From 67fab22a7adcec0279b9b057eb3dc669e32834f0 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 8 Apr 2026 03:30:29 -0700 Subject: net: add getsockopt_iter callback to proto_ops Add a new getsockopt_iter callback to struct proto_ops that uses sockopt_t, a type-safe wrapper around iov_iter. This provides a clean interface for socket option operations that works with both user and kernel buffers. The sockopt_t type encapsulates an iov_iter and an optlen field. The optlen field, although not suggested by Linus, serves as both input (buffer size) and output (returned data size), allowing callbacks to return random values independent of the bytes written via copy_to_iter(), so, keep it separated from iov_iter.count. This is preparatory work for removing the SOL_SOCKET level restriction from io_uring getsockopt operations. Keep in mind that both iter_out and iter_in always point to the same data at all times, and we just have two of them to make the callback implementation sane. Suggested-by: Linus Torvalds Signed-off-by: Breno Leitao Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260408-getsockopt-v3-1-061bb9cb355d@debian.org Signed-off-by: Jakub Kicinski --- include/linux/net.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/linux/net.h b/include/linux/net.h index ca6a7bc5c9ae..f268f395ce47 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -23,9 +23,30 @@ #include #include #include +#include #include +/** + * struct sockopt - socket option value container + * @iter_in: iov_iter for reading optval with the content from the caller. + * Use copy_from_iter() given this iov direction is ITER_SOURCE + * @iter_out: iov_iter for protocols to update optval data to userspace + * Use _copy_to_iter() given iov direction is ITER_DEST + * @optlen: serves as both input (buffer size) and output (returned data size). + * + * Type-safe wrapper for socket option data that works with both + * user and kernel buffers. + * + * The optlen field allows callbacks to return a specific length value + * independent of the bytes written via copy_to_iter(). + */ +typedef struct sockopt { + struct iov_iter iter_in; + struct iov_iter iter_out; + int optlen; +} sockopt_t; + struct poll_table_struct; struct pipe_inode_info; struct inode; @@ -192,6 +213,8 @@ struct proto_ops { unsigned int optlen); int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); + int (*getsockopt_iter)(struct socket *sock, int level, + int optname, sockopt_t *opt); void (*show_fdinfo)(struct seq_file *m, struct socket *sock); int (*sendmsg) (struct socket *sock, struct msghdr *m, size_t total_len); -- cgit v1.3-3-g829e From 5bd0dec150f56b6307d599132dcb7c01007bbecc Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 8 Apr 2026 03:30:30 -0700 Subject: net: call getsockopt_iter if available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update do_sock_getsockopt() to use the new getsockopt_iter callback when available. Add do_sock_getsockopt_iter() helper that: 1. Reads optlen from user/kernel space 2. Initializes a sockopt_t with the appropriate iov_iter (kvec for kernel, ubuf for user buffers) and sets opt.optlen 3. Calls the protocol's getsockopt_iter callback 4. Writes opt.optlen back to user/kernel space The optlen is always written back, even on failure. Some protocols (e.g. CAN raw) return -ERANGE and set optlen to the required buffer size so userspace knows how much to allocate. The callback is responsible for setting opt.optlen to indicate the returned data size. Important to say that iov_out does not need to be copied back in do_sock_getsockopt(). When optval is not kernel (the userspace path), sockptr_to_sockopt() sets up opt->iter_out as a ITER_DEST ubuf iterator pointing directly at the userspace buffer (optval.user). So when getsockopt_iter implementations call copy_to_iter(..., &opt->iter_out), the data is written directly to userspace — no intermediate kernel buffer is involved. When optval.is_kernel is true (the in-kernel path, e.g. from io_uring), the kvec points at the already-provided kernel buffer (optval.kernel), so the data lands in the caller's buffer directly via the kvec-backed iterator. In both cases the iterator writes to the final destination in-place at protocol callback. There's nothing to copy back — only optlen needs to be written back. Signed-off-by: Breno Leitao Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260408-getsockopt-v3-2-061bb9cb355d@debian.org Signed-off-by: Jakub Kicinski --- net/socket.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/net/socket.c b/net/socket.c index ade2ff5845a0..a25e513cf0f4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -77,6 +77,7 @@ #include #include #include +#include #include #include #include @@ -2349,11 +2350,45 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int optname)); +/* + * Initialize a sockopt_t from sockptr optval/optlen, setting up iov_iter + * for both input and output directions. + * It is important to remember that both iov points to the same data, but, + * .iter_in is read-only and .iter_out is write-only by the protocol callbacks + */ +static int sockptr_to_sockopt(sockopt_t *opt, sockptr_t optval, + sockptr_t optlen, struct kvec *kvec) +{ + int koptlen; + + if (copy_from_sockptr(&koptlen, optlen, sizeof(int))) + return -EFAULT; + + if (koptlen < 0) + return -EINVAL; + + if (optval.is_kernel) { + kvec->iov_base = optval.kernel; + kvec->iov_len = koptlen; + iov_iter_kvec(&opt->iter_out, ITER_DEST, kvec, 1, koptlen); + iov_iter_kvec(&opt->iter_in, ITER_SOURCE, kvec, 1, koptlen); + } else { + iov_iter_ubuf(&opt->iter_out, ITER_DEST, optval.user, koptlen); + iov_iter_ubuf(&opt->iter_in, ITER_SOURCE, optval.user, + koptlen); + } + opt->optlen = koptlen; + + return 0; +} + int do_sock_getsockopt(struct socket *sock, bool compat, int level, int optname, sockptr_t optval, sockptr_t optlen) { int max_optlen __maybe_unused = 0; const struct proto_ops *ops; + struct kvec kvec; + sockopt_t opt; int err; err = security_socket_getsockopt(sock, level, optname); @@ -2366,15 +2401,28 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level, ops = READ_ONCE(sock->ops); if (level == SOL_SOCKET) { err = sk_getsockopt(sock->sk, level, optname, optval, optlen); - } else if (unlikely(!ops->getsockopt)) { - err = -EOPNOTSUPP; - } else { + } else if (ops->getsockopt_iter) { + err = sockptr_to_sockopt(&opt, optval, optlen, &kvec); + if (err) + return err; + + err = ops->getsockopt_iter(sock, level, optname, &opt); + + /* Always write back optlen, even on failure. Some protocols + * (e.g. CAN raw) return -ERANGE and set optlen to the + * required buffer size so userspace can discover it. + */ + if (copy_to_sockptr(optlen, &opt.optlen, sizeof(int))) + return -EFAULT; + } else if (ops->getsockopt) { if (WARN_ONCE(optval.is_kernel || optlen.is_kernel, "Invalid argument type")) return -EOPNOTSUPP; err = ops->getsockopt(sock, level, optname, optval.user, optlen.user); + } else { + err = -EOPNOTSUPP; } if (!compat) -- cgit v1.3-3-g829e From 9c99d62705692db7fc8b8921efa0db189e84e694 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 8 Apr 2026 03:30:31 -0700 Subject: af_packet: convert to getsockopt_iter Convert AF_PACKET's getsockopt implementation to use the new getsockopt_iter callback with sockopt_t. Key changes: - Replace (char __user *optval, int __user *optlen) with sockopt_t *opt - Use opt->optlen for buffer length (input) and returned size (output) - Use copy_to_iter() instead of put_user()/copy_to_user() - For PACKET_HDRLEN which reads from optval: use opt->iter_in with copy_from_iter() for the input read, then the common opt->iter_out copy_to_iter() epilogue handles the output Signed-off-by: Breno Leitao Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260408-getsockopt-v3-3-061bb9cb355d@debian.org Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bb2d88205e5a..1da78b6ad3d5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -49,6 +49,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -4051,7 +4052,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, } static int packet_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) + sockopt_t *opt) { int len; int val, lv = sizeof(val); @@ -4065,8 +4066,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (level != SOL_PACKET) return -ENOPROTOOPT; - if (get_user(len, optlen)) - return -EFAULT; + len = opt->optlen; if (len < 0) return -EINVAL; @@ -4115,7 +4115,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, len = sizeof(int); if (len < sizeof(int)) return -EINVAL; - if (copy_from_user(&val, optval, len)) + if (copy_from_iter(&val, len, &opt->iter_in) != len) return -EFAULT; switch (val) { case TPACKET_V1: @@ -4171,9 +4171,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (len > lv) len = lv; - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, data, len)) + opt->optlen = len; + if (copy_to_iter(data, len, &opt->iter_out) != len) return -EFAULT; return 0; } @@ -4672,7 +4671,7 @@ static const struct proto_ops packet_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, - .getsockopt = packet_getsockopt, + .getsockopt_iter = packet_getsockopt, .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, -- cgit v1.3-3-g829e From 5b75e7d6769557fbee2ae46181deaff0c98ca795 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 8 Apr 2026 03:30:32 -0700 Subject: can: raw: convert to getsockopt_iter Convert CAN raw socket's getsockopt implementation to use the new getsockopt_iter callback with sockopt_t. Key changes: - Replace (char __user *optval, int __user *optlen) with sockopt_t *opt - Use opt->optlen for buffer length (input) and returned size (output) - Use copy_to_iter() instead of copy_to_user() - For CAN_RAW_FILTER and CAN_RAW_XL_VCID_OPTS: on -ERANGE, set opt->optlen to the required buffer size. The wrapper writes this back to userspace even on error, preserving the existing API that lets userspace discover the needed allocation size. Signed-off-by: Breno Leitao Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20260408-getsockopt-v3-4-061bb9cb355d@debian.org Signed-off-by: Jakub Kicinski --- net/can/raw.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/net/can/raw.c b/net/can/raw.c index 56c95c768778..a0c0f5e946d8 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -761,7 +761,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, } static int raw_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) + sockopt_t *opt) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); @@ -771,8 +771,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (level != SOL_CAN_RAW) return -EINVAL; - if (get_user(len, optlen)) - return -EFAULT; + len = opt->optlen; if (len < 0) return -EINVAL; @@ -788,12 +787,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (len < fsize) { /* return -ERANGE and needed space in optlen */ err = -ERANGE; - if (put_user(fsize, optlen)) - err = -EFAULT; + opt->optlen = fsize; } else { if (len > fsize) len = fsize; - if (copy_to_user(optval, ro->filter, len)) + if (copy_to_iter(ro->filter, len, + &opt->iter_out) != len) err = -EFAULT; } } else { @@ -802,7 +801,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, release_sock(sk); if (!err) - err = put_user(len, optlen); + opt->optlen = len; return err; } case CAN_RAW_ERR_FILTER: @@ -846,16 +845,16 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (len < sizeof(ro->raw_vcid_opts)) { /* return -ERANGE and needed space in optlen */ err = -ERANGE; - if (put_user(sizeof(ro->raw_vcid_opts), optlen)) - err = -EFAULT; + opt->optlen = sizeof(ro->raw_vcid_opts); } else { if (len > sizeof(ro->raw_vcid_opts)) len = sizeof(ro->raw_vcid_opts); - if (copy_to_user(optval, &ro->raw_vcid_opts, len)) + if (copy_to_iter(&ro->raw_vcid_opts, len, + &opt->iter_out) != len) err = -EFAULT; } if (!err) - err = put_user(len, optlen); + opt->optlen = len; return err; } case CAN_RAW_JOIN_FILTERS: @@ -869,9 +868,8 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return -ENOPROTOOPT; } - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, val, len)) + opt->optlen = len; + if (copy_to_iter(val, len, &opt->iter_out) != len) return -EFAULT; return 0; } @@ -1078,7 +1076,7 @@ static const struct proto_ops raw_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = raw_setsockopt, - .getsockopt = raw_getsockopt, + .getsockopt_iter = raw_getsockopt, .sendmsg = raw_sendmsg, .recvmsg = raw_recvmsg, .mmap = sock_no_mmap, -- cgit v1.3-3-g829e