diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-04-13 14:56:33 -0700 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-04-13 14:56:33 -0700 |
| commit | 2e652049a4db27d052f9c987710920ed328a70e9 (patch) | |
| tree | f4f61b82c8af8270232cc38c1fc706897748728b | |
| parent | 1b9707e6f1a9d5f9e1b91750f24743108b093e2b (diff) | |
| parent | 5b75e7d6769557fbee2ae46181deaff0c98ca795 (diff) | |
| download | wireguard-linux-2e652049a4db27d052f9c987710920ed328a70e9.tar.xz wireguard-linux-2e652049a4db27d052f9c987710920ed328a70e9.zip | |
Merge branch 'net-move-getsockopt-away-from-__user-buffers'
Breno Leitao says:
====================
net: move .getsockopt away from __user buffers
Currently, the .getsockopt callback requires __user pointers:
int (*getsockopt)(struct socket *sock, int level,
int optname, char __user *optval, int __user *optlen);
This prevents kernel callers (io_uring, BPF) from using getsockopt on
levels other than SOL_SOCKET, since they pass kernel pointers.
Following Linus' suggestion [0], this series introduces sockopt_t, a
type-safe wrapper around iov_iter, and a getsockopt_iter callback that
works with both user and kernel buffers. AF_PACKET and CAN raw are
converted as initial users, with selftests covering the trickiest
conversion patterns.
[0] https://lore.kernel.org/all/CAHk-=whmzrO-BMU=uSVXbuoLi-3tJsO=0kHj1BCPBE3F2kVhTA@mail.gmail.com/
====================
Link: https://patch.msgid.link/20260408-getsockopt-v3-0-061bb9cb355d@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
| -rw-r--r-- | include/linux/net.h | 23 | ||||
| -rw-r--r-- | net/can/raw.c | 28 | ||||
| -rw-r--r-- | net/packet/af_packet.c | 15 | ||||
| -rw-r--r-- | net/socket.c | 54 |
4 files changed, 94 insertions, 26 deletions
diff --git a/include/linux/net.h b/include/linux/net.h index ca6a7bc5c9ae..f268f395ce47 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -23,9 +23,30 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/sockptr.h> +#include <linux/uio.h> #include <uapi/linux/net.h> +/** + * struct sockopt - socket option value container + * @iter_in: iov_iter for reading optval with the content from the caller. + * Use copy_from_iter() given this iov direction is ITER_SOURCE + * @iter_out: iov_iter for protocols to update optval data to userspace + * Use _copy_to_iter() given iov direction is ITER_DEST + * @optlen: serves as both input (buffer size) and output (returned data size). + * + * Type-safe wrapper for socket option data that works with both + * user and kernel buffers. + * + * The optlen field allows callbacks to return a specific length value + * independent of the bytes written via copy_to_iter(). + */ +typedef struct sockopt { + struct iov_iter iter_in; + struct iov_iter iter_out; + int optlen; +} sockopt_t; + struct poll_table_struct; struct pipe_inode_info; struct inode; @@ -192,6 +213,8 @@ struct proto_ops { unsigned int optlen); int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); + int (*getsockopt_iter)(struct socket *sock, int level, + int optname, sockopt_t *opt); void (*show_fdinfo)(struct seq_file *m, struct socket *sock); int (*sendmsg) (struct socket *sock, struct msghdr *m, size_t total_len); diff --git a/net/can/raw.c b/net/can/raw.c index 56c95c768778..a0c0f5e946d8 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -761,7 +761,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, } static int raw_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) + sockopt_t *opt) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); @@ -771,8 +771,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (level != SOL_CAN_RAW) return -EINVAL; - if (get_user(len, optlen)) - return -EFAULT; + len = opt->optlen; if (len < 0) return -EINVAL; @@ -788,12 +787,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (len < fsize) { /* return -ERANGE and needed space in optlen */ err = -ERANGE; - if (put_user(fsize, optlen)) - err = -EFAULT; + opt->optlen = fsize; } else { if (len > fsize) len = fsize; - if (copy_to_user(optval, ro->filter, len)) + if (copy_to_iter(ro->filter, len, + &opt->iter_out) != len) err = -EFAULT; } } else { @@ -802,7 +801,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, release_sock(sk); if (!err) - err = put_user(len, optlen); + opt->optlen = len; return err; } case CAN_RAW_ERR_FILTER: @@ -846,16 +845,16 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (len < sizeof(ro->raw_vcid_opts)) { /* return -ERANGE and needed space in optlen */ err = -ERANGE; - if (put_user(sizeof(ro->raw_vcid_opts), optlen)) - err = -EFAULT; + opt->optlen = sizeof(ro->raw_vcid_opts); } else { if (len > sizeof(ro->raw_vcid_opts)) len = sizeof(ro->raw_vcid_opts); - if (copy_to_user(optval, &ro->raw_vcid_opts, len)) + if (copy_to_iter(&ro->raw_vcid_opts, len, + &opt->iter_out) != len) err = -EFAULT; } if (!err) - err = put_user(len, optlen); + opt->optlen = len; return err; } case CAN_RAW_JOIN_FILTERS: @@ -869,9 +868,8 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return -ENOPROTOOPT; } - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, val, len)) + opt->optlen = len; + if (copy_to_iter(val, len, &opt->iter_out) != len) return -EFAULT; return 0; } @@ -1078,7 +1076,7 @@ static const struct proto_ops raw_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = raw_setsockopt, - .getsockopt = raw_getsockopt, + .getsockopt_iter = raw_getsockopt, .sendmsg = raw_sendmsg, .recvmsg = raw_recvmsg, .mmap = sock_no_mmap, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bb2d88205e5a..1da78b6ad3d5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -49,6 +49,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/ethtool.h> +#include <linux/uio.h> #include <linux/filter.h> #include <linux/types.h> #include <linux/mm.h> @@ -4051,7 +4052,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, } static int packet_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) + sockopt_t *opt) { int len; int val, lv = sizeof(val); @@ -4065,8 +4066,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (level != SOL_PACKET) return -ENOPROTOOPT; - if (get_user(len, optlen)) - return -EFAULT; + len = opt->optlen; if (len < 0) return -EINVAL; @@ -4115,7 +4115,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, len = sizeof(int); if (len < sizeof(int)) return -EINVAL; - if (copy_from_user(&val, optval, len)) + if (copy_from_iter(&val, len, &opt->iter_in) != len) return -EFAULT; switch (val) { case TPACKET_V1: @@ -4171,9 +4171,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (len > lv) len = lv; - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, data, len)) + opt->optlen = len; + if (copy_to_iter(data, len, &opt->iter_out) != len) return -EFAULT; return 0; } @@ -4672,7 +4671,7 @@ static const struct proto_ops packet_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, - .getsockopt = packet_getsockopt, + .getsockopt_iter = packet_getsockopt, .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, diff --git a/net/socket.c b/net/socket.c index ade2ff5845a0..a25e513cf0f4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -77,6 +77,7 @@ #include <linux/mount.h> #include <linux/pseudo_fs.h> #include <linux/security.h> +#include <linux/uio.h> #include <linux/syscalls.h> #include <linux/compat.h> #include <linux/kmod.h> @@ -2349,11 +2350,45 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int optname)); +/* + * Initialize a sockopt_t from sockptr optval/optlen, setting up iov_iter + * for both input and output directions. + * It is important to remember that both iov points to the same data, but, + * .iter_in is read-only and .iter_out is write-only by the protocol callbacks + */ +static int sockptr_to_sockopt(sockopt_t *opt, sockptr_t optval, + sockptr_t optlen, struct kvec *kvec) +{ + int koptlen; + + if (copy_from_sockptr(&koptlen, optlen, sizeof(int))) + return -EFAULT; + + if (koptlen < 0) + return -EINVAL; + + if (optval.is_kernel) { + kvec->iov_base = optval.kernel; + kvec->iov_len = koptlen; + iov_iter_kvec(&opt->iter_out, ITER_DEST, kvec, 1, koptlen); + iov_iter_kvec(&opt->iter_in, ITER_SOURCE, kvec, 1, koptlen); + } else { + iov_iter_ubuf(&opt->iter_out, ITER_DEST, optval.user, koptlen); + iov_iter_ubuf(&opt->iter_in, ITER_SOURCE, optval.user, + koptlen); + } + opt->optlen = koptlen; + + return 0; +} + int do_sock_getsockopt(struct socket *sock, bool compat, int level, int optname, sockptr_t optval, sockptr_t optlen) { int max_optlen __maybe_unused = 0; const struct proto_ops *ops; + struct kvec kvec; + sockopt_t opt; int err; err = security_socket_getsockopt(sock, level, optname); @@ -2366,15 +2401,28 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level, ops = READ_ONCE(sock->ops); if (level == SOL_SOCKET) { err = sk_getsockopt(sock->sk, level, optname, optval, optlen); - } else if (unlikely(!ops->getsockopt)) { - err = -EOPNOTSUPP; - } else { + } else if (ops->getsockopt_iter) { + err = sockptr_to_sockopt(&opt, optval, optlen, &kvec); + if (err) + return err; + + err = ops->getsockopt_iter(sock, level, optname, &opt); + + /* Always write back optlen, even on failure. Some protocols + * (e.g. CAN raw) return -ERANGE and set optlen to the + * required buffer size so userspace can discover it. + */ + if (copy_to_sockptr(optlen, &opt.optlen, sizeof(int))) + return -EFAULT; + } else if (ops->getsockopt) { if (WARN_ONCE(optval.is_kernel || optlen.is_kernel, "Invalid argument type")) return -EOPNOTSUPP; err = ops->getsockopt(sock, level, optname, optval.user, optlen.user); + } else { + err = -EOPNOTSUPP; } if (!compat) |
