summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-04-13 14:56:33 -0700
committerJakub Kicinski <kuba@kernel.org>2026-04-13 14:56:33 -0700
commit2e652049a4db27d052f9c987710920ed328a70e9 (patch)
treef4f61b82c8af8270232cc38c1fc706897748728b
parent1b9707e6f1a9d5f9e1b91750f24743108b093e2b (diff)
parent5b75e7d6769557fbee2ae46181deaff0c98ca795 (diff)
downloadwireguard-linux-2e652049a4db27d052f9c987710920ed328a70e9.tar.xz
wireguard-linux-2e652049a4db27d052f9c987710920ed328a70e9.zip
Merge branch 'net-move-getsockopt-away-from-__user-buffers'
Breno Leitao says: ==================== net: move .getsockopt away from __user buffers Currently, the .getsockopt callback requires __user pointers: int (*getsockopt)(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); This prevents kernel callers (io_uring, BPF) from using getsockopt on levels other than SOL_SOCKET, since they pass kernel pointers. Following Linus' suggestion [0], this series introduces sockopt_t, a type-safe wrapper around iov_iter, and a getsockopt_iter callback that works with both user and kernel buffers. AF_PACKET and CAN raw are converted as initial users, with selftests covering the trickiest conversion patterns. [0] https://lore.kernel.org/all/CAHk-=whmzrO-BMU=uSVXbuoLi-3tJsO=0kHj1BCPBE3F2kVhTA@mail.gmail.com/ ==================== Link: https://patch.msgid.link/20260408-getsockopt-v3-0-061bb9cb355d@debian.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/linux/net.h23
-rw-r--r--net/can/raw.c28
-rw-r--r--net/packet/af_packet.c15
-rw-r--r--net/socket.c54
4 files changed, 94 insertions, 26 deletions
diff --git a/include/linux/net.h b/include/linux/net.h
index ca6a7bc5c9ae..f268f395ce47 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -23,9 +23,30 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/sockptr.h>
+#include <linux/uio.h>
#include <uapi/linux/net.h>
+/**
+ * struct sockopt - socket option value container
+ * @iter_in: iov_iter for reading optval with the content from the caller.
+ * Use copy_from_iter() given this iov direction is ITER_SOURCE
+ * @iter_out: iov_iter for protocols to update optval data to userspace
+ * Use _copy_to_iter() given iov direction is ITER_DEST
+ * @optlen: serves as both input (buffer size) and output (returned data size).
+ *
+ * Type-safe wrapper for socket option data that works with both
+ * user and kernel buffers.
+ *
+ * The optlen field allows callbacks to return a specific length value
+ * independent of the bytes written via copy_to_iter().
+ */
+typedef struct sockopt {
+ struct iov_iter iter_in;
+ struct iov_iter iter_out;
+ int optlen;
+} sockopt_t;
+
struct poll_table_struct;
struct pipe_inode_info;
struct inode;
@@ -192,6 +213,8 @@ struct proto_ops {
unsigned int optlen);
int (*getsockopt)(struct socket *sock, int level,
int optname, char __user *optval, int __user *optlen);
+ int (*getsockopt_iter)(struct socket *sock, int level,
+ int optname, sockopt_t *opt);
void (*show_fdinfo)(struct seq_file *m, struct socket *sock);
int (*sendmsg) (struct socket *sock, struct msghdr *m,
size_t total_len);
diff --git a/net/can/raw.c b/net/can/raw.c
index 56c95c768778..a0c0f5e946d8 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -761,7 +761,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
}
static int raw_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
+ sockopt_t *opt)
{
struct sock *sk = sock->sk;
struct raw_sock *ro = raw_sk(sk);
@@ -771,8 +771,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
if (level != SOL_CAN_RAW)
return -EINVAL;
- if (get_user(len, optlen))
- return -EFAULT;
+ len = opt->optlen;
if (len < 0)
return -EINVAL;
@@ -788,12 +787,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
if (len < fsize) {
/* return -ERANGE and needed space in optlen */
err = -ERANGE;
- if (put_user(fsize, optlen))
- err = -EFAULT;
+ opt->optlen = fsize;
} else {
if (len > fsize)
len = fsize;
- if (copy_to_user(optval, ro->filter, len))
+ if (copy_to_iter(ro->filter, len,
+ &opt->iter_out) != len)
err = -EFAULT;
}
} else {
@@ -802,7 +801,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
release_sock(sk);
if (!err)
- err = put_user(len, optlen);
+ opt->optlen = len;
return err;
}
case CAN_RAW_ERR_FILTER:
@@ -846,16 +845,16 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(ro->raw_vcid_opts)) {
/* return -ERANGE and needed space in optlen */
err = -ERANGE;
- if (put_user(sizeof(ro->raw_vcid_opts), optlen))
- err = -EFAULT;
+ opt->optlen = sizeof(ro->raw_vcid_opts);
} else {
if (len > sizeof(ro->raw_vcid_opts))
len = sizeof(ro->raw_vcid_opts);
- if (copy_to_user(optval, &ro->raw_vcid_opts, len))
+ if (copy_to_iter(&ro->raw_vcid_opts, len,
+ &opt->iter_out) != len)
err = -EFAULT;
}
if (!err)
- err = put_user(len, optlen);
+ opt->optlen = len;
return err;
}
case CAN_RAW_JOIN_FILTERS:
@@ -869,9 +868,8 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return -ENOPROTOOPT;
}
- if (put_user(len, optlen))
- return -EFAULT;
- if (copy_to_user(optval, val, len))
+ opt->optlen = len;
+ if (copy_to_iter(val, len, &opt->iter_out) != len)
return -EFAULT;
return 0;
}
@@ -1078,7 +1076,7 @@ static const struct proto_ops raw_ops = {
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = raw_setsockopt,
- .getsockopt = raw_getsockopt,
+ .getsockopt_iter = raw_getsockopt,
.sendmsg = raw_sendmsg,
.recvmsg = raw_recvmsg,
.mmap = sock_no_mmap,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index bb2d88205e5a..1da78b6ad3d5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -49,6 +49,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/ethtool.h>
+#include <linux/uio.h>
#include <linux/filter.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -4051,7 +4052,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
}
static int packet_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
+ sockopt_t *opt)
{
int len;
int val, lv = sizeof(val);
@@ -4065,8 +4066,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
if (level != SOL_PACKET)
return -ENOPROTOOPT;
- if (get_user(len, optlen))
- return -EFAULT;
+ len = opt->optlen;
if (len < 0)
return -EINVAL;
@@ -4115,7 +4115,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
len = sizeof(int);
if (len < sizeof(int))
return -EINVAL;
- if (copy_from_user(&val, optval, len))
+ if (copy_from_iter(&val, len, &opt->iter_in) != len)
return -EFAULT;
switch (val) {
case TPACKET_V1:
@@ -4171,9 +4171,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
if (len > lv)
len = lv;
- if (put_user(len, optlen))
- return -EFAULT;
- if (copy_to_user(optval, data, len))
+ opt->optlen = len;
+ if (copy_to_iter(data, len, &opt->iter_out) != len)
return -EFAULT;
return 0;
}
@@ -4672,7 +4671,7 @@ static const struct proto_ops packet_ops = {
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = packet_setsockopt,
- .getsockopt = packet_getsockopt,
+ .getsockopt_iter = packet_getsockopt,
.sendmsg = packet_sendmsg,
.recvmsg = packet_recvmsg,
.mmap = packet_mmap,
diff --git a/net/socket.c b/net/socket.c
index ade2ff5845a0..a25e513cf0f4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -77,6 +77,7 @@
#include <linux/mount.h>
#include <linux/pseudo_fs.h>
#include <linux/security.h>
+#include <linux/uio.h>
#include <linux/syscalls.h>
#include <linux/compat.h>
#include <linux/kmod.h>
@@ -2349,11 +2350,45 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
int optname));
+/*
+ * Initialize a sockopt_t from sockptr optval/optlen, setting up iov_iter
+ * for both input and output directions.
+ * It is important to remember that both iov points to the same data, but,
+ * .iter_in is read-only and .iter_out is write-only by the protocol callbacks
+ */
+static int sockptr_to_sockopt(sockopt_t *opt, sockptr_t optval,
+ sockptr_t optlen, struct kvec *kvec)
+{
+ int koptlen;
+
+ if (copy_from_sockptr(&koptlen, optlen, sizeof(int)))
+ return -EFAULT;
+
+ if (koptlen < 0)
+ return -EINVAL;
+
+ if (optval.is_kernel) {
+ kvec->iov_base = optval.kernel;
+ kvec->iov_len = koptlen;
+ iov_iter_kvec(&opt->iter_out, ITER_DEST, kvec, 1, koptlen);
+ iov_iter_kvec(&opt->iter_in, ITER_SOURCE, kvec, 1, koptlen);
+ } else {
+ iov_iter_ubuf(&opt->iter_out, ITER_DEST, optval.user, koptlen);
+ iov_iter_ubuf(&opt->iter_in, ITER_SOURCE, optval.user,
+ koptlen);
+ }
+ opt->optlen = koptlen;
+
+ return 0;
+}
+
int do_sock_getsockopt(struct socket *sock, bool compat, int level,
int optname, sockptr_t optval, sockptr_t optlen)
{
int max_optlen __maybe_unused = 0;
const struct proto_ops *ops;
+ struct kvec kvec;
+ sockopt_t opt;
int err;
err = security_socket_getsockopt(sock, level, optname);
@@ -2366,15 +2401,28 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level,
ops = READ_ONCE(sock->ops);
if (level == SOL_SOCKET) {
err = sk_getsockopt(sock->sk, level, optname, optval, optlen);
- } else if (unlikely(!ops->getsockopt)) {
- err = -EOPNOTSUPP;
- } else {
+ } else if (ops->getsockopt_iter) {
+ err = sockptr_to_sockopt(&opt, optval, optlen, &kvec);
+ if (err)
+ return err;
+
+ err = ops->getsockopt_iter(sock, level, optname, &opt);
+
+ /* Always write back optlen, even on failure. Some protocols
+ * (e.g. CAN raw) return -ERANGE and set optlen to the
+ * required buffer size so userspace can discover it.
+ */
+ if (copy_to_sockptr(optlen, &opt.optlen, sizeof(int)))
+ return -EFAULT;
+ } else if (ops->getsockopt) {
if (WARN_ONCE(optval.is_kernel || optlen.is_kernel,
"Invalid argument type"))
return -EOPNOTSUPP;
err = ops->getsockopt(sock, level, optname, optval.user,
optlen.user);
+ } else {
+ err = -EOPNOTSUPP;
}
if (!compat)