summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--cmd/tailscaled/tailscaled.go8
-rw-r--r--go.mod4
-rw-r--r--go.sum8
-rw-r--r--net/tstun/tap_linux.go1
-rw-r--r--net/tstun/tun.go12
-rw-r--r--net/tstun/veth_linux.go285
-rw-r--r--net/tstun/wrap.go56
-rw-r--r--wgengine/userspace.go6
8 files changed, 374 insertions, 6 deletions
diff --git a/cmd/tailscaled/tailscaled.go b/cmd/tailscaled/tailscaled.go
index 160ec1677..bd41988a2 100644
--- a/cmd/tailscaled/tailscaled.go
+++ b/cmd/tailscaled/tailscaled.go
@@ -99,8 +99,8 @@ func defaultTunName() string {
var args struct {
// tunname is a /dev/net/tun tunnel name ("tailscale0"), the
- // string "userspace-networking", "tap:TAPNAME[:BRIDGENAME]"
- // or comma-separated list thereof.
+ // string "userspace-networking", "tap:TAPNAME[:BRIDGENAME]",
+ // "veth:VETHNAME", or comma-separated list thereof.
tunname string
cleanup bool
@@ -524,6 +524,10 @@ func tryEngine(logf logger.Logf, linkMon *monitor.Mon, dialer *tsdial.Dialer, na
e, err := wgengine.NewUserspaceEngine(logf, conf)
return e, false, err
}
+ if strings.HasPrefix(name, "veth:") {
+ conf.IsVETH = true
+ // fall through, we want a router and DNS config.
+ }
r, err := router.New(logf, dev, linkMon)
if err != nil {
diff --git a/go.mod b/go.mod
index f052d4643..06382210e 100644
--- a/go.mod
+++ b/go.mod
@@ -8,6 +8,7 @@ require (
github.com/alexbrainman/sspi v0.0.0-20210105120005-909beea2cc74
github.com/andybalholm/brotli v1.0.3
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be
+ github.com/asavie/xdp v0.3.4-0.20220212172814-56d71236a029
github.com/aws/aws-sdk-go-v2 v1.11.2
github.com/aws/aws-sdk-go-v2/config v1.11.0
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.7.4
@@ -112,6 +113,7 @@ require (
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/charithe/durationcheck v0.0.9 // indirect
github.com/chavacava/garif v0.0.0-20210405164556-e8a0a408d6af // indirect
+ github.com/cilium/ebpf v0.8.1 // indirect
github.com/containerd/stargz-snapshotter/estargz v0.11.4 // indirect
github.com/daixiang0/gci v0.2.9 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
@@ -282,3 +284,5 @@ require (
mvdan.cc/unparam v0.0.0-20211002134041-24922b6997ca // indirect
software.sslmate.com/src/go-pkcs12 v0.0.0-20210415151418-c5206de65a78 // indirect
)
+
+//replace github.com/asavie/xdp => ./gohack/xdp
diff --git a/go.sum b/go.sum
index cedf51792..f0e73db07 100644
--- a/go.sum
+++ b/go.sum
@@ -127,6 +127,8 @@ github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
+github.com/asavie/xdp v0.3.4-0.20220212172814-56d71236a029 h1:jRVqYkwLmMX0LshDb2RgU/iTKAlEGBPBrMVOa7V1FHQ=
+github.com/asavie/xdp v0.3.4-0.20220212172814-56d71236a029/go.mod h1:Vv5p+3mZiDh7ImdSvdon3E78wXyre7df5V58ATdIYAY=
github.com/ashanbrown/forbidigo v1.2.0 h1:RMlEFupPCxQ1IogYOQUnIQwGEUGK8g5vAPMRyJoSxbc=
github.com/ashanbrown/forbidigo v1.2.0/go.mod h1:vVW7PEdqEFqapJe95xHkTfB1+XvZXBFg8t0sG2FIxmI=
github.com/ashanbrown/makezero v0.0.0-20210520155254-b6261585ddde h1:YOsoVXsZQPA9aOTy1g0lAJv5VzZUvwQuZqug8XPeqfM=
@@ -204,6 +206,7 @@ github.com/chavacava/garif v0.0.0-20210405164556-e8a0a408d6af/go.mod h1:Qjyv4H3/
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
+github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs=
github.com/cilium/ebpf v0.8.1 h1:bLSSEbBLqGPXxls55pGr5qWZaTqcmfDJHhou7t254ao=
github.com/cilium/ebpf v0.8.1/go.mod h1:f5zLIM0FSNuAkSyLAN7X+Hy6yznlF1mNiWUMfxMtrgk=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
@@ -292,6 +295,7 @@ github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYF
github.com/fatih/structtag v1.2.0 h1:/OdNE99OxoI/PqaW/SuSK9uxxT3f/tcSZgon/ssNSx4=
github.com/fatih/structtag v1.2.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94=
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
+github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
github.com/frankban/quicktest v1.14.0 h1:+cqqvzZV87b4adx/5ayVOaYZ2CrvM4ejQvUdBzPPUss=
github.com/frankban/quicktest v1.14.0/go.mod h1:NeW+ay9A/U67EYXNFA1nPE8e/tnQv/09mUdL/ijj8og=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
@@ -497,6 +501,7 @@ github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN
github.com/google/go-containerregistry v0.9.0 h1:5Ths7RjxyFV0huKChQTgY6fLzvHhZMpLTFNja8U0/0w=
github.com/google/go-containerregistry v0.9.0/go.mod h1:9eq4BnSufyT1kHNffX+vSXVonaJ7yaIOulrKZejMxnQ=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/google/gopacket v1.1.19/go.mod h1:iJ8V8n6KS+z2U1A8pUwu8bW5SyEMkXJB8Yo/Vo+TKTo=
github.com/google/goterm v0.0.0-20200907032337-555d40f16ae2 h1:CVuJwN34x4xM2aT4sIKhmeib40NeBPhRihNjQmpJsA4=
github.com/google/goterm v0.0.0-20200907032337-555d40f16ae2/go.mod h1:nOFQdrUlIlx6M6ODdSpBj1NVA+VgLC6kmw60mkw34H4=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
@@ -1184,8 +1189,10 @@ github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7Fw
github.com/vbatts/tar-split v0.11.2 h1:Via6XqJr0hceW4wff3QRzD5gAk/tatMw/4ZA7cTlIME=
github.com/vbatts/tar-split v0.11.2/go.mod h1:vV3ZuO2yWSVsz+pfFzDG/upWH1JhjOiEaWq6kXyQ3VI=
github.com/viki-org/dnscache v0.0.0-20130720023526-c70c1f23c5d8/go.mod h1:dniwbG03GafCjFohMDmz6Zc6oCuiqgH6tGNyXTkHzXE=
+github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
github.com/vishvananda/netlink v1.1.1-0.20211118161826-650dca95af54 h1:8mhqcHPqTMhSPoslhGYihEgSfc77+7La1P6kiB6+9So=
github.com/vishvananda/netlink v1.1.1-0.20211118161826-650dca95af54/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho=
+github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74 h1:gga7acRE695APm9hlsSMoOoE65U4/TcqNj90mc69Rlg=
github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
@@ -1447,6 +1454,7 @@ golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606122018-79a91cf218c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
diff --git a/net/tstun/tap_linux.go b/net/tstun/tap_linux.go
index e6b699aec..c7185ce2f 100644
--- a/net/tstun/tap_linux.go
+++ b/net/tstun/tap_linux.go
@@ -94,7 +94,6 @@ const (
// handleTAPFrame handles receiving a raw TAP ethernet frame and reports whether
// it's been handled (that is, whether it should NOT be passed to wireguard).
func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool {
-
if len(ethBuf) < ethernetFrameSize {
// Corrupt. Ignore.
if tapDebug {
diff --git a/net/tstun/tun.go b/net/tstun/tun.go
index 3030112d5..dcc47f205 100644
--- a/net/tstun/tun.go
+++ b/net/tstun/tun.go
@@ -31,6 +31,9 @@ func init() {
// createTAP is non-nil on Linux.
var createTAP func(tapName, bridgeName string) (tun.Device, error)
+// createVETH is non-nil on Linux.
+var createVETH func(tapName string) (tun.Device, error)
+
// New returns a tun.Device for the requested device name, along with
// the OS-dependent name that was allocated to the device.
func New(logf logger.Logf, tunName string) (tun.Device, string, error) {
@@ -51,6 +54,15 @@ func New(logf logger.Logf, tunName string) (tun.Device, string, error) {
return nil, "", errors.New("bogus tap argument")
}
dev, err = createTAP(tapName, bridgeName)
+ } else if strings.HasPrefix(tunName, "veth:") {
+ if runtime.GOOS != "linux" {
+ return nil, "", errors.New("veth/xdp only works on Linux")
+ }
+ f := strings.Split(tunName, ":")
+ if len(f) != 2 {
+ return nil, "", errors.New("bogus veth argument")
+ }
+ dev, err = createVETH(f[1])
} else {
dev, err = tun.CreateTUN(tunName, tunMTU)
}
diff --git a/net/tstun/veth_linux.go b/net/tstun/veth_linux.go
new file mode 100644
index 000000000..65707da15
--- /dev/null
+++ b/net/tstun/veth_linux.go
@@ -0,0 +1,285 @@
+// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tstun
+
+import (
+ "errors"
+ "fmt"
+ "io"
+ "net"
+ "os"
+ "os/exec"
+ "strconv"
+ "sync"
+
+ "github.com/asavie/xdp"
+ "golang.org/x/sys/unix"
+ "golang.zx2c4.com/wireguard/tun"
+ "gvisor.dev/gvisor/pkg/tcpip/link/rawfile"
+)
+
+// This file borrows a lot of values from tap_linux.go, e.g. ourMAC.
+
+func init() { createVETH = createVETHLinux }
+
+const queueID = 0 // TODO explore alternatives
+
+// TODO: do better
+var dstMAC = net.HardwareAddr{0x30, 0x2D, 0x66, 0xEC, 0x7A, 0x94}
+
+func createVETHLinux(vethName string) (device tun.Device, err error) {
+ defer func() {
+ if err != nil {
+ err = fmt.Errorf("tstun: veth: %w", err)
+ }
+ }()
+ vethBEName := vethName + "be"
+ cmd := exec.Command("ip", "link", "add", vethName, "type", "veth", "peer", "name", vethBEName)
+ if b, err := cmd.CombinedOutput(); err != nil {
+ return nil, fmt.Errorf("ip link add %s: %v: %s", vethName, err, b)
+ }
+ if b, err := exec.Command("ip", "link", "set", "dev", vethName, "arp", "off").CombinedOutput(); err != nil {
+ return nil, fmt.Errorf("ip link up %s: %v: %s", vethName, err, b)
+ }
+ // TODO why do we need to set the MTU here?
+ if b, err := exec.Command("ip", "link", "set", "dev", vethName, "mtu", strconv.Itoa(DefaultMTU)).CombinedOutput(); err != nil {
+ return nil, fmt.Errorf("ip link mtu %s: %v: %s", vethName, err, b)
+ }
+ if b, err := exec.Command("ip", "link", "set", "dev", vethName, "up").CombinedOutput(); err != nil {
+ return nil, fmt.Errorf("ip link up %s: %v: %s", vethName, err, b)
+ }
+ if b, err := exec.Command("ip", "link", "set", "dev", vethBEName, "up").CombinedOutput(); err != nil {
+ return nil, fmt.Errorf("ip link up %s: %v: %s", vethBEName, err, b)
+ }
+ ourMACStr := fmt.Sprintf("%x:%x:%x:%x:%x:%x", ourMAC[0], ourMAC[1], ourMAC[2], ourMAC[3], ourMAC[4], ourMAC[5])
+ if b, err := exec.Command("ip", "link", "set", "dev", vethBEName, "address", ourMACStr).CombinedOutput(); err != nil {
+ return nil, fmt.Errorf("ifconfig %s hw ether: %v: %s", vethBEName, err, b)
+ }
+ dstMACStr := fmt.Sprintf("%x:%x:%x:%x:%x:%x", dstMAC[0], dstMAC[1], dstMAC[2], dstMAC[3], dstMAC[4], dstMAC[5])
+ if b, err := exec.Command("ip", "link", "set", "dev", vethName, "address", dstMACStr).CombinedOutput(); err != nil {
+ return nil, fmt.Errorf("ifconfig %s hw ether: %v: %s", vethBEName, err, b)
+ }
+ // TODO ubuntu does not ship with ethtool by default.
+ // If we don't have ethtool we can calculate checksums ourselves, see wrap.go.
+ // I took a quick look at the strace and it's mysterious.
+ if b, err := exec.Command("ethtool", "-K", vethName, "tx", "off", "rx", "off").CombinedOutput(); err != nil {
+ return nil, fmt.Errorf("checksum offloading: %v: %s", err, b)
+ }
+
+ ifaces, err := net.Interfaces()
+ if err != nil {
+ return nil, fmt.Errorf("net.Interfaces: %v", err)
+ }
+
+ ifBEIndex := -1
+ for _, iface := range ifaces {
+ if iface.Name == vethBEName {
+ ifBEIndex = iface.Index
+ break
+ }
+ }
+ if ifBEIndex == -1 {
+ return nil, errors.New("could not find veth index")
+ }
+
+ program, err := xdp.NewProgram(queueID + 1)
+ if err != nil {
+ return nil, fmt.Errorf("xdp.NewProgram: %w", err)
+ }
+ if err := program.Attach(ifBEIndex); err != nil {
+ return nil, fmt.Errorf("xdp program failed to attach: %w", err)
+ }
+
+ xsk, err := xdp.NewSocket(ifBEIndex, queueID, nil)
+ if err != nil {
+ return nil, fmt.Errorf("xdp failed to create socket: %w", err)
+ }
+ if err := program.Register(queueID, xsk.FD()); err != nil {
+ return nil, fmt.Errorf("xdp failed to register socket: %w", err)
+ }
+
+ //ip link add ve_A type veth peer name ve_B
+ d := &vethDevice{
+ name: vethName,
+ events: make(chan tun.Event, 1),
+ program: program,
+ ifBEIndex: ifBEIndex,
+ xsk: xsk,
+ }
+
+ fmt.Printf("createVETHLinux: NumFreeFillSlots=%d\n", d.xsk.NumFreeFillSlots())
+ fmt.Printf("createVETHLinux: NumFreeTxSlots=%d\n", d.xsk.NumFreeTxSlots())
+ return d, nil
+}
+
+// vethDevice implements tun.Device, using a virtual ethernet pair and AF_XDP.
+type vethDevice struct {
+ name string
+ events chan tun.Event
+ program *xdp.Program
+ ifBEIndex int
+ xsk *xdp.Socket
+
+ pollEvent rawfile.PollEvent
+
+ rxMu sync.Mutex // guards rxDescs
+ rxDescs []xdp.Desc
+
+ txMu sync.Mutex // guards txDescs, txCur
+ txDescs []xdp.Desc
+ txNext int
+}
+
+// File implements tun.Device.File.
+func (d *vethDevice) File() *os.File { panic("no file for veth/xdp") }
+
+func (d *vethDevice) blockingPoll(events pollEvent) (numReceived, numCompleted int, err error) {
+ //return d.xsk.Poll(-1)
+ d.pollEvent = rawfile.PollEvent{
+ FD: int32(d.xsk.FD()),
+ Events: int16(events),
+ }
+ /*if d.xsk.NumFilled() > 0 {
+ d.pollEvent.Events |= unix.POLLIN
+ }
+ /*if d.xsk.NumTransmitted() > 0 {
+ d.pollEvent.Events |= unix.POLLOUT
+ }*/
+ if d.pollEvent.Events == 0 {
+ return
+ }
+ for err = unix.EINTR; err == unix.EINTR; {
+ _, errNo := rawfile.BlockingPoll(&d.pollEvent, 1, nil)
+ if errNo == 0 {
+ err = nil
+ } else {
+ err = errNo
+ }
+ }
+ if err != nil {
+ fmt.Printf("blockingPoll err=%v (%d)", err, err)
+ return 0, 0, err
+ }
+ numReceived = d.xsk.NumReceived()
+ numCompleted = d.xsk.NumCompleted()
+ if numCompleted > 0 {
+ d.xsk.Complete(numCompleted)
+ }
+ return numReceived, numCompleted, err
+}
+
+// Read implements tun.Device.Read.
+// read a packet from the device (without any additional headers)
+func (d *vethDevice) Read(b []byte, off int) (int, error) {
+ d.rxMu.Lock()
+ defer d.rxMu.Unlock()
+
+ for len(d.rxDescs) == 0 {
+ if n := d.xsk.NumFreeFillSlots(); n > 0 {
+ d.xsk.Fill(d.xsk.GetDescs(n, true))
+ }
+ numRx, _, err := d.blockingPoll(pollRx)
+ if err != nil {
+ return 0, fmt.Errorf("veth: %w", err)
+ }
+ d.rxDescs = d.xsk.Receive(numRx)
+ //fmt.Printf("vethDevice.Read numRx=%d, len(d.rxDescs)=%d\n", numRx, len(d.rxDescs))
+ //fmt.Printf("--- vethDevice.Read: NumFreeFillSlots: %d numRx: %d, len(rxDescs): %d\n", n, numRx, len(d.rxDescs))
+ }
+ if len(d.rxDescs) == 0 {
+ return 0, fmt.Errorf("veth: %w", io.EOF) // TODO: what error?
+ }
+ data := d.xsk.GetFrame(d.rxDescs[0])
+ n := copy(b[off:], data)
+ //fmt.Printf("--- vethDevice.Read: frame data len=%d (n=%d): %x\n", len(data), n, data)
+ d.rxDescs = d.rxDescs[1:]
+ return n, nil
+}
+
+// Write implements tun.Device.Write.
+// writes a packet to the device (without any additional headers)
+func (d *vethDevice) Write(b []byte, off int) (int, error) {
+ d.txMu.Lock()
+ defer d.txMu.Unlock()
+
+ for len(d.txDescs) == 0 {
+ if numCompleted := d.xsk.NumCompleted(); numCompleted > 0 {
+ d.xsk.Complete(numCompleted)
+ }
+ d.txNext = 0
+ d.txDescs = d.xsk.GetDescs(d.xsk.NumFreeTxSlots(), false)
+ if len(d.txDescs) == 0 {
+ _, _, err := d.blockingPoll(pollTx)
+ if err != nil {
+ return 0, fmt.Errorf("veth: Write: %w", io.EOF)
+ }
+ }
+ //fmt.Printf("veth.Write len(txDescs)=%d\n", len(d.txDescs))
+ }
+
+ n := copy(d.xsk.GetFrame(d.txDescs[d.txNext]), b[off:])
+ d.txDescs[d.txNext].Len = uint32(n)
+ d.txNext++
+
+ if d.txNext == len(d.txDescs) {
+ return len(b), d.flushTxLocked()
+ }
+ return len(b), nil
+}
+
+// Flush implements tun.Device.Flush.
+// flush all previous writes to the device
+func (d *vethDevice) Flush() error {
+ d.txMu.Lock()
+ defer d.txMu.Unlock()
+ return d.flushTxLocked()
+}
+
+func (d *vethDevice) flushTxLocked() error {
+ if d.txNext == 0 {
+ return nil
+ }
+ if d.xsk.Transmit(d.txDescs[:d.txNext]) > 0 {
+ if numCompleted := d.xsk.NumCompleted(); numCompleted > 0 {
+ d.xsk.Complete(numCompleted)
+ }
+ }
+ // We pick up the transmission using the blocking poll in Write.
+ d.txDescs = nil
+ d.txNext = 0
+ return nil
+}
+
+type pollEvent int
+
+const (
+ pollRx = pollEvent(unix.POLLIN)
+ pollTx = pollEvent(unix.POLLOUT)
+)
+
+// MTU implements tun.Device.MTU.
+func (d *vethDevice) MTU() (int, error) { return DefaultMTU, nil } // TODO
+
+// Name implements tun.Device.Name.
+func (d *vethDevice) Name() (string, error) { return d.name, nil }
+
+// Events implements tun.Device.Events.
+// returns a constant channel of events related to the device
+func (d *vethDevice) Events() chan tun.Event {
+ d.events <- tun.EventUp
+ // TODO EventDown, EventMTUUpdate
+ return d.events
+}
+
+// Close implements tun.Device.Close.
+// stops the device and closes the event channel
+func (d *vethDevice) Close() error {
+ close(d.events)
+ d.program.Unregister(queueID)
+ d.xsk.Close()
+ d.program.Detach(d.ifBEIndex)
+ d.program.Close()
+ return nil
+}
diff --git a/net/tstun/wrap.go b/net/tstun/wrap.go
index 909340416..572527950 100644
--- a/net/tstun/wrap.go
+++ b/net/tstun/wrap.go
@@ -73,7 +73,7 @@ type Wrapper struct {
limitedLogf logger.Logf // aggressively rate-limited logf used for potentially high volume errors
// tdev is the underlying Wrapper device.
tdev tun.Device
- isTAP bool // whether tdev is a TAP device
+ isTAP bool // whether tdev is a TAP or veth+xdp device
closeOnce sync.Once
@@ -183,7 +183,25 @@ type tunReadResult struct {
}
func WrapTAP(logf logger.Logf, tdev tun.Device) *Wrapper {
- return wrap(logf, tdev, true)
+ w := wrap(logf, tdev, true)
+ if name, _ := tdev.Name(); strings.HasPrefix(name, "veth:") {
+ var mac [6]byte
+ copy(mac[:], dstMAC)
+ w.destMACAtomic.Store(mac)
+ logf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX destMACAtomic: %x", mac)
+ } else {
+ logf("XXXXXXXXXXXXXXXXXXXXXXXXX tdev name: %s", name)
+ }
+ return w
+}
+
+func WrapVETH(logf logger.Logf, tdev tun.Device) *Wrapper {
+ w := wrap(logf, tdev, true)
+ var mac [6]byte
+ copy(mac[:], dstMAC)
+ w.destMACAtomic.Store(mac)
+ logf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX destMACAtomic: %x", mac)
+ return w
}
func Wrap(logf logger.Logf, tdev tun.Device) *Wrapper {
@@ -362,6 +380,9 @@ const ethernetFrameSize = 14 // 2 six byte MACs, 2 bytes ethertype
// This is needed because t.tdev.Read in general may block (it does on Windows),
// so packets may be stuck in t.outbound if t.Read called t.tdev.Read directly.
func (t *Wrapper) poll() {
+ if t.isTAP { // TODO: only if isVETH
+ //runtime.LockOSThread()
+ }
for range t.bufferConsumed {
DoRead:
var n int
@@ -402,9 +423,38 @@ func (t *Wrapper) poll() {
if n >= ethernetFrameSize {
n -= ethernetFrameSize
}
+ ipbuf := t.buffer[PacketStartOffset : PacketStartOffset+n]
if tapDebug {
- t.logf("tap regular frame: %x", t.buffer[PacketStartOffset:PacketStartOffset+n])
+ t.logf("tap regular frame: %x", ipbuf)
+ }
+ if len(ipbuf) == 0 { // hmmmmm
+ continue
}
+ // By default veth attempts to offload checksums to hardware,
+ // which doesn't exist. We fix this with ethtool.
+ // If ethtool isn't installed, we can fix it this way.
+ /*hdr := header.IPv4(ipbuf)
+ //t.logf("tap ipv4 checksum valid: %v", hdr.IsChecksumValid())
+ if hdr.Protocol() == 0x06 { // TCP
+ tcp := header.TCP(ipbuf[header.IPv4MinimumSize:])
+ payload := tcp.Payload()
+ payloadChecksum := header.Checksum(payload, 0)
+ valid := tcp.IsChecksumValid(hdr.SourceAddress(), hdr.DestinationAddress(), payloadChecksum, uint16(len(payload)))
+ //t.logf("tap tcp checksum valid: %v", valid)
+ if !valid {
+ xsum := header.PseudoHeaderChecksum(
+ header.TCPProtocolNumber,
+ hdr.SourceAddress(),
+ hdr.DestinationAddress(),
+ uint16(len(tcp)),
+ )
+ xsum = header.Checksum(payload, xsum)
+ tcp.SetChecksum(0)
+ tcp.SetChecksum(^tcp.CalculateChecksum(xsum))
+ valid = tcp.IsChecksumValid(hdr.SourceAddress(), hdr.DestinationAddress(), payloadChecksum, uint16(len(payload)))
+ //t.logf("tap tcp checksum valid after set: %v", valid)
+ }
+ }*/
}
t.sendOutbound(tunReadResult{data: t.buffer[PacketStartOffset : PacketStartOffset+n], err: err})
}
diff --git a/wgengine/userspace.go b/wgengine/userspace.go
index ca4aecdc6..e33f8f844 100644
--- a/wgengine/userspace.go
+++ b/wgengine/userspace.go
@@ -190,6 +190,10 @@ type Config struct {
// require ethernet headers.
IsTAP bool
+ // IsVETh is whether Tun is actually a veth/xdp (Layer 2) device
+ // that will require ethernet headers.
+ IsVETH bool
+
// Router interfaces the Engine to the OS network stack.
// If nil, a fake Router that does nothing is used.
Router router.Router
@@ -290,6 +294,8 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
var tsTUNDev *tstun.Wrapper
if conf.IsTAP {
tsTUNDev = tstun.WrapTAP(logf, conf.Tun)
+ } else if conf.IsVETH {
+ tsTUNDev = tstun.WrapVETH(logf, conf.Tun)
} else {
tsTUNDev = tstun.Wrap(logf, conf.Tun)
}