summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorsalman <salman@tailscale.com>2023-06-27 18:26:32 +0000
committersalman <salman@tailscale.com>2023-07-27 12:35:23 +0000
commit737a21f07821a9fe0b522ac4ac77109a164a5e64 (patch)
treefb77bb6c331b3a5dba3fe0f2f624dd79e2765624
parent49f24034eaf4d0f779b0f0e0673b77a794b1a3c3 (diff)
downloadtailscale-s/pmtud.tar.xz
tailscale-s/pmtud.zip
WIP wgengine: inject ICMP PTB for oversize packetss/pmtud
-rw-r--r--net/packet/icmp4.go3
-rw-r--r--net/packet/icmp6.go1
-rw-r--r--wgengine/magicsock/magicsock.go50
-rw-r--r--wgengine/userspace.go67
4 files changed, 112 insertions, 9 deletions
diff --git a/net/packet/icmp4.go b/net/packet/icmp4.go
index 730239abf..9375aebd6 100644
--- a/net/packet/icmp4.go
+++ b/net/packet/icmp4.go
@@ -45,7 +45,8 @@ func (t ICMP4Type) String() string {
type ICMP4Code uint8
const (
- ICMP4NoCode ICMP4Code = 0
+ ICMP4NoCode ICMP4Code = 0x00
+ ICMP4FragmentationNeeded = 0x04
)
// ICMP4Header is an IPv4+ICMPv4 header.
diff --git a/net/packet/icmp6.go b/net/packet/icmp6.go
index 518746b55..d74667c6d 100644
--- a/net/packet/icmp6.go
+++ b/net/packet/icmp6.go
@@ -20,6 +20,7 @@ type ICMP6Type uint8
const (
ICMP6Unreachable ICMP6Type = 1
+ ICMP6PacketTooBig ICMP6Type = 2
ICMP6TimeExceeded ICMP6Type = 3
ICMP6EchoRequest ICMP6Type = 128
ICMP6EchoReply ICMP6Type = 129
diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go
index 797f2a104..81ebb8f14 100644
--- a/wgengine/magicsock/magicsock.go
+++ b/wgengine/magicsock/magicsock.go
@@ -56,6 +56,7 @@ import (
"tailscale.com/net/sockstats"
"tailscale.com/net/stun"
"tailscale.com/net/tsaddr"
+ "tailscale.com/net/tstun"
"tailscale.com/syncs"
"tailscale.com/tailcfg"
"tailscale.com/tstime"
@@ -2090,14 +2091,14 @@ var debugIPv4DiscoPingPenalty = envknob.RegisterDuration("TS_DISCO_PONG_IPV4_DEL
// 3. Send a single ping with ./tool/go run ./cmd/tailscale ping --mtu=1000 <host>
var usefulMtus = [...]int{
- 0,
- //576, // Smallest MTU for IPv4, probably useless?
- //1124, // An observed max mtu in the wild, maybe 1100 instead?
- //1280, // Smallest MTU for IPv6, current default
- //1400, // A little less, for tunnels or such
- //1500, // Most common real world MTU
- //8000, // Some jumbo frames are this size
- //9000, // Most jumbo frames are this size or slightly larger
+ //0,
+ 576, // Smallest MTU for IPv4, probably useless?
+ 1124, // An observed max mtu in the wild, maybe 1100 instead?
+ 1280, // Smallest MTU for IPv6, current default
+ 1400, // A little less, for tunnels or such
+ 1500, // Most common real world MTU
+ 8000, // Some jumbo frames are this size
+ 9000, // Most jumbo frames are this size or slightly larger
}
// sendDiscoMessage sends discovery message m to dstDisco at dst.
@@ -3321,6 +3322,39 @@ func (c *Conn) shouldDoPeriodicReSTUNLocked() bool {
return true
}
+// PathMTU returns the path MTU to the peer at dst (tailscale address)
+func (c *Conn) PathMTU(dst netip.Addr) int {
+ // TODO(s): this is method is pretty expensive. Reduce lookups before
+ // removing the envknob guard.
+ if !debugPMTUD() {
+ return int(tstun.DefaultMTU())
+ }
+
+ peer, ok := c.netMap.PeerByTailscaleIP(dst)
+ if !ok {
+ return int(tstun.DefaultMTU())
+ }
+
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ if c.closed {
+ return int(tstun.DefaultMTU())
+ }
+ ep, ok := c.peerMap.endpointForNodeKey(peer.Key)
+ if !ok {
+ return int(tstun.DefaultMTU())
+ }
+
+ now := mono.Now()
+ if !ep.bestAddr.AddrPort.IsValid() || now.After(ep.trustBestAddrUntil) {
+ // We have not done the disco pings yet. ep.send() will kick that off
+ // down the line.
+ return int(tstun.DefaultMTU())
+ }
+
+ return ep.bestAddr.mtu
+}
+
func (c *Conn) onPortMapChanged() { c.ReSTUN("portmap-changed") }
// ReSTUN triggers an address discovery.
diff --git a/wgengine/userspace.go b/wgengine/userspace.go
index e861d029c..1f1d7d797 100644
--- a/wgengine/userspace.go
+++ b/wgengine/userspace.go
@@ -7,6 +7,7 @@ import (
"bufio"
"context"
crand "crypto/rand"
+ "encoding/binary"
"errors"
"fmt"
"io"
@@ -455,6 +456,60 @@ func echoRespondToAll(p *packet.Parsed, t *tstun.Wrapper) filter.Response {
return filter.Accept
}
+var debugPMTUD = envknob.RegisterBool("TS_DEBUG_PMTUD")
+
+func (e *userspaceEngine) injectICMPPTB(p *packet.Parsed, mtu int) {
+ var icmph packet.Header
+ var payload []byte
+ if p.Src.Addr().Is4() {
+ // From https://www.ietf.org/rfc/rfc1191.html#section-4
+ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ // | Type | Code | Checksum |
+ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ // | unused = 0 | MTU |
+ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ // | Internet Header + 64 bits of Original Datagram Data |
+ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ icmph = packet.ICMP4Header{
+ IP4Header: packet.IP4Header{
+ IPProto: ipproto.ICMPv4,
+ Src: p.Dst.Addr(),
+ Dst: p.Src.Addr(),
+ },
+ Type: packet.ICMP4Unreachable,
+ Code: packet.ICMP4FragmentationNeeded,
+ }
+ payload = make([]byte, 4+20+8)
+ binary.BigEndian.PutUint32(payload, uint32(mtu))
+ copy(payload[4:], p.Buffer()[:len(payload)-4])
+ } else {
+ // https://www.ietf.org/rfc/rfc4443.html#section-3.2
+ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ // | Type | Code | Checksum |
+ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ // | MTU |
+ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ // | As much of invoking packet |
+ // + as possible without the ICMPv6 packet +
+ // | exceeding the minimum IPv6 MTU [IPv6] |
+ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ icmph = packet.ICMP6Header{
+ IP6Header: packet.IP6Header{
+ IPProto: ipproto.ICMPv6,
+ Src: p.Dst.Addr(),
+ Dst: p.Src.Addr(),
+ },
+ Type: packet.ICMP6PacketTooBig,
+ Code: packet.ICMP6NoCode,
+ }
+ // RFC says add as much of invoking packet, but headers should be enough.
+ payload = make([]byte, 4+40+8)
+ binary.BigEndian.PutUint32(payload, uint32(mtu))
+ copy(payload[4:], p.Buffer()[:len(payload)-4])
+ }
+ e.tundev.InjectInboundCopy(packet.Generate(icmph, payload))
+}
+
// handleLocalPackets inspects packets coming from the local network
// stack, and intercepts any packets that should be handled by
// tailscaled directly. Other packets are allowed to proceed into the
@@ -476,6 +531,18 @@ func (e *userspaceEngine) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper)
}
}
+ if debugPMTUD() {
+ const tailscaleOverhead = 40 + 8 + 32 // IP + UDP + WireGuard
+ // TODO IPv4 is 20 bytes but IPv6 is 40 - move this into magicsock where we know
+ // which we're using.
+ // TODO consts to avoid numbers.
+ pmtu := e.magicConn.PathMTU(p.Dst.Addr())
+ if len(p.Buffer())+tailscaleOverhead > pmtu {
+ e.injectICMPPTB(p, pmtu)
+ return filter.Drop
+ }
+ }
+
return filter.Accept
}