diff options
| author | salman <salman@tailscale.com> | 2023-06-27 18:26:32 +0000 |
|---|---|---|
| committer | salman <salman@tailscale.com> | 2023-07-27 12:35:23 +0000 |
| commit | 737a21f07821a9fe0b522ac4ac77109a164a5e64 (patch) | |
| tree | fb77bb6c331b3a5dba3fe0f2f624dd79e2765624 | |
| parent | 49f24034eaf4d0f779b0f0e0673b77a794b1a3c3 (diff) | |
| download | tailscale-s/pmtud.tar.xz tailscale-s/pmtud.zip | |
WIP wgengine: inject ICMP PTB for oversize packetss/pmtud
| -rw-r--r-- | net/packet/icmp4.go | 3 | ||||
| -rw-r--r-- | net/packet/icmp6.go | 1 | ||||
| -rw-r--r-- | wgengine/magicsock/magicsock.go | 50 | ||||
| -rw-r--r-- | wgengine/userspace.go | 67 |
4 files changed, 112 insertions, 9 deletions
diff --git a/net/packet/icmp4.go b/net/packet/icmp4.go index 730239abf..9375aebd6 100644 --- a/net/packet/icmp4.go +++ b/net/packet/icmp4.go @@ -45,7 +45,8 @@ func (t ICMP4Type) String() string { type ICMP4Code uint8 const ( - ICMP4NoCode ICMP4Code = 0 + ICMP4NoCode ICMP4Code = 0x00 + ICMP4FragmentationNeeded = 0x04 ) // ICMP4Header is an IPv4+ICMPv4 header. diff --git a/net/packet/icmp6.go b/net/packet/icmp6.go index 518746b55..d74667c6d 100644 --- a/net/packet/icmp6.go +++ b/net/packet/icmp6.go @@ -20,6 +20,7 @@ type ICMP6Type uint8 const ( ICMP6Unreachable ICMP6Type = 1 + ICMP6PacketTooBig ICMP6Type = 2 ICMP6TimeExceeded ICMP6Type = 3 ICMP6EchoRequest ICMP6Type = 128 ICMP6EchoReply ICMP6Type = 129 diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 797f2a104..81ebb8f14 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -56,6 +56,7 @@ import ( "tailscale.com/net/sockstats" "tailscale.com/net/stun" "tailscale.com/net/tsaddr" + "tailscale.com/net/tstun" "tailscale.com/syncs" "tailscale.com/tailcfg" "tailscale.com/tstime" @@ -2090,14 +2091,14 @@ var debugIPv4DiscoPingPenalty = envknob.RegisterDuration("TS_DISCO_PONG_IPV4_DEL // 3. Send a single ping with ./tool/go run ./cmd/tailscale ping --mtu=1000 <host> var usefulMtus = [...]int{ - 0, - //576, // Smallest MTU for IPv4, probably useless? - //1124, // An observed max mtu in the wild, maybe 1100 instead? - //1280, // Smallest MTU for IPv6, current default - //1400, // A little less, for tunnels or such - //1500, // Most common real world MTU - //8000, // Some jumbo frames are this size - //9000, // Most jumbo frames are this size or slightly larger + //0, + 576, // Smallest MTU for IPv4, probably useless? + 1124, // An observed max mtu in the wild, maybe 1100 instead? + 1280, // Smallest MTU for IPv6, current default + 1400, // A little less, for tunnels or such + 1500, // Most common real world MTU + 8000, // Some jumbo frames are this size + 9000, // Most jumbo frames are this size or slightly larger } // sendDiscoMessage sends discovery message m to dstDisco at dst. @@ -3321,6 +3322,39 @@ func (c *Conn) shouldDoPeriodicReSTUNLocked() bool { return true } +// PathMTU returns the path MTU to the peer at dst (tailscale address) +func (c *Conn) PathMTU(dst netip.Addr) int { + // TODO(s): this is method is pretty expensive. Reduce lookups before + // removing the envknob guard. + if !debugPMTUD() { + return int(tstun.DefaultMTU()) + } + + peer, ok := c.netMap.PeerByTailscaleIP(dst) + if !ok { + return int(tstun.DefaultMTU()) + } + + c.mu.Lock() + defer c.mu.Unlock() + if c.closed { + return int(tstun.DefaultMTU()) + } + ep, ok := c.peerMap.endpointForNodeKey(peer.Key) + if !ok { + return int(tstun.DefaultMTU()) + } + + now := mono.Now() + if !ep.bestAddr.AddrPort.IsValid() || now.After(ep.trustBestAddrUntil) { + // We have not done the disco pings yet. ep.send() will kick that off + // down the line. + return int(tstun.DefaultMTU()) + } + + return ep.bestAddr.mtu +} + func (c *Conn) onPortMapChanged() { c.ReSTUN("portmap-changed") } // ReSTUN triggers an address discovery. diff --git a/wgengine/userspace.go b/wgengine/userspace.go index e861d029c..1f1d7d797 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -7,6 +7,7 @@ import ( "bufio" "context" crand "crypto/rand" + "encoding/binary" "errors" "fmt" "io" @@ -455,6 +456,60 @@ func echoRespondToAll(p *packet.Parsed, t *tstun.Wrapper) filter.Response { return filter.Accept } +var debugPMTUD = envknob.RegisterBool("TS_DEBUG_PMTUD") + +func (e *userspaceEngine) injectICMPPTB(p *packet.Parsed, mtu int) { + var icmph packet.Header + var payload []byte + if p.Src.Addr().Is4() { + // From https://www.ietf.org/rfc/rfc1191.html#section-4 + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | Type | Code | Checksum | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | unused = 0 | MTU | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | Internet Header + 64 bits of Original Datagram Data | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + icmph = packet.ICMP4Header{ + IP4Header: packet.IP4Header{ + IPProto: ipproto.ICMPv4, + Src: p.Dst.Addr(), + Dst: p.Src.Addr(), + }, + Type: packet.ICMP4Unreachable, + Code: packet.ICMP4FragmentationNeeded, + } + payload = make([]byte, 4+20+8) + binary.BigEndian.PutUint32(payload, uint32(mtu)) + copy(payload[4:], p.Buffer()[:len(payload)-4]) + } else { + // https://www.ietf.org/rfc/rfc4443.html#section-3.2 + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | Type | Code | Checksum | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | MTU | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // | As much of invoking packet | + // + as possible without the ICMPv6 packet + + // | exceeding the minimum IPv6 MTU [IPv6] | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + icmph = packet.ICMP6Header{ + IP6Header: packet.IP6Header{ + IPProto: ipproto.ICMPv6, + Src: p.Dst.Addr(), + Dst: p.Src.Addr(), + }, + Type: packet.ICMP6PacketTooBig, + Code: packet.ICMP6NoCode, + } + // RFC says add as much of invoking packet, but headers should be enough. + payload = make([]byte, 4+40+8) + binary.BigEndian.PutUint32(payload, uint32(mtu)) + copy(payload[4:], p.Buffer()[:len(payload)-4]) + } + e.tundev.InjectInboundCopy(packet.Generate(icmph, payload)) +} + // handleLocalPackets inspects packets coming from the local network // stack, and intercepts any packets that should be handled by // tailscaled directly. Other packets are allowed to proceed into the @@ -476,6 +531,18 @@ func (e *userspaceEngine) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper) } } + if debugPMTUD() { + const tailscaleOverhead = 40 + 8 + 32 // IP + UDP + WireGuard + // TODO IPv4 is 20 bytes but IPv6 is 40 - move this into magicsock where we know + // which we're using. + // TODO consts to avoid numbers. + pmtu := e.magicConn.PathMTU(p.Dst.Addr()) + if len(p.Buffer())+tailscaleOverhead > pmtu { + e.injectICMPPTB(p, pmtu) + return filter.Drop + } + } + return filter.Accept } |
