diff options
| author | Val <valerie@tailscale.com> | 2023-08-07 23:05:24 +0200 |
|---|---|---|
| committer | Val <valerie@tailscale.com> | 2023-08-07 23:05:24 +0200 |
| commit | 0a6ddae0de54e39240bb4f621fdae34d5917ca6d (patch) | |
| tree | ae90b76336e76f1858adfb1768688f7f5e9f80a4 | |
| parent | 7d18398d7f430b37890845bd7396e6fa4d801ba9 (diff) | |
| download | tailscale-valscale/ptb.tar.xz tailscale-valscale/ptb.zip | |
tmp: introduce wire/user/safe mtuvalscale/ptb
| -rw-r--r-- | net/tstun/mtu.go | 65 | ||||
| -rw-r--r-- | net/tstun/mtu_test.go | 14 | ||||
| -rw-r--r-- | net/tstun/tun.go | 2 | ||||
| -rw-r--r-- | wgengine/magicsock/endpoint.go | 83 | ||||
| -rw-r--r-- | wgengine/netstack/netstack.go | 4 | ||||
| -rw-r--r-- | wgengine/router/ifconfig_windows.go | 2 |
6 files changed, 115 insertions, 55 deletions
diff --git a/net/tstun/mtu.go b/net/tstun/mtu.go index 2307d47f9..43bbc28ec 100644 --- a/net/tstun/mtu.go +++ b/net/tstun/mtu.go @@ -4,15 +4,66 @@ package tstun import "tailscale.com/envknob" +// There are several kinds of MTU. +// +// On-the-wire MTU: This what the network device advertises as the +// maximum packet size available above the physical link layer. This +// includes IP headers and everything at a higher level. For Ethernet, +// this is typically 1500 bytes but can be larger or smaller. +// +// Tailscale interface MTU: This is what we advertise to userspace as +// the largest possible packet it can send through the tailscale +// interface. This is 80 bytes lower than the largest interface we +// have available to send things on, which is the size of the headers +// Wireguard adds (80 for IPv6, 60 for IPv4, but we don't know which +// it will be so we always subtract 80). E.g. if the largest interface +// MTU is 1500, we set the tailscale interface MTU to 1420. +// +// Peer MTU: The MTU that we have probed for the path to a specific +// peer's various endpoints. If this is smaller than the advertised +// tailscale interface, and the packet is larger than the peer MTU, +// then we generate ICMP Packet Too Big (IPv6) or Fragmentation Needed +// (IPv4) packets inside tailscale and drop the packet. +// +// Historically, we set the tailscale interface MTU to 1280. This +// means we treated the "on the wire" MTU as 1360. This is now the +// "Safe" value we use when we do not know what the path MTU is. +// +// Internally, we store the peer MTU as the MTU advertised to the user. +// +// We have to call these by different names or it is way way too confusing. +// +// Wire MTU +// User MTU +// Peer MTU +// +// What should happen when we set TS_DEBUG_MTU? It should set the +// interface to that, but we should not assume that the path MTU is +// this. So distinguish between what we set the interface MTU to and +// what we assume the path MTU is in the absence of probe information. + const ( - maxMTU uint32 = 65536 - defaultMTU uint32 = 1280 + maxMTU uint32 = 65536 + wireguardOverhead = 80 + DefaultUserMTU uint32 = 1280 + DefaultWireMTU uint32 = 1280 + wireguardOverhead ) -// DefaultMTU returns either the constant default MTU of 1280, or the value set -// in TS_DEBUG_MTU clamped to a maximum of 65536. -func DefaultMTU() uint32 { - // DefaultMTU is the Tailscale default MTU for now. +func userMTUToWireMTU(userMTU uint32) uint32 { + return userMTU + wireguardOverhead +} + +func wireMTUToUserMTU(wireMTU uint32) uint32 { + if wireMTU < wireguardOverhead { + return 0 + } + return wireMTU - wireguardOverhead +} + +// TunMTU returns either the constant default user MTU of 1280, or the +// value set in TS_DEBUG_MTU clamped to a maximum of 65536. +func TunMTU() uint32 { + // TunMTU is the Tailscale default MTU for now. // // wireguard-go defaults to 1420 bytes, which only works if the // "outer" MTU is 1500 bytes. This breaks on DSL connections @@ -21,7 +72,7 @@ func DefaultMTU() uint32 { // 1280 is the smallest MTU allowed for IPv6, which is a sensible // "probably works everywhere" setting until we develop proper PMTU // discovery. - tunMTU := defaultMTU + tunMTU := DefaultUserMTU if mtu, ok := envknob.LookupUintSized("TS_DEBUG_MTU", 10, 32); ok { mtu := uint32(mtu) if mtu > maxMTU { diff --git a/net/tstun/mtu_test.go b/net/tstun/mtu_test.go index f3aea4697..1e01c5b12 100644 --- a/net/tstun/mtu_test.go +++ b/net/tstun/mtu_test.go @@ -7,22 +7,22 @@ import ( "testing" ) -func TestDefaultMTU(t *testing.T) { +func TestTunMTU(t *testing.T) { orig := os.Getenv("TS_DEBUG_MTU") defer os.Setenv("TS_DEBUG_MTU", orig) os.Setenv("TS_DEBUG_MTU", "") - if DefaultMTU() != 1280 { - t.Errorf("DefaultMTU() = %d, want 1280", DefaultMTU()) + if TunMTU() != 1280 { + t.Errorf("TunMTU() = %d, want 1280", TunMTU()) } os.Setenv("TS_DEBUG_MTU", "9000") - if DefaultMTU() != 9000 { - t.Errorf("DefaultMTU() = %d, want 9000", DefaultMTU()) + if TunMTU() != 9000 { + t.Errorf("TunMTU() = %d, want 9000", TunMTU()) } os.Setenv("TS_DEBUG_MTU", "123456789") - if DefaultMTU() != maxMTU { - t.Errorf("DefaultMTU() = %d, want %d", DefaultMTU(), maxMTU) + if TunMTU() != maxMTU { + t.Errorf("TunMTU() = %d, want %d", TunMTU(), maxMTU) } } diff --git a/net/tstun/tun.go b/net/tstun/tun.go index b31ffa7ca..0373c7400 100644 --- a/net/tstun/tun.go +++ b/net/tstun/tun.go @@ -44,7 +44,7 @@ func New(logf logger.Logf, tunName string) (tun.Device, string, error) { } dev, err = createTAP(tapName, bridgeName) } else { - dev, err = tun.CreateTUN(tunName, int(DefaultMTU())) + dev, err = tun.CreateTUN(tunName, int(TunMTU())) } if err != nil { return nil, "", err diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index b74082b65..cec2c4dd0 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -146,9 +146,10 @@ type pongReply struct { pongSrc netip.AddrPort // what they reported they heard } -// mtusToProbe are likely MTUs we might see in the wild. They are used -// by the peer MTU probing code. Set this to a single zero to disable -// path MTU probing. +// mtusToProbe are likely on-the-wire MTUs we might see in the +// wild. They are used by the peer MTU probing code. +// +// Set this array to a single zero to disable path MTU probing. var mtusToProbe = [...]int{ //576, // Smallest MTU for IPv4, probably useless? //1124, // An observed max mtu in the wild, maybe 1100 instead? @@ -612,8 +613,8 @@ func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpo de.recordAndSendDiscoPingLocked(ep, now, purpose, epDisco.key, size) } else { for _, mtu := range mtusToProbe { - de.c.logf("probing mtu %v with disco message size %v", mtu, mtuToPingSize(ep, mtu)) - de.recordAndSendDiscoPingLocked(ep, now, purpose, epDisco.key, mtuToPingSize(ep, mtu)) + de.c.logf("probing mtu %v with disco message size %v", mtu, wireMTUToPingSize(ep, mtu)) + de.recordAndSendDiscoPingLocked(ep, now, purpose, epDisco.key, wireMTUToPingSize(ep, mtu)) } } } @@ -896,34 +897,40 @@ func (de *endpoint) noteConnectivityChange() { } // mtuToPingSize takes a desired on-the-wire MTU and calculates the -// disco ping message size that would produce a packet that is exactly MTU -// bytes in length. +// disco ping message size that would produce a packet that is exactly +// MTU bytes in length including all the headers above the link layer +// (IP and UDP). // -// If mtu is zero, return zero which means don't pad the ping packet at all. -func mtuToPingSize(ep netip.AddrPort, mtu int) int { +// Zero return value means don't pad the ping packet at all. An mtu +// argument of zero or less than the necessary header length results +// in a zero return value. +func wireMTUToPingSize(ep netip.AddrPort, mtu int) int { if mtu == 0 { return 0 } - size := mtu headerLen := ipv4.HeaderLen if ep.Addr().Is6() { headerLen = ipv6.HeaderLen } headerLen += 8 // UDP header length - size -= headerLen - if size < 0 { + if mtu < headerLen { return 0 } - return size + return (mtu - headerLen) } -// pingSizeToMTU calculates the minimum path MTU that would permit a -// disco ping message of sp.size to reach this endpoint. sp.size is -// the length of the entire disco message. -func pingSizeToMTU(sp sentPing) int { +// pingSizeToMTU calculates the minimum wire MTU that would permit the +// specified disco ping message to reach this endpoint. The size +// recorded in sp.size does not include the IP/UDP headers at the +// beginning of the disco message. +// +// If sp.size is zero, that means the ping was not padded at all and +// the MTU was not tested, in which case return the largest safe +// on-the-wire MTU. +func pingSizeToWireMTU(sp sentPing) int { mtu := sp.size if mtu == 0 { - return int(tstun.DefaultMTU()) + return int(tstun.DefaultWireMTU) } headerLen := ipv4.HeaderLen if sp.to.Addr().Is6() { @@ -933,9 +940,9 @@ func pingSizeToMTU(sp sentPing) int { return mtu + headerLen } -// pingSizeToExternalMTU calculates the path MTU as perceived by the -// layer above Tailscale - that is, how much room for data there is -// after accounting for WireGuard overhead. +// pingSizeToUserMTU calculates the minimum MTU on the tailscale +// interface that would permit this ping to reach this endpoint. It is +// the size of the on-the-wire MTU minus the Wireguard overhead: // // - 20-byte IPv4 header or 40 byte IPv6 header // - 8-byte UDP header @@ -943,21 +950,23 @@ func pingSizeToMTU(sp sentPing) int { // - 4-byte key index // - 8-byte nonce // - 16-byte authentication tag +// +// We have to assume IPv6 because we give the same number to everyone +// when we set the external interface MTU. const wgHeaderLen = 4 + 4 + 8 + 16 -func pingSizeToExternalMTU(sp sentPing) int { - mtu := sp.size - if mtu == 0 { - mtu = int(tstun.DefaultMTU()) +func pingSizeToUserMTU(sp sentPing) int { + size := sp.size + if size == 0 { + return int(tstun.DefaultUserMTU) } // The size stored in the sentPing already has the IP/UDP // headers removed. Now remove the Wireguard overhead. - mtu -= wgHeaderLen - if mtu < 0 { - mtu = 0 + if size < wgHeaderLen { + return 0 } - return mtu + return size - wgHeaderLen } // Update MTU-related metrics. Should be called with Conn.mu held. @@ -965,7 +974,7 @@ func updateMTUMetricsLocked(sp sentPing, logf logger.Logf) { if sp.size == 0 { return } - mtu := pingSizeToExternalMTU(sp) + mtu := pingSizeToUserMTU(sp) if metricHighestPeerMTU.Value() < int64(mtu) { metricHighestPeerMTU.Set(int64(mtu)) logf("\n\n\nhighest MTU %v\n\n\n", mtu) @@ -977,29 +986,29 @@ func (c *Conn) PathMTU(dst netip.Addr) int { // TODO(s): this is method is pretty expensive. Reduce lookups before // removing the envknob guard. if !debugPMTUD() { - return int(tstun.DefaultMTU()) + return int(tstun.TunMTU()) } peer, ok := c.netMap.PeerByTailscaleIP(dst) if !ok { - return int(tstun.DefaultMTU()) + return int(tstun.TunMTU()) } c.mu.Lock() defer c.mu.Unlock() if c.closed { - return int(tstun.DefaultMTU()) + return int(tstun.TunMTU()) } ep, ok := c.peerMap.endpointForNodeKey(peer.Key) if !ok { - return int(tstun.DefaultMTU()) + return int(tstun.TunMTU()) } now := mono.Now() if !ep.bestAddr.AddrPort.IsValid() || now.After(ep.trustBestAddrUntil) { // We have not done the disco pings yet. ep.send() will kick that off // down the line. - return int(tstun.DefaultMTU()) + return int(tstun.TunMTU()) } return ep.bestAddr.mtu @@ -1044,7 +1053,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip } if sp.purpose != pingHeartbeat { - de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v mtu=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pingSizeToMTU(sp), m.Src, logger.ArgWriter(func(bw *bufio.Writer) { + de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v mtu=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pingSizeToWireMTU(sp), m.Src, logger.ArgWriter(func(bw *bufio.Writer) { if sp.to != src { fmt.Fprintf(bw, " ping.to=%v", sp.to) } @@ -1060,7 +1069,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip // Promote this pong response to our current best address if it's lower latency. // TODO(bradfitz): decide how latency vs. preference order affects decision if !isDerp { - thisPong := addrQuality{sp.to, latency, pingSizeToMTU(sp)} + thisPong := addrQuality{sp.to, latency, pingSizeToWireMTU(sp)} if betterAddr(thisPong, de.bestAddr) { de.c.logf("\n\n\nSETTING BEST MTU %v\n\n\n", thisPong.mtu) de.c.logf("magicsock: disco: node %v %v now using %v mtu %v", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.mtu) diff --git a/wgengine/netstack/netstack.go b/wgengine/netstack/netstack.go index 0a27f97f2..f2c7e14e7 100644 --- a/wgengine/netstack/netstack.go +++ b/wgengine/netstack/netstack.go @@ -179,7 +179,7 @@ func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magi if tcpipErr != nil { return nil, fmt.Errorf("could not enable TCP SACK: %v", tcpipErr) } - linkEP := channel.New(512, tstun.DefaultMTU(), "") + linkEP := channel.New(512, tstun.TunMTU(), "") if tcpipProblem := ipstack.CreateNIC(nicID, linkEP); tcpipProblem != nil { return nil, fmt.Errorf("could not create netstack NIC: %v", tcpipProblem) } @@ -1044,7 +1044,7 @@ func (ns *Impl) acceptUDP(r *udp.ForwarderRequest) { func (ns *Impl) handleMagicDNSUDP(srcAddr netip.AddrPort, c *gonet.UDPConn) { // In practice, implementations are advised not to exceed 512 bytes // due to fragmenting. Just to be sure, we bump all the way to the MTU. - var maxUDPReqSize = tstun.DefaultMTU() + var maxUDPReqSize = tstun.TunMTU() // Packets are being generated by the local host, so there should be // very, very little latency. 150ms was chosen as something of an upper // bound on resource usage, while hopefully still being long enough for diff --git a/wgengine/router/ifconfig_windows.go b/wgengine/router/ifconfig_windows.go index 1cd01eee1..7b3aca423 100644 --- a/wgengine/router/ifconfig_windows.go +++ b/wgengine/router/ifconfig_windows.go @@ -241,7 +241,7 @@ func interfaceFromLUID(luid winipcfg.LUID, flags winipcfg.GAAFlags) (*winipcfg.I var networkCategoryWarning = health.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy")) func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) { - var mtu = tstun.DefaultMTU() + var mtu = tstun.TunMTU() luid := winipcfg.LUID(tun.LUID()) iface, err := interfaceFromLUID(luid, // Issue 474: on early boot, when the network is still |
