summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--wgengine/netstack/netstack.go53
-rw-r--r--wgengine/netstack/netstack_test.go184
2 files changed, 223 insertions, 14 deletions
diff --git a/wgengine/netstack/netstack.go b/wgengine/netstack/netstack.go
index 9f65b50c0..11900edbf 100644
--- a/wgengine/netstack/netstack.go
+++ b/wgengine/netstack/netstack.go
@@ -845,20 +845,27 @@ func (ns *Impl) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper, gro *gro.
serviceName, isVIPServiceIP := ns.atomicIPVIPServiceMap.Load()[dst]
switch {
case dst == serviceIP || dst == serviceIPv6:
- // We want to intercept some traffic to the "service IP" (e.g.
- // 100.100.100.100 for IPv4). However, of traffic to the
- // service IP, we only care about UDP 53, and TCP on port 53,
- // 80, and 8080.
- switch p.IPProto {
- case ipproto.TCP:
- if port := p.Dst.Port(); port != 53 && port != 80 && port != 8080 && !ns.isLoopbackPort(port) {
- return filter.Accept, gro
- }
- case ipproto.UDP:
- if port := p.Dst.Port(); port != 53 && !ns.isLoopbackPort(port) {
- return filter.Accept, gro
- }
- }
+ // Traffic to the Tailscale service IP (100.100.100.100 /
+ // fd7a:115c:a1e0::53) is always terminated locally on this
+ // node; it must never be forwarded out over WireGuard to a
+ // peer. Netstack's TCP/UDP acceptors handle the ports we
+ // actually serve (UDP 53 MagicDNS, TCP 53/80/8080 for DNS,
+ // the web client, and Taildrive, plus any debug loopback
+ // port). Other ports are rejected cleanly by netstack: UDP
+ // closes the endpoint in acceptUDP, and TCP is RST'd by
+ // acceptTCP's hittingServiceIP guard.
+ //
+ // Previously we returned filter.Accept for TCP/UDP on any
+ // other port, which let the packet fall through to the ACL
+ // filter and ultimately wireguard-go, where no peer owns the
+ // quad-100 AllowedIP. That produced noisy "open-conn-track:
+ // timeout opening ...; no associated peer node" log lines
+ // (e.g. for stray traffic to 100.100.100.100:853 / DoT) and
+ // leaked quad-100 packets onto the tailnet.
+ //
+ // We now unconditionally absorb quad-100 into netstack here,
+ // regardless of IP protocol or port, so such traffic never
+ // reaches the conntrack / peer-routing layers.
case isVIPServiceIP:
// returns all active VIP services in a set, since the IPVIPServiceMap
// contains inactive service IPs when node hosts the service, we need to
@@ -1654,6 +1661,24 @@ func (ns *Impl) acceptTCP(r *tcp.ForwarderRequest) {
} else {
dialIP = ipv4Loopback
}
+ case hittingServiceIP:
+ // TCP to the Tailscale service IP on a port we don't serve
+ // (anything other than DNS/53, web client/80, Taildrive/8080,
+ // or the debug loopback port handled above). handleLocalPackets
+ // absorbs all quad-100 traffic into netstack to prevent it
+ // from leaking to WireGuard peers as noisy "open-conn-track:
+ // timeout opening ...; no associated peer node" log lines
+ // (see the comment there).
+ //
+ // Without this explicit guard, execution would fall through
+ // to the isTailscaleIP case below (quad-100 is in the
+ // tailscale IP range), rewriting the dial target to
+ // 127.0.0.1:<port> and forwardTCP'ing the connection onto
+ // whatever random service happens to be listening on the
+ // host's loopback at that port. Reject cleanly with a RST
+ // here instead.
+ r.Complete(true) // sends a RST
+ return
case isTailscaleIP:
dialIP = ipv4Loopback
}
diff --git a/wgengine/netstack/netstack_test.go b/wgengine/netstack/netstack_test.go
index 32289d13b..4f920c8e0 100644
--- a/wgengine/netstack/netstack_test.go
+++ b/wgengine/netstack/netstack_test.go
@@ -953,6 +953,7 @@ func TestHandleLocalPackets(t *testing.T) {
impl.lb.SetIPServiceMappingsForTest(IPServiceMap)
t.Run("ShouldHandleServiceIP", func(t *testing.T) {
+ t.Parallel()
pkt := &packet.Parsed{
IPVersion: 4,
IPProto: ipproto.TCP,
@@ -965,7 +966,94 @@ func TestHandleLocalPackets(t *testing.T) {
t.Errorf("got filter outcome %v, want filter.DropSilently", resp)
}
})
+ // Any port on the quad-100 service IP must be absorbed locally by
+ // netstack and never leak out to the WireGuard / peer-routing
+ // layers. Historically we only intercepted specific ports (UDP 53
+ // and TCP 53/80/8080), causing stray traffic to other ports such
+ // as 100.100.100.100:853 (DoT) to time out in wireguard-go and
+ // produce "open-conn-track: timeout opening ...; no associated
+ // peer node" log spam. See the handleLocalPackets comment.
+ quad100LeakCases := []struct {
+ name string
+ proto ipproto.Proto
+ dst string
+ }{
+ {"TCP-853-DoT-v4", ipproto.TCP, "100.100.100.100:853"},
+ {"TCP-443-DoH-v4", ipproto.TCP, "100.100.100.100:443"},
+ {"TCP-9000-stray-v4", ipproto.TCP, "100.100.100.100:9000"},
+ {"UDP-853-DoQ-v4", ipproto.UDP, "100.100.100.100:853"},
+ {"UDP-443-v4", ipproto.UDP, "100.100.100.100:443"},
+ {"TCP-853-DoT-v6", ipproto.TCP, "[fd7a:115c:a1e0::53]:853"},
+ {"UDP-443-v6", ipproto.UDP, "[fd7a:115c:a1e0::53]:443"},
+ }
+ for _, tc := range quad100LeakCases {
+ t.Run("ShouldNotLeakQuad100_"+tc.name, func(t *testing.T) {
+ t.Parallel()
+ dst := netip.MustParseAddrPort(tc.dst)
+ ipVersion := uint8(4)
+ if dst.Addr().Is6() {
+ ipVersion = 6
+ }
+ src := "127.0.0.1:9999"
+ if ipVersion == 6 {
+ src = "[::1]:9999"
+ }
+ pkt := &packet.Parsed{
+ IPVersion: ipVersion,
+ IPProto: tc.proto,
+ Src: netip.MustParseAddrPort(src),
+ Dst: dst,
+ }
+ if tc.proto == ipproto.TCP {
+ pkt.TCPFlags = packet.TCPSyn
+ }
+ resp, _ := impl.handleLocalPackets(pkt, impl.tundev, nil)
+ if resp != filter.DropSilently {
+ t.Errorf("quad-100 %s packet leaked: got filter outcome %v, want filter.DropSilently", tc.name, resp)
+ }
+ })
+ }
+ // Exhaustive sweep of all ports for both transport protocols and
+ // both IP versions, confirming no port leaks. The quad-100 branch
+ // of handleLocalPackets is port-independent by construction; this
+ // test serves as a regression guard against accidental port-based
+ // exemptions slipping back in.
+ t.Run("ShouldNotLeakQuad100_AllPorts", func(t *testing.T) {
+ t.Parallel()
+ protos := []ipproto.Proto{ipproto.TCP, ipproto.UDP}
+ dsts := []netip.Addr{
+ netip.MustParseAddr("100.100.100.100"),
+ netip.MustParseAddr("fd7a:115c:a1e0::53"),
+ }
+ for _, proto := range protos {
+ for _, dstAddr := range dsts {
+ ipVersion := uint8(4)
+ srcStr := "127.0.0.1:9999"
+ if dstAddr.Is6() {
+ ipVersion = 6
+ srcStr = "[::1]:9999"
+ }
+ src := netip.MustParseAddrPort(srcStr)
+ for port := 1; port <= 65535; port++ {
+ pkt := &packet.Parsed{
+ IPVersion: ipVersion,
+ IPProto: proto,
+ Src: src,
+ Dst: netip.AddrPortFrom(dstAddr, uint16(port)),
+ }
+ if proto == ipproto.TCP {
+ pkt.TCPFlags = packet.TCPSyn
+ }
+ resp, _ := impl.handleLocalPackets(pkt, impl.tundev, nil)
+ if resp != filter.DropSilently {
+ t.Fatalf("port=%d proto=%v dst=%v: got %v, want filter.DropSilently", port, proto, dstAddr, resp)
+ }
+ }
+ }
+ }
+ })
t.Run("ShouldHandle4via6", func(t *testing.T) {
+ t.Parallel()
pkt := &packet.Parsed{
IPVersion: 6,
IPProto: ipproto.TCP,
@@ -988,6 +1076,7 @@ func TestHandleLocalPackets(t *testing.T) {
}
})
t.Run("ShouldHandleLocalTailscaleServices", func(t *testing.T) {
+ t.Parallel()
pkt := &packet.Parsed{
IPVersion: 4,
IPProto: ipproto.TCP,
@@ -1001,6 +1090,7 @@ func TestHandleLocalPackets(t *testing.T) {
}
})
t.Run("OtherNonHandled", func(t *testing.T) {
+ t.Parallel()
pkt := &packet.Parsed{
IPVersion: 6,
IPProto: ipproto.TCP,
@@ -1023,6 +1113,100 @@ func TestHandleLocalPackets(t *testing.T) {
})
}
+// TestQuad100UnservedTCPPortDoesNotForward verifies that a TCP SYN to the
+// Tailscale service IP (100.100.100.100) on a port we don't serve is
+// absorbed by netstack and rejected cleanly, without triggering the
+// outbound forwardTCP dialer.
+//
+// handleLocalPackets now absorbs all quad-100 traffic regardless of
+// port to prevent it leaking to WireGuard peers (which produced noisy
+// "open-conn-track: timeout opening ...; no associated peer node" log
+// lines). That leaves acceptTCP responsible for rejecting connections
+// to ports we don't handle; without an explicit guard, execution would
+// fall through to the isTailscaleIP case (quad-100 is in the tailscale
+// range), rewriting the dial target to 127.0.0.1:<port> and forwarding
+// the connection to whatever random service happened to be listening
+// on the host's loopback at that port.
+//
+// This test asserts that the forward dialer is NOT invoked for quad-100
+// SYNs on unserved ports; the guard in acceptTCP must RST instead.
+func TestQuad100UnservedTCPPortDoesNotForward(t *testing.T) {
+ impl := makeNetstack(t, func(impl *Impl) {
+ impl.ProcessSubnets = false
+ impl.ProcessLocalIPs = false
+ impl.atomicIsLocalIPFunc.Store(looksLikeATailscaleSelfAddress)
+ })
+
+ dialFn, gotConn := makeHangDialer(t)
+ impl.forwardDialFunc = dialFn
+
+ // Use a client IP in the CGNAT range so shouldProcessInbound-adjacent
+ // code paths treat this as plausibly-peer-sourced traffic, matching
+ // what a real stray quad-100 probe from the host OS would look like.
+ client := netip.MustParseAddr("100.101.102.103")
+ quad100 := tsaddr.TailscaleServiceIP()
+
+ // 853 is DoT, the specific case called out in the original bug
+ // report ("conntrack error no peer found for 100.100.100.100:853").
+ // Before the fix, port 853 (and any non-{53,80,8080} port) leaked
+ // out to WireGuard; after the fix it is absorbed here and must NOT
+ // trigger forwardTCP.
+ pkt := tcp4syn(t, client, quad100, 1234, 853)
+ var parsed packet.Parsed
+ parsed.Decode(pkt)
+
+ resp, _ := impl.handleLocalPackets(&parsed, impl.tundev, nil)
+ if resp != filter.DropSilently {
+ t.Fatalf("handleLocalPackets for quad-100:853: got %v, want filter.DropSilently", resp)
+ }
+
+ // acceptTCP runs asynchronously in the gVisor TCP dispatcher after
+ // handleLocalPackets injects the packet into netstack. Use the
+ // in-flight connection counter as a deterministic synchronization
+ // point: wrapTCPProtocolHandler increments connsInFlightByClient
+ // when the dispatcher hands the connection off to acceptTCP, and
+ // acceptTCP's deferred decrementInFlightTCPForward decrements it
+ // on return.
+ //
+ // On the green path (RST guard fires), acceptTCP returns promptly
+ // and the counter reaches 0. On the red path (fall-through to
+ // forwardTCP), acceptTCP blocks inside the forwardDialFunc call —
+ // makeHangDialer signals gotConn on entry (buffered, non-blocking)
+ // and then blocks forever — so the counter never reaches 0 but
+ // gotConn fires synchronously from the dispatcher goroutine. A
+ // select on both races those outcomes without real-time padding.
+ //
+ // testing/synctest is not usable here: gVisor's sleep package calls
+ // the runtime's gopark directly rather than via the standard
+ // library, so synctest.Wait() cannot observe those goroutines
+ // becoming durably blocked and hangs indefinitely.
+ inFlightZero := make(chan struct{})
+ go func() {
+ for {
+ impl.mu.Lock()
+ n := impl.connsInFlightByClient[client]
+ impl.mu.Unlock()
+ if n == 0 {
+ close(inFlightZero)
+ return
+ }
+ time.Sleep(time.Millisecond)
+ }
+ }()
+
+ select {
+ case <-gotConn:
+ t.Fatalf("forwardDialFunc was called for quad-100:853; acceptTCP fell through to forwardTCP instead of sending RST. This means stray traffic to quad-100 on unserved ports is being redirected to the host's loopback at the same port.")
+ case <-inFlightZero:
+ // acceptTCP returned cleanly; the RST guard fired.
+ case <-time.After(5 * time.Second):
+ // Safety net so a regression in the in-flight counter plumbing
+ // doesn't hang the whole test run; both outcomes above should
+ // fire within milliseconds in practice.
+ t.Fatal("timed out waiting for acceptTCP to dispatch quad-100:853 SYN")
+ }
+}
+
func TestShouldSendToHost(t *testing.T) {
var (
selfIP4 = netip.MustParseAddr("100.64.1.2")