diff options
| author | Brad Fitzpatrick <bradfitz@tailscale.com> | 2026-04-13 20:31:35 +0000 |
|---|---|---|
| committer | Brad Fitzpatrick <brad@danga.com> | 2026-04-13 15:24:35 -0700 |
| commit | 50b8cfbde2fc548a6c5b33b3c7021997ca14a0d9 (patch) | |
| tree | 97c3fd3dbef9f1a96c60fc56de3a0ff0728eed2a | |
| parent | 6500d3c3f82b706c9164fb053ab1b22e913f7f13 (diff) | |
| download | tailscale-50b8cfbde2fc548a6c5b33b3c7021997ca14a0d9.tar.xz tailscale-50b8cfbde2fc548a6c5b33b3c7021997ca14a0d9.zip | |
wgengine/netstack: fix data race on in-flight connection test globals
The maxInFlightConnectionAttemptsForTest and
maxInFlightConnectionAttemptsPerClientForTest globals were plain ints
read by background gVisor TCP handler goroutines (via
wrapTCPProtocolHandler) and written by tstest.Replace cleanup in
TestTCPForwardLimits_PerClient. When a gVisor goroutine outlived the
test cleanup window, the race detector caught the unsynchronized
access.
The race-prone code was introduced in c5abbcd4b4d8 (2024-02-26,
"wgengine/netstack: add a per-client limit for in-flight TCP
forwards") which added both the plain int globals and the
TestTCPForwardLimits_PerClient test that writes them via
tstest.Replace. It is not obvious why this has only recently started
being detected as a data race; likely some combination of gVisor
version bumps, Go toolchain scheduler changes, and additional
TCP-injecting subtests (e.g. 03461ea7f, 2026-01-30) increased
goroutine churn enough to hit the window.
Change both globals to atomic.Int32 and replace tstest.Replace (which
does non-atomic *target = old on cleanup) with explicit Store/Cleanup
pairs.
Fixes #19118
Change-Id: Id26ba6fbfb2e4ade319976db80af8e16c7c8778e
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
| -rw-r--r-- | wgengine/netstack/netstack.go | 16 | ||||
| -rw-r--r-- | wgengine/netstack/netstack_test.go | 6 |
2 files changed, 13 insertions, 9 deletions
diff --git a/wgengine/netstack/netstack.go b/wgengine/netstack/netstack.go index 4da89e364..9f65b50c0 100644 --- a/wgengine/netstack/netstack.go +++ b/wgengine/netstack/netstack.go @@ -64,10 +64,12 @@ import ( const debugPackets = false // If non-zero, these override the values returned from the corresponding -// functions, below. +// functions, below. They are accessed atomically because background +// goroutines in the gVisor TCP stack read them while test cleanup +// goroutines may be restoring them concurrently. var ( - maxInFlightConnectionAttemptsForTest int - maxInFlightConnectionAttemptsPerClientForTest int + maxInFlightConnectionAttemptsForTest atomic.Int32 + maxInFlightConnectionAttemptsPerClientForTest atomic.Int32 ) // maxInFlightConnectionAttempts returns the global number of in-flight @@ -80,8 +82,8 @@ var ( // connection, so we want to ensure that we don't allow an unbounded number of // connections. func maxInFlightConnectionAttempts() int { - if n := maxInFlightConnectionAttemptsForTest; n > 0 { - return n + if n := maxInFlightConnectionAttemptsForTest.Load(); n > 0 { + return int(n) } if version.IsMobile() { @@ -106,8 +108,8 @@ func maxInFlightConnectionAttempts() int { // maxInFlightConnectionAttempts, but applies on a per-client basis // (i.e. keyed by the remote Tailscale IP). func maxInFlightConnectionAttemptsPerClient() int { - if n := maxInFlightConnectionAttemptsPerClientForTest; n > 0 { - return n + if n := maxInFlightConnectionAttemptsPerClientForTest.Load(); n > 0 { + return int(n) } // For now, allow each individual client at most 2/3rds of the global diff --git a/wgengine/netstack/netstack_test.go b/wgengine/netstack/netstack_test.go index e588fa47c..fd44741bc 100644 --- a/wgengine/netstack/netstack_test.go +++ b/wgengine/netstack/netstack_test.go @@ -814,8 +814,10 @@ func TestTCPForwardLimits_PerClient(t *testing.T) { envknob.Setenv("TS_DEBUG_NETSTACK", "true") // Set our test override limits during this test. - tstest.Replace(t, &maxInFlightConnectionAttemptsForTest, 2) - tstest.Replace(t, &maxInFlightConnectionAttemptsPerClientForTest, 1) + maxInFlightConnectionAttemptsForTest.Store(2) + t.Cleanup(func() { maxInFlightConnectionAttemptsForTest.Store(0) }) + maxInFlightConnectionAttemptsPerClientForTest.Store(1) + t.Cleanup(func() { maxInFlightConnectionAttemptsPerClientForTest.Store(0) }) impl := makeNetstack(t, func(impl *Impl) { impl.ProcessSubnets = true |
