summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorBrad Fitzpatrick <bradfitz@tailscale.com>2020-08-04 09:36:38 -0700
committerBrad Fitzpatrick <bradfitz@tailscale.com>2020-08-04 09:36:38 -0700
commitbfcb0aa0be710aa328f793a3a90d3daf2de9a19e (patch)
tree99ae6f6d6badd3a38416c370c0137873c23a31b9
parentc3467fbadb7e2f28c3f786dfabdc13c173e62055 (diff)
downloadtailscale-bfcb0aa0be710aa328f793a3a90d3daf2de9a19e.tar.xz
tailscale-bfcb0aa0be710aa328f793a3a90d3daf2de9a19e.zip
wgengine/magicsock: deflake tests, Close deadlock again
Better fix than 37903a9056d664ddbc52cce3324dc0610d81862b Fixes tailscale/corp#533
-rw-r--r--wgengine/magicsock/magicsock.go44
-rw-r--r--wgengine/magicsock/magicsock_test.go6
2 files changed, 35 insertions, 15 deletions
diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go
index 587f4f72d..4214e3d77 100644
--- a/wgengine/magicsock/magicsock.go
+++ b/wgengine/magicsock/magicsock.go
@@ -125,7 +125,8 @@ type Conn struct {
packetListener nettype.PacketListener
// ============================================================
- mu sync.Mutex // guards all following fields
+ mu sync.Mutex // guards all following fields
+ muCond *sync.Cond
// canCreateEPUnlocked tracks at one place whether mu is
// already held. It's then checked in CreateEndpoint to avoid
@@ -137,7 +138,6 @@ type Conn struct {
started bool // Start was called
closed bool // Close was called
- endpointsUpdateWaiter *sync.Cond
endpointsUpdateActive bool
wantEndpointsUpdate string // true if non-empty; string is reason
lastEndpoints []string
@@ -323,7 +323,7 @@ func newConn() *Conn {
sharedDiscoKey: make(map[tailcfg.DiscoKey]*[32]byte),
discoOfAddr: make(map[netaddr.IPPort]tailcfg.DiscoKey),
}
- c.endpointsUpdateWaiter = sync.NewCond(&c.mu)
+ c.muCond = sync.NewCond(&c.mu)
return c
}
@@ -395,7 +395,7 @@ func (c *Conn) updateEndpoints(why string) {
go c.updateEndpoints(why)
} else {
c.endpointsUpdateActive = false
- c.endpointsUpdateWaiter.Broadcast()
+ c.muCond.Broadcast()
}
}()
@@ -1079,6 +1079,7 @@ func (c *Conn) derpWriteChanOfAddr(addr netaddr.IPPort, peer key.Public) chan<-
go func() {
dc.Connect(ctx)
close(c.derpStarted)
+ c.muCond.Broadcast()
}()
}
@@ -1825,7 +1826,9 @@ func (c *Conn) SetPrivateKey(privateKey wgcfg.PrivateKey) error {
if oldKey.IsZero() {
c.everHadKey = true
c.logf("magicsock: SetPrivateKey called (init)")
- go c.ReSTUN("set-private-key")
+ if c.started {
+ go c.ReSTUN("set-private-key")
+ }
} else if newKey.IsZero() {
c.logf("magicsock: SetPrivateKey called (zeroed)")
c.closeAllDerpLocked("zero-private-key")
@@ -2087,6 +2090,21 @@ func (c *Conn) Close() error {
c.pconn6.Close()
}
err := c.pconn4.Close()
+
+ // Wait on goroutines updating right at the end, once everything is
+ // already closed. We want everything else in the Conn to be
+ // consistently in the closed state before we release mu to wait
+ // on the endpoint updater & derphttp.Connect.
+ for c.goroutinesRunningLocked() {
+ c.muCond.Wait()
+ }
+ return err
+}
+
+func (c *Conn) goroutinesRunningLocked() bool {
+ if c.endpointsUpdateActive {
+ return true
+ }
// The goroutine running dc.Connect in derpWriteChanOfAddr may linger
// and appear to leak, as observed in https://github.com/tailscale/tailscale/issues/554.
// This is despite the underlying context being cancelled by connCtxCancel above.
@@ -2096,16 +2114,14 @@ func (c *Conn) Close() error {
// on the first run, it sets firstDerp := true and spawns the aforementioned goroutine.
// To detect this, we check activeDerp, which is initialized to non-nil on the first run.
if c.activeDerp != nil {
- <-c.derpStarted
- }
- // Wait on endpoints updating right at the end, once everything is
- // already closed. We want everything else in the Conn to be
- // consistently in the closed state before we release mu to wait
- // on the endpoint updater.
- for c.endpointsUpdateActive {
- c.endpointsUpdateWaiter.Wait()
+ select {
+ case <-c.derpStarted:
+ break
+ default:
+ return true
+ }
}
- return err
+ return false
}
func maxIdleBeforeSTUNShutdown() time.Duration {
diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go
index 01ca63335..d42d837f7 100644
--- a/wgengine/magicsock/magicsock_test.go
+++ b/wgengine/magicsock/magicsock_test.go
@@ -807,13 +807,17 @@ func testActiveDiscovery(t *testing.T, d *devices) {
// from DERP.
mustDirect := func(m1, m2 *magicStack) {
+ lastLog := time.Now().Add(-time.Minute)
for deadline := time.Now().Add(5 * time.Second); time.Now().Before(deadline); time.Sleep(10 * time.Millisecond) {
pst := m1.Status().Peer[m2.Public()]
if pst.CurAddr != "" {
logf("direct link %s->%s found with addr %s", m1, m2, pst.CurAddr)
return
}
- logf("no direct path %s->%s yet, addrs %v", m1, m2, pst.Addrs)
+ if now := time.Now(); now.Sub(lastLog) > time.Second {
+ logf("no direct path %s->%s yet, addrs %v", m1, m2, pst.Addrs)
+ lastLog = now
+ }
}
t.Errorf("magicsock did not find a direct path from %s to %s", m1, m2)
}