summaryrefslogtreecommitdiffhomepage
path: root/wgengine
diff options
context:
space:
mode:
authorBrad Fitzpatrick <bradfitz@tailscale.com>2025-12-02 12:50:33 -0800
committerBrad Fitzpatrick <bradfitz@tailscale.com>2025-12-02 15:12:13 -0800
commit381de776c4878dd9af76b126cfa37bc80cad363f (patch)
treec3ddcbf9613db3074c9c6882bb757357cf5bdd0a /wgengine
parentb8c58ca7c1a49fb772d095c65693cdab06488047 (diff)
downloadtailscale-bradfitz/mutex_debug.tar.xz
tailscale-bradfitz/mutex_debug.zip
syncs: start working on mutex debugging, registrationbradfitz/mutex_debug
Updates #17852 Change-Id: Ib1b634eedd30cc4006bc1b39aa8d479d37c5f1f2 Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
Diffstat (limited to 'wgengine')
-rw-r--r--wgengine/magicsock/magicsock.go6
-rw-r--r--wgengine/netlog/netlog.go4
-rw-r--r--wgengine/netlog/netlog_omit.go1
-rw-r--r--wgengine/userspace.go8
-rw-r--r--wgengine/watchdog.go11
5 files changed, 24 insertions, 6 deletions
diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go
index 064838a2d..e4d1d3fbc 100644
--- a/wgengine/magicsock/magicsock.go
+++ b/wgengine/magicsock/magicsock.go
@@ -599,6 +599,11 @@ func newConn(logf logger.Logf) *Conn {
discoInfo: make(map[key.DiscoPublic]*discoInfo),
cloudInfo: newCloudInfo(logf),
}
+ syncs.RegisterMutex(&c.mu, "magicsock.Conn.mu")
+ syncs.RegisterMutex(&c.pconn4.mu, "magicsock.Conn.pconn4.mu")
+ syncs.RegisterMutex(&c.pconn6.mu, "magicsock.Conn.pconn6.mu")
+ syncs.RegisterMutex(&c.endpointTracker.mu, "magicsock.Conn.endpointTracker.mu")
+
c.discoAtomic.Set(discoPrivate)
c.bind = &connBind{Conn: c, closed: true}
c.receiveBatchPool = sync.Pool{New: func() any {
@@ -3145,6 +3150,7 @@ func (c *Conn) updateNodes(update NodeViewsUpdate) (peersChanged bool) {
heartbeatDisabled: flags.heartbeatDisabled,
isWireguardOnly: n.IsWireGuardOnly(),
}
+ syncs.RegisterMutex(&ep.mu, "magicsock.endpoint.mu")
switch runtime.GOOS {
case "ios", "android":
// Omit, to save memory. Prior to 2024-03-20 we used to limit it to
diff --git a/wgengine/netlog/netlog.go b/wgengine/netlog/netlog.go
index 12fe9c797..ba643944f 100644
--- a/wgengine/netlog/netlog.go
+++ b/wgengine/netlog/netlog.go
@@ -80,6 +80,10 @@ type Logger struct {
routePrefixes []netip.Prefix
}
+func (nl *Logger) RegisterMutex() {
+ syncs.RegisterMutex(&nl.mu, "netlog.Logger.mu")
+}
+
// Running reports whether the logger is running.
func (nl *Logger) Running() bool {
nl.mu.Lock()
diff --git a/wgengine/netlog/netlog_omit.go b/wgengine/netlog/netlog_omit.go
index 03610a1ef..3cefe978a 100644
--- a/wgengine/netlog/netlog_omit.go
+++ b/wgengine/netlog/netlog_omit.go
@@ -12,3 +12,4 @@ func (*Logger) Running() bool { return false }
func (*Logger) Shutdown(any) error { return nil }
func (*Logger) ReconfigNetworkMap(any) {}
func (*Logger) ReconfigRoutes(any) {}
+func (*Logger) RegisterMutex() {}
diff --git a/wgengine/userspace.go b/wgengine/userspace.go
index 1b8562d3f..7e48e3967 100644
--- a/wgengine/userspace.go
+++ b/wgengine/userspace.go
@@ -17,7 +17,6 @@ import (
"runtime"
"slices"
"strings"
- "sync"
"time"
"github.com/tailscale/wireguard-go/device"
@@ -130,7 +129,7 @@ type userspaceEngine struct {
// is being routed over Tailscale.
isDNSIPOverTailscale syncs.AtomicValue[func(netip.Addr) bool]
- wgLock sync.Mutex // serializes all wgdev operations; see lock order comment below
+ wgLock syncs.Mutex // serializes all wgdev operations; see lock order comment below
lastCfgFull wgcfg.Config
lastNMinPeers int
lastRouter *router.Config
@@ -145,7 +144,7 @@ type userspaceEngine struct {
lastStatusPollTime mono.Time // last time we polled the engine status
reconfigureVPN func() error // or nil
- mu sync.Mutex // guards following; see lock order comment below
+ mu syncs.Mutex // guards following; see lock order comment below
netMap *netmap.NetworkMap // or nil
closing bool // Close was called (even if we're still closing)
statusCallback StatusCallback
@@ -361,6 +360,9 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error)
reconfigureVPN: conf.ReconfigureVPN,
health: conf.HealthTracker,
}
+ syncs.RegisterMutex(&e.mu, "wgengine.userspaceEngine.mu")
+ syncs.RegisterMutex(&e.wgLock, "wgengine.userspaceEngine.wgLock")
+ e.networkLogger.RegisterMutex()
if e.birdClient != nil {
// Disable the protocol at start time.
diff --git a/wgengine/watchdog.go b/wgengine/watchdog.go
index 9cc4ed3b5..bad34a15f 100644
--- a/wgengine/watchdog.go
+++ b/wgengine/watchdog.go
@@ -19,6 +19,7 @@ import (
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/dns"
"tailscale.com/net/packet"
+ "tailscale.com/syncs"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"tailscale.com/types/netmap"
@@ -81,9 +82,13 @@ func (e *watchdogEngine) watchdogErr(name string, fn func() error) error {
}()
errCh := make(chan error)
- go func() {
- errCh <- fn()
- }()
+ if syncs.MutexDebugging {
+ syncs.ForkJoinGo(func() { errCh <- fn() })
+ } else {
+ // Don't use ForkJoinGo to avoid the loss of "created by" in
+ // stack traces.
+ go func() { errCh <- fn() }()
+ }
t := time.NewTimer(e.maxWait)
select {
case err := <-errCh: