diff options
Diffstat (limited to 'wgengine/userspace.go')
| -rw-r--r-- | wgengine/userspace.go | 453 |
1 files changed, 61 insertions, 392 deletions
diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 76261d4d4..523407995 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -4,19 +4,16 @@ package wgengine import ( - "bufio" "context" crand "crypto/rand" "crypto/x509" "errors" "fmt" "io" - "maps" "math" "net/netip" "runtime" "slices" - "strings" "sync" "time" @@ -69,29 +66,6 @@ import ( "tailscale.com/wgengine/wglog" ) -// Lazy wireguard-go configuration parameters. -const ( - // lazyPeerIdleThreshold is the idle duration after - // which we remove a peer from the wireguard configuration. - // (This includes peers that have never been idle, which - // effectively have infinite idleness) - lazyPeerIdleThreshold = 5 * time.Minute - - // packetSendTimeUpdateFrequency controls how often we record - // the time that we wrote a packet to an IP address. - packetSendTimeUpdateFrequency = 10 * time.Second - - // packetSendRecheckWireguardThreshold controls how long we can go - // between packet sends to an IP before checking to see - // whether this IP address needs to be added back to the - // WireGuard peer oconfig. - packetSendRecheckWireguardThreshold = 1 * time.Minute -) - -// statusPollInterval is how often we ask wireguard-go for its engine -// status (as long as there's activity). See docs on its use below. -const statusPollInterval = 1 * time.Minute - // networkLoggerUploadTimeout is the maximum timeout to wait when // shutting down the network logger as it uploads the last network log messages. const networkLoggerUploadTimeout = 5 * time.Second @@ -133,21 +107,13 @@ type userspaceEngine struct { // is being routed over Tailscale. isDNSIPOverTailscale syncs.AtomicValue[func(netip.Addr) bool] - wgLock sync.Mutex // serializes all wgdev operations; see lock order comment below - lastCfgFull wgcfg.Config - lastNMinPeers int - lastRouter *router.Config - lastEngineFull *wgcfg.Config // of full wireguard config, not trimmed - lastEngineInputs *maybeReconfigInputs - lastDNSConfig dns.ConfigView // or invalid if none - lastIsSubnetRouter bool // was the node a primary subnet router in the last run. - recvActivityAt map[key.NodePublic]mono.Time - trimmedNodes map[key.NodePublic]bool // set of node keys of peers currently excluded from wireguard config - sentActivityAt map[netip.Addr]*mono.Time // value is accessed atomically - destIPActivityFuncs map[netip.Addr]func() - lastStatusPollTime mono.Time // last time we polled the engine status - reconfigureVPN func() error // or nil - conn25PacketHooks Conn25PacketHooks // or nil + wgLock sync.Mutex // serializes all wgdev operations; see lock order comment below + lastCfgFull wgcfg.Config + lastRouter *router.Config + lastDNSConfig dns.ConfigView // or invalid if none + lastIsSubnetRouter bool // was the node a primary subnet router in the last run. + reconfigureVPN func() error // or nil + conn25PacketHooks Conn25PacketHooks // or nil mu sync.Mutex // guards following; see lock order comment below netMap *netmap.NetworkMap // or nil @@ -461,10 +427,6 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error) ForceDiscoKey: conf.ForceDiscoKey, OnDERPRecv: conf.OnDERPRecv, } - if buildfeatures.HasLazyWG { - magicsockOpts.NoteRecvActivity = e.noteRecvActivity - } - var err error e.magicConn, err = magicsock.NewConn(magicsockOpts) if err != nil { @@ -691,163 +653,11 @@ func (e *userspaceEngine) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper) return filter.Accept } -var debugTrimWireguard = envknob.RegisterOptBool("TS_DEBUG_TRIM_WIREGUARD") - -// forceFullWireguardConfig reports whether we should give wireguard our full -// network map, even for inactive peers. -// -// TODO(bradfitz): remove this at some point. We had a TODO to do it before 1.0 -// but it's still there as of 1.30. Really we should not do this wireguard lazy -// peer config at all and just fix wireguard-go to not have so much extra memory -// usage per peer. That would simplify a lot of Tailscale code. OTOH, we have 50 -// MB of memory on iOS now instead of 15 MB, so the other option is to just give -// up on lazy wireguard config and blow the memory and hope for the best on iOS. -// That's sad too. Or we get rid of these knobs (lazy wireguard config has been -// stable!) but I'm worried that a future regression would be easier to debug -// with these knobs in place. -func (e *userspaceEngine) forceFullWireguardConfig(numPeers int) bool { - // Did the user explicitly enable trimming via the environment variable knob? - if b, ok := debugTrimWireguard().Get(); ok { - return !b - } - return e.controlKnobs != nil && e.controlKnobs.KeepFullWGConfig.Load() -} - -// isTrimmablePeer reports whether p is a peer that we can trim out of the -// network map. -// -// For implementation simplicity, we can only trim peers that have -// only non-subnet AllowedIPs (an IPv4 /32 or IPv6 /128), which is the -// common case for most peers. Subnet router nodes will just always be -// created in the wireguard-go config. -func (e *userspaceEngine) isTrimmablePeer(p *wgcfg.Peer, numPeers int) bool { - if e.forceFullWireguardConfig(numPeers) { - return false - } - - // AllowedIPs must all be single IPs, not subnets. - for _, aip := range p.AllowedIPs { - if !aip.IsSingleIP() { - return false - } - } - return true -} - -// noteRecvActivity is called by magicsock when a packet has been -// received for the peer with node key nk. Magicsock calls this no -// more than every 10 seconds for a given peer. -func (e *userspaceEngine) noteRecvActivity(nk key.NodePublic) { - e.wgLock.Lock() - defer e.wgLock.Unlock() - - if _, ok := e.recvActivityAt[nk]; !ok { - // Not a trimmable peer we care about tracking. (See isTrimmablePeer) - if e.trimmedNodes[nk] { - e.logf("wgengine: [unexpected] noteReceiveActivity called on idle node %v that's not in recvActivityAt", nk.ShortString()) - } - return - } - now := e.timeNow() - e.recvActivityAt[nk] = now - - // As long as there's activity, periodically poll the engine to get - // stats for the far away side effect of - // ipn/ipnlocal.LocalBackend.parseWgStatusLocked to log activity, for - // use in various admin dashboards. - // This particularly matters on platforms without a connected GUI, as - // the GUIs generally poll this enough to cause that logging. But - // tailscaled alone did not, hence this. - if e.lastStatusPollTime.IsZero() || now.Sub(e.lastStatusPollTime) >= statusPollInterval { - e.lastStatusPollTime = now - go e.RequestStatus() - } - - // If the last activity time jumped a bunch (say, at least - // half the idle timeout) then see if we need to reprogram - // WireGuard. This could probably be just - // lazyPeerIdleThreshold without the divide by 2, but - // maybeReconfigWireguardLocked is cheap enough to call every - // couple minutes (just not on every packet). - if e.trimmedNodes[nk] { - e.logf("wgengine: idle peer %v now active, reconfiguring WireGuard", nk.ShortString()) - e.maybeReconfigWireguardLocked(nil) - } -} - -// isActiveSinceLocked reports whether the peer identified by (nk, ip) -// has had a packet sent to or received from it since t. -// -// e.wgLock must be held. -func (e *userspaceEngine) isActiveSinceLocked(nk key.NodePublic, ip netip.Addr, t mono.Time) bool { - if e.recvActivityAt[nk].After(t) { - return true - } - timePtr, ok := e.sentActivityAt[ip] - if !ok { - return false - } - return timePtr.LoadAtomic().After(t) -} - -// maybeReconfigInputs holds the inputs to the maybeReconfigWireguardLocked -// function. If these things don't change between calls, there's nothing to do. -// -// If you add a field, update Equal and Clone, and add a case to -// TestMaybeReconfigInputsEqual. -type maybeReconfigInputs struct { - WGConfig *wgcfg.Config - TrimmedNodes map[key.NodePublic]bool - - // TrackNodes and TrackIPs are built in full.Peers iteration order, - // which is sorted by NodeID (via sortedPeers -> WGCfg). Equal uses - // order-dependent comparison, so any change to that ordering - // invariant must update the comparison logic. - TrackNodes views.Slice[key.NodePublic] - TrackIPs views.Slice[netip.Addr] -} - -func (i *maybeReconfigInputs) Equal(o *maybeReconfigInputs) bool { - if i == o { - return true - } - if i == nil || o == nil { - return false - } - if !i.WGConfig.Equal(o.WGConfig) { - return false - } - if len(i.TrimmedNodes) != len(o.TrimmedNodes) { - return false - } - for k := range i.TrimmedNodes { - if !o.TrimmedNodes[k] { - return false - } - } - if !views.SliceEqual(i.TrackNodes, o.TrackNodes) { - return false - } - return views.SliceEqual(i.TrackIPs, o.TrackIPs) -} - -func (i *maybeReconfigInputs) Clone() *maybeReconfigInputs { - if i == nil { - return nil - } - v := *i - v.WGConfig = i.WGConfig.Clone() - v.TrimmedNodes = maps.Clone(i.TrimmedNodes) - return &v -} - -// discoChanged are the set of peers whose disco keys have changed, implying they've restarted. -// If a peer is in this set and was previously in the live wireguard config, -// it needs to be first removed and then re-added to flush out its wireguard session key. -// If discoChanged is nil or empty, this extra removal step isn't done. +// maybeReconfigWireguardLocked reconfigures wireguard-go with the current +// full config, installing a PeerLookupFunc for on-demand peer creation. // // e.wgLock must be held. -func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.NodePublic]bool) error { +func (e *userspaceEngine) maybeReconfigWireguardLocked() error { if hook := e.testMaybeReconfigHook; hook != nil { hook() return nil @@ -856,181 +666,40 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked(discoChanged map[key.Node full := e.lastCfgFull e.wgLogger.SetPeers(full.Peers) - // Compute a minimal config to pass to wireguard-go - // based on the full config. Prune off all the peers - // and only add the active ones back. - min := full - min.Peers = make([]wgcfg.Peer, 0, e.lastNMinPeers) - - // We'll only keep a peer around if it's been active in - // the past 5 minutes. That's more than WireGuard's key - // rotation time anyway so it's no harm if we remove it - // later if it's been inactive. - var activeCutoff mono.Time - if buildfeatures.HasLazyWG { - activeCutoff = e.timeNow().Add(-lazyPeerIdleThreshold) - } - - // Not all peers can be trimmed from the network map (see - // isTrimmablePeer). For those that are trimmable, keep track of - // their NodeKey and Tailscale IPs. These are the ones we'll need - // to install tracking hooks for to watch their send/receive - // activity. - // - // trackNodes and trackIPs are appended in full.Peers order (sorted - // by NodeID). maybeReconfigInputs.Equal depends on this ordering; - // see the struct comment. - var trackNodes []key.NodePublic - var trackIPs []netip.Addr - if buildfeatures.HasLazyWG { - trackNodes = make([]key.NodePublic, 0, len(full.Peers)) - trackIPs = make([]netip.Addr, 0, len(full.Peers)) - } - - // Don't re-alloc the map; the Go compiler optimizes map clears as of - // Go 1.11, so we can re-use the existing + allocated map. - if e.trimmedNodes != nil { - clear(e.trimmedNodes) - } else { - e.trimmedNodes = make(map[key.NodePublic]bool) - } - - needRemoveStep := false - for i := range full.Peers { - p := &full.Peers[i] - nk := p.PublicKey - if !buildfeatures.HasLazyWG || !e.isTrimmablePeer(p, len(full.Peers)) { - min.Peers = append(min.Peers, *p) - if discoChanged[nk] { - needRemoveStep = true - } - continue - } - trackNodes = append(trackNodes, nk) - recentlyActive := false - for _, cidr := range p.AllowedIPs { - trackIPs = append(trackIPs, cidr.Addr()) - recentlyActive = recentlyActive || e.isActiveSinceLocked(nk, cidr.Addr(), activeCutoff) - } - if recentlyActive { - min.Peers = append(min.Peers, *p) - if discoChanged[nk] { - needRemoveStep = true - } - } else { - e.trimmedNodes[nk] = true - } - } - e.lastNMinPeers = len(min.Peers) - - if changed := checkchange.Update(&e.lastEngineInputs, &maybeReconfigInputs{ - WGConfig: &min, - TrimmedNodes: e.trimmedNodes, - TrackNodes: views.SliceOf(trackNodes), - TrackIPs: views.SliceOf(trackIPs), - }); !changed { - return nil - } - - if buildfeatures.HasLazyWG { - e.updateActivityMapsLocked(trackNodes, trackIPs) - } - - if needRemoveStep { - minner := min - minner.Peers = nil - numRemove := 0 - for _, p := range min.Peers { - if discoChanged[p.PublicKey] { - numRemove++ - continue - } - minner.Peers = append(minner.Peers, p) - } - if numRemove > 0 { - e.logf("wgengine: Reconfig: removing session keys for %d peers", numRemove) - if err := wgcfg.ReconfigDevice(e.wgdev, &minner, e.logf); err != nil { - e.logf("wgdev.Reconfig: %v", err) - return err - } - } - } - - e.logf("wgengine: Reconfig: configuring userspace WireGuard config (with %d/%d peers)", len(min.Peers), len(full.Peers)) - if err := wgcfg.ReconfigDevice(e.wgdev, &min, e.logf); err != nil { + e.logf("wgengine: Reconfig: configuring userspace WireGuard config (with %d peers)", len(full.Peers)) + if err := wgcfg.ReconfigDevice(e.wgdev, &full, e.logf); err != nil { e.logf("wgdev.Reconfig: %v", err) return err } return nil } -// updateActivityMapsLocked updates the data structures used for tracking the activity -// of wireguard peers that we might add/remove dynamically from the real config -// as given to wireguard-go. -// -// e.wgLock must be held. -func (e *userspaceEngine) updateActivityMapsLocked(trackNodes []key.NodePublic, trackIPs []netip.Addr) { - if !buildfeatures.HasLazyWG { - return - } - // Generate the new map of which nodekeys we want to track - // receive times for. - mr := map[key.NodePublic]mono.Time{} // TODO: only recreate this if set of keys changed - for _, nk := range trackNodes { - // Preserve old times in the new map, but also - // populate map entries for new trackNodes values with - // time.Time{} zero values. (Only entries in this map - // are tracked, so the Time zero values allow it to be - // tracked later) - mr[nk] = e.recvActivityAt[nk] - } - e.recvActivityAt = mr - - oldTime := e.sentActivityAt - e.sentActivityAt = make(map[netip.Addr]*mono.Time, len(oldTime)) - oldFunc := e.destIPActivityFuncs - e.destIPActivityFuncs = make(map[netip.Addr]func(), len(oldFunc)) - - updateFn := func(timePtr *mono.Time) func() { - return func() { - now := e.timeNow() - old := timePtr.LoadAtomic() - - // How long's it been since we last sent a packet? - elapsed := now.Sub(old) - if old == 0 { - // For our first packet, old is 0, which has indeterminate meaning. - // Set elapsed to a big number (four score and seven years). - elapsed = 762642 * time.Hour - } - - if elapsed >= packetSendTimeUpdateFrequency { - timePtr.StoreAtomic(now) - } - // On a big jump, assume we might no longer be in the wireguard - // config and go check. - if elapsed >= packetSendRecheckWireguardThreshold { - e.wgLock.Lock() - defer e.wgLock.Unlock() - e.maybeReconfigWireguardLocked(nil) - } +// SetPeerByIPPacketFunc installs a callback used by wireguard-go to look up +// which peer should handle an outbound packet by destination IP. +func (e *userspaceEngine) SetPeerByIPPacketFunc(fn func(netip.Addr) (_ key.NodePublic, ok bool)) { + e.wgdev.SetPeerByIPPacketFunc(func(_, dst netip.Addr, _ []byte) (device.NoisePublicKey, bool) { + // Fast path: exact IP match (node addresses). + if pk, ok := fn(dst); ok { + return pk.Raw32(), true } - } - - for _, ip := range trackIPs { - timePtr := oldTime[ip] - if timePtr == nil { - timePtr = new(mono.Time) + // Slow path: check AllowedIPs for subnet routes. + e.wgLock.Lock() + defer e.wgLock.Unlock() + var best netip.Prefix + var bestKey key.NodePublic + for _, p := range e.lastCfgFull.Peers { + for _, pfx := range p.AllowedIPs { + if pfx.Contains(dst) && (!best.IsValid() || pfx.Bits() > best.Bits()) { + best = pfx + bestKey = p.PublicKey + } + } } - e.sentActivityAt[ip] = timePtr - - fn := oldFunc[ip] - if fn == nil { - fn = updateFn(timePtr) + if best.IsValid() { + return bestKey.Raw32(), true } - e.destIPActivityFuncs[ip] = fn - } - e.tundev.SetDestIPActivityFuncs(e.destIPActivityFuncs) + return device.NoisePublicKey{}, false + }) } // hasOverlap checks if there is a IPPrefix which is common amongst the two @@ -1119,7 +788,7 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, } isSubnetRouterChanged := buildfeatures.HasAdvertiseRoutes && isSubnetRouter != e.lastIsSubnetRouter - engineChanged := checkchange.Update(&e.lastEngineFull, cfg) + engineChanged := !e.lastCfgFull.Equal(cfg) routerChanged := checkchange.Update(&e.lastRouter, routerCfg) dnsChanged := buildfeatures.HasDNS && !e.lastDNSConfig.Equal(dnsCfg.View()) if dnsChanged { @@ -1151,9 +820,8 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, } // See if any peers have changed disco keys, which means they've restarted. - // If so, we need to update the wireguard-go/device.Device in two phases: - // once without the node which has restarted, to clear its wireguard session key, - // and a second time with it. + // If so, remove the peer from wireguard-go to flush its session key, + // then let the PeerLookupFunc re-create it on demand. discoChanged := make(map[key.NodePublic]bool) { prevEP := make(map[key.NodePublic]key.DiscoPublic) @@ -1168,7 +836,6 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, continue } - // If the key changed, mark the connection for reconfiguration. pub := p.PublicKey if old, ok := prevEP[pub]; ok && old != p.DiscoKey { @@ -1177,34 +844,21 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, // connection. if discoTSMP, okTSMP := e.tsmpLearnedDisco[p.PublicKey]; okTSMP { if discoTSMP == p.DiscoKey { - // Key matches, remove entry from map. e.logf("wgengine: Skipping reconfig (TSMP key): %s changed from %q to %q", pub.ShortString(), old, p.DiscoKey) delete(e.tsmpLearnedDisco, p.PublicKey) } else { - // The new disco key does not match what we received via - // TSMP for this peer. This is unexpected, so log it. - // If it does happen, overwrite the previously-saved - // disco key with the new one for now: We expect another - // update must be pending in that case, so keep the map - // entry. - // The reason why this should never happen is that only a single - // request is coming through the netmap pipeline at a time, and there - // should realistically ever only be a single entry in the map. This - // is really a belt and suspenders solution to find usage that is - // inconsistent with our expectations. e.logf("wgengine: [unexpected] Reconfig: using TSMP key for %s (control stale): tsmp=%q control=%q old=%q", pub.ShortString(), discoTSMP, p.DiscoKey, old) metricTSMPLearnedKeyMismatch.Add(1) p.DiscoKey = discoTSMP } - - // Skip session clear no matter what. continue } discoChanged[pub] = true e.logf("wgengine: Reconfig: %s changed from %q to %q", pub.ShortString(), old, p.DiscoKey) + e.wgdev.RemovePeer(pub.Raw32()) } } } @@ -1214,8 +868,6 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, e.testDiscoChangedHook(discoChanged) } - e.lastCfgFull = *cfg.Clone() - // Tell magicsock about the new (or initial) private key // (which is needed by DERP) before wgdev gets it, as wgdev // will start trying to handshake, which we want to be able to @@ -1223,11 +875,24 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, if err := e.magicConn.SetPrivateKey(cfg.PrivateKey); err != nil { e.logf("wgengine: Reconfig: SetPrivateKey: %v", err) } + + // Set the wireguard-go private key only when it changes. + // Calling SetPrivateKey unconditionally acquires staticIdentity.Lock, + // which can deadlock with concurrent handshake goroutines that hold + // staticIdentity.RLock and are waiting for peers.Lock. + if !e.lastCfgFull.PrivateKey.Equal(cfg.PrivateKey) { + if err := e.wgdev.SetPrivateKey(key.NodePrivateAs[device.NoisePrivateKey](cfg.PrivateKey)); err != nil { + e.logf("wgengine: Reconfig: wgdev.SetPrivateKey: %v", err) + } + } + + e.lastCfgFull = *cfg.Clone() + e.magicConn.UpdatePeers(peerSet) e.magicConn.SetPreferredPort(listenPort) e.magicConn.UpdatePMTUD() - if err := e.maybeReconfigWireguardLocked(discoChanged); err != nil { + if err := e.maybeReconfigWireguardLocked(); err != nil { return err } @@ -1373,8 +1038,14 @@ func (e *userspaceEngine) PeerByKey(pubKey key.NodePublic) (_ wgint.Peer, ok boo if dev == nil { return wgint.Peer{}, false } - peer := dev.LookupPeer(pubKey.Raw32()) - if peer == nil { + // Use LookupActivePeer (not LookupPeer) to avoid triggering on-demand + // peer creation via PeerLookupFunc. PeerByKey is called from status + // polling paths (getStatus, getPeerStatusLite) which iterate every peer + // in the netmap; using LookupPeer would lazily create a wireguard-go + // peer for every single netmap peer on each status poll, leaking + // memory via per-peer queues and goroutines. + peer, ok := dev.LookupActivePeer(pubKey.Raw32()) + if !ok { return wgint.Peer{}, false } return wgint.PeerOf(peer), true @@ -1470,8 +1141,6 @@ func (e *userspaceEngine) Close() { e.closing = true e.mu.Unlock() - r := bufio.NewReader(strings.NewReader("")) - e.wgdev.IpcSetOperation(r) e.magicConn.Close() if e.netMonOwned { e.netMon.Close() |
