summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJordan Whited <jordan@tailscale.com>2025-07-02 19:31:03 -0700
committerJordan Whited <jordan@tailscale.com>2025-07-02 19:31:03 -0700
commitcf253057f740f8a5a0fc089622d06792d977f7cd (patch)
tree46d937c4dc79cae7ad290f59ce4d3e8520b4e7cb
parent77d19604f449ac65092e232c93d28f9e686df161 (diff)
downloadtailscale-jwhited/peer-verify-every-packet-batch.tar.xz
tailscale-jwhited/peer-verify-every-packet-batch.zip
wgengine/magicsock: always use Cryptokey Routing identificationjwhited/peer-verify-every-packet-batch
We only set [epAddr]s in the [peerMap] when wireguard-go tells us who they belong to. A node key can only have a single [epAddr] in the [peerMap]. We also clear an [epAddr] when wireguard-go tells us our mapping assumption between [epAddr] and peer is wrong (outdated). Signed-off-by: Jordan Whited <jordan@tailscale.com>
-rw-r--r--wgengine/magicsock/endpoint.go17
-rw-r--r--wgengine/magicsock/magicsock.go50
-rw-r--r--wgengine/magicsock/peermap.go73
3 files changed, 47 insertions, 93 deletions
diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go
index 0569341ff..5fb2b6316 100644
--- a/wgengine/magicsock/endpoint.go
+++ b/wgengine/magicsock/endpoint.go
@@ -128,6 +128,21 @@ func (de *endpoint) udpRelayEndpointReady(maybeBest addrQuality) {
de.trustBestAddrUntil = mono.Now().Add(trustUDPAddrDuration)
}
+// MaybePeer implements [conn.PeerVerifyEndpoint].
+func (de *endpoint) MaybePeer() [32]byte {
+ return de.publicKey.Raw32()
+}
+
+// FromPeer implements [conn.PeerAwareEndpoint].
+func (de *endpoint) FromPeer(peerPublicKey [32]byte) {
+ de.c.mu.Lock()
+ defer de.c.mu.Unlock()
+ if de.publicKey.Raw32() != peerPublicKey {
+ de.c.peerMap.clearEpAddrForNodeKey(de.publicKey)
+ de.c.logf("magicsock: clearing epAddr for node %v %v", de.nodeAddr, de.discoShort())
+ }
+}
+
func (de *endpoint) setBestAddrLocked(v addrQuality) {
if v.epAddr != de.bestAddr.epAddr {
de.probeUDPLifetime.resetCycleEndpointLocked()
@@ -1694,8 +1709,6 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src epAdd
return
}
- de.c.peerMap.setNodeKeyForEpAddr(src, de.publicKey)
-
st.addPongReplyLocked(pongReply{
latency: latency,
pongAt: now,
diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go
index 174345a84..c17bace6d 100644
--- a/wgengine/magicsock/magicsock.go
+++ b/wgengine/magicsock/magicsock.go
@@ -1694,12 +1694,6 @@ func (c *Conn) receiveIP(b []byte, ipp netip.AddrPort, cache *epAddrEndpointCach
de, ok := c.peerMap.endpointForEpAddr(src)
c.mu.Unlock()
if !ok {
- if c.controlKnobs != nil && c.controlKnobs.DisableCryptorouting.Load() {
- // Note: UDP relay is dependent on cryptorouting enablement. We
- // only update Geneve-encapsulated [epAddr]s in the [peerMap]
- // via [lazyEndpoint].
- return nil, 0, false
- }
// TODO(jwhited): reuse [lazyEndpoint] across calls to receiveIP()
// for the same batch & [epAddr] src.
return &lazyEndpoint{c: c, src: src}, size, true
@@ -2242,45 +2236,13 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src epAddr, di *discoInfo, derpN
return
}
- var bestEpAddr epAddr
- var discoKey key.DiscoPublic
- ep, ok := c.peerMap.endpointForEpAddr(src)
- if ok {
- ep.mu.Lock()
- bestEpAddr = ep.bestAddr.epAddr
- ep.mu.Unlock()
- disco := ep.disco.Load()
- if disco != nil {
- discoKey = disco.key
- }
- }
-
- if src == bestEpAddr && discoKey == di.discoKey {
- // We have an associated endpoint with src as its bestAddr. Set
- // numNodes so we TX a pong further down.
- numNodes = 1
- } else {
- // We have no [endpoint] in the [peerMap] for this relay [epAddr]
- // using it as a bestAddr. [relayManager] might be in the middle of
- // probing it or attempting to set it as best via
- // [endpoint.udpRelayEndpointReady()]. Make [relayManager] aware.
- c.relayManager.handleGeneveEncapDiscoMsgNotBestAddr(c, dm, di, src)
- return
- }
+ // [relayManager] might be in the middle of probing src or attempting to
+ // set it as best via [endpoint.udpRelayEndpointReady()]. Make
+ // [relayManager] aware. [relayManager] is always responsible for
+ // handling all pings received over Geneve-encapsulated paths.
+ c.relayManager.handleGeneveEncapDiscoMsgNotBestAddr(c, dm, di, src)
+ return
default: // no VNI
- // If we can figure out with certainty which node key this disco
- // message is for, eagerly update our [epAddr]<>node and disco<>node
- // mappings to make p2p path discovery faster in simple
- // cases. Without this, disco would still work, but would be
- // reliant on DERP call-me-maybe to establish the disco<>node
- // mapping, and on subsequent disco handlePongConnLocked to establish
- // the IP:port<>disco mapping.
- if nk, ok := c.unambiguousNodeKeyOfPingLocked(dm, di.discoKey, derpNodeSrc); ok {
- if !isDerp {
- c.peerMap.setNodeKeyForEpAddr(src, nk)
- }
- }
-
// Remember this route if not present.
var dup bool
if isDerp {
diff --git a/wgengine/magicsock/peermap.go b/wgengine/magicsock/peermap.go
index 838905396..06334cd37 100644
--- a/wgengine/magicsock/peermap.go
+++ b/wgengine/magicsock/peermap.go
@@ -17,13 +17,12 @@ type peerInfo struct {
// that when we're deleting this node, we can rapidly find out the
// keys that need deleting from peerMap.byEpAddr without having to
// iterate over every epAddr known for any peer.
- epAddrs set.Set[epAddr]
+ epAddr epAddr
}
func newPeerInfo(ep *endpoint) *peerInfo {
return &peerInfo{
- ep: ep,
- epAddrs: set.Set[epAddr]{},
+ ep: ep,
}
}
@@ -36,18 +35,6 @@ type peerMap struct {
byEpAddr map[epAddr]*peerInfo
byNodeID map[tailcfg.NodeID]*peerInfo
- // relayEpAddrByNodeKey ensures we only hold a single relay
- // [epAddr] (vni.isSet()) for a given node key in byEpAddr, vs letting them
- // grow unbounded. Relay [epAddr]'s are dynamically created by
- // [relayManager] during path discovery, and are only useful to track in
- // peerMap so long as they are the endpoint.bestAddr. [relayManager] handles
- // all creation and initial probing responsibilities otherwise, and it does
- // not depend on [peerMap].
- //
- // Note: This doesn't address unbounded growth of non-relay epAddr's in
- // byEpAddr. That issue is being tracked in http://go/corp/29422.
- relayEpAddrByNodeKey map[key.NodePublic]epAddr
-
// nodesOfDisco contains the set of nodes that are using a
// DiscoKey. Usually those sets will be just one node.
nodesOfDisco map[key.DiscoPublic]set.Set[key.NodePublic]
@@ -55,11 +42,10 @@ type peerMap struct {
func newPeerMap() peerMap {
return peerMap{
- byNodeKey: map[key.NodePublic]*peerInfo{},
- byEpAddr: map[epAddr]*peerInfo{},
- byNodeID: map[tailcfg.NodeID]*peerInfo{},
- relayEpAddrByNodeKey: map[key.NodePublic]epAddr{},
- nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{},
+ byNodeKey: map[key.NodePublic]*peerInfo{},
+ byEpAddr: map[epAddr]*peerInfo{},
+ byNodeID: map[tailcfg.NodeID]*peerInfo{},
+ nodesOfDisco: map[key.DiscoPublic]set.Set[key.NodePublic]{},
}
}
@@ -154,16 +140,8 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
delete(m.nodesOfDisco[oldDiscoKey], ep.publicKey)
}
if ep.isWireguardOnly {
- // If the peer is a WireGuard only peer, add all of its endpoints.
-
- // TODO(raggi,catzkorn): this could mean that if a "isWireguardOnly"
- // peer has, say, 192.168.0.2 and so does a tailscale peer, the
- // wireguard one will win. That may not be the outcome that we want -
- // perhaps we should prefer bestAddr.epAddr.ap if it is set?
- // see tailscale/tailscale#7994
- for ipp := range ep.endpointState {
- m.setNodeKeyForEpAddr(epAddr{ap: ipp}, ep.publicKey)
- }
+ // If the peer is a WireGuard only peer, return early. There is no disco
+ // tracking for WireGuard peers.
return
}
discoSet := m.nodesOfDisco[epDisco.key]
@@ -174,6 +152,21 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
discoSet.Add(ep.publicKey)
}
+// clearEpAddrForNodeKey clears the [epAddr] associated with nk. This is
+// called by an [*endpoint] when wireguard-go signals a mismatch between
+// a Cryptokey Routing identification outcome and the peer we believe to be
+// associated with the packet.
+//
+// NATs (including UDP relay servers) can cause collisions of [epAddr]s across
+// peers. This function resolves such collisions when they occur. A subsequent
+// lookup via endpointForEpAddr() will fail, leading to resolution via
+// [*lazyEndpoint] Cryptokey Routing identification.
+func (m *peerMap) clearEpAddrForNodeKey(nk key.NodePublic) {
+ if pi := m.byNodeKey[nk]; pi != nil {
+ delete(m.byEpAddr, pi.epAddr)
+ }
+}
+
// setNodeKeyForEpAddr makes future peer lookups by addr return the
// same endpoint as a lookup by nk.
//
@@ -182,22 +175,11 @@ func (m *peerMap) upsertEndpoint(ep *endpoint, oldDiscoKey key.DiscoPublic) {
// WireGuard for packets received from addr.
func (m *peerMap) setNodeKeyForEpAddr(addr epAddr, nk key.NodePublic) {
if pi := m.byEpAddr[addr]; pi != nil {
- delete(pi.epAddrs, addr)
+ pi.epAddr = epAddr{}
delete(m.byEpAddr, addr)
- if addr.vni.isSet() {
- delete(m.relayEpAddrByNodeKey, pi.ep.publicKey)
- }
}
if pi, ok := m.byNodeKey[nk]; ok {
- if addr.vni.isSet() {
- relay, ok := m.relayEpAddrByNodeKey[nk]
- if ok {
- delete(pi.epAddrs, relay)
- delete(m.byEpAddr, relay)
- }
- m.relayEpAddrByNodeKey[nk] = addr
- }
- pi.epAddrs.Add(addr)
+ pi.epAddr = addr
m.byEpAddr[addr] = pi
}
}
@@ -225,8 +207,5 @@ func (m *peerMap) deleteEndpoint(ep *endpoint) {
// Unexpected. But no logger plumbed here to log so.
return
}
- for ip := range pi.epAddrs {
- delete(m.byEpAddr, ip)
- }
- delete(m.relayEpAddrByNodeKey, ep.publicKey)
+ delete(m.byEpAddr, pi.epAddr)
}