summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorClaus Lensbøl <claus@tailscale.com>2026-03-27 10:03:07 -0400
committerClaus Lensbøl <claus@tailscale.com>2026-03-27 10:05:23 -0400
commiteed936e4dd0f3273e03f300241d869252c59e115 (patch)
treef30e7e14f9979737f35d19166dd09af3335f4422
parent92da36dd01ab242aac08079a96eaf2b2ca5bbedf (diff)
downloadtailscale-cmol/natlab_tsmp_test.tar.xz
tailscale-cmol/natlab_tsmp_test.zip
tstest/{integration,natlab}: add test for async caching casecmol/natlab_tsmp_test
WIP DO NOT MERGE Adds tests for caching using one node connected to control and one not connected. Instead of looking at the ping metrics, verify that the connection ended up being direct and that the correct client metric was used to get there.
-rw-r--r--tstest/integration/nat/nat_test.go75
-rw-r--r--tstest/natlab/vnet/vnet.go3
-rw-r--r--wgengine/userspace.go4
3 files changed, 75 insertions, 7 deletions
diff --git a/tstest/integration/nat/nat_test.go b/tstest/integration/nat/nat_test.go
index 2ac16bf58..4a1902faf 100644
--- a/tstest/integration/nat/nat_test.go
+++ b/tstest/integration/nat/nat_test.go
@@ -17,6 +17,7 @@ import (
"os"
"os/exec"
"path/filepath"
+ "strconv"
"strings"
"sync"
"testing"
@@ -44,6 +45,7 @@ type natTest struct {
tempDir string // for qcow2 images
vnet *vnet.Server
kernel string // linux kernel path
+ clients []*vnet.NodeAgentClient
gotRoute pingRoute
}
@@ -152,6 +154,21 @@ func easyNoControlDiscoRotate(c *vnet.Config) *vnet.Node {
vnet.RotateDisco, vnet.PreICMPPing, nw)
}
+// easyNetmapCacheEnabled sets up a node with easy NAT, and enables netmap
+// caching, including reading from the cache and parsing/sending TSMP messages.
+func easyNetmapCacheEnabled(c *vnet.Config) *vnet.Node {
+ n := c.NumNodes() + 1
+ nw := c.AddNetwork(
+ fmt.Sprintf("2.%d.%d.%d", n, n, n), // public IP
+ fmt.Sprintf("192.168.%d.1/24", n),
+ vnet.EasyNAT)
+ return c.AddNode(
+ vnet.TailscaledEnv{
+ Key: "TS_USE_CACHED_NETMAP",
+ Value: "true",
+ }, nw)
+}
+
func v6AndBlackholedIPv4(c *vnet.Config) *vnet.Node {
n := c.NumNodes() + 1
nw := c.AddNetwork(
@@ -379,16 +396,15 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
- var clients []*vnet.NodeAgentClient
for _, n := range nodes {
client := nt.vnet.NodeAgentClient(n)
n.SetClient(client)
- clients = append(clients, client)
+ nt.clients = append(nt.clients, client)
}
sts := make([]*ipnstate.Status, len(nodes))
var eg errgroup.Group
- for i, c := range clients {
+ for i, c := range nt.clients {
eg.Go(func() error {
node := nodes[i]
t.Logf("%v calling Status...", node)
@@ -447,19 +463,31 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute {
// Should we send traffic across the nodes before starting disco?
// For nodes that rotated disco keys after control going away.
if preICMPPing {
- _, err := ping(ctx, t, clients[0], sts[1].Self.TailscaleIPs[0], tailcfg.PingICMP)
+ res, err := ping(ctx, t, nt.clients[0], sts[1].Self.TailscaleIPs[0], tailcfg.PingICMP)
if err != nil {
t.Fatalf("ICMP ping failure: %v", err)
}
+
+ t.Logf("ICMP ping route: %v", classifyPing(res))
}
- pingRes, err := ping(ctx, t, clients[0], sts[1].Self.TailscaleIPs[0], tailcfg.PingDisco)
+ pingRes, err := ping(ctx, t, nt.clients[0], sts[1].Self.TailscaleIPs[0], tailcfg.PingDisco)
+
if err != nil {
t.Fatalf("ping failure: %v", err)
}
nt.gotRoute = classifyPing(pingRes)
t.Logf("ping route: %v", nt.gotRoute)
+ // Capture client metrics for use in validating test success.
+ for _, client := range nt.clients {
+ if mb, err := client.DaemonMetrics(ctx); err == nil {
+ client.CapturedMetrics = mb
+ } else {
+ t.Logf("DaemonMetrics: %v (metrics assertions will be skipped)", err)
+ }
+ }
+
return nt.gotRoute
}
@@ -561,6 +589,35 @@ func (nt *natTest) want(r pingRoute) {
}
}
+func (nt *natTest) wantWGEngineTSMPResetAvoided(client *vnet.NodeAgentClient) {
+ nt.tb.Helper()
+ if client == nil {
+ nt.tb.Error("wantWGSessionTSMPRetryAvoided: client is nil")
+ return
+ }
+ val := parseClientMetric(nt.tb, client.CapturedMetrics,
+ "wgengine_tsmp_disco_key_reset_avoided")
+ if val < 1 {
+ nt.tb.Errorf("WG engine reset for disco not avoided")
+ }
+}
+
+func parseClientMetric(tb testing.TB, data []byte, name string) int64 {
+ tb.Helper()
+ for line := range strings.SplitSeq(string(data), "\n") {
+ // tb.Logf("Line: %s", line)
+ if strings.HasPrefix(line, "#") {
+ continue
+ }
+ parts := strings.Fields(line)
+ if len(parts) == 2 && parts[0] == name {
+ v, _ := strconv.ParseInt(parts[1], 10, 64)
+ return v
+ }
+ }
+ return 0
+}
+
func TestEasyEasy(t *testing.T) {
nt := newNatTest(t)
nt.runTest(easy, easy)
@@ -574,6 +631,14 @@ func TestTwoEasyNoControlDiscoRotate(t *testing.T) {
nt.want(routeDirect)
}
+func TestTSMPOneConnectedOneNot(t *testing.T) {
+ t.Skip("Test is not working and should be wired up for a cached netmap. https://github.com/tailscale/tailscale/issues/19141")
+ nt := newNatTest(t)
+ nt.runTest(easyNoControlDiscoRotate, easyNetmapCacheEnabled)
+ nt.wantWGEngineTSMPResetAvoided(nt.clients[0])
+ nt.want(routeDirect)
+}
+
// Issue tailscale/corp#26438: use learned DERP route as send path of last
// resort
//
diff --git a/tstest/natlab/vnet/vnet.go b/tstest/natlab/vnet/vnet.go
index 9eb81520c..9f3db11c2 100644
--- a/tstest/natlab/vnet/vnet.go
+++ b/tstest/natlab/vnet/vnet.go
@@ -2133,7 +2133,8 @@ func (s *Server) takeAgentConnOne(n *node) (_ *agentConn, ok bool) {
type NodeAgentClient struct {
*local.Client
- HTTPClient *http.Client
+ HTTPClient *http.Client
+ CapturedMetrics []byte
}
func (s *Server) NodeAgentDialer(n *Node) netx.DialFunc {
diff --git a/wgengine/userspace.go b/wgengine/userspace.go
index 364c70c9c..dced6d134 100644
--- a/wgengine/userspace.go
+++ b/wgengine/userspace.go
@@ -1140,7 +1140,8 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config,
if discoTSMP, okTSMP := e.tsmpLearnedDisco[p.PublicKey]; okTSMP &&
discoTSMP == p.DiscoKey {
delete(e.tsmpLearnedDisco, p.PublicKey)
- e.logf("wgengine: Skipping reconfig (TSMP key): %s changed from %q to %q", pub.ShortString(), old, p.DiscoKey)
+ e.logf("wgengine: Skipping reconfig (TSMP key) for: %s", pub.ShortString())
+ metricTSMPDiscoKeyResetAvoided.Add(1)
continue
}
@@ -1875,6 +1876,7 @@ var (
metricTSMPDiscoKeyAdvertisementSent = clientmetric.NewCounter("magicsock_tsmp_disco_key_advertisement_sent")
metricTSMPDiscoKeyAdvertisementError = clientmetric.NewCounter("magicsock_tsmp_disco_key_advertisement_error")
+ metricTSMPDiscoKeyResetAvoided = clientmetric.NewCounter("wgengine_tsmp_disco_key_reset_avoided")
)
func (e *userspaceEngine) InstallCaptureHook(cb packet.CaptureCallback) {