summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorFernando Serboncini <fserb@tailscale.com>2026-02-04 20:14:48 -0500
committerFernando Serboncini <fserb@tailscale.com>2026-02-05 16:16:29 -0500
commit357aa8b6b63484a838821aad150c199214e97340 (patch)
treec35c82dad02824e30d89b467f6e675984327b618
parent036b6a12621306da8368b167deb9858d4a8d6ce9 (diff)
downloadtailscale-fserb/natlab-flaky.tar.xz
tailscale-fserb/natlab-flaky.zip
DO NOT SUBMIT: tsnet/natlab flaky investigationfserb/natlab-flaky
Signed-off-by: Fernando Serboncini <fserb@tailscale.com>
-rw-r--r--tstest/integration/nat/nat_test.go50
-rw-r--r--tstest/natlab/vnet/conf.go40
-rw-r--r--tstest/natlab/vnet/vnet.go14
3 files changed, 82 insertions, 22 deletions
diff --git a/tstest/integration/nat/nat_test.go b/tstest/integration/nat/nat_test.go
index 2aea7c296..98dd8f854 100644
--- a/tstest/integration/nat/nat_test.go
+++ b/tstest/integration/nat/nat_test.go
@@ -119,9 +119,13 @@ func v6cidr(n int) string {
func easy(c *vnet.Config) *vnet.Node {
n := c.NumNodes() + 1
- return c.AddNode(c.AddNetwork(
+ nw := c.AddNetwork(
fmt.Sprintf("2.%d.%d.%d", n, n, n), // public IP
- fmt.Sprintf("192.168.%d.1/24", n), vnet.EasyNAT))
+ fmt.Sprintf("192.168.%d.1/24", n), vnet.EasyNAT)
+ if n == 2 {
+ nw.SetDERPLatency(500 * time.Millisecond)
+ }
+ return c.AddNode(nw)
}
func easyAnd6(c *vnet.Config) *vnet.Node {
@@ -415,7 +419,7 @@ func (nt *natTest) runTest(addNode ...addNodeFunc) pingRoute {
return ""
}
- pingRes, err := ping(ctx, clients[0], sts[1].Self.TailscaleIPs[0])
+ pingRes, err := ping(ctx, t, clients[0], sts[1].Self.TailscaleIPs[0])
if err != nil {
t.Fatalf("ping failure: %v", err)
}
@@ -450,14 +454,16 @@ const (
routeNil pingRoute = "nil" // *ipnstate.PingResult is nil
)
-func ping(ctx context.Context, c *vnet.NodeAgentClient, target netip.Addr) (*ipnstate.PingResult, error) {
+func ping_old(ctx context.Context, t testing.TB, c *vnet.NodeAgentClient, target netip.Addr) (*ipnstate.PingResult, error) {
n := 0
var res *ipnstate.PingResult
anyPong := false
for n < 10 {
n++
+ t.Logf("ping_old attempt %d to %v ...", n, target)
pr, err := c.PingWithOpts(ctx, target, tailcfg.PingDisco, tailscale.PingOpts{})
if err != nil {
+ t.Logf("ping_old attempt %d error: %v", n, err)
if anyPong {
return res, nil
}
@@ -467,8 +473,10 @@ func ping(ctx context.Context, c *vnet.NodeAgentClient, target netip.Addr) (*ipn
return nil, errors.New(pr.Err)
}
if pr.DERPRegionID == 0 {
+ t.Logf("ping_old attempt %d: direct (endpoint %v, latency %v)", n, pr.Endpoint, pr.LatencySeconds)
return pr, nil
}
+ t.Logf("ping_old attempt %d: via DERP region %d (latency %v)", n, pr.DERPRegionID, pr.LatencySeconds)
res = pr
select {
case <-ctx.Done():
@@ -481,6 +489,40 @@ func ping(ctx context.Context, c *vnet.NodeAgentClient, target netip.Addr) (*ipn
return res, nil
}
+func ping(ctx context.Context, t testing.TB, c *vnet.NodeAgentClient, target netip.Addr) (*ipnstate.PingResult, error) {
+ var lastRes *ipnstate.PingResult
+ for n := range 10 {
+ t.Logf("ping attempt %d to %v ...", n+1, target)
+ pingCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
+ pr, err := c.PingWithOpts(pingCtx, target, tailcfg.PingDisco, tailscale.PingOpts{})
+ cancel()
+ if err != nil {
+ t.Logf("ping attempt %d error: %v", n+1, err)
+ if ctx.Err() != nil {
+ break
+ }
+ continue
+ }
+ if pr.Err != "" {
+ return nil, errors.New(pr.Err)
+ }
+ t.Logf("ping attempt %d: derp=%d endpoint=%v latency=%v", n+1, pr.DERPRegionID, pr.Endpoint, pr.LatencySeconds)
+ if pr.DERPRegionID == 0 {
+ return pr, nil
+ }
+ lastRes = pr
+ select {
+ case <-ctx.Done():
+ return lastRes, nil
+ case <-time.After(time.Second):
+ }
+ }
+ if lastRes != nil {
+ return lastRes, nil
+ }
+ return nil, fmt.Errorf("no ping response (ctx: %v)", ctx.Err())
+}
+
func up(ctx context.Context, c *vnet.NodeAgentClient) error {
req, err := http.NewRequestWithContext(ctx, "GET", "http://unused/up", nil)
if err != nil {
diff --git a/tstest/natlab/vnet/conf.go b/tstest/natlab/vnet/conf.go
index 3f83e35c0..ca9127cc3 100644
--- a/tstest/natlab/vnet/conf.go
+++ b/tstest/natlab/vnet/conf.go
@@ -282,8 +282,9 @@ type Network struct {
svcs set.Set[NetworkService]
- latency time.Duration // latency applied to interface writes
- lossRate float64 // chance of packet loss (0.0 to 1.0)
+ latency time.Duration // latency applied to interface writes
+ lossRate float64 // chance of packet loss (0.0 to 1.0)
+ derpLatency time.Duration // extra latency for DERP-related packets
// ...
err error // carried error
@@ -304,6 +305,10 @@ func (n *Network) SetPacketLoss(rate float64) {
n.lossRate = rate
}
+func (n *Network) SetDERPLatency(d time.Duration) {
+ n.derpLatency = d
+}
+
// SetBlackholedIPv4 sets whether the network should blackhole all IPv4 traffic
// out to the Internet. (DHCP etc continues to work on the LAN.)
func (n *Network) SetBlackholedIPv4(v bool) {
@@ -372,21 +377,22 @@ func (s *Server) initFromConfig(c *Config) error {
conf.lanIP4 = netip.MustParsePrefix("192.168.0.0/24")
}
n := &network{
- num: conf.num,
- s: s,
- mac: conf.mac,
- portmap: conf.svcs.Contains(NATPMP), // TODO: expand network.portmap
- wanIP6: conf.wanIP6,
- v4: conf.lanIP4.IsValid(),
- v6: conf.wanIP6.IsValid(),
- wanIP4: conf.wanIP4,
- lanIP4: conf.lanIP4,
- breakWAN4: conf.breakWAN4,
- latency: conf.latency,
- lossRate: conf.lossRate,
- nodesByIP4: map[netip.Addr]*node{},
- nodesByMAC: map[MAC]*node{},
- logf: logger.WithPrefix(s.logf, fmt.Sprintf("[net-%v] ", conf.mac)),
+ num: conf.num,
+ s: s,
+ mac: conf.mac,
+ portmap: conf.svcs.Contains(NATPMP), // TODO: expand network.portmap
+ wanIP6: conf.wanIP6,
+ v4: conf.lanIP4.IsValid(),
+ v6: conf.wanIP6.IsValid(),
+ wanIP4: conf.wanIP4,
+ lanIP4: conf.lanIP4,
+ breakWAN4: conf.breakWAN4,
+ latency: conf.latency,
+ lossRate: conf.lossRate,
+ derpLatency: conf.derpLatency,
+ nodesByIP4: map[netip.Addr]*node{},
+ nodesByMAC: map[MAC]*node{},
+ logf: logger.WithPrefix(s.logf, fmt.Sprintf("[net-%v] ", conf.mac)),
}
netOfConf[conf] = n
s.networks.Add(n)
diff --git a/tstest/natlab/vnet/vnet.go b/tstest/natlab/vnet/vnet.go
index 357fe213c..e01be9148 100644
--- a/tstest/natlab/vnet/vnet.go
+++ b/tstest/natlab/vnet/vnet.go
@@ -268,12 +268,22 @@ func (n *network) handleIPPacketFromGvisor(ipRaw []byte) {
return
}
if nw, ok := n.writers.Load(node.mac); ok {
- nw.write(resPkt)
+ if d := n.derpLatency; d > 0 && n.isDERPPacket(flow.src) {
+ pkt := make([]byte, len(resPkt))
+ copy(pkt, resPkt)
+ time.AfterFunc(d, func() { nw.write(pkt) })
+ } else {
+ nw.write(resPkt)
+ }
} else {
n.logf("gvisor write: no writeFunc for %v", node.mac)
}
}
+func (n *network) isDERPPacket(ip netip.Addr) bool {
+ return fakeDERP1.Match(ip) || fakeDERP2.Match(ip)
+}
+
func netaddrIPFromNetstackIP(s tcpip.Address) netip.Addr {
switch s.Len() {
case 4:
@@ -435,6 +445,7 @@ func (n *network) serveLogCatcherConn(clientRemoteIP netip.Addr, c net.Conn) {
for _, lg := range logs {
tStr := lg.Logtail.Client_Time.Round(time.Millisecond).Format(time.RFC3339Nano)
fmt.Fprintf(&node.logBuf, "[%v] %s\n", tStr, lg.Text)
+ n.s.logf("[%v] %s: %s", node, tStr, lg.Text)
}
}
})
@@ -520,6 +531,7 @@ type network struct {
breakWAN4 bool // break WAN IPv4 connectivity
latency time.Duration // latency applied to interface writes
lossRate float64 // probability of dropping a packet (0.0 to 1.0)
+ derpLatency time.Duration // extra latency for DERP-related packets
nodesByIP4 map[netip.Addr]*node // by LAN IPv4
nodesByMAC map[MAC]*node
logf func(format string, args ...any)