diff options
| author | Andrew Dunham <andrew@du.nham.ca> | 2024-04-04 16:35:52 -0400 |
|---|---|---|
| committer | Andrew Dunham <andrew@du.nham.ca> | 2024-04-04 16:35:52 -0400 |
| commit | b8f89c93acdf3ca99796bbc3d956dee882d5d125 (patch) | |
| tree | ac4945412f1e6f26cb812e9ad3bb5f73996f008e /control/controlclient/direct.go | |
| parent | 853e3e29a0a62107f274afb537c7972789a00d2a (diff) | |
| download | tailscale-andrew/controlclient-use-last-addr.tar.xz tailscale-andrew/controlclient-use-last-addr.zip | |
control/controlclient: try reconnecting to last successful addrandrew/controlclient-use-last-addr
If we lose our connection to the control server (e.g. due to a restart,
or a network blip, etc), try reconnecting to the same address first
before going through the whole control dialplan and/or DNS flow.
This ensures that we're a bit "sticky", which makes load balancing
easier by improving the chances that this client will hit a server with
a warm cache. It also reduces the thundering herd of requests that hit
other servers after we restart a single one.
Updates #TODO
Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Change-Id: I6c3ef0b088468a8888c05cf8e3813056118ec835
Diffstat (limited to 'control/controlclient/direct.go')
| -rw-r--r-- | control/controlclient/direct.go | 32 |
1 files changed, 24 insertions, 8 deletions
diff --git a/control/controlclient/direct.go b/control/controlclient/direct.go index f5d1f0410..8c501dc10 100644 --- a/control/controlclient/direct.go +++ b/control/controlclient/direct.go @@ -42,6 +42,7 @@ import ( "tailscale.com/net/tlsdial" "tailscale.com/net/tsdial" "tailscale.com/net/tshttpproxy" + "tailscale.com/syncs" "tailscale.com/tailcfg" "tailscale.com/tka" "tailscale.com/tstime" @@ -82,6 +83,11 @@ type Direct struct { dialPlan ControlDialPlanner // can be nil + // lastServerAddr is set to the most recent address that we + // successfully connected to. It is used to prioritize this address + // when reconnecting (e.g. when a control server restart happens). + lastServerAddr syncs.AtomicValue[netip.Addr] + mu sync.Mutex // mutex guards the following fields serverLegacyKey key.MachinePublic // original ("legacy") nacl crypto_box-based public key; only used for signRegisterRequest on Windows now serverNoiseKey key.MachinePublic @@ -1428,6 +1434,8 @@ func sleepAsRequested(ctx context.Context, logf logger.Logf, d time.Duration, cl } } +var useLastAddr = envknob.RegisterBool("TS_CONTROLCLIENT_USE_LAST_ADDR") + // getNoiseClient returns the noise client, creating one if one doesn't exist. func (c *Direct) getNoiseClient() (*NoiseClient, error) { c.mu.Lock() @@ -1444,6 +1452,12 @@ func (c *Direct) getNoiseClient() (*NoiseClient, error) { if c.dialPlan != nil { dp = c.dialPlan.Load } + + var lastAddr *syncs.AtomicValue[netip.Addr] + if useLastAddr() { + lastAddr = &c.lastServerAddr + } + nc, err, _ := c.sfGroup.Do(struct{}{}, func() (*NoiseClient, error) { k, err := c.getMachinePrivKey() if err != nil { @@ -1451,18 +1465,20 @@ func (c *Direct) getNoiseClient() (*NoiseClient, error) { } c.logf("[v1] creating new noise client") nc, err := NewNoiseClient(NoiseOpts{ - PrivKey: k, - ServerPubKey: serverNoiseKey, - ServerURL: c.serverURL, - Dialer: c.dialer, - DNSCache: c.dnsCache, - Logf: c.logf, - NetMon: c.netMon, - DialPlan: dp, + PrivKey: k, + ServerPubKey: serverNoiseKey, + ServerURL: c.serverURL, + Dialer: c.dialer, + DNSCache: c.dnsCache, + Logf: c.logf, + NetMon: c.netMon, + DialPlan: dp, + LastServerAddr: lastAddr, }) if err != nil { return nil, err } + c.mu.Lock() defer c.mu.Unlock() c.noiseClient = nc |
