summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tka/limits.go2
-rw-r--r--tka/state.go2
-rw-r--r--tka/sync.go35
-rw-r--r--tka/sync_test.go159
4 files changed, 115 insertions, 83 deletions
diff --git a/tka/limits.go b/tka/limits.go
index a644df061..0091075c8 100644
--- a/tka/limits.go
+++ b/tka/limits.go
@@ -21,7 +21,7 @@ const (
maxSyncIter = 2000
// Max iterations searching for a head intersection during the sync process.
- maxSyncHeadIntersectionIter = 400
+ maxSyncHeadIntersectionIter = 1000
// Limit on scanning AUM trees, chosen arbitrarily.
maxScanIterations = 2000
diff --git a/tka/state.go b/tka/state.go
index 69b3dbfeb..89f48009d 100644
--- a/tka/state.go
+++ b/tka/state.go
@@ -33,7 +33,7 @@ type State struct {
// possesses a valid DisablementSecret. These values are used during the
// Tailnet Lock deactivation process.
//
- // These are safe to share publicly or store in the clear. They cannot be
+ // These are safe to share publicly or store in the clear. They cannot be
// used to derive the original DisablementSecret.
DisablementValues [][]byte `cbor:"2,keyasint"`
diff --git a/tka/sync.go b/tka/sync.go
index 5cae9b45f..20a033864 100644
--- a/tka/sync.go
+++ b/tka/sync.go
@@ -60,18 +60,6 @@ func FromSyncOffer(offer SyncOffer) (head string, ancestors []string, err error)
return string(headBytes), ancestors, nil
}
-const (
- // The starting number of AUMs to skip when listing
- // ancestors in a SyncOffer.
- ancestorsSkipStart = 4
-
- // How many bits to advance the skip count when listing
- // ancestors in a SyncOffer.
- //
- // 2 bits, so (4<<2), so after skipping 4 it skips 16.
- ancestorsSkipShift = 2
-)
-
// SyncOffer returns an abbreviated description of the current AUM
// chain, which can be used to synchronize with another (untrusted)
// Authority instance.
@@ -92,20 +80,10 @@ func (a *Authority) SyncOffer(storage Chonk) (SyncOffer, error) {
Ancestors: make([]AUMHash, 0, 6), // 6 chosen arbitrarily.
}
- // We send some subset of our ancestors to help the remote
- // find a more-recent 'head intersection'.
- // The number of AUMs between each ancestor entry gets
- // exponentially larger.
- var (
- skipAmount uint64 = ancestorsSkipStart
- curs AUMHash = a.Head()
- )
- for i := range uint64(maxSyncHeadIntersectionIter) {
- if i > 0 && (i%skipAmount) == 0 {
- out.Ancestors = append(out.Ancestors, curs)
- skipAmount = skipAmount << ancestorsSkipShift
- }
-
+ // We send all our checkpoints to help the remote find a
+ // more-recent 'head intersection'.
+ curs := a.Head()
+ for range maxSyncHeadIntersectionIter {
parent, err := storage.AUM(curs)
if err != nil {
if err != os.ErrNotExist {
@@ -118,6 +96,11 @@ func (a *Authority) SyncOffer(storage Chonk) (SyncOffer, error) {
if parent.Hash() == oldest {
break
}
+
+ if parent.MessageKind == AUMCheckpoint {
+ out.Ancestors = append(out.Ancestors, curs)
+ }
+
copy(curs[:], parent.PrevAUMHash)
}
diff --git a/tka/sync_test.go b/tka/sync_test.go
index 68f659ae5..c520138a8 100644
--- a/tka/sync_test.go
+++ b/tka/sync_test.go
@@ -5,44 +5,13 @@ package tka
import (
"bytes"
- "strconv"
"testing"
"github.com/google/go-cmp/cmp"
+ "tailscale.com/tstest"
+ "tailscale.com/util/must"
)
-func TestSyncOffer(t *testing.T) {
- c := newTestchain(t, `
- A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
- A10 -> A11 -> A12 -> A13 -> A14 -> A15 -> A16 -> A17 -> A18
- A18 -> A19 -> A20 -> A21 -> A22 -> A23 -> A24 -> A25
- `)
- storage := c.Chonk()
- a, err := Open(storage)
- if err != nil {
- t.Fatal(err)
- }
- got, err := a.SyncOffer(storage)
- if err != nil {
- t.Fatal(err)
- }
-
- // A SyncOffer includes a selection of AUMs going backwards in the tree,
- // progressively skipping more and more each iteration.
- want := SyncOffer{
- Head: c.AUMHashes["A25"],
- Ancestors: []AUMHash{
- c.AUMHashes["A"+strconv.Itoa(25-ancestorsSkipStart)],
- c.AUMHashes["A"+strconv.Itoa(25-ancestorsSkipStart<<ancestorsSkipShift)],
- c.AUMHashes["A1"],
- },
- }
-
- if diff := cmp.Diff(want, got); diff != "" {
- t.Errorf("SyncOffer diff (-want, +got):\n%s", diff)
- }
-}
-
func TestComputeSyncIntersection_FastForward(t *testing.T) {
// Node 1 has: A1 -> A2
// Node 2 has: A1 -> A2 -> A3 -> A4
@@ -131,15 +100,6 @@ func TestComputeSyncIntersection_ForkSmallDiff(t *testing.T) {
if err != nil {
t.Fatal(err)
}
- if diff := cmp.Diff(SyncOffer{
- Head: c.AUMHashes["F1"],
- Ancestors: []AUMHash{
- c.AUMHashes["A"+strconv.Itoa(9-ancestorsSkipStart)],
- c.AUMHashes["A1"],
- },
- }, offer1); diff != "" {
- t.Errorf("offer1 diff (-want, +got):\n%s", diff)
- }
chonk2 := c.ChonkWith("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10")
n2, err := Open(chonk2)
@@ -150,21 +110,12 @@ func TestComputeSyncIntersection_ForkSmallDiff(t *testing.T) {
if err != nil {
t.Fatal(err)
}
- if diff := cmp.Diff(SyncOffer{
- Head: c.AUMHashes["A10"],
- Ancestors: []AUMHash{
- c.AUMHashes["A"+strconv.Itoa(10-ancestorsSkipStart)],
- c.AUMHashes["A1"],
- },
- }, offer2); diff != "" {
- t.Errorf("offer2 diff (-want, +got):\n%s", diff)
- }
// Node 1 only knows about the first eight nodes, so the head of n2 is
// alien to it.
t.Run("n1", func(t *testing.T) {
- // n2 has 10 nodes, so the first common ancestor should be 10-ancestorsSkipStart
- wantIntersection := c.AUMHashes["A"+strconv.Itoa(10-ancestorsSkipStart)]
+ // n2 has 10 nodes, so the first common ancestor is the genesis AUM
+ wantIntersection := c.AUMHashes["A1"]
got, err := computeSyncIntersection(chonk1, offer1, offer2)
if err != nil {
@@ -180,8 +131,8 @@ func TestComputeSyncIntersection_ForkSmallDiff(t *testing.T) {
// Node 2 knows about the full chain but doesn't recognize the head.
t.Run("n2", func(t *testing.T) {
- // n1 has 9 nodes, so the first common ancestor should be 9-ancestorsSkipStart
- wantIntersection := c.AUMHashes["A"+strconv.Itoa(9-ancestorsSkipStart)]
+ // n1 has 9 nodes, so the first common ancestor is the genesis AUM
+ wantIntersection := c.AUMHashes["A1"]
got, err := computeSyncIntersection(chonk2, offer2, offer1)
if err != nil {
@@ -375,3 +326,101 @@ func TestSyncSimpleE2E(t *testing.T) {
t.Errorf("node & control are not synced: c=%x, n=%x", cHash, nHash)
}
}
+
+// TestSyncFromFarBehind checks that nodes with compacted state can still find
+// a common ancestor when the remote is significantly ahead.
+//
+// We simulate a node that has compacted its early history and is now ~500 AUMs
+// behind the control plane, a distance that previously caused exponential sampling
+// in SyncOffer to skip the node's entire local history.
+//
+// Regression test for http://go/corp/40404
+func TestSyncFromFarBehind(t *testing.T) {
+ pub1, priv1 := testingKey25519(t, 1)
+ pub2, _ := testingKey25519(t, 2)
+ signer1 := signer25519(priv1)
+
+ key1 := Key{Kind: Key25519, Public: pub1, Votes: 2}
+ key2 := Key{Kind: Key25519, Public: pub2, Votes: 2}
+
+ // Setup: persistentAuthority (control plane) vs compactingAuthority (client node).
+ state := State{
+ Keys: []Key{key1},
+ DisablementValues: [][]byte{DisablementKDF([]byte{1, 2, 3})},
+ }
+
+ persistentStorage, compactingStorage := ChonkMem(), ChonkMem()
+ persistentSize := func() int { return len(must.Get(persistentStorage.AllAUMs())) }
+ compactingSize := func() int { return len(must.Get(compactingStorage.AllAUMs())) }
+
+ persistentAuthority, genesisAUM := must.Get2(Create(persistentStorage, state, signer1))
+ compactingAuthority := must.Get(Bootstrap(compactingStorage, genesisAUM))
+
+ // Backdate the clock on the compactingStorage so all AUMs will be old enough
+ // to be considered for compacting.
+ clock := tstest.NewClock(tstest.ClockOpts{})
+ compactingStorage.SetClock(clock)
+
+ // 1. Generate enough history to trigger checkpoints.
+ for range checkpointEvery * 2 {
+ update := persistentAuthority.NewUpdater(signer1)
+
+ must.Do(update.AddKey(key2))
+ addKey := must.Get(update.Finalize(persistentStorage))
+ must.Do(persistentAuthority.Inform(persistentStorage, addKey))
+ must.Do(compactingAuthority.Inform(compactingStorage, addKey))
+
+ update = persistentAuthority.NewUpdater(signer1)
+ must.Do(update.RemoveKey(key2.MustID()))
+ removeKey := must.Get(update.Finalize(persistentStorage))
+ must.Do(persistentAuthority.Inform(persistentStorage, removeKey))
+ must.Do(compactingAuthority.Inform(compactingStorage, removeKey))
+ }
+
+ t.Logf("genesis and first batch of AUMs: persistent = %d, compacting = %d", persistentSize(), compactingSize())
+
+ // 2. Compact the node state.
+ //
+ // It now has a different 'oldestAncestor' than the control plane.
+ must.Do(compactingAuthority.Compact(compactingStorage, CompactionDefaults))
+
+ // 3. Advance the control plane far beyond the node.
+ //
+ // As of 2026-04-17, the largest TKA has ~750 AUMs.
+ //
+ // If you keep increasing this number, eventually the sync will fail because you
+ // hit the hard-coded limits on iteration during the sync process.
+ for persistentSize() < compactingSize()+800 {
+ b := persistentAuthority.NewUpdater(signer1)
+
+ must.Do(b.AddKey(key2))
+ addKey := must.Get(b.Finalize(persistentStorage))
+ must.Do(persistentAuthority.Inform(persistentStorage, addKey))
+
+ b = persistentAuthority.NewUpdater(signer1)
+ must.Do(b.RemoveKey(key2.MustID()))
+ removeKey := must.Get(b.Finalize(persistentStorage))
+ must.Do(persistentAuthority.Inform(persistentStorage, removeKey))
+ }
+
+ t.Logf("post-compacting and extra AUMs: persistent = %d, compacting = %d", persistentSize(), compactingSize())
+
+ // 4. Verify Intersection.
+ // The node should find an intersection even with a 500-AUM gap.
+ persistentOffer := must.Get(persistentAuthority.SyncOffer(persistentStorage))
+ compactingOffer := must.Get(compactingAuthority.SyncOffer(compactingStorage))
+
+ if _, err := compactingAuthority.MissingAUMs(compactingStorage, persistentOffer); err != nil {
+ t.Errorf("node failed to find intersection with far-ahead control plane: %v", err)
+ }
+
+ // 4. Check that the persistent authority can find an intersection with the
+ // compacting authority, and has missing AUMs to send it.
+ missing, err := persistentAuthority.MissingAUMs(persistentStorage, compactingOffer)
+ if len(missing) == 0 {
+ t.Errorf("control plane did not find any missing AUMs for node")
+ }
+ if err != nil {
+ t.Errorf("control plane failed to find missing AUMs for node: %v", err)
+ }
+}