summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorTom DNetto <tom@tailscale.com>2022-07-18 13:00:32 -0700
committerTom <twitchyliquid64@users.noreply.github.com>2022-07-19 09:58:36 -0700
commit393a229de98969d435ea3d403ebb992d3c9f8324 (patch)
tree588a0d2840c1bca1d5aad7b75f42657e68aea4a1
parent165c8f898e75e0c49cf0a5bef12be7d64534d205 (diff)
downloadtailscale-393a229de98969d435ea3d403ebb992d3c9f8324.tar.xz
tailscale-393a229de98969d435ea3d403ebb992d3c9f8324.zip
tka: implement synchronization mechanics
This PR implements the synchronization mechanics for TKA: generating a SyncOffer, processing a SyncOffer to find an intersection, and computing the set of AUMs that should be transmitted to effect convergence. This is the final PR implementing core mechanics for TKA. Signed-off-by: Tom DNetto <tom@tailscale.com>
-rw-r--r--tka/sync.go251
-rw-r--r--tka/sync_test.go373
2 files changed, 624 insertions, 0 deletions
diff --git a/tka/sync.go b/tka/sync.go
new file mode 100644
index 000000000..7d4443016
--- /dev/null
+++ b/tka/sync.go
@@ -0,0 +1,251 @@
+// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tka
+
+import (
+ "errors"
+ "fmt"
+ "os"
+)
+
+const (
+ // Max iterations searching for any intersection.
+ maxSyncIter = 2000
+ // Max iterations searching for a head intersection.
+ maxSyncHeadIntersectionIter = 400
+)
+
+// ErrNoIntersection is returned when a shared AUM could
+// not be determined when evaluating a remote sync offer.
+var ErrNoIntersection = errors.New("no intersection")
+
+// SyncOffer conveys information about the current head & ancestor AUMs,
+// for the purpose of synchronization with some remote end.
+//
+// Ancestors should contain a subset of the ancestors of the chain.
+// The last entry in that slice is the oldest-known AUM in the chain.
+type SyncOffer struct {
+ Head AUMHash
+ Ancestors []AUMHash
+}
+
+const (
+ // The starting number of AUMs to skip when listing
+ // ancestors in a SyncOffer.
+ ancestorsSkipStart = 4
+
+ // How many bits to advance the skip count when listing
+ // ancestors in a SyncOffer.
+ //
+ // 2 bits, so (4<<2), so after skipping 4 it skips 16.
+ ancestorsSkipShift = 2
+)
+
+func (a *Authority) syncOffer() (SyncOffer, error) {
+ oldest := a.oldestAncestor.Hash()
+
+ out := SyncOffer{
+ Head: a.Head(),
+ Ancestors: make([]AUMHash, 0, 6), // 6 chosen arbitrarily.
+ }
+
+ // We send some subset of our ancestors to help the remote
+ // find a more-recent 'head intersection'.
+ // The number of AUMs between each ancestor entry gets
+ // exponentially larger.
+ var (
+ skipAmount uint64 = ancestorsSkipStart
+ curs AUMHash = a.Head()
+ )
+ for i := uint64(0); i < maxSyncHeadIntersectionIter; i++ {
+ if i > 0 && (i%skipAmount) == 0 {
+ out.Ancestors = append(out.Ancestors, curs)
+ skipAmount = skipAmount << ancestorsSkipShift
+ }
+
+ parent, err := a.storage.AUM(curs)
+ if err != nil {
+ if err != os.ErrNotExist {
+ return SyncOffer{}, err
+ }
+ break
+ }
+
+ // We add the oldest later on, so don't duplicate.
+ if parent.Hash() == oldest {
+ break
+ }
+ copy(curs[:], parent.PrevAUMHash)
+ }
+
+ out.Ancestors = append(out.Ancestors, oldest)
+ return out, nil
+}
+
+// SyncOffer returns an abbreviated description of the current AUM
+// chain, which can be used to synchronize with another (untrusted)
+// Authority instance.
+//
+// The returned SyncOffer structure should be transmitted to the remote
+// Authority, which should call MissingAUMs() using it to determine
+// AUMs which need to be transmitted. This list of AUMs from the remote
+// can then be applied locally with Inform().
+//
+// This SyncOffer + AUM exchange should be performed by both ends,
+// because its possible that either end has AUMs that the other needs
+// to find out about.
+func (a *Authority) SyncOffer() (SyncOffer, error) {
+ return a.syncOffer()
+}
+
+// intersection describes how to synchronize AUMs with a remote
+// authority.
+type intersection struct {
+ // if true, no exchange of AUMs is needed.
+ upToDate bool
+
+ // headIntersection is the latest common AUM on the remote. In other
+ // words, we need to send all AUMs since this one.
+ headIntersection *AUMHash
+
+ // tailIntersection is the oldest common AUM on the remote. In other
+ // words, we diverge with the remote after this AUM, so we both need
+ // to transmit our AUM chain starting here.
+ tailIntersection *AUMHash
+}
+
+// computeSyncIntersection determines the common AUMs between a local and
+// remote SyncOffer. This intersection can be used to synchronize both
+// sides.
+func computeSyncIntersection(authority *Authority, localOffer, remoteOffer SyncOffer) (*intersection, error) {
+ // Simple case: up to date.
+ if remoteOffer.Head == localOffer.Head {
+ return &intersection{upToDate: true, headIntersection: &localOffer.Head}, nil
+ }
+
+ // Case: 'head intersection'
+ // If we have the remote's head, its more likely than not that
+ // we have updates that build on that head. To confirm this,
+ // we iterate backwards through our chain to see if the given
+ // head is an ancestor of our current chain.
+ //
+ // In other words:
+ // <Us> A -> B -> C
+ // <Them> A -> B
+ // ∴ their head intersects with our chain, we need to send C
+ var hasRemoteHead bool
+ _, err := authority.storage.AUM(remoteOffer.Head)
+ if err != nil {
+ if err != os.ErrNotExist {
+ return nil, err
+ }
+ } else {
+ hasRemoteHead = true
+ }
+
+ if hasRemoteHead {
+ curs := localOffer.Head
+ for i := 0; i < maxSyncHeadIntersectionIter; i++ {
+ parent, err := authority.storage.AUM(curs)
+ if err != nil {
+ if err != os.ErrNotExist {
+ return nil, err
+ }
+ break
+ }
+
+ if parent.Hash() == remoteOffer.Head {
+ h := parent.Hash()
+ return &intersection{headIntersection: &h}, nil
+ }
+
+ copy(curs[:], parent.PrevAUMHash)
+ }
+ }
+
+ // Case: 'tail intersection'
+ // So we don't have a clue what the remote's head is, but
+ // if one of the ancestors they gave us is part of our chain,
+ // then theres an intersection, which is a starting point for
+ // the remote to send us AUMs from.
+ //
+ // We iterate the list of ancestors in order because the remote
+ // ordered them such that the newer ones are earlier, so with
+ // a bit of luck we can use an earlier one and hence do less work /
+ // transmit fewer AUMs.
+ for _, a := range remoteOffer.Ancestors {
+ state, err := computeStateAt(authority.storage, maxSyncIter, a)
+ if err != nil {
+ if err != os.ErrNotExist {
+ return nil, fmt.Errorf("computeStateAt: %v", err)
+ }
+ continue
+ }
+
+ end, _, err := fastForward(authority.storage, maxSyncIter, state, func(curs AUM, _ State) bool {
+ return curs.Hash() == localOffer.Head
+ })
+ if err != nil {
+ return nil, err
+ }
+ // fastForward can terminate before the done condition if there are
+ // no more children left, so we check again before considering this
+ // an intersection.
+ if end.Hash() == localOffer.Head {
+ return &intersection{tailIntersection: &a}, nil
+ }
+ }
+
+ return nil, ErrNoIntersection
+}
+
+// MissingAUMs returns AUMs a remote may be missing based on the
+// remotes' SyncOffer.
+func (a *Authority) MissingAUMs(remoteOffer SyncOffer) ([]AUM, error) {
+ localOffer, err := a.syncOffer()
+ if err != nil {
+ return nil, fmt.Errorf("local syncOffer: %v", err)
+ }
+ intersection, err := computeSyncIntersection(a, localOffer, remoteOffer)
+ if err != nil {
+ return nil, fmt.Errorf("intersection: %v", err)
+ }
+ if intersection.upToDate {
+ return nil, nil
+ }
+ out := make([]AUM, 0, 12) // 12 chosen arbitrarily.
+
+ if intersection.headIntersection != nil {
+ state, err := computeStateAt(a.storage, maxSyncIter, *intersection.headIntersection)
+ if err != nil {
+ return nil, err
+ }
+
+ _, _, err = fastForward(a.storage, maxSyncIter, state, func(curs AUM, _ State) bool {
+ if curs.Hash() != *intersection.headIntersection {
+ out = append(out, curs)
+ }
+ return false
+ })
+ return out, err
+ }
+
+ if intersection.tailIntersection != nil {
+ state, err := computeStateAt(a.storage, maxSyncIter, *intersection.tailIntersection)
+ if err != nil {
+ return nil, err
+ }
+
+ _, _, err = fastForward(a.storage, maxSyncIter, state, func(curs AUM, _ State) bool {
+ if curs.Hash() != *intersection.tailIntersection {
+ out = append(out, curs)
+ }
+ return false
+ })
+ return out, err
+ }
+
+ panic("unreachable")
+}
diff --git a/tka/sync_test.go b/tka/sync_test.go
new file mode 100644
index 000000000..65cba0ab8
--- /dev/null
+++ b/tka/sync_test.go
@@ -0,0 +1,373 @@
+// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package tka
+
+import (
+ "bytes"
+ "strconv"
+ "testing"
+
+ "github.com/google/go-cmp/cmp"
+)
+
+func TestSyncOffer(t *testing.T) {
+ c := newTestchain(t, `
+ A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
+ A10 -> A11 -> A12 -> A13 -> A14 -> A15 -> A16 -> A17 -> A18
+ A18 -> A19 -> A20 -> A21 -> A22 -> A23 -> A24 -> A25
+ `)
+ a, err := Open(c.Chonk())
+ if err != nil {
+ t.Fatal(err)
+ }
+ got, err := a.SyncOffer()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // A SyncOffer includes a selection of AUMs going backwards in the tree,
+ // progressively skipping more and more each iteration.
+ want := SyncOffer{
+ Head: c.AUMHashes["A25"],
+ Ancestors: []AUMHash{
+ c.AUMHashes["A"+strconv.Itoa(25-ancestorsSkipStart)],
+ c.AUMHashes["A"+strconv.Itoa(25-ancestorsSkipStart<<ancestorsSkipShift)],
+ c.AUMHashes["A1"],
+ },
+ }
+
+ if diff := cmp.Diff(want, got); diff != "" {
+ t.Errorf("SyncOffer diff (-want, +got):\n%s", diff)
+ }
+}
+
+func TestComputeSyncIntersection_FastForward(t *testing.T) {
+ // Node 1 has: A1 -> A2
+ // Node 2 has: A1 -> A2 -> A3 -> A4
+ c := newTestchain(t, `
+ A1 -> A2 -> A3 -> A4
+ `)
+ a1H, a2H := c.AUMHashes["A1"], c.AUMHashes["A2"]
+
+ chonk1 := c.ChonkWith("A1", "A2")
+ n1, err := Open(chonk1)
+ if err != nil {
+ t.Fatal(err)
+ }
+ offer1, err := n1.SyncOffer()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ chonk2 := c.Chonk() // All AUMs
+ n2, err := Open(chonk2)
+ if err != nil {
+ t.Fatal(err)
+ }
+ offer2, err := n2.SyncOffer()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // Node 1 only knows about the first two nodes, so the head of n2 is
+ // alien to it.
+ t.Run("n1", func(t *testing.T) {
+ got, err := computeSyncIntersection(n1, offer1, offer2)
+ if err != nil {
+ t.Fatalf("computeSyncIntersection() failed: %v", err)
+ }
+ want := &intersection{
+ tailIntersection: &a1H,
+ }
+ if diff := cmp.Diff(want, got, cmp.AllowUnexported(intersection{})); diff != "" {
+ t.Errorf("intersection diff (-want, +got):\n%s", diff)
+ }
+ })
+
+ // Node 2 knows about the full chain, so it can see that the head of n1
+ // intersects with a subset of its chain (a Head Intersection).
+ t.Run("n2", func(t *testing.T) {
+ got, err := computeSyncIntersection(n2, offer2, offer1)
+ if err != nil {
+ t.Fatalf("computeSyncIntersection() failed: %v", err)
+ }
+ want := &intersection{
+ headIntersection: &a2H,
+ }
+ if diff := cmp.Diff(want, got, cmp.AllowUnexported(intersection{})); diff != "" {
+ t.Errorf("intersection diff (-want, +got):\n%s", diff)
+ }
+ })
+}
+
+func TestComputeSyncIntersection_ForkSmallDiff(t *testing.T) {
+ // The number of nodes in the chain is longer than ancestorSkipStart,
+ // so that during sync both nodes are able to find a common ancestor
+ // which was later than A1.
+
+ c := newTestchain(t, `
+ A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
+ | -> F1
+ // Make F1 different to A9.
+ // hashSeed is chosen such that the hash is higher than A9.
+ F1.hashSeed = 7
+ `)
+ // Node 1 has: A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> F1
+ // Node 2 has: A1 -> A2 -> A3 -> A4 -> A5 -> A6 -> A7 -> A8 -> A9 -> A10
+ f1H, a9H := c.AUMHashes["F1"], c.AUMHashes["A9"]
+
+ if bytes.Compare(f1H[:], a9H[:]) < 0 {
+ t.Fatal("failed assert: h(a9) > h(f1H)\nTweak hashSeed till this passes")
+ }
+
+ n1, err := Open(c.ChonkWith("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "F1"))
+ if err != nil {
+ t.Fatal(err)
+ }
+ offer1, err := n1.SyncOffer()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if diff := cmp.Diff(SyncOffer{
+ Head: c.AUMHashes["F1"],
+ Ancestors: []AUMHash{
+ c.AUMHashes["A"+strconv.Itoa(9-ancestorsSkipStart)],
+ c.AUMHashes["A1"],
+ },
+ }, offer1); diff != "" {
+ t.Errorf("offer1 diff (-want, +got):\n%s", diff)
+ }
+
+ n2, err := Open(c.ChonkWith("A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10"))
+ if err != nil {
+ t.Fatal(err)
+ }
+ offer2, err := n2.SyncOffer()
+ if err != nil {
+ t.Fatal(err)
+ }
+ if diff := cmp.Diff(SyncOffer{
+ Head: c.AUMHashes["A10"],
+ Ancestors: []AUMHash{
+ c.AUMHashes["A"+strconv.Itoa(10-ancestorsSkipStart)],
+ c.AUMHashes["A1"],
+ },
+ }, offer2); diff != "" {
+ t.Errorf("offer2 diff (-want, +got):\n%s", diff)
+ }
+
+ // Node 1 only knows about the first eight nodes, so the head of n2 is
+ // alien to it.
+ t.Run("n1", func(t *testing.T) {
+ // n2 has 10 nodes, so the first common ancestor should be 10-ancestorsSkipStart
+ wantIntersection := c.AUMHashes["A"+strconv.Itoa(10-ancestorsSkipStart)]
+
+ got, err := computeSyncIntersection(n1, offer1, offer2)
+ if err != nil {
+ t.Fatalf("computeSyncIntersection() failed: %v", err)
+ }
+ want := &intersection{
+ tailIntersection: &wantIntersection,
+ }
+ if diff := cmp.Diff(want, got, cmp.AllowUnexported(intersection{})); diff != "" {
+ t.Errorf("intersection diff (-want, +got):\n%s", diff)
+ }
+ })
+
+ // Node 2 knows about the full chain but doesn't recognize the head.
+ t.Run("n2", func(t *testing.T) {
+ // n1 has 9 nodes, so the first common ancestor should be 9-ancestorsSkipStart
+ wantIntersection := c.AUMHashes["A"+strconv.Itoa(9-ancestorsSkipStart)]
+
+ got, err := computeSyncIntersection(n2, offer2, offer1)
+ if err != nil {
+ t.Fatalf("computeSyncIntersection() failed: %v", err)
+ }
+ want := &intersection{
+ tailIntersection: &wantIntersection,
+ }
+ if diff := cmp.Diff(want, got, cmp.AllowUnexported(intersection{})); diff != "" {
+ t.Errorf("intersection diff (-want, +got):\n%s", diff)
+ }
+ })
+}
+
+func TestMissingAUMs_FastForward(t *testing.T) {
+ // Node 1 has: A1 -> A2
+ // Node 2 has: A1 -> A2 -> A3 -> A4
+ c := newTestchain(t, `
+ A1 -> A2 -> A3 -> A4
+ A1.hashSeed = 1
+ A2.hashSeed = 2
+ A3.hashSeed = 3
+ A4.hashSeed = 4
+ `)
+
+ chonk1 := c.ChonkWith("A1", "A2")
+ n1, err := Open(chonk1)
+ if err != nil {
+ t.Fatal(err)
+ }
+ offer1, err := n1.SyncOffer()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ chonk2 := c.Chonk() // All AUMs
+ n2, err := Open(chonk2)
+ if err != nil {
+ t.Fatal(err)
+ }
+ offer2, err := n2.SyncOffer()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // Node 1 only knows about the first two nodes, so the head of n2 is
+ // alien to it. As such, it should send history from the newest ancestor,
+ // A1 (if the chain was longer there would be one in the middle).
+ t.Run("n1", func(t *testing.T) {
+ got, err := n1.MissingAUMs(offer2)
+ if err != nil {
+ t.Fatalf("MissingAUMs() failed: %v", err)
+ }
+
+ // Both sides have A1, so the only AUM that n2 might not have is
+ // A2.
+ want := []AUM{c.AUMs["A2"]}
+ if diff := cmp.Diff(want, got); diff != "" {
+ t.Errorf("MissingAUMs diff (-want, +got):\n%s", diff)
+ }
+ })
+
+ // Node 2 knows about the full chain, so it can see that the head of n1
+ // intersects with a subset of its chain (a Head Intersection).
+ t.Run("n2", func(t *testing.T) {
+ got, err := n2.MissingAUMs(offer1)
+ if err != nil {
+ t.Fatalf("MissingAUMs() failed: %v", err)
+ }
+
+ want := []AUM{
+ c.AUMs["A3"],
+ c.AUMs["A4"],
+ }
+ if diff := cmp.Diff(want, got); diff != "" {
+ t.Errorf("MissingAUMs diff (-want, +got):\n%s", diff)
+ }
+ })
+}
+
+func TestMissingAUMs_Fork(t *testing.T) {
+ // Node 1 has: A1 -> A2 -> A3 -> F1
+ // Node 2 has: A1 -> A2 -> A3 -> A4
+ c := newTestchain(t, `
+ A1 -> A2 -> A3 -> A4
+ | -> F1
+ A1.hashSeed = 1
+ A2.hashSeed = 2
+ A3.hashSeed = 3
+ A4.hashSeed = 4
+ `)
+
+ chonk1 := c.ChonkWith("A1", "A2", "A3", "F1")
+ n1, err := Open(chonk1)
+ if err != nil {
+ t.Fatal(err)
+ }
+ offer1, err := n1.SyncOffer()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ chonk2 := c.ChonkWith("A1", "A2", "A3", "A4")
+ n2, err := Open(chonk2)
+ if err != nil {
+ t.Fatal(err)
+ }
+ offer2, err := n2.SyncOffer()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ t.Run("n1", func(t *testing.T) {
+ got, err := n1.MissingAUMs(offer2)
+ if err != nil {
+ t.Fatalf("MissingAUMs() failed: %v", err)
+ }
+
+ // Both sides have A1, so n1 will send everything it knows from
+ // there to head.
+ want := []AUM{
+ c.AUMs["A2"],
+ c.AUMs["A3"],
+ c.AUMs["F1"],
+ }
+ if diff := cmp.Diff(want, got); diff != "" {
+ t.Errorf("MissingAUMs diff (-want, +got):\n%s", diff)
+ }
+ })
+
+ t.Run("n2", func(t *testing.T) {
+ got, err := n2.MissingAUMs(offer1)
+ if err != nil {
+ t.Fatalf("MissingAUMs() failed: %v", err)
+ }
+
+ // Both sides have A1, so n2 will send everything it knows from
+ // there to head.
+ want := []AUM{
+ c.AUMs["A2"],
+ c.AUMs["A3"],
+ c.AUMs["A4"],
+ }
+ if diff := cmp.Diff(want, got); diff != "" {
+ t.Errorf("MissingAUMs diff (-want, +got):\n%s", diff)
+ }
+ })
+}
+
+func TestSyncSimpleE2E(t *testing.T) {
+ pub, priv := testingKey25519(t, 1)
+ key := Key{Kind: Key25519, Public: pub, Votes: 2}
+
+ c := newTestchain(t, `
+ G1 -> L1 -> L2 -> L3
+ G1.template = genesis
+ `,
+ optTemplate("genesis", AUM{MessageKind: AUMCheckpoint, State: &State{
+ Keys: []Key{key},
+ DisablementSecrets: [][]byte{disablementKDF([]byte{1, 2, 3})},
+ }}),
+ optKey("key", key, priv),
+ optSignAllUsing("key"))
+
+ node, err := Bootstrap(&Mem{}, c.AUMs["G1"])
+ if err != nil {
+ t.Fatalf("node Bootstrap() failed: %v", err)
+ }
+ control, err := Open(c.Chonk())
+ if err != nil {
+ t.Fatalf("control Open() failed: %v", err)
+ }
+
+ // Control knows the full chain, node only knows the genesis. Lets see
+ // if they can sync.
+ nodeOffer, err := node.SyncOffer()
+ if err != nil {
+ t.Fatal(err)
+ }
+ controlAUMs, err := control.MissingAUMs(nodeOffer)
+ if err != nil {
+ t.Fatalf("control.MissingAUMs(%v) failed: %v", nodeOffer, err)
+ }
+ if err := node.Inform(controlAUMs); err != nil {
+ t.Fatalf("node.Inform(%v) failed: %v", controlAUMs, err)
+ }
+
+ if cHash, nHash := control.Head(), node.Head(); cHash != nHash {
+ t.Errorf("node & control are not synced: c=%x, n=%x", cHash, nHash)
+ }
+}