1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
|
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
package derphttp
import (
"context"
"sync"
"time"
"tailscale.com/derp"
"tailscale.com/types/key"
"tailscale.com/types/logger"
)
var retryInterval = 5 * time.Second
// testHookWatchLookConnectResult, if non-nil for tests, is called by RunWatchConnectionLoop
// with the connect result. If it returns false, the loop ends.
var testHookWatchLookConnectResult func(connectError error, wasSelfConnect bool) (keepRunning bool)
// RunWatchConnectionLoop loops until ctx is done, sending
// WatchConnectionChanges and subscribing to connection changes.
//
// If the server's public key is ignoreServerKey, RunWatchConnectionLoop
// returns.
//
// Otherwise, the add and remove funcs are called as clients come & go.
// Note that add is called for every new connection and remove is only
// called for the final disconnection. See https://github.com/tailscale/tailscale/issues/13566.
// This behavior will likely change. Callers should do their own accounting
// and dup suppression as needed.
//
// If set the notifyError func is called with any error that occurs within the ctx
// main loop connection setup, or the inner loop receiving messages via RecvDetail.
//
// infoLogf, if non-nil, is the logger to write periodic status updates about
// how many peers are on the server. Error log output is set to the c's logger,
// regardless of infoLogf's value.
//
// To force RunWatchConnectionLoop to return quickly, its ctx needs to be
// closed, and c itself needs to be closed.
//
// It is a fatal error to call this on an already-started Client without having
// initialized Client.WatchConnectionChanges to true.
//
// If the DERP connection breaks and reconnects, remove will be called for all
// previously seen peers, with Reason type PeerGoneReasonMeshConnBroke. Those
// clients are likely still connected and their add message will appear after
// reconnect.
func (c *Client) RunWatchConnectionLoop(ctx context.Context, ignoreServerKey key.NodePublic, infoLogf logger.Logf,
add func(derp.PeerPresentMessage), remove func(derp.PeerGoneMessage), notifyError func(error)) {
if !c.WatchConnectionChanges {
if c.isStarted() {
panic("invalid use of RunWatchConnectionLoop on already-started Client without setting Client.RunWatchConnectionLoop")
}
c.WatchConnectionChanges = true
}
if infoLogf == nil {
infoLogf = logger.Discard
}
logf := c.logf
const statusInterval = 10 * time.Second
var (
mu sync.Mutex
present = map[key.NodePublic]bool{}
loggedConnected = false
)
clear := func() {
mu.Lock()
defer mu.Unlock()
if len(present) == 0 {
return
}
logf("reconnected; clearing %d forwarding mappings", len(present))
for k := range present {
remove(derp.PeerGoneMessage{Peer: k, Reason: derp.PeerGoneReasonMeshConnBroke})
}
present = map[key.NodePublic]bool{}
}
lastConnGen := 0
lastStatus := c.clock.Now()
logConnectedLocked := func() {
if loggedConnected {
return
}
infoLogf("connected; %d peers", len(present))
loggedConnected = true
}
const logConnectedDelay = 200 * time.Millisecond
timer := c.clock.AfterFunc(2*time.Second, func() {
mu.Lock()
defer mu.Unlock()
logConnectedLocked()
})
defer timer.Stop()
updatePeer := func(k key.NodePublic, isPresent bool) {
mu.Lock()
defer mu.Unlock()
if isPresent {
present[k] = true
if !loggedConnected {
timer.Reset(logConnectedDelay)
}
} else {
// If we got a peerGone message, that means the initial connection's
// flood of peerPresent messages is done, so we can log already:
logConnectedLocked()
delete(present, k)
}
}
sleep := func(d time.Duration) {
t, tChannel := c.clock.NewTimer(d)
select {
case <-ctx.Done():
t.Stop()
case <-tChannel:
}
}
for ctx.Err() == nil {
// Make sure we're connected before calling s.ServerPublicKey.
_, _, err := c.connect(ctx, "RunWatchConnectionLoop")
if err != nil {
logf("mesh connect: %v", err)
if notifyError != nil {
notifyError(err)
}
if f := testHookWatchLookConnectResult; f != nil && !f(err, false) {
return
}
logf("mesh connect: %v", err)
sleep(retryInterval)
continue
}
selfConnect := c.ServerPublicKey() == ignoreServerKey
if f := testHookWatchLookConnectResult; f != nil && !f(err, selfConnect) {
return
}
if selfConnect {
logf("detected self-connect; ignoring host")
return
}
for {
m, connGen, err := c.RecvDetail()
if err != nil {
clear()
logf("Recv: %v", err)
if notifyError != nil {
notifyError(err)
}
sleep(retryInterval)
break
}
if connGen != lastConnGen {
lastConnGen = connGen
clear()
}
switch m := m.(type) {
case derp.PeerPresentMessage:
add(m)
updatePeer(m.Key, true)
case derp.PeerGoneMessage:
switch m.Reason {
case derp.PeerGoneReasonDisconnected:
// Normal case, log nothing
case derp.PeerGoneReasonNotHere:
logf("Recv: peer %s not connected to %s",
key.NodePublic(m.Peer).ShortString(), c.ServerPublicKey().ShortString())
default:
logf("Recv: peer %s not at server %s for unknown reason %v",
key.NodePublic(m.Peer).ShortString(), c.ServerPublicKey().ShortString(), m.Reason)
}
remove(m)
updatePeer(m.Peer, false)
default:
continue
}
if now := c.clock.Now(); now.Sub(lastStatus) > statusInterval {
lastStatus = now
infoLogf("%d peers", len(present))
}
}
}
}
|