summaryrefslogtreecommitdiffhomepage
path: root/control/controlclient/noise.go
blob: 4bd8cfc25ee96397cce9855a11854819e023313c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause

package controlclient

import (
	"bytes"
	"cmp"
	"context"
	"encoding/json"
	"errors"
	"math"
	"net/http"
	"net/netip"
	"net/url"
	"sync"
	"time"

	"golang.org/x/net/http2"
	"tailscale.com/control/controlhttp"
	"tailscale.com/health"
	"tailscale.com/internal/noiseconn"
	"tailscale.com/net/dnscache"
	"tailscale.com/net/netmon"
	"tailscale.com/net/tsdial"
	"tailscale.com/tailcfg"
	"tailscale.com/tstime"
	"tailscale.com/types/key"
	"tailscale.com/types/logger"
	"tailscale.com/util/mak"
	"tailscale.com/util/multierr"
	"tailscale.com/util/singleflight"
)

// NoiseClient provides a http.Client to connect to tailcontrol over
// the ts2021 protocol.
type NoiseClient struct {
	// Client is an HTTP client to talk to the coordination server.
	// It automatically makes a new Noise connection as needed.
	// It does not support node key proofs. To do that, call
	// noiseClient.getConn instead to make a connection.
	*http.Client

	// h2t is the HTTP/2 transport we use a bit to create new
	// *http2.ClientConns. We don't use its connection pool and we don't use its
	// dialing. We use it for exactly one reason: its idle timeout that can only
	// be configured via the HTTP/1 config. And then we call NewClientConn (with
	// an existing Noise connection) on the http2.Transport which sets up an
	// http2.ClientConn using that idle timeout from an http1.Transport.
	h2t *http2.Transport

	// sfDial ensures that two concurrent requests for a noise connection only
	// produce one shared one between the two callers.
	sfDial singleflight.Group[struct{}, *noiseconn.Conn]

	dialer       *tsdial.Dialer
	dnsCache     *dnscache.Resolver
	privKey      key.MachinePrivate
	serverPubKey key.MachinePublic
	host         string // the host part of serverURL
	httpPort     string // the default port to dial
	httpsPort    string // the fallback Noise-over-https port or empty if none

	// dialPlan optionally returns a ControlDialPlan previously received
	// from the control server; either the function or the return value can
	// be nil.
	dialPlan func() *tailcfg.ControlDialPlan

	logf   logger.Logf
	netMon *netmon.Monitor
	health *health.Tracker

	// mu only protects the following variables.
	mu       sync.Mutex
	closed   bool
	last     *noiseconn.Conn // or nil
	nextID   int
	connPool map[int]*noiseconn.Conn // active connections not yet closed; see noiseconn.Conn.Close
}

// NoiseOpts contains options for the NewNoiseClient function. All fields are
// required unless otherwise specified.
type NoiseOpts struct {
	// PrivKey is this node's private key.
	PrivKey key.MachinePrivate
	// ServerPubKey is the public key of the server.
	ServerPubKey key.MachinePublic
	// ServerURL is the URL of the server to connect to.
	ServerURL string
	// Dialer's SystemDial function is used to connect to the server.
	Dialer *tsdial.Dialer
	// DNSCache is the caching Resolver to use to connect to the server.
	//
	// This field can be nil.
	DNSCache *dnscache.Resolver
	// Logf is the log function to use. This field can be nil.
	Logf logger.Logf
	// NetMon is the network monitor that, if set, will be used to get the
	// network interface state. This field can be nil; if so, the current
	// state will be looked up dynamically.
	NetMon *netmon.Monitor
	// HealthTracker, if non-nil, is the health tracker to use.
	HealthTracker *health.Tracker
	// DialPlan, if set, is a function that should return an explicit plan
	// on how to connect to the server.
	DialPlan func() *tailcfg.ControlDialPlan
}

// NewNoiseClient returns a new noiseClient for the provided server and machine key.
// serverURL is of the form https://<host>:<port> (no trailing slash).
//
// netMon may be nil, if non-nil it's used to do faster interface lookups.
// dialPlan may be nil
func NewNoiseClient(opts NoiseOpts) (*NoiseClient, error) {
	logf := opts.Logf
	u, err := url.Parse(opts.ServerURL)
	if err != nil {
		return nil, err
	}

	if u.Scheme != "http" && u.Scheme != "https" {
		return nil, errors.New("invalid ServerURL scheme, must be http or https")
	}

	var httpPort string
	var httpsPort string
	addr, _ := netip.ParseAddr(u.Hostname())
	isPrivateHost := addr.IsPrivate() || addr.IsLoopback() || u.Hostname() == "localhost"
	if port := u.Port(); port != "" {
		// If there is an explicit port specified, entirely rely on the scheme,
		// unless it's http with a private host in which case we never try using HTTPS.
		if u.Scheme == "https" {
			httpPort = ""
			httpsPort = port
		} else if u.Scheme == "http" {
			httpPort = port
			httpsPort = "443"
			if isPrivateHost {
				logf("setting empty HTTPS port with http scheme and private host %s", u.Hostname())
				httpsPort = ""
			}
		}
	} else if u.Scheme == "http" && isPrivateHost {
		// Whenever the scheme is http and the hostname is an IP address, do not set the HTTPS port,
		// as there cannot be a TLS certificate issued for an IP, unless it's a public IP.
		httpPort = "80"
		httpsPort = ""
	} else {
		// Otherwise, use the standard ports
		httpPort = "80"
		httpsPort = "443"
	}

	np := &NoiseClient{
		serverPubKey: opts.ServerPubKey,
		privKey:      opts.PrivKey,
		host:         u.Hostname(),
		httpPort:     httpPort,
		httpsPort:    httpsPort,
		dialer:       opts.Dialer,
		dnsCache:     opts.DNSCache,
		dialPlan:     opts.DialPlan,
		logf:         opts.Logf,
		netMon:       opts.NetMon,
		health:       opts.HealthTracker,
	}

	// Create the HTTP/2 Transport using a net/http.Transport
	// (which only does HTTP/1) because it's the only way to
	// configure certain properties on the http2.Transport. But we
	// never actually use the net/http.Transport for any HTTP/1
	// requests.
	h2Transport, err := http2.ConfigureTransports(&http.Transport{
		IdleConnTimeout: time.Minute,
	})
	if err != nil {
		return nil, err
	}
	np.h2t = h2Transport

	np.Client = &http.Client{Transport: np}
	return np, nil
}

// GetSingleUseRoundTripper returns a RoundTripper that can be only be used once
// (and must be used once) to make a single HTTP request over the noise channel
// to the coordination server.
//
// In addition to the RoundTripper, it returns the HTTP/2 channel's early noise
// payload, if any.
func (nc *NoiseClient) GetSingleUseRoundTripper(ctx context.Context) (http.RoundTripper, *tailcfg.EarlyNoise, error) {
	for tries := 0; tries < 3; tries++ {
		conn, err := nc.getConn(ctx)
		if err != nil {
			return nil, nil, err
		}
		ok, earlyPayloadMaybeNil, err := conn.ReserveNewRequest(ctx)
		if err != nil {
			return nil, nil, err
		}
		if ok {
			return conn, earlyPayloadMaybeNil, nil
		}
	}
	return nil, nil, errors.New("[unexpected] failed to reserve a request on a connection")
}

// contextErr is an error that wraps another error and is used to indicate that
// the error was because a context expired.
type contextErr struct {
	err error
}

func (e contextErr) Error() string {
	return e.err.Error()
}

func (e contextErr) Unwrap() error {
	return e.err
}

// getConn returns a noiseconn.Conn that can be used to make requests to the
// coordination server. It may return a cached connection or create a new one.
// Dials are singleflighted, so concurrent calls to getConn may only dial once.
// As such, context values may not be respected as there are no guarantees that
// the context passed to getConn is the same as the context passed to dial.
func (nc *NoiseClient) getConn(ctx context.Context) (*noiseconn.Conn, error) {
	nc.mu.Lock()
	if last := nc.last; last != nil && last.CanTakeNewRequest() {
		nc.mu.Unlock()
		return last, nil
	}
	nc.mu.Unlock()

	for {
		// We singeflight the dial to avoid making multiple connections, however
		// that means that we can't simply cancel the dial if the context is
		// canceled. Instead, we have to additionally check that the context
		// which was canceled is our context and retry if our context is still
		// valid.
		conn, err, _ := nc.sfDial.Do(struct{}{}, func() (*noiseconn.Conn, error) {
			c, err := nc.dial(ctx)
			if err != nil {
				if ctx.Err() != nil {
					return nil, contextErr{ctx.Err()}
				}
				return nil, err
			}
			return c, nil
		})
		var ce contextErr
		if err == nil || !errors.As(err, &ce) {
			return conn, err
		}
		if ctx.Err() == nil {
			// The dial failed because of a context error, but our context
			// is still valid. Retry.
			continue
		}
		// The dial failed because our context was canceled. Return the
		// underlying error.
		return nil, ce.Unwrap()
	}
}

func (nc *NoiseClient) RoundTrip(req *http.Request) (*http.Response, error) {
	ctx := req.Context()
	conn, err := nc.getConn(ctx)
	if err != nil {
		return nil, err
	}
	return conn.RoundTrip(req)
}

// connClosed removes the connection with the provided ID from the pool
// of active connections.
func (nc *NoiseClient) connClosed(id int) {
	nc.mu.Lock()
	defer nc.mu.Unlock()
	conn := nc.connPool[id]
	if conn != nil {
		delete(nc.connPool, id)
		if nc.last == conn {
			nc.last = nil
		}
	}
}

// Close closes all the underlying noise connections.
// It is a no-op and returns nil if the connection is already closed.
func (nc *NoiseClient) Close() error {
	nc.mu.Lock()
	nc.closed = true
	conns := nc.connPool
	nc.connPool = nil
	nc.mu.Unlock()

	var errors []error
	for _, c := range conns {
		if err := c.Close(); err != nil {
			errors = append(errors, err)
		}
	}
	return multierr.New(errors...)
}

// dial opens a new connection to tailcontrol, fetching the server noise key
// if not cached.
func (nc *NoiseClient) dial(ctx context.Context) (*noiseconn.Conn, error) {
	nc.mu.Lock()
	connID := nc.nextID
	nc.nextID++
	nc.mu.Unlock()

	if tailcfg.CurrentCapabilityVersion > math.MaxUint16 {
		// Panic, because a test should have started failing several
		// thousand version numbers before getting to this point.
		panic("capability version is too high to fit in the wire protocol")
	}

	var dialPlan *tailcfg.ControlDialPlan
	if nc.dialPlan != nil {
		dialPlan = nc.dialPlan()
	}

	// If we have a dial plan, then set our timeout as slightly longer than
	// the maximum amount of time contained therein; we assume that
	// explicit instructions on timeouts are more useful than a single
	// hard-coded timeout.
	//
	// The default value of 5 is chosen so that, when there's no dial plan,
	// we retain the previous behaviour of 10 seconds end-to-end timeout.
	timeoutSec := 5.0
	if dialPlan != nil {
		for _, c := range dialPlan.Candidates {
			if v := c.DialStartDelaySec + c.DialTimeoutSec; v > timeoutSec {
				timeoutSec = v
			}
		}
	}

	// After we establish a connection, we need some time to actually
	// upgrade it into a Noise connection. With a ballpark worst-case RTT
	// of 1000ms, give ourselves an extra 5 seconds to complete the
	// handshake.
	timeoutSec += 5

	// Be extremely defensive and ensure that the timeout is in the range
	// [5, 60] seconds (e.g. if we accidentally get a negative number).
	if timeoutSec > 60 {
		timeoutSec = 60
	} else if timeoutSec < 5 {
		timeoutSec = 5
	}

	timeout := time.Duration(timeoutSec * float64(time.Second))
	ctx, cancel := context.WithTimeout(ctx, timeout)
	defer cancel()

	clientConn, err := (&controlhttp.Dialer{
		Hostname:        nc.host,
		HTTPPort:        nc.httpPort,
		HTTPSPort:       cmp.Or(nc.httpsPort, controlhttp.NoPort),
		MachineKey:      nc.privKey,
		ControlKey:      nc.serverPubKey,
		ProtocolVersion: uint16(tailcfg.CurrentCapabilityVersion),
		Dialer:          nc.dialer.SystemDial,
		DNSCache:        nc.dnsCache,
		DialPlan:        dialPlan,
		Logf:            nc.logf,
		NetMon:          nc.netMon,
		HealthTracker:   nc.health,
		Clock:           tstime.StdClock{},
	}).Dial(ctx)
	if err != nil {
		return nil, err
	}

	ncc, err := noiseconn.New(clientConn.Conn, nc.h2t, connID, nc.connClosed)
	if err != nil {
		return nil, err
	}

	nc.mu.Lock()
	if nc.closed {
		nc.mu.Unlock()
		ncc.Close() // Needs to be called without holding the lock.
		return nil, errors.New("noise client closed")
	}
	defer nc.mu.Unlock()
	mak.Set(&nc.connPool, connID, ncc)
	nc.last = ncc
	return ncc, nil
}

// post does a POST to the control server at the given path, JSON-encoding body.
// The provided nodeKey is an optional load balancing hint.
func (nc *NoiseClient) post(ctx context.Context, path string, nodeKey key.NodePublic, body any) (*http.Response, error) {
	return nc.doWithBody(ctx, "POST", path, nodeKey, body)
}

func (nc *NoiseClient) doWithBody(ctx context.Context, method, path string, nodeKey key.NodePublic, body any) (*http.Response, error) {
	jbody, err := json.Marshal(body)
	if err != nil {
		return nil, err
	}
	req, err := http.NewRequestWithContext(ctx, method, "https://"+nc.host+path, bytes.NewReader(jbody))
	if err != nil {
		return nil, err
	}
	addLBHeader(req, nodeKey)
	req.Header.Set("Content-Type", "application/json")
	conn, err := nc.getConn(ctx)
	if err != nil {
		return nil, err
	}
	return conn.RoundTrip(req)
}