summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorBrian Palmer <brianp@tailscale.com>2025-03-03 13:13:16 -0700
committerBrian Palmer <brianp@tailscale.com>2025-03-03 13:13:16 -0700
commit67d9def2d1af633f5772652cf4c69b56cac45c52 (patch)
treeecf88d82b4b5f079a7b370d538b48d18fe6734f1
parentce6ce81311cc53df4498f2e8757b52be50801d64 (diff)
downloadtailscale-brianp/controlclient-timings.tar.xz
tailscale-brianp/controlclient-timings.zip
controlclient: add timing metrics for control requestsbrianp/controlclient-timings
Updates #26869
-rw-r--r--control/controlclient/direct.go8
-rw-r--r--control/controlclient/metrics.go39
-rw-r--r--control/controlclient/noise.go2
3 files changed, 49 insertions, 0 deletions
diff --git a/control/controlclient/direct.go b/control/controlclient/direct.go
index 883a1a587..f9e27a725 100644
--- a/control/controlclient/direct.go
+++ b/control/controlclient/direct.go
@@ -676,7 +676,9 @@ func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, new
addLBHeader(req, request.OldNodeKey)
addLBHeader(req, request.NodeKey)
+ start := time.Now()
res, err := httpc.Do(req)
+ loginLatencies.Observe(time.Since(start).Seconds())
if err != nil {
return regen, opt.URL, nil, fmt.Errorf("register request: %w", err)
}
@@ -964,7 +966,9 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
}
addLBHeader(req, nodeKey)
+ start := time.Now()
res, err := httpc.Do(req)
+ initialMapRequestLatencies.Observe(time.Since(start).Seconds())
if err != nil {
vlogf("netmap: Do: %v", err)
return err
@@ -1322,6 +1326,7 @@ func (c *Direct) answerPing(pr *tailcfg.PingRequest) {
c.logf("invalid PingRequest with no URL")
return
}
+ start := time.Now()
switch pr.Types {
case "":
answerHeadPing(c.logf, httpc, pr)
@@ -1334,6 +1339,7 @@ func (c *Direct) answerPing(pr *tailcfg.PingRequest) {
answerC2NPing(c.logf, c.c2nHandler, httpc, pr)
return
}
+ answerPingLatencies.Observe(time.Since(start).Seconds())
for _, t := range strings.Split(pr.Types, ",") {
switch pt := tailcfg.PingType(t); pt {
case tailcfg.PingTSMP, tailcfg.PingDisco, tailcfg.PingICMP, tailcfg.PingPeerAPI:
@@ -1638,7 +1644,9 @@ func (c *Direct) ReportHealthChange(w *health.Warnable, us *health.UnhealthyStat
// Best effort, no logging:
ctx, cancel := context.WithTimeout(c.closedCtx, 5*time.Second)
defer cancel()
+ start := time.Now()
res, err := np.post(ctx, "/machine/update-health", nodeKey, req)
+ updateHealthLatencies.Observe(time.Since(start).Seconds())
if err != nil {
return
}
diff --git a/control/controlclient/metrics.go b/control/controlclient/metrics.go
new file mode 100644
index 000000000..8f8bfe40e
--- /dev/null
+++ b/control/controlclient/metrics.go
@@ -0,0 +1,39 @@
+package controlclient
+
+import (
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/client_golang/prometheus/promauto"
+)
+
+var (
+ loginLatencies = promauto.NewHistogram(prometheus.HistogramOpts{
+ Name: "controlclient_login_latency_seconds",
+ Help: "Control login time",
+ // 15 buckets from 10ms to 1m.
+ Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)},
+ )
+ dialLatencies = promauto.NewHistogram(prometheus.HistogramOpts{
+ Name: "controlclient_dial_latency_seconds",
+ Help: "Control dial time",
+ // 15 buckets from 10ms to 1m.
+ Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)},
+ )
+ initialMapRequestLatencies = promauto.NewHistogram(prometheus.HistogramOpts{
+ Name: "controlclient_initial_map_request_latency_seconds",
+ Help: "Initial map request/response time",
+ // 15 buckets from 10ms to 1m.
+ Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)},
+ )
+ updateHealthLatencies = promauto.NewHistogram(prometheus.HistogramOpts{
+ Name: "controlclient_update_health_latency_seconds",
+ Help: "Update health request/response time",
+ // 15 buckets from 10ms to 1m.
+ Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)},
+ )
+ answerPingLatencies = promauto.NewHistogram(prometheus.HistogramOpts{
+ Name: "controlclient_answer_ping_latency_seconds",
+ Help: "Answer ping request/response time",
+ // 15 buckets from 10ms to 1m.
+ Buckets: prometheus.ExponentialBucketsRange(0.01, 60, 15)},
+ )
+)
diff --git a/control/controlclient/noise.go b/control/controlclient/noise.go
index 4bd8cfc25..88a4feddc 100644
--- a/control/controlclient/noise.go
+++ b/control/controlclient/noise.go
@@ -239,7 +239,9 @@ func (nc *NoiseClient) getConn(ctx context.Context) (*noiseconn.Conn, error) {
// which was canceled is our context and retry if our context is still
// valid.
conn, err, _ := nc.sfDial.Do(struct{}{}, func() (*noiseconn.Conn, error) {
+ start := time.Now()
c, err := nc.dial(ctx)
+ dialLatencies.Observe(time.Since(start).Seconds())
if err != nil {
if ctx.Err() != nil {
return nil, contextErr{ctx.Err()}