diff options
| author | James Tucker <james@tailscale.com> | 2026-04-22 12:40:18 -0700 |
|---|---|---|
| committer | James Tucker <james@tailscale.com> | 2026-04-22 12:41:43 -0700 |
| commit | 12c114b158a94137915e822226a4cc9b4a47079f (patch) | |
| tree | 277a5a06e5e710f351e4bf1437a07701e54a5455 /cmd/tailscaled | |
| parent | d7916d4369bd2b880281e38edac2f600d5ab2673 (diff) | |
| download | tailscale-raggi/startupprof.tar.xz tailscale-raggi/startupprof.zip | |
cmd/{startupprof,tailscaled}: add startup profiling toolsraggi/startupprof
I was running some end to end tests and decided I wanted to know why
tailscaled takes longer than I expect to startup even when talking to
solely local resources. This helped me get a few answers.
Diffstat (limited to 'cmd/tailscaled')
| -rw-r--r-- | cmd/tailscaled/startupprof.go | 269 | ||||
| -rw-r--r-- | cmd/tailscaled/tailscaled.go | 22 |
2 files changed, 291 insertions, 0 deletions
diff --git a/cmd/tailscaled/startupprof.go b/cmd/tailscaled/startupprof.go new file mode 100644 index 000000000..627e2e927 --- /dev/null +++ b/cmd/tailscaled/startupprof.go @@ -0,0 +1,269 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +//go:build go1.23 + +package main + +// This file implements opt-in, envknob-gated startup profiling for tailscaled. +// It is intended to be used by the cmd/startupprof harness (and anyone +// debugging tailscaled startup latency). +// +// It is safe to leave compiled in: when none of the TS_STARTUPPROF_* envknobs +// are set, everything here is a fast no-op. +// +// The controls are: +// +// TS_STARTUPPROF_TRACE=/path/to/trace.out +// Start runtime/trace.Start at process entry (as early as possible in +// main, before any other work) and write to the given path. The trace +// will be stopped automatically when the backend reaches ipn.Running, +// or after TS_STARTUPPROF_MAX_SECS (default 30s), whichever is first. +// +// TS_STARTUPPROF_CPUPROF=/path/to/cpu.pprof +// Same, but for runtime/pprof.StartCPUProfile. +// +// TS_STARTUPPROF_MEMPROF=/path/to/mem.pprof +// Write a heap profile when the trace is stopped. +// +// TS_STARTUPPROF_MAX_SECS=<int> +// Maximum time to leave profilers running. Default: 30. +// +// TS_STARTUPPROF_PHASES=/path/to/phases.txt +// Append wall-clock phase timings (measured from process entry) to +// this file. Machine-readable: "<nanoseconds>\t<phase name>\n". +// +// The phase marker API (markPhase) is also available without any envknob; +// when no phases file is configured, markPhase emits a runtime/trace.Log +// event (cheap; no-op if tracing disabled) and records nothing else. +// +// Design goal: zero dependencies on logtail, logger, or anything that is +// itself part of the startup cost being measured. + +import ( + "context" + "fmt" + "os" + "runtime" + "runtime/pprof" + "runtime/trace" + "sync" + "sync/atomic" + "time" + + "tailscale.com/ipn" + "tailscale.com/ipn/ipnlocal" +) + +// procStart is the earliest time we can record: the value of time.Now() at +// init() in this file. This captures cost incurred by other init funcs that +// run after ours, plus all of main. It is not perfect (Go init order is +// source-file lexical within a package) but is adequate for our purposes. +// +// NOTE: placing procStart in a var with an initializer at package scope +// guarantees it is assigned before any init() or main(). We also set it +// again in init() as a backstop. +var procStart = time.Now() + +// startupProf holds runtime state for the profiler. All fields are only +// accessed from the single startup goroutine + the state-change notifier. +var startupProf struct { + enabled atomic.Bool + stopOnce sync.Once + stoppedAt atomic.Int64 // unix nanos; 0 until stopped + traceFile *os.File + cpuFile *os.File + memPath string + phasesPath string + + // phases is a ring of recorded phases, appended to the phases file on stop. + phasesMu sync.Mutex + phases []phaseRecord +} + +type phaseRecord struct { + offset time.Duration + name string +} + +func init() { + // Backstop: ensure procStart reflects a very early moment even if the + // initializer above was bypassed for some reason. + if procStart.IsZero() { + procStart = time.Now() + } + // If TS_STARTUPPROF_DEBUG is set, log wall-clock timestamps (in ns + // since unix epoch) at each phase marker so we can correlate inside + // vs outside-the-process timings. + if os.Getenv("TS_STARTUPPROF_DEBUG") != "" { + startupProfDebug = true + } +} + +var startupProfDebug bool + +// startupProfileStart starts any configured profilers. Safe to call exactly +// once, as early in main as possible. It is not safe to call concurrently. +// +// It returns without error if no profilers are configured; callers should +// still call it unconditionally. +func startupProfileStart() { + tracePath := os.Getenv("TS_STARTUPPROF_TRACE") + cpuPath := os.Getenv("TS_STARTUPPROF_CPUPROF") + memPath := os.Getenv("TS_STARTUPPROF_MEMPROF") + phasesPath := os.Getenv("TS_STARTUPPROF_PHASES") + + if tracePath == "" && cpuPath == "" && memPath == "" && phasesPath == "" { + return + } + startupProf.enabled.Store(true) + startupProf.memPath = memPath + startupProf.phasesPath = phasesPath + + if tracePath != "" { + f, err := os.Create(tracePath) + if err != nil { + fmt.Fprintf(os.Stderr, "startupprof: create trace %q: %v\n", tracePath, err) + } else if err := trace.Start(f); err != nil { + fmt.Fprintf(os.Stderr, "startupprof: trace.Start: %v\n", err) + f.Close() + } else { + startupProf.traceFile = f + } + } + if cpuPath != "" { + f, err := os.Create(cpuPath) + if err != nil { + fmt.Fprintf(os.Stderr, "startupprof: create cpuprof %q: %v\n", cpuPath, err) + } else { + // Bump sample rate from the default 100Hz to 1000Hz since + // startup is typically <100ms and the default gives almost + // no samples. Must be called before StartCPUProfile. + runtime.SetCPUProfileRate(1000) + if err := pprof.StartCPUProfile(f); err != nil { + fmt.Fprintf(os.Stderr, "startupprof: StartCPUProfile: %v\n", err) + f.Close() + } else { + startupProf.cpuFile = f + } + } + } + + // Record process start as the first phase. + markPhase("startupprof_init") + + // Safety net: stop after TS_STARTUPPROF_MAX_SECS regardless of state. + maxSecs := 30 + if v := os.Getenv("TS_STARTUPPROF_MAX_SECS"); v != "" { + if n, err := parsePositiveInt(v); err == nil { + maxSecs = n + } + } + go func() { + time.Sleep(time.Duration(maxSecs) * time.Second) + startupProfileStop("max_time") + }() +} + +// markPhase records a named phase. It is extremely cheap when profiling is +// disabled (one atomic load). When profiling IS enabled, it emits a +// runtime/trace Log event and appends to the phases slice for later +// write-out. +func markPhase(name string) { + if startupProfDebug { + // unix-time ns for external correlation. + fmt.Fprintf(os.Stderr, "TSSPD %d %s\n", time.Now().UnixNano(), name) + } + if !startupProf.enabled.Load() { + return + } + // trace.Log requires a non-nil context. + trace.Log(context.Background(), "phase", name) + rec := phaseRecord{ + offset: time.Since(procStart), + name: name, + } + startupProf.phasesMu.Lock() + startupProf.phases = append(startupProf.phases, rec) + startupProf.phasesMu.Unlock() +} + +// startupProfileWatchLocalBackend spawns a goroutine that watches the given +// LocalBackend and stops all profilers when it reaches ipn.Running. +// +// Safe to call with a nil lb (no-op). Safe to call multiple times. +func startupProfileWatchLocalBackend(lb *ipnlocal.LocalBackend) { + if !startupProf.enabled.Load() || lb == nil { + return + } + go func() { + // WatchNotifications delivers an immediate NotifyInitialState. + lb.WatchNotifications(context.Background(), ipn.NotifyInitialState, nil, func(n *ipn.Notify) bool { + if n.State != nil && *n.State == ipn.Running { + markPhase("state_running") + startupProfileStop("state_running") + return false + } + return true + }) + }() +} + +// startupProfileStop stops all profilers, writes outputs, and is idempotent. +// reason is recorded as the final phase. +func startupProfileStop(reason string) { + startupProf.stopOnce.Do(func() { + markPhase("stop_" + reason) + startupProf.stoppedAt.Store(time.Now().UnixNano()) + + // Stop CPU profile BEFORE the trace, because trace.Stop itself + // does an expensive stack-dump pass that would otherwise be the + // dominant entry in the CPU profile. + if startupProf.cpuFile != nil { + pprof.StopCPUProfile() + startupProf.cpuFile.Close() + } + if startupProf.traceFile != nil { + trace.Stop() + startupProf.traceFile.Close() + } + if startupProf.memPath != "" { + if f, err := os.Create(startupProf.memPath); err == nil { + runtime.GC() + pprof.WriteHeapProfile(f) + f.Close() + } + } + if startupProf.phasesPath != "" { + writePhases(startupProf.phasesPath) + } + }) +} + +func writePhases(path string) { + f, err := os.Create(path) + if err != nil { + fmt.Fprintf(os.Stderr, "startupprof: create phases %q: %v\n", path, err) + return + } + defer f.Close() + startupProf.phasesMu.Lock() + defer startupProf.phasesMu.Unlock() + for _, p := range startupProf.phases { + fmt.Fprintf(f, "%d\t%s\n", p.offset.Nanoseconds(), p.name) + } +} + +func parsePositiveInt(s string) (int, error) { + n := 0 + if s == "" { + return 0, fmt.Errorf("empty") + } + for _, c := range s { + if c < '0' || c > '9' { + return 0, fmt.Errorf("bad digit %q", c) + } + n = n*10 + int(c-'0') + } + return n, nil +} diff --git a/cmd/tailscaled/tailscaled.go b/cmd/tailscaled/tailscaled.go index fe18731ae..0aa9dca45 100644 --- a/cmd/tailscaled/tailscaled.go +++ b/cmd/tailscaled/tailscaled.go @@ -179,6 +179,11 @@ var ( type proxyStartFunc = func(logf logger.Logf, dialer *tsdial.Dialer) func main() { + // Start the optional startup profiler as early as possible. This is a + // no-op unless TS_STARTUPPROF_* envknobs are set. See startupprof.go. + startupProfileStart() + markPhase("main_enter") + envknob.PanicIfAnyEnvCheckedInInit() if shouldRunCLI() { beCLI() @@ -399,11 +404,13 @@ var logPol *logpolicy.Policy // or nil if not used var debugMux *http.ServeMux func run() (err error) { + markPhase("run_enter") var logf logger.Logf = log.Printf // Install an event bus as early as possible, so that it's // available universally when setting up everything else. sys := tsd.NewSystem() + markPhase("tsd_new_system") sys.SocketPath = args.socketpath // Parse config, if specified, to fail early if it's invalid. @@ -425,6 +432,7 @@ func run() (err error) { } sys.Set(netMon) } + markPhase("netmon_new") var publicLogID logid.PublicID if buildfeatures.HasLogTail { @@ -445,6 +453,7 @@ func run() (err error) { pol.Shutdown(ctx) }() } + markPhase("logpolicy_new") if err := envknob.ApplyDiskConfigError(); err != nil { log.Printf("Error reading environment config: %v", err) @@ -473,6 +482,7 @@ func run() (err error) { // crashed, and would for example restore system DNS configuration. dns.CleanUp(logf, netMon, sys.Bus.Get(), sys.HealthTracker.Get(), args.tunname) router.CleanUp(logf, netMon, args.tunname) + markPhase("dns_router_cleanup") // If the cleanUp flag was passed, then exit. if args.cleanUp { return nil @@ -509,10 +519,12 @@ var sigPipe os.Signal // set by sigpipe.go // logID may be the zero value if logging is not in use. func startIPNServer(ctx context.Context, logf logger.Logf, logID logid.PublicID, sys *tsd.System) error { + markPhase("startIPNServer_enter") ln, err := safesocket.Listen(args.socketpath) if err != nil { return fmt.Errorf("safesocket.Listen: %v", err) } + markPhase("safesocket_listen") ctx, cancel := context.WithCancel(ctx) defer cancel() @@ -570,6 +582,7 @@ func startIPNServer(ctx context.Context, logf logger.Logf, logID logid.PublicID, lb, err := getLocalBackend(ctx, logf, logID, sys) if err == nil { logf("got LocalBackend in %v", time.Since(t0).Round(time.Millisecond)) + markPhase("got_local_backend") if lb.Prefs().Valid() { if err := lb.Start(ipn.Options{}); err != nil { logf("LocalBackend.Start: %v", err) @@ -578,8 +591,11 @@ func startIPNServer(ctx context.Context, logf logger.Logf, logID logid.PublicID, cancel() return } + markPhase("lb_start_returned") } srv.SetLocalBackend(lb) + markPhase("srv_set_local_backend") + startupProfileWatchLocalBackend(lb) close(wgEngineCreated) return } @@ -624,6 +640,7 @@ func getLocalBackend(ctx context.Context, logf logger.Logf, logID logid.PublicID if err != nil { return nil, fmt.Errorf("createEngine: %w", err) } + markPhase("createEngine_returned") if onlyNetstack && !buildfeatures.HasNetstack { return nil, errors.New("userspace-networking support is not compiled in to this binary") } @@ -641,6 +658,7 @@ func getLocalBackend(ctx context.Context, logf logger.Logf, logID logid.PublicID return nil, fmt.Errorf("newNetstack: %w", err) } } + markPhase("netstack_created") if startProxy != nil { go startProxy(logf, dialer) @@ -662,15 +680,18 @@ func getLocalBackend(ctx context.Context, logf logger.Logf, logID logid.PublicID ht.SetUnhealthy(ipn.StateStoreHealth, health.Args{health.ArgError: err.Error()}) } sys.Set(store) + markPhase("store_new") if w, ok := sys.Tun.GetOK(); ok { w.Start() } + markPhase("tun_start") lb, err := ipnlocal.NewLocalBackend(logf, logID, sys, opts.LoginFlags) if err != nil { return nil, fmt.Errorf("ipnlocal.NewLocalBackend: %w", err) } + markPhase("new_local_backend") lb.SetVarRoot(opts.VarRoot) if logPol != nil { lb.SetLogFlusher(logPol.Logtail.StartFlush) @@ -687,6 +708,7 @@ func getLocalBackend(ctx context.Context, logf logger.Logf, logID logid.PublicID log.Fatalf("failed to start netstack: %v", err) } } + markPhase("netstack_started") if buildfeatures.HasTPM && args.hardwareAttestation.v { lb.SetHardwareAttested() } |
