summaryrefslogtreecommitdiffhomepage
path: root/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'cmd')
-rw-r--r--cmd/startupprof/startupprof.go784
-rw-r--r--cmd/tailscaled/startupprof.go269
-rw-r--r--cmd/tailscaled/tailscaled.go22
3 files changed, 1075 insertions, 0 deletions
diff --git a/cmd/startupprof/startupprof.go b/cmd/startupprof/startupprof.go
new file mode 100644
index 000000000..55083ba8d
--- /dev/null
+++ b/cmd/startupprof/startupprof.go
@@ -0,0 +1,784 @@
+// Copyright (c) Tailscale Inc & contributors
+// SPDX-License-Identifier: BSD-3-Clause
+
+// Program startupprof is a harness for profiling tailscaled startup time.
+//
+// It spins up an in-process testcontrol server + DERP + STUN, and then
+// forks one or more tailscaled processes against it. One process ("peer")
+// joins first and is driven to the Running state; the peer exists only so
+// that the process we actually care about — the "target" — has a real peer
+// to disco with, to measure time-to-first-peer-communication.
+//
+// The target process is launched with TS_STARTUPPROF_* envknobs that cause
+// tailscaled to write a runtime/trace, CPU profile, heap profile, and a
+// machine-readable phase-timing file (see cmd/tailscaled/startupprof.go).
+//
+// The harness additionally enables GODEBUG=inittrace=1 so that Go's
+// per-package init() cost is logged to stderr, and summarizes the top
+// init-time offenders.
+//
+// Typical usage:
+//
+// go build -o /tmp/tailscaled ./cmd/tailscaled
+// go run ./cmd/startupprof -tailscaled=/tmp/tailscaled -out=/tmp/tsprof
+// go tool trace /tmp/tsprof/target-trace.out
+// go tool pprof -http=: /tmp/tsprof/target-cpu.pprof
+// cat /tmp/tsprof/target-phases.txt
+//
+// The report printed at the end shows wall-clock times for each phase and
+// the init-time breakdown parsed from GODEBUG=inittrace.
+package main
+
+import (
+ "bufio"
+ "context"
+ "errors"
+ "flag"
+ "fmt"
+ "io"
+ "log"
+ "net"
+ "net/http"
+ "net/http/httptest"
+ "net/netip"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+ "sync"
+ "syscall"
+ "testing"
+ "time"
+
+ "tailscale.com/client/local"
+ "tailscale.com/ipn"
+ "tailscale.com/ipn/ipnstate"
+ "tailscale.com/tailcfg"
+ "tailscale.com/tstest/integration"
+ "tailscale.com/tstest/integration/testcontrol"
+ "tailscale.com/types/logger"
+)
+
+var (
+ flagTailscaled = flag.String("tailscaled", "", "path to tailscaled binary (required)")
+ flagOut = flag.String("out", "", "output directory for trace/pprof/phases files (required)")
+ flagSkipPeer = flag.Bool("skip-peer", false, "don't spin up a peer; measure only to BackendState=Running (no first-ping metric)")
+ flagKeepAlive = flag.Bool("keep-alive", false, "after measuring, leave the target process running so you can poke at it")
+ flagTimeout = flag.Duration("timeout", 60*time.Second, "overall timeout for a single run")
+ flagRuns = flag.Int("runs", 1, "number of back-to-back measurement runs")
+ flagVerbose = flag.Bool("v", false, "verbose: stream daemon stderr")
+ flagCachedNetmap = flag.Bool("cached-netmap", false, "test client-side netmap caching: testcontrol grants NodeAttrCacheNetworkMaps, target uses persistent state, first 'prime' run populates the cache, then -runs measurement runs reuse it.")
+)
+
+func main() {
+ flag.Parse()
+ if *flagTailscaled == "" || *flagOut == "" {
+ fmt.Fprintln(os.Stderr, "usage: startupprof -tailscaled=PATH -out=DIR [flags]")
+ flag.PrintDefaults()
+ os.Exit(2)
+ }
+ if _, err := os.Stat(*flagTailscaled); err != nil {
+ log.Fatalf("tailscaled not found at %q: %v", *flagTailscaled, err)
+ }
+ if err := os.MkdirAll(*flagOut, 0o755); err != nil {
+ log.Fatalf("mkdir out: %v", err)
+ }
+
+ tb := &fakeTB{}
+
+ // Spin up the fake control plane + DERP + STUN in-process.
+ derpMap := integration.RunDERPAndSTUN(tb, logger.Discard, "127.0.0.1")
+ control := &testcontrol.Server{
+ DERPMap: derpMap,
+ DNSConfig: &tailcfg.DNSConfig{
+ Proxied: true,
+ },
+ MagicDNSDomain: "tail-scale.ts.net",
+ }
+ if *flagCachedNetmap {
+ // Grant NodeAttrCacheNetworkMaps so the target writes + reads a
+ // disk netmap cache between runs.
+ // DefaultNodeCapabilities replaces the default cap map entirely.
+ caps := tailcfg.NodeCapMap{
+ tailcfg.NodeAttrCacheNetworkMaps: []tailcfg.RawMessage{},
+ tailcfg.CapabilityHTTPS: []tailcfg.RawMessage{},
+ tailcfg.NodeAttrFunnel: []tailcfg.RawMessage{},
+ tailcfg.CapabilityFileSharing: []tailcfg.RawMessage{},
+ tailcfg.CapabilityFunnelPorts + "?ports=8080,443": []tailcfg.RawMessage{},
+ }
+ control.DefaultNodeCapabilities = &caps
+ log.Printf("cached-netmap mode: testcontrol grants NodeAttrCacheNetworkMaps")
+ }
+ control.HTTPTestServer = httptest.NewUnstartedServer(control)
+ control.HTTPTestServer.Start()
+ defer control.HTTPTestServer.Close()
+ controlURL := control.HTTPTestServer.URL
+ log.Printf("testcontrol URL: %s", controlURL)
+ log.Printf("DERP: %+v", derpMap.Regions[1].Nodes[0])
+
+ // Optionally, bring up a peer tailscaled. This one is *not* profiled; it
+ // exists only so the target has a real peer to ping.
+ var peerIP netip.Addr
+ if !*flagSkipPeer {
+ p, err := launchDaemon(daemonOpts{
+ binary: *flagTailscaled,
+ outDir: *flagOut,
+ controlURL: controlURL,
+ name: "peer",
+ authKey: "peer-key",
+ verbose: *flagVerbose,
+ // peer is NOT profiled
+ })
+ if err != nil {
+ log.Fatalf("launch peer: %v", err)
+ }
+ defer p.shutdown()
+
+ ctx, cancel := context.WithTimeout(context.Background(), *flagTimeout)
+ defer cancel()
+ if err := p.waitReady(ctx); err != nil {
+ log.Fatalf("peer waitReady: %v", err)
+ }
+ st, err := p.lc.Status(ctx)
+ if err != nil {
+ log.Fatalf("peer status: %v", err)
+ }
+ if len(st.TailscaleIPs) == 0 {
+ log.Fatalf("peer has no tailscale IP")
+ }
+ peerIP = st.TailscaleIPs[0]
+ log.Printf("peer up at %v", peerIP)
+ }
+
+ // Target state dir.
+ // - In normal mode: fresh temp dir per run (ephemeral-style, though we
+ // use file state since cache needs varRoot; cache is simply absent).
+ // - In cached-netmap mode: single persistent state dir reused across
+ // runs. Prime run (run 0) populates the cache; runs 1..N measure
+ // reuse.
+ targetStateDir := ""
+ if *flagCachedNetmap {
+ targetStateDir = filepath.Join(*flagOut, "target-state")
+ if err := os.RemoveAll(targetStateDir); err != nil {
+ log.Fatalf("reset target state dir: %v", err)
+ }
+ if err := os.MkdirAll(targetStateDir, 0o700); err != nil {
+ log.Fatalf("create target state dir: %v", err)
+ }
+ log.Printf("cached-netmap mode: persistent target state dir: %s", targetStateDir)
+ log.Printf("---- prime run (not measured) ----")
+ if err := runOnce(controlURL, peerIP, "target-prime", targetStateDir, false); err != nil {
+ log.Fatalf("prime run: %v", err)
+ }
+ }
+
+ for run := 1; run <= *flagRuns; run++ {
+ log.Printf("==== run %d of %d ====", run, *flagRuns)
+ name := "target"
+ if *flagRuns > 1 {
+ name = fmt.Sprintf("target-run%d", run)
+ }
+ if err := runOnce(controlURL, peerIP, name, targetStateDir, true); err != nil {
+ log.Fatalf("run %d: %v", run, err)
+ }
+ }
+}
+
+// runOnce launches a target tailscaled and measures startup to Running + first
+// ping. If reusedStateDir is non-empty, it's used (persistent across runs);
+// otherwise a fresh temp dir is created. If measure is false, no report is
+// generated (used for the prime run in cached-netmap mode).
+func runOnce(controlURL string, peerIP netip.Addr, name, reusedStateDir string, measure bool) error {
+ opts := daemonOpts{
+ binary: *flagTailscaled,
+ outDir: *flagOut,
+ controlURL: controlURL,
+ name: name,
+ authKey: "target-key",
+ verbose: *flagVerbose,
+ profile: measure,
+ stateDir: reusedStateDir,
+ }
+ d, err := launchDaemon(opts)
+ if err != nil {
+ return fmt.Errorf("launch target: %w", err)
+ }
+ if !*flagKeepAlive {
+ defer d.shutdown()
+ }
+
+ t0 := d.startWall
+
+ // tSockUp: moment safesocket is listening.
+ tSockUp, err := d.waitSocket(*flagTimeout)
+ if err != nil {
+ return fmt.Errorf("waiting for socket: %w", err)
+ }
+
+ ctx, cancel := context.WithTimeout(context.Background(), *flagTimeout)
+ defer cancel()
+
+ // tRunning: moment BackendState == Running (netmap applied, at least
+ // one live DERP or peer endpoint).
+ // tFirstNetMap: moment any non-nil NetMap has been applied internally
+ // (from disk cache OR from control). With netmap caching enabled, this
+ // should fire much earlier than tRunning.
+ tRunning, tFirstNetMap, err := d.waitRunningAndNetMap(ctx)
+ if err != nil {
+ return fmt.Errorf("waiting for Running: %w", err)
+ }
+
+ // tFirstPing: first successful disco ping to the peer.
+ var tFirstPing time.Time
+ if peerIP.IsValid() {
+ pr, err := pingUntilSuccess(ctx, d.lc, peerIP, 20*time.Second)
+ if err != nil {
+ return fmt.Errorf("ping %v: %w", peerIP, err)
+ }
+ tFirstPing = time.Now()
+ log.Printf("[%s] first ping to %v ok: via=%s latency=%v",
+ name, peerIP, pr.DERPRegionCode, pr.LatencySeconds)
+ }
+
+ if !measure {
+ // Cleanly shutdown to persist state for the next run.
+ d.shutdown()
+ return nil
+ }
+
+ // Give tailscaled a moment to flush the trace/pprof files (state_running
+ // triggers the stop, which writes out files and closes them).
+ waitForProfFiles(d.profilePaths, 2*time.Second)
+
+ report := buildReport(d, t0, tSockUp, tFirstNetMap, tRunning, tFirstPing)
+ fmt.Println(report)
+
+ summaryPath := filepath.Join(*flagOut, name+"-summary.txt")
+ if err := os.WriteFile(summaryPath, []byte(report), 0o644); err != nil {
+ return fmt.Errorf("write summary: %w", err)
+ }
+ log.Printf("wrote summary: %s", summaryPath)
+ log.Printf(" trace: %s", d.profilePaths.trace)
+ log.Printf(" cpuprof: %s", d.profilePaths.cpu)
+ log.Printf(" memprof: %s", d.profilePaths.mem)
+ log.Printf(" phases: %s", d.profilePaths.phases)
+ log.Printf(" stderr: %s", d.stderrPath)
+
+ if *flagKeepAlive {
+ log.Printf("--keep-alive set; leaving target running. Ctrl-C to exit.")
+ select {}
+ }
+ return nil
+}
+
+// -------- daemon management --------
+
+type daemonOpts struct {
+ binary string
+ outDir string
+ controlURL string
+ name string
+ authKey string
+ verbose bool
+ profile bool // if true, enable TS_STARTUPPROF_* envknobs
+ stateDir string // if non-empty, use this as --statedir (persistent across invocations); otherwise a fresh tempdir
+}
+
+type profilePaths struct {
+ trace string
+ cpu string
+ mem string
+ phases string
+}
+
+type daemon struct {
+ opts daemonOpts
+ cmd *exec.Cmd
+ stateDir string
+ sockPath string
+ stderrPath string
+ startWall time.Time
+ lc *local.Client
+ profilePaths profilePaths
+ stderrMu sync.Mutex
+ stderrLines []string // for inittrace capture
+ stderrFile *os.File
+ shutdownOnce sync.Once
+}
+
+func launchDaemon(opts daemonOpts) (*daemon, error) {
+ stateDir := opts.stateDir
+ persistent := stateDir != ""
+ if !persistent {
+ var err error
+ stateDir, err = os.MkdirTemp("", "tsprof-"+opts.name+"-")
+ if err != nil {
+ return nil, err
+ }
+ }
+ // Use a short socket path (unix sockets have length limits: 104 on
+ // macOS, 108 on Linux). Always put sockets under /tmp with a short
+ // name to stay under even the tightest limit.
+ sockPath := filepath.Join(os.TempDir(), "tsprof-"+opts.name+".sock")
+
+ args := []string{
+ "--tun=userspace-networking",
+ "--statedir=" + stateDir,
+ "--socket=" + sockPath,
+ "--socks5-server=localhost:0",
+ "--no-logs-no-support", // disable logtail upload
+ }
+ if !persistent {
+ // Ephemeral (random node key per invocation). Triggers
+ // controlclient.LoginEphemeral in cmd/tailscaled.
+ args = append(args, "--state=mem:")
+ } else {
+ // Persistent state; tailscaled defaults the state file to
+ // <statedir>/tailscaled.state when --state is unset. We want
+ // the cache directory (<statedir>/profile-data/...) to survive
+ // across invocations, so we don't use mem:.
+ }
+ cmd := exec.Command(opts.binary, args...)
+
+ env := os.Environ()
+ env = append(env,
+ "TS_CONTROL_URL="+opts.controlURL, // respected for initial prefs default? we set prefs explicitly below anyway
+ "TS_PANIC_IF_HIT_MAIN_CONTROL=1",
+ "TS_DISABLE_PORTMAPPER=1",
+ "TS_ASSUME_NETWORK_UP_FOR_TEST=1",
+ "GODEBUG=inittrace=1",
+ )
+
+ var paths profilePaths
+ if opts.profile {
+ paths = profilePaths{
+ trace: filepath.Join(opts.outDir, opts.name+"-trace.out"),
+ cpu: filepath.Join(opts.outDir, opts.name+"-cpu.pprof"),
+ mem: filepath.Join(opts.outDir, opts.name+"-mem.pprof"),
+ phases: filepath.Join(opts.outDir, opts.name+"-phases.txt"),
+ }
+ env = append(env,
+ "TS_STARTUPPROF_TRACE="+paths.trace,
+ "TS_STARTUPPROF_CPUPROF="+paths.cpu,
+ "TS_STARTUPPROF_MEMPROF="+paths.mem,
+ "TS_STARTUPPROF_PHASES="+paths.phases,
+ "TS_STARTUPPROF_MAX_SECS=60",
+ )
+ }
+ cmd.Env = env
+
+ // Capture stderr to file + a ring of recent lines for inittrace parsing.
+ stderrPath := filepath.Join(opts.outDir, opts.name+"-stderr.log")
+ stderrFile, err := os.Create(stderrPath)
+ if err != nil {
+ return nil, err
+ }
+ d := &daemon{
+ opts: opts,
+ cmd: cmd,
+ stateDir: stateDir,
+ sockPath: sockPath,
+ stderrPath: stderrPath,
+ profilePaths: paths,
+ stderrFile: stderrFile,
+ }
+ // Use an os.Pipe so cmd.Start() doesn't spawn its own copy goroutine
+ // (which adds scheduling latency before we first get control back).
+ pr, pw, err := os.Pipe()
+ if err != nil {
+ stderrFile.Close()
+ return nil, err
+ }
+ cmd.Stderr = pw
+ cmd.Stdout = pw
+ go d.pumpStderr(pr)
+
+ log.Printf("[%s] starting %s %s", opts.name, opts.binary, strings.Join(args, " "))
+ t0 := time.Now()
+ d.startWall = t0
+ if err := cmd.Start(); err != nil {
+ stderrFile.Close()
+ pw.Close()
+ return nil, err
+ }
+ // Close our copy of the write end: only the child holds it open now,
+ // so pumpStderr's reader will see EOF when the child exits.
+ pw.Close()
+ if opts.verbose {
+ log.Printf("[%s] cmd.Start() returned in %v", opts.name, time.Since(t0))
+ }
+
+ d.lc = &local.Client{
+ Socket: sockPath,
+ UseSocketOnly: true,
+ }
+ return d, nil
+}
+
+func (d *daemon) pumpStderr(r io.Reader) {
+ sc := bufio.NewScanner(r)
+ sc.Buffer(make([]byte, 64*1024), 1024*1024)
+ for sc.Scan() {
+ line := sc.Text()
+ d.stderrMu.Lock()
+ d.stderrLines = append(d.stderrLines, line)
+ d.stderrMu.Unlock()
+ fmt.Fprintln(d.stderrFile, line)
+ if d.opts.verbose {
+ fmt.Fprintf(os.Stderr, "[%s] %s\n", d.opts.name, line)
+ }
+ }
+}
+
+func (d *daemon) waitSocket(timeout time.Duration) (time.Time, error) {
+ // NOTE: we intentionally use net.Dial directly here and NOT
+ // safesocket.ConnectContext, because the latter has a 250ms backoff
+ // whenever the daemon appears to still be starting up (see
+ // safesocket/safesocket.go:62). For a latency benchmark that would
+ // artificially add 250ms to our measurements.
+ deadline := time.Now().Add(timeout)
+ attempts := 0
+ for {
+ attempts++
+ c, err := net.Dial("unix", d.sockPath)
+ if err == nil {
+ c.Close()
+ if d.opts.verbose {
+ log.Printf("[%s] socket up after %d attempts in %v", d.opts.name, attempts, time.Since(d.startWall))
+ }
+ return time.Now(), nil
+ }
+ if time.Now().After(deadline) {
+ return time.Time{}, fmt.Errorf("timeout waiting for socket %q: %v", d.sockPath, err)
+ }
+ time.Sleep(500 * time.Microsecond)
+ }
+}
+
+// waitReady calls Start() with prefs + authkey and waits until Running.
+func (d *daemon) waitReady(ctx context.Context) error {
+ if _, err := d.waitSocket(10 * time.Second); err != nil {
+ return err
+ }
+ _, _, err := d.waitRunningAndNetMap(ctx)
+ return err
+}
+
+// waitRunningAndNetMap drives the state machine by calling lc.Start()
+// (which is idempotent in effect) and watches the bus for both the first
+// non-nil NetMap notification AND the transition to BackendState=Running.
+//
+// The first-netmap moment is valuable because with client-side netmap
+// caching, it can occur *before* the first control round-trip completes
+// (the cached netmap is applied synchronously inside lc.Start). It's the
+// earliest point at which the node has peer information and can initiate
+// disco connections.
+//
+// Returns (tRunning, tFirstNetMap, err). Either time may be zero if the
+// corresponding event was not observed (e.g. we missed the netmap delivery
+// because it arrived before our WatchIPNBus subscription did).
+func (d *daemon) waitRunningAndNetMap(ctx context.Context) (tRunning, tFirstNetMap time.Time, err error) {
+ // Subscribe first so we catch the initial-state burst, which (with
+ // NotifyInitialNetMap) delivers the current netmap if one is already
+ // applied at the time of subscription. This covers the case where a
+ // cached netmap was loaded in lc.Start before we even started watching.
+ const mask = ipn.NotifyInitialState | ipn.NotifyInitialNetMap
+ watcher, werr := d.lc.WatchIPNBus(ctx, mask)
+ if werr != nil {
+ return time.Time{}, time.Time{}, fmt.Errorf("WatchIPNBus: %w", werr)
+ }
+ defer watcher.Close()
+
+ // Now call Start.
+ prefs := ipn.NewPrefs()
+ prefs.ControlURL = d.opts.controlURL
+ prefs.WantRunning = true
+ prefs.Hostname = d.opts.name
+ _ = d.lc.Start(ctx, ipn.Options{
+ UpdatePrefs: prefs,
+ AuthKey: d.opts.authKey,
+ })
+ // StartLoginInteractive kicks the control client's auth routine, which
+ // will use the AuthKey set above. Without this, a fresh tailscaled
+ // stays at NeedsLogin forever.
+ _ = d.lc.StartLoginInteractive(ctx)
+
+ for {
+ n, nerr := watcher.Next()
+ if nerr != nil {
+ return tRunning, tFirstNetMap, nerr
+ }
+ if n.ErrMessage != nil {
+ return tRunning, tFirstNetMap, errors.New(*n.ErrMessage)
+ }
+ if n.NetMap != nil && tFirstNetMap.IsZero() {
+ tFirstNetMap = time.Now()
+ }
+ if n.State != nil && *n.State == ipn.Running {
+ tRunning = time.Now()
+ return tRunning, tFirstNetMap, nil
+ }
+ }
+}
+
+func (d *daemon) shutdown() {
+ d.shutdownOnce.Do(func() {
+ if d.cmd.Process != nil {
+ _ = d.cmd.Process.Signal(syscall.SIGTERM)
+ done := make(chan struct{})
+ go func() { d.cmd.Wait(); close(done) }()
+ select {
+ case <-done:
+ case <-time.After(3 * time.Second):
+ _ = d.cmd.Process.Kill()
+ <-done
+ }
+ }
+ if d.stderrFile != nil {
+ d.stderrFile.Close()
+ }
+ })
+}
+
+func pingUntilSuccess(ctx context.Context, lc *local.Client, ip netip.Addr, maxWait time.Duration) (*ipnstate.PingResult, error) {
+ deadline := time.Now().Add(maxWait)
+ var lastErr error
+ for time.Now().Before(deadline) {
+ pingCtx, cancel := context.WithTimeout(ctx, 3*time.Second)
+ pr, err := lc.Ping(pingCtx, ip, tailcfg.PingDisco)
+ cancel()
+ if err == nil && pr != nil && pr.Err == "" {
+ return pr, nil
+ }
+ if err != nil {
+ lastErr = err
+ } else if pr != nil && pr.Err != "" {
+ lastErr = errors.New(pr.Err)
+ }
+ time.Sleep(100 * time.Millisecond)
+ }
+ if lastErr == nil {
+ lastErr = errors.New("ping deadline")
+ }
+ return nil, lastErr
+}
+
+func waitForProfFiles(p profilePaths, max time.Duration) {
+ if p.phases == "" {
+ return
+ }
+ deadline := time.Now().Add(max)
+ for time.Now().Before(deadline) {
+ if fi, err := os.Stat(p.phases); err == nil && fi.Size() > 0 {
+ return
+ }
+ time.Sleep(50 * time.Millisecond)
+ }
+}
+
+// -------- reporting --------
+
+func buildReport(d *daemon, tFork, tSockUp, tFirstNetMap, tRunning, tFirstPing time.Time) string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "===== startupprof: %s =====\n", d.opts.name)
+ fmt.Fprintf(&b, "tailscaled: %s\n", d.opts.binary)
+ fmt.Fprintf(&b, "\nWall-clock milestones (from fork):\n")
+ fmt.Fprintf(&b, " %-28s %8s\n", "milestone", "ms")
+ row := func(name string, t time.Time) {
+ if t.IsZero() {
+ return
+ }
+ fmt.Fprintf(&b, " %-28s %8.1f\n", name, float64(t.Sub(tFork).Microseconds())/1000.0)
+ }
+ row("fork->socket_listening", tSockUp)
+ row("fork->first_netmap_applied", tFirstNetMap)
+ row("fork->backend_running", tRunning)
+ row("fork->first_peer_ping", tFirstPing)
+
+ // Phases from the traced process itself (measured from earliest possible
+ // time inside the process: startupprof.go's procStart).
+ if ph, err := readPhases(d.profilePaths.phases); err == nil && len(ph) > 0 {
+ fmt.Fprintf(&b, "\nIn-process phases (from procStart, earliest time.Now in process):\n")
+ fmt.Fprintf(&b, " %-28s %8s %8s\n", "phase", "ms@", "delta")
+ var prev time.Duration
+ for i, p := range ph {
+ var delta time.Duration
+ if i > 0 {
+ delta = p.offset - prev
+ }
+ fmt.Fprintf(&b, " %-28s %8.1f %8.1f\n", p.name,
+ float64(p.offset.Microseconds())/1000.0,
+ float64(delta.Microseconds())/1000.0)
+ prev = p.offset
+ }
+ }
+
+ // inittrace from GODEBUG=inittrace=1.
+ d.stderrMu.Lock()
+ stderrSnap := append([]string(nil), d.stderrLines...)
+ d.stderrMu.Unlock()
+ if itr, total := extractInitTrace(stderrSnap); len(itr) > 0 {
+ fmt.Fprintf(&b, "\nTop package init costs (GODEBUG=inittrace=1):\n")
+ fmt.Fprintf(&b, " %-50s %9s %10s %10s\n", "package", "own_ms", "cumul_ms", "alloc_KB")
+ for _, it := range itr {
+ fmt.Fprintf(&b, " %-50s %9.3f %10.2f %10d\n",
+ it.pkg, it.clockMs, it.cpuMs, it.allocBytes/1024)
+ }
+ fmt.Fprintf(&b, " %-50s %9.3f %10.2f %10d\n", total.pkg, total.clockMs, total.cpuMs, total.allocBytes/1024)
+ fmt.Fprintf(&b, " (own_ms = this init func's wall time; cumul_ms = elapsed since program start)\n")
+ }
+
+ return b.String()
+}
+
+type phase struct {
+ offset time.Duration
+ name string
+}
+
+func readPhases(path string) ([]phase, error) {
+ if path == "" {
+ return nil, errors.New("no path")
+ }
+ f, err := os.Open(path)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+ sc := bufio.NewScanner(f)
+ var out []phase
+ for sc.Scan() {
+ line := sc.Text()
+ tab := strings.IndexByte(line, '\t')
+ if tab < 0 {
+ continue
+ }
+ ns, err := strconv.ParseInt(line[:tab], 10, 64)
+ if err != nil {
+ continue
+ }
+ out = append(out, phase{offset: time.Duration(ns), name: line[tab+1:]})
+ }
+ return out, nil
+}
+
+type initRecord struct {
+ pkg string
+ clockMs float64
+ cpuMs float64
+ allocBytes int64
+}
+
+// extractInitTrace parses Go's GODEBUG=inittrace=1 lines out of captured
+// stderr. Each line looks like:
+//
+// init internal/bytealg @0.008 ms, 0.004 ms clock, 0 bytes, 0 allocs
+// init runtime @0.15 ms, 0.12 ms clock, 0 bytes, 0 allocs
+// init errors @0.47 ms, 0.003 ms clock, 0 bytes, 0 allocs
+//
+// Returns (topBy Y clock, totalAcrossAll). Each returned record's clockMs is
+// the init function's own wall time (Y), not the cumulative @X.
+func extractInitTrace(lines []string) (top []initRecord, total initRecord) {
+ var out []initRecord
+ var maxAt float64
+ for _, line := range lines {
+ const pfx = "init "
+ i := strings.Index(line, pfx)
+ if i < 0 {
+ continue
+ }
+ rest := line[i+len(pfx):]
+ // "pkg @X ms, Y ms clock, Z bytes, N allocs"
+ at := strings.Index(rest, " @")
+ if at < 0 {
+ continue
+ }
+ pkg := rest[:at]
+ tail := rest[at+2:]
+ // "X ms, Y ms clock, Z bytes, N allocs"
+ fields := strings.Split(tail, ",")
+ if len(fields) < 3 {
+ continue
+ }
+ atMs, ok := parseNumberWithSuffix(strings.TrimSpace(fields[0]), "ms")
+ if !ok {
+ continue
+ }
+ clockMs, ok := parseNumberWithSuffix(strings.TrimSpace(fields[1]), "ms clock")
+ if !ok {
+ continue
+ }
+ // fields[2] is "Z bytes"
+ alloc, _ := parseNumberWithSuffix(strings.TrimSpace(fields[2]), "bytes")
+ if atMs > maxAt {
+ maxAt = atMs
+ }
+ total.clockMs += clockMs
+ total.allocBytes += int64(alloc)
+ out = append(out, initRecord{
+ pkg: pkg,
+ clockMs: clockMs,
+ cpuMs: atMs, // repurposed: show @X ms (cumulative offset) for context
+ allocBytes: int64(alloc),
+ })
+ }
+ total.pkg = "TOTAL (all init funcs)"
+ total.cpuMs = maxAt
+ // Sort by clock time desc, then return top 30.
+ sort.Slice(out, func(i, j int) bool { return out[i].clockMs > out[j].clockMs })
+ if len(out) > 30 {
+ out = out[:30]
+ }
+ return out, total
+}
+
+func parseNumberWithSuffix(s, suffix string) (float64, bool) {
+ s = strings.TrimSuffix(s, suffix)
+ s = strings.TrimSpace(s)
+ v, err := strconv.ParseFloat(s, 64)
+ if err != nil {
+ return 0, false
+ }
+ return v, true
+}
+
+// -------- fakeTB (needed by integration.RunDERPAndSTUN) --------
+
+type fakeTB struct{ *testing.T }
+
+func (*fakeTB) Cleanup(_ func()) {}
+func (*fakeTB) Error(args ...any) {
+ log.Print(args...)
+}
+func (*fakeTB) Errorf(format string, args ...any) {
+ log.Printf(format, args...)
+}
+func (*fakeTB) Fail() { log.Fatal("fail") }
+func (*fakeTB) FailNow() { log.Fatal("failnow") }
+func (*fakeTB) Failed() bool { return false }
+func (*fakeTB) Fatal(args ...any) {
+ log.Fatal(args...)
+}
+func (*fakeTB) Fatalf(format string, args ...any) {
+ log.Fatalf(format, args...)
+}
+func (*fakeTB) Helper() {}
+func (*fakeTB) Log(args ...any) {
+ log.Print(args...)
+}
+func (*fakeTB) Logf(format string, args ...any) {
+ log.Printf(format, args...)
+}
+func (*fakeTB) Name() string { return "startupprof" }
+func (*fakeTB) Setenv(_, _ string) {}
+func (*fakeTB) Skip(_ ...any) {}
+func (*fakeTB) SkipNow() {}
+func (*fakeTB) Skipf(_ string, _ ...any) {}
+func (*fakeTB) Skipped() bool { return false }
+func (*fakeTB) TempDir() string { d, _ := os.MkdirTemp("", "tb"); return d }
+func (*fakeTB) Deadline() (time.Time, bool) { return time.Time{}, false }
+
+// unused but required for iface:
+var _ = http.StatusOK
diff --git a/cmd/tailscaled/startupprof.go b/cmd/tailscaled/startupprof.go
new file mode 100644
index 000000000..627e2e927
--- /dev/null
+++ b/cmd/tailscaled/startupprof.go
@@ -0,0 +1,269 @@
+// Copyright (c) Tailscale Inc & contributors
+// SPDX-License-Identifier: BSD-3-Clause
+
+//go:build go1.23
+
+package main
+
+// This file implements opt-in, envknob-gated startup profiling for tailscaled.
+// It is intended to be used by the cmd/startupprof harness (and anyone
+// debugging tailscaled startup latency).
+//
+// It is safe to leave compiled in: when none of the TS_STARTUPPROF_* envknobs
+// are set, everything here is a fast no-op.
+//
+// The controls are:
+//
+// TS_STARTUPPROF_TRACE=/path/to/trace.out
+// Start runtime/trace.Start at process entry (as early as possible in
+// main, before any other work) and write to the given path. The trace
+// will be stopped automatically when the backend reaches ipn.Running,
+// or after TS_STARTUPPROF_MAX_SECS (default 30s), whichever is first.
+//
+// TS_STARTUPPROF_CPUPROF=/path/to/cpu.pprof
+// Same, but for runtime/pprof.StartCPUProfile.
+//
+// TS_STARTUPPROF_MEMPROF=/path/to/mem.pprof
+// Write a heap profile when the trace is stopped.
+//
+// TS_STARTUPPROF_MAX_SECS=<int>
+// Maximum time to leave profilers running. Default: 30.
+//
+// TS_STARTUPPROF_PHASES=/path/to/phases.txt
+// Append wall-clock phase timings (measured from process entry) to
+// this file. Machine-readable: "<nanoseconds>\t<phase name>\n".
+//
+// The phase marker API (markPhase) is also available without any envknob;
+// when no phases file is configured, markPhase emits a runtime/trace.Log
+// event (cheap; no-op if tracing disabled) and records nothing else.
+//
+// Design goal: zero dependencies on logtail, logger, or anything that is
+// itself part of the startup cost being measured.
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "runtime"
+ "runtime/pprof"
+ "runtime/trace"
+ "sync"
+ "sync/atomic"
+ "time"
+
+ "tailscale.com/ipn"
+ "tailscale.com/ipn/ipnlocal"
+)
+
+// procStart is the earliest time we can record: the value of time.Now() at
+// init() in this file. This captures cost incurred by other init funcs that
+// run after ours, plus all of main. It is not perfect (Go init order is
+// source-file lexical within a package) but is adequate for our purposes.
+//
+// NOTE: placing procStart in a var with an initializer at package scope
+// guarantees it is assigned before any init() or main(). We also set it
+// again in init() as a backstop.
+var procStart = time.Now()
+
+// startupProf holds runtime state for the profiler. All fields are only
+// accessed from the single startup goroutine + the state-change notifier.
+var startupProf struct {
+ enabled atomic.Bool
+ stopOnce sync.Once
+ stoppedAt atomic.Int64 // unix nanos; 0 until stopped
+ traceFile *os.File
+ cpuFile *os.File
+ memPath string
+ phasesPath string
+
+ // phases is a ring of recorded phases, appended to the phases file on stop.
+ phasesMu sync.Mutex
+ phases []phaseRecord
+}
+
+type phaseRecord struct {
+ offset time.Duration
+ name string
+}
+
+func init() {
+ // Backstop: ensure procStart reflects a very early moment even if the
+ // initializer above was bypassed for some reason.
+ if procStart.IsZero() {
+ procStart = time.Now()
+ }
+ // If TS_STARTUPPROF_DEBUG is set, log wall-clock timestamps (in ns
+ // since unix epoch) at each phase marker so we can correlate inside
+ // vs outside-the-process timings.
+ if os.Getenv("TS_STARTUPPROF_DEBUG") != "" {
+ startupProfDebug = true
+ }
+}
+
+var startupProfDebug bool
+
+// startupProfileStart starts any configured profilers. Safe to call exactly
+// once, as early in main as possible. It is not safe to call concurrently.
+//
+// It returns without error if no profilers are configured; callers should
+// still call it unconditionally.
+func startupProfileStart() {
+ tracePath := os.Getenv("TS_STARTUPPROF_TRACE")
+ cpuPath := os.Getenv("TS_STARTUPPROF_CPUPROF")
+ memPath := os.Getenv("TS_STARTUPPROF_MEMPROF")
+ phasesPath := os.Getenv("TS_STARTUPPROF_PHASES")
+
+ if tracePath == "" && cpuPath == "" && memPath == "" && phasesPath == "" {
+ return
+ }
+ startupProf.enabled.Store(true)
+ startupProf.memPath = memPath
+ startupProf.phasesPath = phasesPath
+
+ if tracePath != "" {
+ f, err := os.Create(tracePath)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "startupprof: create trace %q: %v\n", tracePath, err)
+ } else if err := trace.Start(f); err != nil {
+ fmt.Fprintf(os.Stderr, "startupprof: trace.Start: %v\n", err)
+ f.Close()
+ } else {
+ startupProf.traceFile = f
+ }
+ }
+ if cpuPath != "" {
+ f, err := os.Create(cpuPath)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "startupprof: create cpuprof %q: %v\n", cpuPath, err)
+ } else {
+ // Bump sample rate from the default 100Hz to 1000Hz since
+ // startup is typically <100ms and the default gives almost
+ // no samples. Must be called before StartCPUProfile.
+ runtime.SetCPUProfileRate(1000)
+ if err := pprof.StartCPUProfile(f); err != nil {
+ fmt.Fprintf(os.Stderr, "startupprof: StartCPUProfile: %v\n", err)
+ f.Close()
+ } else {
+ startupProf.cpuFile = f
+ }
+ }
+ }
+
+ // Record process start as the first phase.
+ markPhase("startupprof_init")
+
+ // Safety net: stop after TS_STARTUPPROF_MAX_SECS regardless of state.
+ maxSecs := 30
+ if v := os.Getenv("TS_STARTUPPROF_MAX_SECS"); v != "" {
+ if n, err := parsePositiveInt(v); err == nil {
+ maxSecs = n
+ }
+ }
+ go func() {
+ time.Sleep(time.Duration(maxSecs) * time.Second)
+ startupProfileStop("max_time")
+ }()
+}
+
+// markPhase records a named phase. It is extremely cheap when profiling is
+// disabled (one atomic load). When profiling IS enabled, it emits a
+// runtime/trace Log event and appends to the phases slice for later
+// write-out.
+func markPhase(name string) {
+ if startupProfDebug {
+ // unix-time ns for external correlation.
+ fmt.Fprintf(os.Stderr, "TSSPD %d %s\n", time.Now().UnixNano(), name)
+ }
+ if !startupProf.enabled.Load() {
+ return
+ }
+ // trace.Log requires a non-nil context.
+ trace.Log(context.Background(), "phase", name)
+ rec := phaseRecord{
+ offset: time.Since(procStart),
+ name: name,
+ }
+ startupProf.phasesMu.Lock()
+ startupProf.phases = append(startupProf.phases, rec)
+ startupProf.phasesMu.Unlock()
+}
+
+// startupProfileWatchLocalBackend spawns a goroutine that watches the given
+// LocalBackend and stops all profilers when it reaches ipn.Running.
+//
+// Safe to call with a nil lb (no-op). Safe to call multiple times.
+func startupProfileWatchLocalBackend(lb *ipnlocal.LocalBackend) {
+ if !startupProf.enabled.Load() || lb == nil {
+ return
+ }
+ go func() {
+ // WatchNotifications delivers an immediate NotifyInitialState.
+ lb.WatchNotifications(context.Background(), ipn.NotifyInitialState, nil, func(n *ipn.Notify) bool {
+ if n.State != nil && *n.State == ipn.Running {
+ markPhase("state_running")
+ startupProfileStop("state_running")
+ return false
+ }
+ return true
+ })
+ }()
+}
+
+// startupProfileStop stops all profilers, writes outputs, and is idempotent.
+// reason is recorded as the final phase.
+func startupProfileStop(reason string) {
+ startupProf.stopOnce.Do(func() {
+ markPhase("stop_" + reason)
+ startupProf.stoppedAt.Store(time.Now().UnixNano())
+
+ // Stop CPU profile BEFORE the trace, because trace.Stop itself
+ // does an expensive stack-dump pass that would otherwise be the
+ // dominant entry in the CPU profile.
+ if startupProf.cpuFile != nil {
+ pprof.StopCPUProfile()
+ startupProf.cpuFile.Close()
+ }
+ if startupProf.traceFile != nil {
+ trace.Stop()
+ startupProf.traceFile.Close()
+ }
+ if startupProf.memPath != "" {
+ if f, err := os.Create(startupProf.memPath); err == nil {
+ runtime.GC()
+ pprof.WriteHeapProfile(f)
+ f.Close()
+ }
+ }
+ if startupProf.phasesPath != "" {
+ writePhases(startupProf.phasesPath)
+ }
+ })
+}
+
+func writePhases(path string) {
+ f, err := os.Create(path)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "startupprof: create phases %q: %v\n", path, err)
+ return
+ }
+ defer f.Close()
+ startupProf.phasesMu.Lock()
+ defer startupProf.phasesMu.Unlock()
+ for _, p := range startupProf.phases {
+ fmt.Fprintf(f, "%d\t%s\n", p.offset.Nanoseconds(), p.name)
+ }
+}
+
+func parsePositiveInt(s string) (int, error) {
+ n := 0
+ if s == "" {
+ return 0, fmt.Errorf("empty")
+ }
+ for _, c := range s {
+ if c < '0' || c > '9' {
+ return 0, fmt.Errorf("bad digit %q", c)
+ }
+ n = n*10 + int(c-'0')
+ }
+ return n, nil
+}
diff --git a/cmd/tailscaled/tailscaled.go b/cmd/tailscaled/tailscaled.go
index fe18731ae..0aa9dca45 100644
--- a/cmd/tailscaled/tailscaled.go
+++ b/cmd/tailscaled/tailscaled.go
@@ -179,6 +179,11 @@ var (
type proxyStartFunc = func(logf logger.Logf, dialer *tsdial.Dialer)
func main() {
+ // Start the optional startup profiler as early as possible. This is a
+ // no-op unless TS_STARTUPPROF_* envknobs are set. See startupprof.go.
+ startupProfileStart()
+ markPhase("main_enter")
+
envknob.PanicIfAnyEnvCheckedInInit()
if shouldRunCLI() {
beCLI()
@@ -399,11 +404,13 @@ var logPol *logpolicy.Policy // or nil if not used
var debugMux *http.ServeMux
func run() (err error) {
+ markPhase("run_enter")
var logf logger.Logf = log.Printf
// Install an event bus as early as possible, so that it's
// available universally when setting up everything else.
sys := tsd.NewSystem()
+ markPhase("tsd_new_system")
sys.SocketPath = args.socketpath
// Parse config, if specified, to fail early if it's invalid.
@@ -425,6 +432,7 @@ func run() (err error) {
}
sys.Set(netMon)
}
+ markPhase("netmon_new")
var publicLogID logid.PublicID
if buildfeatures.HasLogTail {
@@ -445,6 +453,7 @@ func run() (err error) {
pol.Shutdown(ctx)
}()
}
+ markPhase("logpolicy_new")
if err := envknob.ApplyDiskConfigError(); err != nil {
log.Printf("Error reading environment config: %v", err)
@@ -473,6 +482,7 @@ func run() (err error) {
// crashed, and would for example restore system DNS configuration.
dns.CleanUp(logf, netMon, sys.Bus.Get(), sys.HealthTracker.Get(), args.tunname)
router.CleanUp(logf, netMon, args.tunname)
+ markPhase("dns_router_cleanup")
// If the cleanUp flag was passed, then exit.
if args.cleanUp {
return nil
@@ -509,10 +519,12 @@ var sigPipe os.Signal // set by sigpipe.go
// logID may be the zero value if logging is not in use.
func startIPNServer(ctx context.Context, logf logger.Logf, logID logid.PublicID, sys *tsd.System) error {
+ markPhase("startIPNServer_enter")
ln, err := safesocket.Listen(args.socketpath)
if err != nil {
return fmt.Errorf("safesocket.Listen: %v", err)
}
+ markPhase("safesocket_listen")
ctx, cancel := context.WithCancel(ctx)
defer cancel()
@@ -570,6 +582,7 @@ func startIPNServer(ctx context.Context, logf logger.Logf, logID logid.PublicID,
lb, err := getLocalBackend(ctx, logf, logID, sys)
if err == nil {
logf("got LocalBackend in %v", time.Since(t0).Round(time.Millisecond))
+ markPhase("got_local_backend")
if lb.Prefs().Valid() {
if err := lb.Start(ipn.Options{}); err != nil {
logf("LocalBackend.Start: %v", err)
@@ -578,8 +591,11 @@ func startIPNServer(ctx context.Context, logf logger.Logf, logID logid.PublicID,
cancel()
return
}
+ markPhase("lb_start_returned")
}
srv.SetLocalBackend(lb)
+ markPhase("srv_set_local_backend")
+ startupProfileWatchLocalBackend(lb)
close(wgEngineCreated)
return
}
@@ -624,6 +640,7 @@ func getLocalBackend(ctx context.Context, logf logger.Logf, logID logid.PublicID
if err != nil {
return nil, fmt.Errorf("createEngine: %w", err)
}
+ markPhase("createEngine_returned")
if onlyNetstack && !buildfeatures.HasNetstack {
return nil, errors.New("userspace-networking support is not compiled in to this binary")
}
@@ -641,6 +658,7 @@ func getLocalBackend(ctx context.Context, logf logger.Logf, logID logid.PublicID
return nil, fmt.Errorf("newNetstack: %w", err)
}
}
+ markPhase("netstack_created")
if startProxy != nil {
go startProxy(logf, dialer)
@@ -662,15 +680,18 @@ func getLocalBackend(ctx context.Context, logf logger.Logf, logID logid.PublicID
ht.SetUnhealthy(ipn.StateStoreHealth, health.Args{health.ArgError: err.Error()})
}
sys.Set(store)
+ markPhase("store_new")
if w, ok := sys.Tun.GetOK(); ok {
w.Start()
}
+ markPhase("tun_start")
lb, err := ipnlocal.NewLocalBackend(logf, logID, sys, opts.LoginFlags)
if err != nil {
return nil, fmt.Errorf("ipnlocal.NewLocalBackend: %w", err)
}
+ markPhase("new_local_backend")
lb.SetVarRoot(opts.VarRoot)
if logPol != nil {
lb.SetLogFlusher(logPol.Logtail.StartFlush)
@@ -687,6 +708,7 @@ func getLocalBackend(ctx context.Context, logf logger.Logf, logID logid.PublicID
log.Fatalf("failed to start netstack: %v", err)
}
}
+ markPhase("netstack_started")
if buildfeatures.HasTPM && args.hardwareAttestation.v {
lb.SetHardwareAttested()
}