diff options
| author | kari-ts <kari@tailscale.com> | 2025-03-19 11:28:04 -0700 |
|---|---|---|
| committer | kari-ts <kari@tailscale.com> | 2025-04-04 14:24:56 -0700 |
| commit | 6d5c7b11913e09b061e863411ad488dc44a13870 (patch) | |
| tree | 9e1789b5080ae4a92523611e49920dcb1102604b /cmd/containerboot/certs.go | |
| parent | ca50599c95e0a4cb7b4aab179e866e202f10c0c4 (diff) | |
| parent | 3a2c92f08eac8cd8f50356ff288e40a28636ee42 (diff) | |
| download | tailscale-kari/taildropsaf.tar.xz tailscale-kari/taildropsaf.zip | |
TO DO:kari/taildropsaf
-check if Context.getExternalFilesDirs works as is for private dir
Diffstat (limited to 'cmd/containerboot/certs.go')
| -rw-r--r-- | cmd/containerboot/certs.go | 147 |
1 files changed, 147 insertions, 0 deletions
diff --git a/cmd/containerboot/certs.go b/cmd/containerboot/certs.go new file mode 100644 index 000000000..7af0424a9 --- /dev/null +++ b/cmd/containerboot/certs.go @@ -0,0 +1,147 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build linux + +package main + +import ( + "context" + "fmt" + "log" + "net" + "sync" + "time" + + "tailscale.com/ipn" + "tailscale.com/util/goroutines" + "tailscale.com/util/mak" +) + +// certManager is responsible for issuing certificates for known domains and for +// maintaining a loop that re-attempts issuance daily. +// Currently cert manager logic is only run on ingress ProxyGroup replicas that are responsible for managing certs for +// HA Ingress HTTPS endpoints ('write' replicas). +type certManager struct { + lc localClient + tracker goroutines.Tracker // tracks running goroutines + mu sync.Mutex // guards the following + // certLoops contains a map of DNS names, for which we currently need to + // manage certs to cancel functions that allow stopping a goroutine when + // we no longer need to manage certs for the DNS name. + certLoops map[string]context.CancelFunc +} + +// ensureCertLoops ensures that, for all currently managed Service HTTPS +// endpoints, there is a cert loop responsible for issuing and ensuring the +// renewal of the TLS certs. +// ServeConfig must not be nil. +func (cm *certManager) ensureCertLoops(ctx context.Context, sc *ipn.ServeConfig) error { + if sc == nil { + return fmt.Errorf("[unexpected] ensureCertLoops called with nil ServeConfig") + } + currentDomains := make(map[string]bool) + const httpsPort = "443" + for _, service := range sc.Services { + for hostPort := range service.Web { + domain, port, err := net.SplitHostPort(string(hostPort)) + if err != nil { + return fmt.Errorf("[unexpected] unable to parse HostPort %s", hostPort) + } + if port != httpsPort { // HA Ingress' HTTP endpoint + continue + } + currentDomains[domain] = true + } + } + cm.mu.Lock() + defer cm.mu.Unlock() + for domain := range currentDomains { + if _, exists := cm.certLoops[domain]; !exists { + cancelCtx, cancel := context.WithCancel(ctx) + mak.Set(&cm.certLoops, domain, cancel) + cm.tracker.Go(func() { cm.runCertLoop(cancelCtx, domain) }) + } + } + + // Stop goroutines for domain names that are no longer in the config. + for domain, cancel := range cm.certLoops { + if !currentDomains[domain] { + cancel() + delete(cm.certLoops, domain) + } + } + return nil +} + +// runCertLoop: +// - calls localAPI certificate endpoint to ensure that certs are issued for the +// given domain name +// - calls localAPI certificate endpoint daily to ensure that certs are renewed +// - if certificate issuance failed retries after an exponential backoff period +// starting at 1 minute and capped at 24 hours. Reset the backoff once issuance succeeds. +// Note that renewal check also happens when the node receives an HTTPS request and it is possible that certs get +// renewed at that point. Renewal here is needed to prevent the shared certs from expiry in edge cases where the 'write' +// replica does not get any HTTPS requests. +// https://letsencrypt.org/docs/integration-guide/#retrying-failures +func (cm *certManager) runCertLoop(ctx context.Context, domain string) { + const ( + normalInterval = 24 * time.Hour // regular renewal check + initialRetry = 1 * time.Minute // initial backoff after a failure + maxRetryInterval = 24 * time.Hour // max backoff period + ) + timer := time.NewTimer(0) // fire off timer immediately + defer timer.Stop() + retryCount := 0 + for { + select { + case <-ctx.Done(): + return + case <-timer.C: + // We call the certificate endpoint, but don't do anything + // with the returned certs here. + // The call to the certificate endpoint will ensure that + // certs are issued/renewed as needed and stored in the + // relevant state store. For example, for HA Ingress + // 'write' replica, the cert and key will be stored in a + // Kubernetes Secret named after the domain for which we + // are issuing. + // Note that renewals triggered by the call to the + // certificates endpoint here and by renewal check + // triggered during a call to node's HTTPS endpoint + // share the same state/renewal lock mechanism, so we + // should not run into redundant issuances during + // concurrent renewal checks. + // TODO(irbekrm): maybe it is worth adding a new + // issuance endpoint that explicitly only triggers + // issuance and stores certs in the relevant store, but + // does not return certs to the caller? + _, _, err := cm.lc.CertPair(ctx, domain) + if err != nil { + log.Printf("error refreshing certificate for %s: %v", domain, err) + } + var nextInterval time.Duration + // TODO(irbekrm): distinguish between LE rate limit + // errors and other error types like transient network + // errors. + if err == nil { + retryCount = 0 + nextInterval = normalInterval + } else { + retryCount++ + // Calculate backoff: initialRetry * 2^(retryCount-1) + // For retryCount=1: 1min * 2^0 = 1min + // For retryCount=2: 1min * 2^1 = 2min + // For retryCount=3: 1min * 2^2 = 4min + backoff := initialRetry * time.Duration(1<<(retryCount-1)) + if backoff > maxRetryInterval { + backoff = maxRetryInterval + } + nextInterval = backoff + log.Printf("Error refreshing certificate for %s (retry %d): %v. Will retry in %v\n", + domain, retryCount, err, nextInterval) + } + timer.Reset(nextInterval) + } + } +} |
