summaryrefslogtreecommitdiffhomepage
path: root/cmd
diff options
context:
space:
mode:
Diffstat (limited to 'cmd')
-rw-r--r--cmd/containerboot/certs.go147
-rw-r--r--cmd/containerboot/certs_test.go229
-rw-r--r--cmd/containerboot/main.go2
-rw-r--r--cmd/containerboot/serve.go24
-rw-r--r--cmd/containerboot/serve_test.go4
-rw-r--r--cmd/containerboot/settings.go17
-rw-r--r--cmd/containerboot/tailscaled.go16
-rw-r--r--cmd/derper/cert.go102
-rw-r--r--cmd/derper/cert_test.go73
-rw-r--r--cmd/derper/depaware.txt13
-rw-r--r--cmd/derper/derper.go59
-rw-r--r--cmd/derpprobe/derpprobe.go3
-rw-r--r--cmd/hello/hello.go5
-rw-r--r--cmd/k8s-operator/depaware.txt8
-rw-r--r--cmd/k8s-operator/deploy/chart/templates/operator-rbac.yaml2
-rw-r--r--cmd/k8s-operator/deploy/manifests/operator.yaml1
-rw-r--r--cmd/k8s-operator/dnsrecords_test.go9
-rw-r--r--cmd/k8s-operator/egress-pod-readiness.go6
-rw-r--r--cmd/k8s-operator/egress-pod-readiness_test.go6
-rw-r--r--cmd/k8s-operator/egress-services.go18
-rw-r--r--cmd/k8s-operator/ingress-for-pg.go817
-rw-r--r--cmd/k8s-operator/ingress-for-pg_test.go286
-rw-r--r--cmd/k8s-operator/ingress.go1
-rw-r--r--cmd/k8s-operator/metrics_resources.go3
-rw-r--r--cmd/k8s-operator/operator.go86
-rw-r--r--cmd/k8s-operator/operator_test.go8
-rw-r--r--cmd/k8s-operator/proxygroup.go49
-rw-r--r--cmd/k8s-operator/proxygroup_specs.go33
-rw-r--r--cmd/k8s-operator/proxygroup_test.go76
-rw-r--r--cmd/k8s-operator/sts.go4
-rw-r--r--cmd/k8s-operator/sts_test.go21
-rw-r--r--cmd/k8s-operator/svc.go8
-rw-r--r--cmd/k8s-operator/testutils_test.go9
-rw-r--r--cmd/k8s-operator/tsrecorder.go41
-rw-r--r--cmd/natc/natc.go22
-rw-r--r--cmd/proxy-to-grafana/proxy-to-grafana.go104
-rw-r--r--cmd/stund/depaware.txt13
-rw-r--r--cmd/stund/stund.go3
-rw-r--r--cmd/tailscale/depaware.txt6
-rw-r--r--cmd/tailscaled/depaware.txt9
-rw-r--r--cmd/testwrapper/testwrapper.go30
-rw-r--r--cmd/tsidp/Dockerfile41
-rw-r--r--cmd/tsidp/README.md100
-rw-r--r--cmd/tsidp/tsidp.go17
-rw-r--r--cmd/xdpderper/xdpderper.go3
45 files changed, 2145 insertions, 389 deletions
diff --git a/cmd/containerboot/certs.go b/cmd/containerboot/certs.go
new file mode 100644
index 000000000..7af0424a9
--- /dev/null
+++ b/cmd/containerboot/certs.go
@@ -0,0 +1,147 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+//go:build linux
+
+package main
+
+import (
+ "context"
+ "fmt"
+ "log"
+ "net"
+ "sync"
+ "time"
+
+ "tailscale.com/ipn"
+ "tailscale.com/util/goroutines"
+ "tailscale.com/util/mak"
+)
+
+// certManager is responsible for issuing certificates for known domains and for
+// maintaining a loop that re-attempts issuance daily.
+// Currently cert manager logic is only run on ingress ProxyGroup replicas that are responsible for managing certs for
+// HA Ingress HTTPS endpoints ('write' replicas).
+type certManager struct {
+ lc localClient
+ tracker goroutines.Tracker // tracks running goroutines
+ mu sync.Mutex // guards the following
+ // certLoops contains a map of DNS names, for which we currently need to
+ // manage certs to cancel functions that allow stopping a goroutine when
+ // we no longer need to manage certs for the DNS name.
+ certLoops map[string]context.CancelFunc
+}
+
+// ensureCertLoops ensures that, for all currently managed Service HTTPS
+// endpoints, there is a cert loop responsible for issuing and ensuring the
+// renewal of the TLS certs.
+// ServeConfig must not be nil.
+func (cm *certManager) ensureCertLoops(ctx context.Context, sc *ipn.ServeConfig) error {
+ if sc == nil {
+ return fmt.Errorf("[unexpected] ensureCertLoops called with nil ServeConfig")
+ }
+ currentDomains := make(map[string]bool)
+ const httpsPort = "443"
+ for _, service := range sc.Services {
+ for hostPort := range service.Web {
+ domain, port, err := net.SplitHostPort(string(hostPort))
+ if err != nil {
+ return fmt.Errorf("[unexpected] unable to parse HostPort %s", hostPort)
+ }
+ if port != httpsPort { // HA Ingress' HTTP endpoint
+ continue
+ }
+ currentDomains[domain] = true
+ }
+ }
+ cm.mu.Lock()
+ defer cm.mu.Unlock()
+ for domain := range currentDomains {
+ if _, exists := cm.certLoops[domain]; !exists {
+ cancelCtx, cancel := context.WithCancel(ctx)
+ mak.Set(&cm.certLoops, domain, cancel)
+ cm.tracker.Go(func() { cm.runCertLoop(cancelCtx, domain) })
+ }
+ }
+
+ // Stop goroutines for domain names that are no longer in the config.
+ for domain, cancel := range cm.certLoops {
+ if !currentDomains[domain] {
+ cancel()
+ delete(cm.certLoops, domain)
+ }
+ }
+ return nil
+}
+
+// runCertLoop:
+// - calls localAPI certificate endpoint to ensure that certs are issued for the
+// given domain name
+// - calls localAPI certificate endpoint daily to ensure that certs are renewed
+// - if certificate issuance failed retries after an exponential backoff period
+// starting at 1 minute and capped at 24 hours. Reset the backoff once issuance succeeds.
+// Note that renewal check also happens when the node receives an HTTPS request and it is possible that certs get
+// renewed at that point. Renewal here is needed to prevent the shared certs from expiry in edge cases where the 'write'
+// replica does not get any HTTPS requests.
+// https://letsencrypt.org/docs/integration-guide/#retrying-failures
+func (cm *certManager) runCertLoop(ctx context.Context, domain string) {
+ const (
+ normalInterval = 24 * time.Hour // regular renewal check
+ initialRetry = 1 * time.Minute // initial backoff after a failure
+ maxRetryInterval = 24 * time.Hour // max backoff period
+ )
+ timer := time.NewTimer(0) // fire off timer immediately
+ defer timer.Stop()
+ retryCount := 0
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case <-timer.C:
+ // We call the certificate endpoint, but don't do anything
+ // with the returned certs here.
+ // The call to the certificate endpoint will ensure that
+ // certs are issued/renewed as needed and stored in the
+ // relevant state store. For example, for HA Ingress
+ // 'write' replica, the cert and key will be stored in a
+ // Kubernetes Secret named after the domain for which we
+ // are issuing.
+ // Note that renewals triggered by the call to the
+ // certificates endpoint here and by renewal check
+ // triggered during a call to node's HTTPS endpoint
+ // share the same state/renewal lock mechanism, so we
+ // should not run into redundant issuances during
+ // concurrent renewal checks.
+ // TODO(irbekrm): maybe it is worth adding a new
+ // issuance endpoint that explicitly only triggers
+ // issuance and stores certs in the relevant store, but
+ // does not return certs to the caller?
+ _, _, err := cm.lc.CertPair(ctx, domain)
+ if err != nil {
+ log.Printf("error refreshing certificate for %s: %v", domain, err)
+ }
+ var nextInterval time.Duration
+ // TODO(irbekrm): distinguish between LE rate limit
+ // errors and other error types like transient network
+ // errors.
+ if err == nil {
+ retryCount = 0
+ nextInterval = normalInterval
+ } else {
+ retryCount++
+ // Calculate backoff: initialRetry * 2^(retryCount-1)
+ // For retryCount=1: 1min * 2^0 = 1min
+ // For retryCount=2: 1min * 2^1 = 2min
+ // For retryCount=3: 1min * 2^2 = 4min
+ backoff := initialRetry * time.Duration(1<<(retryCount-1))
+ if backoff > maxRetryInterval {
+ backoff = maxRetryInterval
+ }
+ nextInterval = backoff
+ log.Printf("Error refreshing certificate for %s (retry %d): %v. Will retry in %v\n",
+ domain, retryCount, err, nextInterval)
+ }
+ timer.Reset(nextInterval)
+ }
+ }
+}
diff --git a/cmd/containerboot/certs_test.go b/cmd/containerboot/certs_test.go
new file mode 100644
index 000000000..577311ea3
--- /dev/null
+++ b/cmd/containerboot/certs_test.go
@@ -0,0 +1,229 @@
+// Copyright (c) Tailscale Inc & AUTHORS
+// SPDX-License-Identifier: BSD-3-Clause
+
+//go:build linux
+
+package main
+
+import (
+ "context"
+ "testing"
+ "time"
+
+ "tailscale.com/ipn"
+ "tailscale.com/tailcfg"
+)
+
+// TestEnsureCertLoops tests that the certManager correctly starts and stops
+// update loops for certs when the serve config changes. It tracks goroutine
+// count and uses that as a validator that the expected number of cert loops are
+// running.
+func TestEnsureCertLoops(t *testing.T) {
+ tests := []struct {
+ name string
+ initialConfig *ipn.ServeConfig
+ updatedConfig *ipn.ServeConfig
+ initialGoroutines int64 // after initial serve config is applied
+ updatedGoroutines int64 // after updated serve config is applied
+ wantErr bool
+ }{
+ {
+ name: "empty_serve_config",
+ initialConfig: &ipn.ServeConfig{},
+ initialGoroutines: 0,
+ },
+ {
+ name: "nil_serve_config",
+ initialConfig: nil,
+ initialGoroutines: 0,
+ wantErr: true,
+ },
+ {
+ name: "empty_to_one_service",
+ initialConfig: &ipn.ServeConfig{},
+ updatedConfig: &ipn.ServeConfig{
+ Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+ "svc:my-app": {
+ Web: map[ipn.HostPort]*ipn.WebServerConfig{
+ "my-app.tailnetxyz.ts.net:443": {},
+ },
+ },
+ },
+ },
+ initialGoroutines: 0,
+ updatedGoroutines: 1,
+ },
+ {
+ name: "single_service",
+ initialConfig: &ipn.ServeConfig{
+ Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+ "svc:my-app": {
+ Web: map[ipn.HostPort]*ipn.WebServerConfig{
+ "my-app.tailnetxyz.ts.net:443": {},
+ },
+ },
+ },
+ },
+ initialGoroutines: 1,
+ },
+ {
+ name: "multiple_services",
+ initialConfig: &ipn.ServeConfig{
+ Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+ "svc:my-app": {
+ Web: map[ipn.HostPort]*ipn.WebServerConfig{
+ "my-app.tailnetxyz.ts.net:443": {},
+ },
+ },
+ "svc:my-other-app": {
+ Web: map[ipn.HostPort]*ipn.WebServerConfig{
+ "my-other-app.tailnetxyz.ts.net:443": {},
+ },
+ },
+ },
+ },
+ initialGoroutines: 2, // one loop per domain across all services
+ },
+ {
+ name: "ignore_non_https_ports",
+ initialConfig: &ipn.ServeConfig{
+ Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+ "svc:my-app": {
+ Web: map[ipn.HostPort]*ipn.WebServerConfig{
+ "my-app.tailnetxyz.ts.net:443": {},
+ "my-app.tailnetxyz.ts.net:80": {},
+ },
+ },
+ },
+ },
+ initialGoroutines: 1, // only one loop for the 443 endpoint
+ },
+ {
+ name: "remove_domain",
+ initialConfig: &ipn.ServeConfig{
+ Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+ "svc:my-app": {
+ Web: map[ipn.HostPort]*ipn.WebServerConfig{
+ "my-app.tailnetxyz.ts.net:443": {},
+ },
+ },
+ "svc:my-other-app": {
+ Web: map[ipn.HostPort]*ipn.WebServerConfig{
+ "my-other-app.tailnetxyz.ts.net:443": {},
+ },
+ },
+ },
+ },
+ updatedConfig: &ipn.ServeConfig{
+ Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+ "svc:my-app": {
+ Web: map[ipn.HostPort]*ipn.WebServerConfig{
+ "my-app.tailnetxyz.ts.net:443": {},
+ },
+ },
+ },
+ },
+ initialGoroutines: 2, // initially two loops (one per service)
+ updatedGoroutines: 1, // one loop after removing service2
+ },
+ {
+ name: "add_domain",
+ initialConfig: &ipn.ServeConfig{
+ Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+ "svc:my-app": {
+ Web: map[ipn.HostPort]*ipn.WebServerConfig{
+ "my-app.tailnetxyz.ts.net:443": {},
+ },
+ },
+ },
+ },
+ updatedConfig: &ipn.ServeConfig{
+ Services: map[tailcfg.ServiceName]*ipn.ServiceConfig{
+ "svc:my-app": {
+ Web: map[ipn.HostPort]*ipn.WebServerConfig{
+ "my-app.tailnetxyz.ts.net:443": {},
+ },
+ },
+ "svc:my-other-app": {
+ Web: map[ipn.HostPort]*ipn.WebServerConfig{
+ "my-other-app.tailnetxyz.ts.net:443": {},
+ },
+ },
+ },
+ },
+ initialGoroutines: 1,
+ updatedGoroutines: 2,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
+ cm := &certManager{
+ lc: &fakeLocalClient{},
+ certLoops: make(map[string]context.CancelFunc),
+ }
+
+ allDone := make(chan bool, 1)
+ defer cm.tracker.AddDoneCallback(func() {
+ cm.mu.Lock()
+ defer cm.mu.Unlock()
+ if cm.tracker.RunningGoroutines() > 0 {
+ return
+ }
+ select {
+ case allDone <- true:
+ default:
+ }
+ })()
+
+ err := cm.ensureCertLoops(ctx, tt.initialConfig)
+ if (err != nil) != tt.wantErr {
+ t.Fatalf("ensureCertLoops() error = %v", err)
+ }
+
+ if got := cm.tracker.RunningGoroutines(); got != tt.initialGoroutines {
+ t.Errorf("after initial config: got %d running goroutines, want %d", got, tt.initialGoroutines)
+ }
+
+ if tt.updatedConfig != nil {
+ if err := cm.ensureCertLoops(ctx, tt.updatedConfig); err != nil {
+ t.Fatalf("ensureCertLoops() error on update = %v", err)
+ }
+
+ // Although starting goroutines and cancelling
+ // the context happens in the main goroutine, it
+ // the actual goroutine exit when a context is
+ // cancelled does not- so wait for a bit for the
+ // running goroutine count to reach the expected
+ // number.
+ deadline := time.After(5 * time.Second)
+ for {
+ if got := cm.tracker.RunningGoroutines(); got == tt.updatedGoroutines {
+ break
+ }
+ select {
+ case <-deadline:
+ t.Fatalf("timed out waiting for goroutine count to reach %d, currently at %d",
+ tt.updatedGoroutines, cm.tracker.RunningGoroutines())
+ case <-time.After(10 * time.Millisecond):
+ continue
+ }
+ }
+ }
+
+ if tt.updatedGoroutines == 0 {
+ return // no goroutines to wait for
+ }
+ // cancel context to make goroutines exit
+ cancel()
+ select {
+ case <-time.After(5 * time.Second):
+ t.Fatal("timed out waiting for goroutine to finish")
+ case <-allDone:
+ }
+ })
+ }
+}
diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go
index cf4bd8620..5f8052bb9 100644
--- a/cmd/containerboot/main.go
+++ b/cmd/containerboot/main.go
@@ -646,7 +646,7 @@ runLoop:
if cfg.ServeConfigPath != "" {
triggerWatchServeConfigChanges.Do(func() {
- go watchServeConfigChanges(ctx, cfg.ServeConfigPath, certDomainChanged, certDomain, client, kc)
+ go watchServeConfigChanges(ctx, certDomainChanged, certDomain, client, kc, cfg)
})
}
diff --git a/cmd/containerboot/serve.go b/cmd/containerboot/serve.go
index fbfaba64a..37fd49777 100644
--- a/cmd/containerboot/serve.go
+++ b/cmd/containerboot/serve.go
@@ -28,20 +28,23 @@ import (
// applies it to lc. It exits when ctx is canceled. cdChanged is a channel that
// is written to when the certDomain changes, causing the serve config to be
// re-read and applied.
-func watchServeConfigChanges(ctx context.Context, path string, cdChanged <-chan bool, certDomainAtomic *atomic.Pointer[string], lc *local.Client, kc *kubeClient) {
+func watchServeConfigChanges(ctx context.Context, cdChanged <-chan bool, certDomainAtomic *atomic.Pointer[string], lc *local.Client, kc *kubeClient, cfg *settings) {
if certDomainAtomic == nil {
panic("certDomainAtomic must not be nil")
}
+
var tickChan <-chan time.Time
var eventChan <-chan fsnotify.Event
if w, err := fsnotify.NewWatcher(); err != nil {
+ // Creating a new fsnotify watcher would fail for example if inotify was not able to create a new file descriptor.
+ // See https://github.com/tailscale/tailscale/issues/15081
log.Printf("serve proxy: failed to create fsnotify watcher, timer-only mode: %v", err)
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
tickChan = ticker.C
} else {
defer w.Close()
- if err := w.Add(filepath.Dir(path)); err != nil {
+ if err := w.Add(filepath.Dir(cfg.ServeConfigPath)); err != nil {
log.Fatalf("serve proxy: failed to add fsnotify watch: %v", err)
}
eventChan = w.Events
@@ -49,6 +52,12 @@ func watchServeConfigChanges(ctx context.Context, path string, cdChanged <-chan
var certDomain string
var prevServeConfig *ipn.ServeConfig
+ var cm certManager
+ if cfg.CertShareMode == "rw" {
+ cm = certManager{
+ lc: lc,
+ }
+ }
for {
select {
case <-ctx.Done():
@@ -61,12 +70,12 @@ func watchServeConfigChanges(ctx context.Context, path string, cdChanged <-chan
// k8s handles these mounts. So just re-read the file and apply it
// if it's changed.
}
- sc, err := readServeConfig(path, certDomain)
+ sc, err := readServeConfig(cfg.ServeConfigPath, certDomain)
if err != nil {
log.Fatalf("serve proxy: failed to read serve config: %v", err)
}
if sc == nil {
- log.Printf("serve proxy: no serve config at %q, skipping", path)
+ log.Printf("serve proxy: no serve config at %q, skipping", cfg.ServeConfigPath)
continue
}
if prevServeConfig != nil && reflect.DeepEqual(sc, prevServeConfig) {
@@ -81,6 +90,12 @@ func watchServeConfigChanges(ctx context.Context, path string, cdChanged <-chan
}
}
prevServeConfig = sc
+ if cfg.CertShareMode != "rw" {
+ continue
+ }
+ if err := cm.ensureCertLoops(ctx, sc); err != nil {
+ log.Fatalf("serve proxy: error ensuring cert loops: %v", err)
+ }
}
}
@@ -94,6 +109,7 @@ func certDomainFromNetmap(nm *netmap.NetworkMap) string {
// localClient is a subset of [local.Client] that can be mocked for testing.
type localClient interface {
SetServeConfig(context.Context, *ipn.ServeConfig) error
+ CertPair(context.Context, string) ([]byte, []byte, error)
}
func updateServeConfig(ctx context.Context, sc *ipn.ServeConfig, certDomain string, lc localClient) error {
diff --git a/cmd/containerboot/serve_test.go b/cmd/containerboot/serve_test.go
index eb92a8dc8..fc18f254d 100644
--- a/cmd/containerboot/serve_test.go
+++ b/cmd/containerboot/serve_test.go
@@ -206,6 +206,10 @@ func (m *fakeLocalClient) SetServeConfig(ctx context.Context, cfg *ipn.ServeConf
return nil
}
+func (m *fakeLocalClient) CertPair(ctx context.Context, domain string) (certPEM, keyPEM []byte, err error) {
+ return nil, nil, nil
+}
+
func TestHasHTTPSEndpoint(t *testing.T) {
tests := []struct {
name string
diff --git a/cmd/containerboot/settings.go b/cmd/containerboot/settings.go
index 0da18e52c..c62db5340 100644
--- a/cmd/containerboot/settings.go
+++ b/cmd/containerboot/settings.go
@@ -74,6 +74,12 @@ type settings struct {
HealthCheckEnabled bool
DebugAddrPort string
EgressProxiesCfgPath string
+ // CertShareMode is set for Kubernetes Pods running cert share mode.
+ // Possible values are empty (containerboot doesn't run any certs
+ // logic), 'ro' (for Pods that shold never attempt to issue/renew
+ // certs) and 'rw' for Pods that should manage the TLS certs shared
+ // amongst the replicas.
+ CertShareMode string
}
func configFromEnv() (*settings, error) {
@@ -128,6 +134,17 @@ func configFromEnv() (*settings, error) {
cfg.PodIPv6 = parsed.String()
}
}
+ // If cert share is enabled, set the replica as read or write. Only 0th
+ // replica should be able to write.
+ isInCertShareMode := defaultBool("TS_EXPERIMENTAL_CERT_SHARE", false)
+ if isInCertShareMode {
+ cfg.CertShareMode = "ro"
+ podName := os.Getenv("POD_NAME")
+ if strings.HasSuffix(podName, "-0") {
+ cfg.CertShareMode = "rw"
+ }
+ }
+
if err := cfg.validate(); err != nil {
return nil, fmt.Errorf("invalid configuration: %v", err)
}
diff --git a/cmd/containerboot/tailscaled.go b/cmd/containerboot/tailscaled.go
index e73a7e94d..654b34757 100644
--- a/cmd/containerboot/tailscaled.go
+++ b/cmd/containerboot/tailscaled.go
@@ -33,6 +33,9 @@ func startTailscaled(ctx context.Context, cfg *settings) (*local.Client, *os.Pro
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
+ if cfg.CertShareMode != "" {
+ cmd.Env = append(os.Environ(), "TS_CERT_SHARE_MODE="+cfg.CertShareMode)
+ }
log.Printf("Starting tailscaled")
if err := cmd.Start(); err != nil {
return nil, nil, fmt.Errorf("starting tailscaled failed: %v", err)
@@ -173,11 +176,14 @@ func tailscaleSet(ctx context.Context, cfg *settings) error {
func watchTailscaledConfigChanges(ctx context.Context, path string, lc *local.Client, errCh chan<- error) {
var (
tickChan <-chan time.Time
+ eventChan <-chan fsnotify.Event
+ errChan <-chan error
tailscaledCfgDir = filepath.Dir(path)
prevTailscaledCfg []byte
)
- w, err := fsnotify.NewWatcher()
- if err != nil {
+ if w, err := fsnotify.NewWatcher(); err != nil {
+ // Creating a new fsnotify watcher would fail for example if inotify was not able to create a new file descriptor.
+ // See https://github.com/tailscale/tailscale/issues/15081
log.Printf("tailscaled config watch: failed to create fsnotify watcher, timer-only mode: %v", err)
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
@@ -188,6 +194,8 @@ func watchTailscaledConfigChanges(ctx context.Context, path string, lc *local.Cl
errCh <- fmt.Errorf("failed to add fsnotify watch: %w", err)
return
}
+ eventChan = w.Events
+ errChan = w.Errors
}
b, err := os.ReadFile(path)
if err != nil {
@@ -205,11 +213,11 @@ func watchTailscaledConfigChanges(ctx context.Context, path string, lc *local.Cl
select {
case <-ctx.Done():
return
- case err := <-w.Errors:
+ case err := <-errChan:
errCh <- fmt.Errorf("watcher error: %w", err)
return
case <-tickChan:
- case event := <-w.Events:
+ case event := <-eventChan:
if event.Name != toWatch {
continue
}
diff --git a/cmd/derper/cert.go b/cmd/derper/cert.go
index 623fa376f..b95755c64 100644
--- a/cmd/derper/cert.go
+++ b/cmd/derper/cert.go
@@ -4,16 +4,28 @@
package main
import (
+ "crypto/ecdsa"
+ "crypto/elliptic"
+ "crypto/rand"
+ "crypto/sha256"
"crypto/tls"
"crypto/x509"
+ "crypto/x509/pkix"
+ "encoding/json"
+ "encoding/pem"
"errors"
"fmt"
+ "log"
+ "math/big"
"net"
"net/http"
+ "os"
"path/filepath"
"regexp"
+ "time"
"golang.org/x/crypto/acme/autocert"
+ "tailscale.com/tailcfg"
)
var unsafeHostnameCharacters = regexp.MustCompile(`[^a-zA-Z0-9-\.]`)
@@ -65,8 +77,18 @@ func NewManualCertManager(certdir, hostname string) (certProvider, error) {
crtPath := filepath.Join(certdir, keyname+".crt")
keyPath := filepath.Join(certdir, keyname+".key")
cert, err := tls.LoadX509KeyPair(crtPath, keyPath)
+ hostnameIP := net.ParseIP(hostname) // or nil if hostname isn't an IP address
if err != nil {
- return nil, fmt.Errorf("can not load x509 key pair for hostname %q: %w", keyname, err)
+ // If the hostname is an IP address, automatically create a
+ // self-signed certificate for it.
+ var certp *tls.Certificate
+ if os.IsNotExist(err) && hostnameIP != nil {
+ certp, err = createSelfSignedIPCert(crtPath, keyPath, hostname)
+ }
+ if err != nil {
+ return nil, fmt.Errorf("can not load x509 key pair for hostname %q: %w", keyname, err)
+ }
+ cert = *certp
}
// ensure hostname matches with the certificate
x509Cert, err := x509.ParseCertificate(cert.Certificate[0])
@@ -76,6 +98,18 @@ func NewManualCertManager(certdir, hostname string) (certProvider, error) {
if err := x509Cert.VerifyHostname(hostname); err != nil {
return nil, fmt.Errorf("cert invalid for hostname %q: %w", hostname, err)
}
+ if hostnameIP != nil {
+ // If the hostname is an IP address, print out information on how to
+ // confgure this in the derpmap.
+ dn := &tailcfg.DERPNode{
+ Name: "custom",
+ RegionID: 900,
+ HostName: hostname,
+ CertName: fmt.Sprintf("sha256-raw:%-02x", sha256.Sum256(x509Cert.Raw)),
+ }
+ dnJSON, _ := json.Marshal(dn)
+ log.Printf("Using self-signed certificate for IP address %q. Configure it in DERPMap using: (https://tailscale.com/s/custom-derp)\n %s", hostname, dnJSON)
+ }
return &manualCertManager{
cert: &cert,
hostname: hostname,
@@ -109,3 +143,69 @@ func (m *manualCertManager) getCertificate(hi *tls.ClientHelloInfo) (*tls.Certif
func (m *manualCertManager) HTTPHandler(fallback http.Handler) http.Handler {
return fallback
}
+
+func createSelfSignedIPCert(crtPath, keyPath, ipStr string) (*tls.Certificate, error) {
+ ip := net.ParseIP(ipStr)
+ if ip == nil {
+ return nil, fmt.Errorf("invalid IP address: %s", ipStr)
+ }
+
+ priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
+ if err != nil {
+ return nil, fmt.Errorf("failed to generate EC private key: %v", err)
+ }
+
+ serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 128)
+ serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
+ if err != nil {
+ return nil, fmt.Errorf("failed to generate serial number: %v", err)
+ }
+
+ now := time.Now()
+ template := x509.Certificate{
+ SerialNumber: serialNumber,
+ Subject: pkix.Name{
+ CommonName: ipStr,
+ },
+ NotBefore: now,
+ NotAfter: now.AddDate(1, 0, 0), // expires in 1 year; a bit over that is rejected by macOS etc
+
+ KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment,
+ ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
+ BasicConstraintsValid: true,
+ }
+
+ // Set the IP as a SAN.
+ template.IPAddresses = []net.IP{ip}
+
+ // Create the self-signed certificate.
+ derBytes, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create certificate: %v", err)
+ }
+
+ certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: derBytes})
+
+ keyBytes, err := x509.MarshalECPrivateKey(priv)
+ if err != nil {
+ return nil, fmt.Errorf("unable to marshal EC private key: %v", err)
+ }
+
+ keyPEM := pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: keyBytes})
+
+ if err := os.MkdirAll(filepath.Dir(crtPath), 0700); err != nil {
+ return nil, fmt.Errorf("failed to create directory for certificate: %v", err)
+ }
+ if err := os.WriteFile(crtPath, certPEM, 0644); err != nil {
+ return nil, fmt.Errorf("failed to write certificate to %s: %v", crtPath, err)
+ }
+ if err := os.WriteFile(keyPath, keyPEM, 0600); err != nil {
+ return nil, fmt.Errorf("failed to write key to %s: %v", keyPath, err)
+ }
+
+ tlsCert, err := tls.X509KeyPair(certPEM, keyPEM)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create tls.Certificate: %v", err)
+ }
+ return &tlsCert, nil
+}
diff --git a/cmd/derper/cert_test.go b/cmd/derper/cert_test.go
index a379e5c04..2ec7b756e 100644
--- a/cmd/derper/cert_test.go
+++ b/cmd/derper/cert_test.go
@@ -4,19 +4,29 @@
package main
import (
+ "context"
"crypto/ecdsa"
"crypto/elliptic"
"crypto/rand"
+ "crypto/sha256"
"crypto/tls"
"crypto/x509"
"crypto/x509/pkix"
"encoding/pem"
+ "fmt"
"math/big"
"net"
+ "net/http"
"os"
"path/filepath"
"testing"
"time"
+
+ "tailscale.com/derp"
+ "tailscale.com/derp/derphttp"
+ "tailscale.com/net/netmon"
+ "tailscale.com/tailcfg"
+ "tailscale.com/types/key"
)
// Verify that in --certmode=manual mode, we can use a bare IP address
@@ -95,3 +105,66 @@ func TestCertIP(t *testing.T) {
t.Fatalf("GetCertificate returned nil")
}
}
+
+// Test that we can dial a raw IP without using a hostname and without a WebPKI
+// cert, validating the cert against the signature of the cert in the DERP map's
+// DERPNode.
+//
+// See https://github.com/tailscale/tailscale/issues/11776.
+func TestPinnedCertRawIP(t *testing.T) {
+ td := t.TempDir()
+ cp, err := NewManualCertManager(td, "127.0.0.1")
+ if err != nil {
+ t.Fatalf("NewManualCertManager: %v", err)
+ }
+
+ cert, err := cp.TLSConfig().GetCertificate(&tls.ClientHelloInfo{
+ ServerName: "127.0.0.1",
+ })
+ if err != nil {
+ t.Fatalf("GetCertificate: %v", err)
+ }
+
+ ln, err := net.Listen("tcp", "127.0.0.1:0")
+ if err != nil {
+ t.Fatalf("Listen: %v", err)
+ }
+ defer ln.Close()
+
+ ds := derp.NewServer(key.NewNode(), t.Logf)
+
+ derpHandler := derphttp.Handler(ds)
+ mux := http.NewServeMux()
+ mux.Handle("/derp", derpHandler)
+
+ var hs http.Server
+ hs.Handler = mux
+ hs.TLSConfig = cp.TLSConfig()
+ go hs.ServeTLS(ln, "", "")
+
+ lnPort := ln.Addr().(*net.TCPAddr).Port
+
+ reg := &tailcfg.DERPRegion{
+ RegionID: 900,
+ Nodes: []*tailcfg.DERPNode{
+ {
+ RegionID: 900,
+ HostName: "127.0.0.1",
+ CertName: fmt.Sprintf("sha256-raw:%-02x", sha256.Sum256(cert.Leaf.Raw)),
+ DERPPort: lnPort,
+ },
+ },
+ }
+
+ netMon := netmon.NewStatic()
+ dc := derphttp.NewRegionClient(key.NewNode(), t.Logf, netMon, func() *tailcfg.DERPRegion {
+ return reg
+ })
+ defer dc.Close()
+
+ _, connClose, _, err := dc.DialRegionTLS(context.Background(), reg)
+ if err != nil {
+ t.Fatalf("DialRegionTLS: %v", err)
+ }
+ defer connClose.Close()
+}
diff --git a/cmd/derper/depaware.txt b/cmd/derper/depaware.txt
index 1812a1a8d..5d375a515 100644
--- a/cmd/derper/depaware.txt
+++ b/cmd/derper/depaware.txt
@@ -96,6 +96,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
tailscale.com/disco from tailscale.com/derp
tailscale.com/drive from tailscale.com/client/local+
tailscale.com/envknob from tailscale.com/client/local+
+ tailscale.com/feature from tailscale.com/tsweb
tailscale.com/health from tailscale.com/net/tlsdial+
tailscale.com/hostinfo from tailscale.com/net/netmon+
tailscale.com/ipn from tailscale.com/client/local
@@ -128,8 +129,8 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
tailscale.com/tstime from tailscale.com/derp+
tailscale.com/tstime/mono from tailscale.com/tstime/rate
tailscale.com/tstime/rate from tailscale.com/derp
- tailscale.com/tsweb from tailscale.com/cmd/derper
- tailscale.com/tsweb/promvarz from tailscale.com/tsweb
+ tailscale.com/tsweb from tailscale.com/cmd/derper+
+ tailscale.com/tsweb/promvarz from tailscale.com/cmd/derper
tailscale.com/tsweb/varz from tailscale.com/tsweb+
tailscale.com/types/dnstype from tailscale.com/tailcfg+
tailscale.com/types/empty from tailscale.com/ipn
@@ -309,7 +310,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
html from net/http/pprof+
html/template from tailscale.com/cmd/derper
internal/abi from crypto/x509/internal/macos+
- internal/asan from syscall+
+ internal/asan from internal/runtime/maps+
internal/bisect from internal/godebug
internal/bytealg from bytes+
internal/byteorder from crypto/cipher+
@@ -319,12 +320,12 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
internal/filepathlite from os+
internal/fmtsort from fmt+
internal/goarch from crypto/internal/fips140deps/cpu+
- internal/godebug from crypto/tls+
+ internal/godebug from crypto/internal/fips140deps/godebug+
internal/godebugs from internal/godebug+
- internal/goexperiment from runtime+
+ internal/goexperiment from hash/maphash+
internal/goos from crypto/x509+
internal/itoa from internal/poll+
- internal/msan from syscall+
+ internal/msan from internal/runtime/maps+
internal/nettrace from net+
internal/oserror from io/fs+
internal/poll from net+
diff --git a/cmd/derper/derper.go b/cmd/derper/derper.go
index 980870847..3c6fda68c 100644
--- a/cmd/derper/derper.go
+++ b/cmd/derper/derper.go
@@ -49,6 +49,9 @@ import (
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/version"
+
+ // Support for prometheus varz in tsweb
+ _ "tailscale.com/tsweb/promvarz"
)
var (
@@ -63,6 +66,7 @@ var (
hostname = flag.String("hostname", "derp.tailscale.com", "LetsEncrypt host name, if addr's port is :443. When --certmode=manual, this can be an IP address to avoid SNI checks")
runSTUN = flag.Bool("stun", true, "whether to run a STUN server. It will bind to the same IP (if any) as the --addr flag value.")
runDERP = flag.Bool("derp", true, "whether to run a DERP server. The only reason to set this false is if you're decommissioning a server but want to keep its bootstrap DNS functionality still running.")
+ flagHome = flag.String("home", "", "what to serve at the root path. It may be left empty (the default, for a default homepage), \"blank\" for a blank page, or a URL to redirect to")
meshPSKFile = flag.String("mesh-psk-file", defaultMeshPSKFile(), "if non-empty, path to file containing the mesh pre-shared key file. It should contain some hex string; whitespace is trimmed.")
meshWith = flag.String("mesh-with", "", "optional comma-separated list of hostnames to mesh with; the server's own hostname can be in the list. If an entry contains a slash, the second part names a hostname to be used when dialing the target.")
@@ -71,10 +75,13 @@ var (
secretsCacheDir = flag.String("secrets-cache-dir", defaultSetecCacheDir(), "directory to cache setec secrets in (required if --secrets-url is set)")
bootstrapDNS = flag.String("bootstrap-dns-names", "", "optional comma-separated list of hostnames to make available at /bootstrap-dns")
unpublishedDNS = flag.String("unpublished-bootstrap-dns-names", "", "optional comma-separated list of hostnames to make available at /bootstrap-dns and not publish in the list. If an entry contains a slash, the second part names a DNS record to poll for its TXT record with a `0` to `100` value for rollout percentage.")
+
verifyClients = flag.Bool("verify-clients", false, "verify clients to this DERP server through a local tailscaled instance.")
verifyClientURL = flag.String("verify-client-url", "", "if non-empty, an admission controller URL for permitting client connections; see tailcfg.DERPAdmitClientRequest")
verifyFailOpen = flag.Bool("verify-client-url-fail-open", true, "whether we fail open if --verify-client-url is unreachable")
+ socket = flag.String("socket", "", "optional alternate path to tailscaled socket (only relevant when using --verify-clients)")
+
acceptConnLimit = flag.Float64("accept-connection-limit", math.Inf(+1), "rate limit for accepting new connection")
acceptConnBurst = flag.Int("accept-connection-burst", math.MaxInt, "burst limit for accepting new connection")
@@ -192,6 +199,7 @@ func main() {
s := derp.NewServer(cfg.PrivateKey, log.Printf)
s.SetVerifyClient(*verifyClients)
+ s.SetTailscaledSocketPath(*socket)
s.SetVerifyClientURL(*verifyClientURL)
s.SetVerifyClientURLFailOpen(*verifyFailOpen)
s.SetTCPWriteTimeout(*tcpWriteTimeout)
@@ -250,6 +258,11 @@ func main() {
}
expvar.Publish("derp", s.ExpVar())
+ handleHome, ok := getHomeHandler(*flagHome)
+ if !ok {
+ log.Fatalf("unknown --home value %q", *flagHome)
+ }
+
mux := http.NewServeMux()
if *runDERP {
derpHandler := derphttp.Handler(s)
@@ -270,19 +283,7 @@ func main() {
mux.HandleFunc("/bootstrap-dns", tsweb.BrowserHeaderHandlerFunc(handleBootstrapDNS))
mux.Handle("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
tsweb.AddBrowserHeaders(w)
- w.Header().Set("Content-Type", "text/html; charset=utf-8")
- w.WriteHeader(200)
- err := homePageTemplate.Execute(w, templateData{
- ShowAbuseInfo: validProdHostname.MatchString(*hostname),
- Disabled: !*runDERP,
- AllowDebug: tsweb.AllowDebugAccess(r),
- })
- if err != nil {
- if r.Context().Err() == nil {
- log.Printf("homePageTemplate.Execute: %v", err)
- }
- return
- }
+ handleHome.ServeHTTP(w, r)
}))
mux.Handle("/robots.txt", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
tsweb.AddBrowserHeaders(w)
@@ -575,3 +576,35 @@ var homePageTemplate = template.Must(template.New("home").Parse(`<html><body>
</body>
</html>
`))
+
+// getHomeHandler returns a handler for the home page based on a flag string
+// as documented on the --home flag.
+func getHomeHandler(val string) (_ http.Handler, ok bool) {
+ if val == "" {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ w.WriteHeader(200)
+ err := homePageTemplate.Execute(w, templateData{
+ ShowAbuseInfo: validProdHostname.MatchString(*hostname),
+ Disabled: !*runDERP,
+ AllowDebug: tsweb.AllowDebugAccess(r),
+ })
+ if err != nil {
+ if r.Context().Err() == nil {
+ log.Printf("homePageTemplate.Execute: %v", err)
+ }
+ return
+ }
+ }), true
+ }
+ if val == "blank" {
+ return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ w.WriteHeader(200)
+ }), true
+ }
+ if strings.HasPrefix(val, "http://") || strings.HasPrefix(val, "https://") {
+ return http.RedirectHandler(val, http.StatusFound), true
+ }
+ return nil, false
+}
diff --git a/cmd/derpprobe/derpprobe.go b/cmd/derpprobe/derpprobe.go
index 6e8c603b9..899838462 100644
--- a/cmd/derpprobe/derpprobe.go
+++ b/cmd/derpprobe/derpprobe.go
@@ -15,6 +15,9 @@ import (
"tailscale.com/prober"
"tailscale.com/tsweb"
"tailscale.com/version"
+
+ // Support for prometheus varz in tsweb
+ _ "tailscale.com/tsweb/promvarz"
)
var (
diff --git a/cmd/hello/hello.go b/cmd/hello/hello.go
index 86f885f54..fa116b28b 100644
--- a/cmd/hello/hello.go
+++ b/cmd/hello/hello.go
@@ -20,6 +20,7 @@ import (
"tailscale.com/client/local"
"tailscale.com/client/tailscale/apitype"
+ "tailscale.com/tailcfg"
)
var (
@@ -134,6 +135,10 @@ func tailscaleIP(who *apitype.WhoIsResponse) string {
if who == nil {
return ""
}
+ vals, err := tailcfg.UnmarshalNodeCapJSON[string](who.Node.CapMap, tailcfg.NodeAttrNativeIPV4)
+ if err == nil && len(vals) > 0 {
+ return vals[0]
+ }
for _, nodeIP := range who.Node.Addresses {
if nodeIP.Addr().Is4() && nodeIP.IsSingleIP() {
return nodeIP.Addr().String()
diff --git a/cmd/k8s-operator/depaware.txt b/cmd/k8s-operator/depaware.txt
index 54d9bd248..978744947 100644
--- a/cmd/k8s-operator/depaware.txt
+++ b/cmd/k8s-operator/depaware.txt
@@ -814,6 +814,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
tailscale.com/internal/client/tailscale from tailscale.com/cmd/k8s-operator
tailscale.com/internal/noiseconn from tailscale.com/control/controlclient
tailscale.com/ipn from tailscale.com/client/local+
+ tailscale.com/ipn/auditlog from tailscale.com/ipn/ipnlocal+
tailscale.com/ipn/conffile from tailscale.com/ipn/ipnlocal+
💣 tailscale.com/ipn/desktop from tailscale.com/ipn/ipnlocal+
💣 tailscale.com/ipn/ipnauth from tailscale.com/ipn/ipnlocal+
@@ -904,6 +905,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
tailscale.com/tstime/rate from tailscale.com/derp+
tailscale.com/tsweb/varz from tailscale.com/util/usermetric
tailscale.com/types/appctype from tailscale.com/ipn/ipnlocal
+ tailscale.com/types/bools from tailscale.com/tsnet
tailscale.com/types/dnstype from tailscale.com/ipn/ipnlocal+
tailscale.com/types/empty from tailscale.com/ipn+
tailscale.com/types/ipproto from tailscale.com/net/flowtrack+
@@ -1149,7 +1151,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
html from html/template+
html/template from github.com/gorilla/csrf
internal/abi from crypto/x509/internal/macos+
- internal/asan from syscall+
+ internal/asan from internal/runtime/maps+
internal/bisect from internal/godebug
internal/bytealg from bytes+
internal/byteorder from crypto/cipher+
@@ -1161,11 +1163,11 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
internal/goarch from crypto/internal/fips140deps/cpu+
internal/godebug from archive/tar+
internal/godebugs from internal/godebug+
- internal/goexperiment from runtime+
+ internal/goexperiment from hash/maphash+
internal/goos from crypto/x509+
internal/itoa from internal/poll+
internal/lazyregexp from go/doc
- internal/msan from syscall+
+ internal/msan from internal/runtime/maps+
internal/nettrace from net+
internal/oserror from io/fs+
internal/poll from net+
diff --git a/cmd/k8s-operator/deploy/chart/templates/operator-rbac.yaml b/cmd/k8s-operator/deploy/chart/templates/operator-rbac.yaml
index 7056ef42f..5bf50617e 100644
--- a/cmd/k8s-operator/deploy/chart/templates/operator-rbac.yaml
+++ b/cmd/k8s-operator/deploy/chart/templates/operator-rbac.yaml
@@ -75,7 +75,7 @@ rules:
verbs: ["get", "list", "watch", "create", "update", "deletecollection"]
- apiGroups: ["rbac.authorization.k8s.io"]
resources: ["roles", "rolebindings"]
- verbs: ["get", "create", "patch", "update", "list", "watch"]
+ verbs: ["get", "create", "patch", "update", "list", "watch", "deletecollection"]
- apiGroups: ["monitoring.coreos.com"]
resources: ["servicemonitors"]
verbs: ["get", "list", "update", "create", "delete"]
diff --git a/cmd/k8s-operator/deploy/manifests/operator.yaml b/cmd/k8s-operator/deploy/manifests/operator.yaml
index e966ef559..9ee3b441a 100644
--- a/cmd/k8s-operator/deploy/manifests/operator.yaml
+++ b/cmd/k8s-operator/deploy/manifests/operator.yaml
@@ -4898,6 +4898,7 @@ rules:
- update
- list
- watch
+ - deletecollection
- apiGroups:
- monitoring.coreos.com
resources:
diff --git a/cmd/k8s-operator/dnsrecords_test.go b/cmd/k8s-operator/dnsrecords_test.go
index 389461b85..4e73e6c9e 100644
--- a/cmd/k8s-operator/dnsrecords_test.go
+++ b/cmd/k8s-operator/dnsrecords_test.go
@@ -22,6 +22,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/fake"
operatorutils "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
+ "tailscale.com/kube/kubetypes"
"tailscale.com/tstest"
"tailscale.com/types/ptr"
)
@@ -163,10 +164,10 @@ func headlessSvcForParent(o client.Object, typ string) *corev1.Service {
Name: o.GetName(),
Namespace: "tailscale",
Labels: map[string]string{
- LabelManaged: "true",
- LabelParentName: o.GetName(),
- LabelParentNamespace: o.GetNamespace(),
- LabelParentType: typ,
+ kubetypes.LabelManaged: "true",
+ LabelParentName: o.GetName(),
+ LabelParentNamespace: o.GetNamespace(),
+ LabelParentType: typ,
},
},
Spec: corev1.ServiceSpec{
diff --git a/cmd/k8s-operator/egress-pod-readiness.go b/cmd/k8s-operator/egress-pod-readiness.go
index a6c57bf9d..05cf1aa1a 100644
--- a/cmd/k8s-operator/egress-pod-readiness.go
+++ b/cmd/k8s-operator/egress-pod-readiness.go
@@ -112,9 +112,9 @@ func (er *egressPodsReconciler) Reconcile(ctx context.Context, req reconcile.Req
}
// Get all ClusterIP Services for all egress targets exposed to cluster via this ProxyGroup.
lbls := map[string]string{
- LabelManaged: "true",
- labelProxyGroup: proxyGroupName,
- labelSvcType: typeEgress,
+ kubetypes.LabelManaged: "true",
+ labelProxyGroup: proxyGroupName,
+ labelSvcType: typeEgress,
}
svcs := &corev1.ServiceList{}
if err := er.List(ctx, svcs, client.InNamespace(er.tsNamespace), client.MatchingLabels(lbls)); err != nil {
diff --git a/cmd/k8s-operator/egress-pod-readiness_test.go b/cmd/k8s-operator/egress-pod-readiness_test.go
index 5e6fa2bb4..3c35d9043 100644
--- a/cmd/k8s-operator/egress-pod-readiness_test.go
+++ b/cmd/k8s-operator/egress-pod-readiness_test.go
@@ -450,9 +450,9 @@ func newSvc(name string, port int32) (*corev1.Service, string) {
Namespace: "operator-ns",
Name: name,
Labels: map[string]string{
- LabelManaged: "true",
- labelProxyGroup: "dev",
- labelSvcType: typeEgress,
+ kubetypes.LabelManaged: "true",
+ labelProxyGroup: "dev",
+ labelSvcType: typeEgress,
},
},
Spec: corev1.ServiceSpec{},
diff --git a/cmd/k8s-operator/egress-services.go b/cmd/k8s-operator/egress-services.go
index cf218ba4f..7103205ac 100644
--- a/cmd/k8s-operator/egress-services.go
+++ b/cmd/k8s-operator/egress-services.go
@@ -630,7 +630,11 @@ func tailnetTargetFromSvc(svc *corev1.Service) egressservices.TailnetTarget {
func portMap(p corev1.ServicePort) egressservices.PortMap {
// TODO (irbekrm): out of bounds check?
- return egressservices.PortMap{Protocol: string(p.Protocol), MatchPort: uint16(p.TargetPort.IntVal), TargetPort: uint16(p.Port)}
+ return egressservices.PortMap{
+ Protocol: string(p.Protocol),
+ MatchPort: uint16(p.TargetPort.IntVal),
+ TargetPort: uint16(p.Port),
+ }
}
func isEgressSvcForProxyGroup(obj client.Object) bool {
@@ -676,12 +680,12 @@ func egressSvcsConfigs(ctx context.Context, cl client.Client, proxyGroupName, ts
// should probably validate and truncate (?) the names is they are too long.
func egressSvcChildResourceLabels(svc *corev1.Service) map[string]string {
return map[string]string{
- LabelManaged: "true",
- LabelParentType: "svc",
- LabelParentName: svc.Name,
- LabelParentNamespace: svc.Namespace,
- labelProxyGroup: svc.Annotations[AnnotationProxyGroup],
- labelSvcType: typeEgress,
+ kubetypes.LabelManaged: "true",
+ LabelParentType: "svc",
+ LabelParentName: svc.Name,
+ LabelParentNamespace: svc.Namespace,
+ labelProxyGroup: svc.Annotations[AnnotationProxyGroup],
+ labelSvcType: typeEgress,
}
}
diff --git a/cmd/k8s-operator/ingress-for-pg.go b/cmd/k8s-operator/ingress-for-pg.go
index 4fa0af2a2..dc74a86a5 100644
--- a/cmd/k8s-operator/ingress-for-pg.go
+++ b/cmd/k8s-operator/ingress-for-pg.go
@@ -15,10 +15,14 @@ import (
"slices"
"strings"
"sync"
+ "time"
+
+ "math/rand/v2"
"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
+ rbacv1 "k8s.io/api/rbac/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -53,9 +57,9 @@ const (
var gaugePGIngressResources = clientmetric.NewGauge(kubetypes.MetricIngressPGResourceCount)
-// IngressPGReconciler is a controller that reconciles Tailscale Ingresses should be exposed on an ingress ProxyGroup
-// (in HA mode).
-type IngressPGReconciler struct {
+// HAIngressReconciler is a controller that reconciles Tailscale Ingresses
+// should be exposed on an ingress ProxyGroup (in HA mode).
+type HAIngressReconciler struct {
client.Client
recorder record.EventRecorder
@@ -65,6 +69,7 @@ type IngressPGReconciler struct {
tsNamespace string
lc localClient
defaultTags []string
+ operatorID string // stableID of the operator's Tailscale device
mu sync.Mutex // protects following
// managedIngresses is a set of all ingress resources that we're currently
@@ -72,20 +77,29 @@ type IngressPGReconciler struct {
managedIngresses set.Slice[types.UID]
}
-// Reconcile reconciles Ingresses that should be exposed over Tailscale in HA mode (on a ProxyGroup). It looks at all
-// Ingresses with tailscale.com/proxy-group annotation. For each such Ingress, it ensures that a VIPService named after
-// the hostname of the Ingress exists and is up to date. It also ensures that the serve config for the ingress
-// ProxyGroup is updated to route traffic for the VIPService to the Ingress's backend Services.
-// When an Ingress is deleted or unexposed, the VIPService and the associated serve config are cleaned up.
-// Ingress hostname change also results in the VIPService for the previous hostname being cleaned up and a new VIPService
-// being created for the new hostname.
-func (a *IngressPGReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
- logger := a.logger.With("Ingress", req.NamespacedName)
+// Reconcile reconciles Ingresses that should be exposed over Tailscale in HA
+// mode (on a ProxyGroup). It looks at all Ingresses with
+// tailscale.com/proxy-group annotation. For each such Ingress, it ensures that
+// a VIPService named after the hostname of the Ingress exists and is up to
+// date. It also ensures that the serve config for the ingress ProxyGroup is
+// updated to route traffic for the VIPService to the Ingress's backend
+// Services. Ingress hostname change also results in the VIPService for the
+// previous hostname being cleaned up and a new VIPService being created for the
+// new hostname.
+// HA Ingresses support multi-cluster Ingress setup.
+// Each VIPService contains a list of owner references that uniquely identify
+// the Ingress resource and the operator. When an Ingress that acts as a
+// backend is being deleted, the corresponding VIPService is only deleted if the
+// only owner reference that it contains is for this Ingress. If other owner
+// references are found, then cleanup operation only removes this Ingress' owner
+// reference.
+func (r *HAIngressReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
+ logger := r.logger.With("Ingress", req.NamespacedName)
logger.Debugf("starting reconcile")
defer logger.Debugf("reconcile finished")
ing := new(networkingv1.Ingress)
- err = a.Get(ctx, req.NamespacedName, ing)
+ err = r.Get(ctx, req.NamespacedName, ing)
if apierrors.IsNotFound(err) {
// Request object not found, could have been deleted after reconcile request.
logger.Debugf("Ingress not found, assuming it was deleted")
@@ -99,57 +113,69 @@ func (a *IngressPGReconciler) Reconcile(ctx context.Context, req reconcile.Reque
hostname := hostnameForIngress(ing)
logger = logger.With("hostname", hostname)
- if !ing.DeletionTimestamp.IsZero() || !a.shouldExpose(ing) {
- return res, a.maybeCleanup(ctx, hostname, ing, logger)
+ // needsRequeue is set to true if the underlying VIPService has changed as a result of this reconcile. If that
+ // is the case, we reconcile the Ingress one more time to ensure that concurrent updates to the VIPService in a
+ // multi-cluster Ingress setup have not resulted in another actor overwriting our VIPService update.
+ needsRequeue := false
+ if !ing.DeletionTimestamp.IsZero() || !r.shouldExpose(ing) {
+ needsRequeue, err = r.maybeCleanup(ctx, hostname, ing, logger)
+ } else {
+ needsRequeue, err = r.maybeProvision(ctx, hostname, ing, logger)
}
-
- if err := a.maybeProvision(ctx, hostname, ing, logger); err != nil {
- return res, fmt.Errorf("failed to provision: %w", err)
+ if err != nil {
+ return res, err
+ }
+ if needsRequeue {
+ res = reconcile.Result{RequeueAfter: requeueInterval()}
}
return res, nil
}
-// maybeProvision ensures that the VIPService and serve config for the Ingress are created or updated.
-func (a *IngressPGReconciler) maybeProvision(ctx context.Context, hostname string, ing *networkingv1.Ingress, logger *zap.SugaredLogger) error {
- if err := validateIngressClass(ctx, a.Client); err != nil {
+// maybeProvision ensures that a VIPService for this Ingress exists and is up to date and that the serve config for the
+// corresponding ProxyGroup contains the Ingress backend's definition.
+// If a VIPService does not exist, it will be created.
+// If a VIPService exists, but only with owner references from other operator instances, an owner reference for this
+// operator instance is added.
+// If a VIPService exists, but does not have an owner reference from any operator, we error
+// out assuming that this is an owner reference created by an unknown actor.
+// Returns true if the operation resulted in a VIPService update.
+func (r *HAIngressReconciler) maybeProvision(ctx context.Context, hostname string, ing *networkingv1.Ingress, logger *zap.SugaredLogger) (svcsChanged bool, err error) {
+ if err := validateIngressClass(ctx, r.Client); err != nil {
logger.Infof("error validating tailscale IngressClass: %v.", err)
- return nil
+ return false, nil
}
-
// Get and validate ProxyGroup readiness
pgName := ing.Annotations[AnnotationProxyGroup]
if pgName == "" {
logger.Infof("[unexpected] no ProxyGroup annotation, skipping VIPService provisioning")
- return nil
+ return false, nil
}
+ logger = logger.With("ProxyGroup", pgName)
+
pg := &tsapi.ProxyGroup{}
- if err := a.Get(ctx, client.ObjectKey{Name: pgName}, pg); err != nil {
+ if err := r.Get(ctx, client.ObjectKey{Name: pgName}, pg); err != nil {
if apierrors.IsNotFound(err) {
- logger.Infof("ProxyGroup %q does not exist", pgName)
- return nil
+ logger.Infof("ProxyGroup does not exist")
+ return false, nil
}
- return fmt.Errorf("getting ProxyGroup %q: %w", pgName, err)
+ return false, fmt.Errorf("getting ProxyGroup %q: %w", pgName, err)
}
if !tsoperator.ProxyGroupIsReady(pg) {
- // TODO(irbekrm): we need to reconcile ProxyGroup Ingresses on ProxyGroup changes to not miss the status update
- // in this case.
- logger.Infof("ProxyGroup %q is not ready", pgName)
- return nil
+ logger.Infof("ProxyGroup is not (yet) ready")
+ return false, nil
}
// Validate Ingress configuration
- if err := a.validateIngress(ing, pg); err != nil {
+ if err := r.validateIngress(ctx, ing, pg); err != nil {
logger.Infof("invalid Ingress configuration: %v", err)
- a.recorder.Event(ing, corev1.EventTypeWarning, "InvalidIngressConfiguration", err.Error())
- return nil
+ r.recorder.Event(ing, corev1.EventTypeWarning, "InvalidIngressConfiguration", err.Error())
+ return false, nil
}
- if !IsHTTPSEnabledOnTailnet(a.tsnetServer) {
- a.recorder.Event(ing, corev1.EventTypeWarning, "HTTPSNotEnabled", "HTTPS is not enabled on the tailnet; ingress may not work")
+ if !IsHTTPSEnabledOnTailnet(r.tsnetServer) {
+ r.recorder.Event(ing, corev1.EventTypeWarning, "HTTPSNotEnabled", "HTTPS is not enabled on the tailnet; ingress may not work")
}
- logger = logger.With("proxy-group", pg)
-
if !slices.Contains(ing.Finalizers, FinalizerNamePG) {
// This log line is printed exactly once during initial provisioning,
// because once the finalizer is in place this block gets skipped. So,
@@ -157,64 +183,82 @@ func (a *IngressPGReconciler) maybeProvision(ctx context.Context, hostname strin
// multi-reconcile operation is underway.
logger.Infof("exposing Ingress over tailscale")
ing.Finalizers = append(ing.Finalizers, FinalizerNamePG)
- if err := a.Update(ctx, ing); err != nil {
- return fmt.Errorf("failed to add finalizer: %w", err)
+ if err := r.Update(ctx, ing); err != nil {
+ return false, fmt.Errorf("failed to add finalizer: %w", err)
}
- a.mu.Lock()
- a.managedIngresses.Add(ing.UID)
- gaugePGIngressResources.Set(int64(a.managedIngresses.Len()))
- a.mu.Unlock()
+ r.mu.Lock()
+ r.managedIngresses.Add(ing.UID)
+ gaugePGIngressResources.Set(int64(r.managedIngresses.Len()))
+ r.mu.Unlock()
}
- // 1. Ensure that if Ingress' hostname has changed, any VIPService resources corresponding to the old hostname
- // are cleaned up.
- // In practice, this function will ensure that any VIPServices that are associated with the provided ProxyGroup
- // and no longer owned by an Ingress are cleaned up. This is fine- it is not expensive and ensures that in edge
- // cases (a single update changed both hostname and removed ProxyGroup annotation) the VIPService is more likely
- // to be (eventually) removed.
- if err := a.maybeCleanupProxyGroup(ctx, pgName, logger); err != nil {
- return fmt.Errorf("failed to cleanup VIPService resources for ProxyGroup: %w", err)
+ // 1. Ensure that if Ingress' hostname has changed, any VIPService
+ // resources corresponding to the old hostname are cleaned up.
+ // In practice, this function will ensure that any VIPServices that are
+ // associated with the provided ProxyGroup and no longer owned by an
+ // Ingress are cleaned up. This is fine- it is not expensive and ensures
+ // that in edge cases (a single update changed both hostname and removed
+ // ProxyGroup annotation) the VIPService is more likely to be
+ // (eventually) removed.
+ svcsChanged, err = r.maybeCleanupProxyGroup(ctx, pgName, logger)
+ if err != nil {
+ return false, fmt.Errorf("failed to cleanup VIPService resources for ProxyGroup: %w", err)
}
- // 2. Ensure that there isn't a VIPService with the same hostname already created and not owned by this Ingress.
- // TODO(irbekrm): perhaps in future we could have record names being stored on VIPServices. I am not certain if
- // there might not be edge cases (custom domains, etc?) where attempting to determine the DNS name of the
- // VIPService in this way won't be incorrect.
- tcd, err := a.tailnetCertDomain(ctx)
+ // 2. Ensure that there isn't a VIPService with the same hostname
+ // already created and not owned by this Ingress.
+ // TODO(irbekrm): perhaps in future we could have record names being
+ // stored on VIPServices. I am not certain if there might not be edge
+ // cases (custom domains, etc?) where attempting to determine the DNS
+ // name of the VIPService in this way won't be incorrect.
+ tcd, err := r.tailnetCertDomain(ctx)
if err != nil {
- return fmt.Errorf("error determining DNS name base: %w", err)
+ return false, fmt.Errorf("error determining DNS name base: %w", err)
}
dnsName := hostname + "." + tcd
serviceName := tailcfg.ServiceName("svc:" + hostname)
- existingVIPSvc, err := a.tsClient.GetVIPService(ctx, serviceName)
- // TODO(irbekrm): here and when creating the VIPService, verify if the error is not terminal (and therefore
- // should not be reconciled). For example, if the hostname is already a hostname of a Tailscale node, the GET
- // here will fail.
+ existingVIPSvc, err := r.tsClient.GetVIPService(ctx, serviceName)
+ // TODO(irbekrm): here and when creating the VIPService, verify if the
+ // error is not terminal (and therefore should not be reconciled). For
+ // example, if the hostname is already a hostname of a Tailscale node,
+ // the GET here will fail.
if err != nil {
errResp := &tailscale.ErrResponse{}
if ok := errors.As(err, errResp); ok && errResp.Status != http.StatusNotFound {
- return fmt.Errorf("error getting VIPService %q: %w", hostname, err)
+ return false, fmt.Errorf("error getting VIPService %q: %w", hostname, err)
}
}
- if existingVIPSvc != nil && !isVIPServiceForIngress(existingVIPSvc, ing) {
- logger.Infof("VIPService %q for MagicDNS name %q already exists, but is not owned by this Ingress. Please delete it manually and recreate this Ingress to proceed or create an Ingress for a different MagicDNS name", hostname, dnsName)
- a.recorder.Event(ing, corev1.EventTypeWarning, "ConflictingVIPServiceExists", fmt.Sprintf("VIPService %q for MagicDNS name %q already exists, but is not owned by this Ingress. Please delete it manually to proceed or create an Ingress for a different MagicDNS name", hostname, dnsName))
- return nil
+ // Generate the VIPService comment for new or existing VIPService. This
+ // checks and ensures that VIPService's owner references are updated for
+ // this Ingress and errors if that is not possible (i.e. because it
+ // appears that the VIPService has been created by a non-operator
+ // actor).
+ svcComment, err := r.ownerRefsComment(existingVIPSvc)
+ if err != nil {
+ const instr = "To proceed, you can either manually delete the existing VIPService or choose a different MagicDNS name at `.spec.tls.hosts[0] in the Ingress definition"
+ msg := fmt.Sprintf("error ensuring ownership of VIPService %s: %v. %s", hostname, err, instr)
+ logger.Warn(msg)
+ r.recorder.Event(ing, corev1.EventTypeWarning, "InvalidVIPService", msg)
+ return false, nil
+ }
+ // 3. Ensure that TLS Secret and RBAC exists
+ if err := r.ensureCertResources(ctx, pgName, dnsName); err != nil {
+ return false, fmt.Errorf("error ensuring cert resources: %w", err)
}
- // 3. Ensure that the serve config for the ProxyGroup contains the VIPService
- cm, cfg, err := a.proxyGroupServeConfig(ctx, pgName)
+ // 4. Ensure that the serve config for the ProxyGroup contains the VIPService.
+ cm, cfg, err := r.proxyGroupServeConfig(ctx, pgName)
if err != nil {
- return fmt.Errorf("error getting Ingress serve config: %w", err)
+ return false, fmt.Errorf("error getting Ingress serve config: %w", err)
}
if cm == nil {
logger.Infof("no Ingress serve config ConfigMap found, unable to update serve config. Ensure that ProxyGroup is healthy.")
- return nil
+ return svcsChanged, nil
}
ep := ipn.HostPort(fmt.Sprintf("%s:443", dnsName))
- handlers, err := handlersForIngress(ctx, ing, a.Client, a.recorder, dnsName, logger)
+ handlers, err := handlersForIngress(ctx, ing, r.Client, r.recorder, dnsName, logger)
if err != nil {
- return fmt.Errorf("failed to get handlers for Ingress: %w", err)
+ return false, fmt.Errorf("failed to get handlers for Ingress: %w", err)
}
ingCfg := &ipn.ServiceConfig{
TCP: map[uint16]*ipn.TCPPortHandler{
@@ -250,16 +294,16 @@ func (a *IngressPGReconciler) maybeProvision(ctx context.Context, hostname strin
mak.Set(&cfg.Services, serviceName, ingCfg)
cfgBytes, err := json.Marshal(cfg)
if err != nil {
- return fmt.Errorf("error marshaling serve config: %w", err)
+ return false, fmt.Errorf("error marshaling serve config: %w", err)
}
mak.Set(&cm.BinaryData, serveConfigKey, cfgBytes)
- if err := a.Update(ctx, cm); err != nil {
- return fmt.Errorf("error updating serve config: %w", err)
+ if err := r.Update(ctx, cm); err != nil {
+ return false, fmt.Errorf("error updating serve config: %w", err)
}
}
// 4. Ensure that the VIPService exists and is up to date.
- tags := a.defaultTags
+ tags := r.defaultTags
if tstr, ok := ing.Annotations[AnnotationTags]; ok {
tags = strings.Split(tstr, ",")
}
@@ -273,66 +317,93 @@ func (a *IngressPGReconciler) maybeProvision(ctx context.Context, hostname strin
Name: serviceName,
Tags: tags,
Ports: vipPorts,
- Comment: fmt.Sprintf(VIPSvcOwnerRef, ing.UID),
+ Comment: svcComment,
}
if existingVIPSvc != nil {
vipSvc.Addrs = existingVIPSvc.Addrs
}
+ // TODO(irbekrm): right now if two Ingress resources attempt to apply different VIPService configs (different
+ // tags, or HTTP endpoint settings) we can end up reconciling those in a loop. We should detect when an Ingress
+ // with the same generation number has been reconciled ~more than N times and stop attempting to apply updates.
if existingVIPSvc == nil ||
!reflect.DeepEqual(vipSvc.Tags, existingVIPSvc.Tags) ||
- !reflect.DeepEqual(vipSvc.Ports, existingVIPSvc.Ports) {
- logger.Infof("Ensuring VIPService %q exists and is up to date", hostname)
- if err := a.tsClient.CreateOrUpdateVIPService(ctx, vipSvc); err != nil {
- logger.Infof("error creating VIPService: %v", err)
- return fmt.Errorf("error creating VIPService: %w", err)
+ !reflect.DeepEqual(vipSvc.Ports, existingVIPSvc.Ports) ||
+ !strings.EqualFold(vipSvc.Comment, existingVIPSvc.Comment) {
+ logger.Infof("Ensuring VIPService exists and is up to date")
+ if err := r.tsClient.CreateOrUpdateVIPService(ctx, vipSvc); err != nil {
+ return false, fmt.Errorf("error creating VIPService: %w", err)
}
}
- // 5. Update Ingress status
- oldStatus := ing.Status.DeepCopy()
- ports := []networkingv1.IngressPortStatus{
- {
- Protocol: "TCP",
- Port: 443,
- },
+ // 5. Update tailscaled's AdvertiseServices config, which should add the VIPService
+ // IPs to the ProxyGroup Pods' AllowedIPs in the next netmap update if approved.
+ if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg.Name, serviceName, true, logger); err != nil {
+ return false, fmt.Errorf("failed to update tailscaled config: %w", err)
}
- if isHTTPEndpointEnabled(ing) {
- ports = append(ports, networkingv1.IngressPortStatus{
- Protocol: "TCP",
- Port: 80,
- })
+
+ // 6. Update Ingress status if ProxyGroup Pods are ready.
+ count, err := r.numberPodsAdvertising(ctx, pg.Name, serviceName)
+ if err != nil {
+ return false, fmt.Errorf("failed to check if any Pods are configured: %w", err)
}
- ing.Status.LoadBalancer.Ingress = []networkingv1.IngressLoadBalancerIngress{
- {
- Hostname: dnsName,
- Ports: ports,
- },
+
+ oldStatus := ing.Status.DeepCopy()
+
+ switch count {
+ case 0:
+ ing.Status.LoadBalancer.Ingress = nil
+ default:
+ ports := []networkingv1.IngressPortStatus{
+ {
+ Protocol: "TCP",
+ Port: 443,
+ },
+ }
+ if isHTTPEndpointEnabled(ing) {
+ ports = append(ports, networkingv1.IngressPortStatus{
+ Protocol: "TCP",
+ Port: 80,
+ })
+ }
+ ing.Status.LoadBalancer.Ingress = []networkingv1.IngressLoadBalancerIngress{
+ {
+ Hostname: dnsName,
+ Ports: ports,
+ },
+ }
}
- if apiequality.Semantic.DeepEqual(oldStatus, ing.Status) {
- return nil
+ if apiequality.Semantic.DeepEqual(oldStatus, &ing.Status) {
+ return svcsChanged, nil
}
- if err := a.Status().Update(ctx, ing); err != nil {
- return fmt.Errorf("failed to update Ingress status: %w", err)
+
+ const prefix = "Updating Ingress status"
+ if count == 0 {
+ logger.Infof("%s. No Pods are advertising VIPService yet", prefix)
+ } else {
+ logger.Infof("%s. %d Pod(s) advertising VIPService", prefix, count)
}
- return nil
+
+ if err := r.Status().Update(ctx, ing); err != nil {
+ return false, fmt.Errorf("failed to update Ingress status: %w", err)
+ }
+ return svcsChanged, nil
}
-// maybeCleanupProxyGroup ensures that if an Ingress hostname has changed, any VIPService resources created for the
-// Ingress' ProxyGroup corresponding to the old hostname are cleaned up. A run of this function will ensure that any
-// VIPServices that are associated with the provided ProxyGroup and no longer owned by an Ingress are cleaned up.
-func (a *IngressPGReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyGroupName string, logger *zap.SugaredLogger) error {
+// VIPServices that are associated with the provided ProxyGroup and no longer managed this operator's instance are deleted, if not owned by other operator instances, else the owner reference is cleaned up.
+// Returns true if the operation resulted in existing VIPService updates (owner reference removal).
+func (r *HAIngressReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyGroupName string, logger *zap.SugaredLogger) (svcsChanged bool, err error) {
// Get serve config for the ProxyGroup
- cm, cfg, err := a.proxyGroupServeConfig(ctx, proxyGroupName)
+ cm, cfg, err := r.proxyGroupServeConfig(ctx, proxyGroupName)
if err != nil {
- return fmt.Errorf("getting serve config: %w", err)
+ return false, fmt.Errorf("getting serve config: %w", err)
}
if cfg == nil {
- return nil // ProxyGroup does not have any VIPServices
+ return false, nil // ProxyGroup does not have any VIPServices
}
ingList := &networkingv1.IngressList{}
- if err := a.List(ctx, ingList); err != nil {
- return fmt.Errorf("listing Ingresses: %w", err)
+ if err := r.List(ctx, ingList); err != nil {
+ return false, fmt.Errorf("listing Ingresses: %w", err)
}
serveConfigChanged := false
// For each VIPService in serve config...
@@ -349,101 +420,113 @@ func (a *IngressPGReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyG
if !found {
logger.Infof("VIPService %q is not owned by any Ingress, cleaning up", vipServiceName)
- svc, err := a.getVIPService(ctx, vipServiceName, logger)
+
+ // Delete the VIPService from control if necessary.
+ svcsChanged, err = r.cleanupVIPService(ctx, vipServiceName, logger)
if err != nil {
- errResp := &tailscale.ErrResponse{}
- if errors.As(err, &errResp) && errResp.Status == http.StatusNotFound {
- delete(cfg.Services, vipServiceName)
- serveConfigChanged = true
- continue
- }
- return err
+ return false, fmt.Errorf("deleting VIPService %q: %w", vipServiceName, err)
}
- if isVIPServiceForAnyIngress(svc) {
- logger.Infof("cleaning up orphaned VIPService %q", vipServiceName)
- if err := a.tsClient.DeleteVIPService(ctx, vipServiceName); err != nil {
- errResp := &tailscale.ErrResponse{}
- if !errors.As(err, &errResp) || errResp.Status != http.StatusNotFound {
- return fmt.Errorf("deleting VIPService %q: %w", vipServiceName, err)
- }
- }
+
+ // Make sure the VIPService is not advertised in tailscaled or serve config.
+ if err = r.maybeUpdateAdvertiseServicesConfig(ctx, proxyGroupName, vipServiceName, false, logger); err != nil {
+ return false, fmt.Errorf("failed to update tailscaled config services: %w", err)
+ }
+ _, ok := cfg.Services[vipServiceName]
+ if ok {
+ logger.Infof("Removing VIPService %q from serve config", vipServiceName)
+ delete(cfg.Services, vipServiceName)
+ serveConfigChanged = true
+ }
+ if err := r.cleanupCertResources(ctx, proxyGroupName, vipServiceName); err != nil {
+ return false, fmt.Errorf("failed to clean up cert resources: %w", err)
}
- delete(cfg.Services, vipServiceName)
- serveConfigChanged = true
}
}
if serveConfigChanged {
cfgBytes, err := json.Marshal(cfg)
if err != nil {
- return fmt.Errorf("marshaling serve config: %w", err)
+ return false, fmt.Errorf("marshaling serve config: %w", err)
}
mak.Set(&cm.BinaryData, serveConfigKey, cfgBytes)
- if err := a.Update(ctx, cm); err != nil {
- return fmt.Errorf("updating serve config: %w", err)
+ if err := r.Update(ctx, cm); err != nil {
+ return false, fmt.Errorf("updating serve config: %w", err)
}
}
- return nil
+ return svcsChanged, nil
}
// maybeCleanup ensures that any resources, such as a VIPService created for this Ingress, are cleaned up when the
-// Ingress is being deleted or is unexposed.
-func (a *IngressPGReconciler) maybeCleanup(ctx context.Context, hostname string, ing *networkingv1.Ingress, logger *zap.SugaredLogger) error {
+// Ingress is being deleted or is unexposed. The cleanup is safe for a multi-cluster setup- the VIPService is only
+// deleted if it does not contain any other owner references. If it does the cleanup only removes the owner reference
+// corresponding to this Ingress.
+func (r *HAIngressReconciler) maybeCleanup(ctx context.Context, hostname string, ing *networkingv1.Ingress, logger *zap.SugaredLogger) (svcChanged bool, err error) {
logger.Debugf("Ensuring any resources for Ingress are cleaned up")
ix := slices.Index(ing.Finalizers, FinalizerNamePG)
if ix < 0 {
logger.Debugf("no finalizer, nothing to do")
- a.mu.Lock()
- defer a.mu.Unlock()
- a.managedIngresses.Remove(ing.UID)
- gaugePGIngressResources.Set(int64(a.managedIngresses.Len()))
- return nil
+ return false, nil
}
+ logger.Infof("Ensuring that VIPService %q configuration is cleaned up", hostname)
- // 1. Check if there is a VIPService created for this Ingress.
+ // Ensure that if cleanup succeeded Ingress finalizers are removed.
+ defer func() {
+ if err != nil {
+ return
+ }
+ if e := r.deleteFinalizer(ctx, ing, logger); err != nil {
+ err = errors.Join(err, e)
+ }
+ }()
+
+ // 1. Check if there is a VIPService associated with this Ingress.
pg := ing.Annotations[AnnotationProxyGroup]
- cm, cfg, err := a.proxyGroupServeConfig(ctx, pg)
+ cm, cfg, err := r.proxyGroupServeConfig(ctx, pg)
if err != nil {
- return fmt.Errorf("error getting ProxyGroup serve config: %w", err)
+ return false, fmt.Errorf("error getting ProxyGroup serve config: %w", err)
}
serviceName := tailcfg.ServiceName("svc:" + hostname)
+
// VIPService is always first added to serve config and only then created in the Tailscale API, so if it is not
- // found in the serve config, we can assume that there is no VIPService. TODO(irbekrm): once we have ingress
- // ProxyGroup, we will probably add currently exposed VIPServices to its status. At that point, we can use the
- // status rather than checking the serve config each time.
- if cfg == nil || cfg.Services == nil || cfg.Services[serviceName] == nil {
- return nil
+ // found in the serve config, we can assume that there is no VIPService. (If the serve config does not exist at
+ // all, it is possible that the ProxyGroup has been deleted before cleaning up the Ingress, so carry on with
+ // cleanup).
+ if cfg != nil && cfg.Services != nil && cfg.Services[serviceName] == nil {
+ return false, nil
+ }
+
+ // 2. Clean up the VIPService resources.
+ svcChanged, err = r.cleanupVIPService(ctx, serviceName, logger)
+ if err != nil {
+ return false, fmt.Errorf("error deleting VIPService: %w", err)
+ }
+
+ // 3. Clean up any cluster resources
+ if err := r.cleanupCertResources(ctx, pg, serviceName); err != nil {
+ return false, fmt.Errorf("failed to clean up cert resources: %w", err)
}
- logger.Infof("Ensuring that VIPService %q configuration is cleaned up", hostname)
- // 2. Delete the VIPService.
- if err := a.deleteVIPServiceIfExists(ctx, serviceName, ing, logger); err != nil {
- return fmt.Errorf("error deleting VIPService: %w", err)
+ if cfg == nil || cfg.Services == nil { // user probably deleted the ProxyGroup
+ return svcChanged, nil
}
- // 3. Remove the VIPService from the serve config for the ProxyGroup.
+ // 4. Unadvertise the VIPService in tailscaled config.
+ if err = r.maybeUpdateAdvertiseServicesConfig(ctx, pg, serviceName, false, logger); err != nil {
+ return false, fmt.Errorf("failed to update tailscaled config services: %w", err)
+ }
+
+ // 5. Remove the VIPService from the serve config for the ProxyGroup.
logger.Infof("Removing VIPService %q from serve config for ProxyGroup %q", hostname, pg)
delete(cfg.Services, serviceName)
cfgBytes, err := json.Marshal(cfg)
if err != nil {
- return fmt.Errorf("error marshaling serve config: %w", err)
+ return false, fmt.Errorf("error marshaling serve config: %w", err)
}
mak.Set(&cm.BinaryData, serveConfigKey, cfgBytes)
- if err := a.Update(ctx, cm); err != nil {
- return fmt.Errorf("error updating ConfigMap %q: %w", cm.Name, err)
- }
-
- if err := a.deleteFinalizer(ctx, ing, logger); err != nil {
- return fmt.Errorf("failed to remove finalizer: %w", err)
- }
- a.mu.Lock()
- defer a.mu.Unlock()
- a.managedIngresses.Remove(ing.UID)
- gaugePGIngressResources.Set(int64(a.managedIngresses.Len()))
- return nil
+ return svcChanged, r.Update(ctx, cm)
}
-func (a *IngressPGReconciler) deleteFinalizer(ctx context.Context, ing *networkingv1.Ingress, logger *zap.SugaredLogger) error {
+func (r *HAIngressReconciler) deleteFinalizer(ctx context.Context, ing *networkingv1.Ingress, logger *zap.SugaredLogger) error {
found := false
ing.Finalizers = slices.DeleteFunc(ing.Finalizers, func(f string) bool {
found = true
@@ -454,9 +537,13 @@ func (a *IngressPGReconciler) deleteFinalizer(ctx context.Context, ing *networki
}
logger.Debug("ensure %q finalizer is removed", FinalizerNamePG)
- if err := a.Update(ctx, ing); err != nil {
+ if err := r.Update(ctx, ing); err != nil {
return fmt.Errorf("failed to remove finalizer %q: %w", FinalizerNamePG, err)
}
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ r.managedIngresses.Remove(ing.UID)
+ gaugePGIngressResources.Set(int64(r.managedIngresses.Len()))
return nil
}
@@ -464,15 +551,15 @@ func pgIngressCMName(pg string) string {
return fmt.Sprintf("%s-ingress-config", pg)
}
-func (a *IngressPGReconciler) proxyGroupServeConfig(ctx context.Context, pg string) (cm *corev1.ConfigMap, cfg *ipn.ServeConfig, err error) {
+func (r *HAIngressReconciler) proxyGroupServeConfig(ctx context.Context, pg string) (cm *corev1.ConfigMap, cfg *ipn.ServeConfig, err error) {
name := pgIngressCMName(pg)
cm = &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: name,
- Namespace: a.tsNamespace,
+ Namespace: r.tsNamespace,
},
}
- if err := a.Get(ctx, client.ObjectKeyFromObject(cm), cm); err != nil && !apierrors.IsNotFound(err) {
+ if err := r.Get(ctx, client.ObjectKeyFromObject(cm), cm); err != nil && !apierrors.IsNotFound(err) {
return nil, nil, fmt.Errorf("error retrieving ingress serve config ConfigMap %s: %v", name, err)
}
if apierrors.IsNotFound(err) {
@@ -492,16 +579,16 @@ type localClient interface {
}
// tailnetCertDomain returns the base domain (TCD) of the current tailnet.
-func (a *IngressPGReconciler) tailnetCertDomain(ctx context.Context) (string, error) {
- st, err := a.lc.StatusWithoutPeers(ctx)
+func (r *HAIngressReconciler) tailnetCertDomain(ctx context.Context) (string, error) {
+ st, err := r.lc.StatusWithoutPeers(ctx)
if err != nil {
return "", fmt.Errorf("error getting tailscale status: %w", err)
}
return st.CurrentTailnet.MagicDNSSuffix, nil
}
-// shouldExpose returns true if the Ingress should be exposed over Tailscale in HA mode (on a ProxyGroup)
-func (a *IngressPGReconciler) shouldExpose(ing *networkingv1.Ingress) bool {
+// shouldExpose returns true if the Ingress should be exposed over Tailscale in HA mode (on a ProxyGroup).
+func (r *HAIngressReconciler) shouldExpose(ing *networkingv1.Ingress) bool {
isTSIngress := ing != nil &&
ing.Spec.IngressClassName != nil &&
*ing.Spec.IngressClassName == tailscaleIngressClassName
@@ -509,39 +596,13 @@ func (a *IngressPGReconciler) shouldExpose(ing *networkingv1.Ingress) bool {
return isTSIngress && pgAnnot != ""
}
-func (a *IngressPGReconciler) getVIPService(ctx context.Context, name tailcfg.ServiceName, logger *zap.SugaredLogger) (*tailscale.VIPService, error) {
- svc, err := a.tsClient.GetVIPService(ctx, name)
- if err != nil {
- errResp := &tailscale.ErrResponse{}
- if ok := errors.As(err, errResp); ok && errResp.Status != http.StatusNotFound {
- logger.Infof("error getting VIPService %q: %v", name, err)
- return nil, fmt.Errorf("error getting VIPService %q: %w", name, err)
- }
- }
- return svc, nil
-}
-
-func isVIPServiceForIngress(svc *tailscale.VIPService, ing *networkingv1.Ingress) bool {
- if svc == nil || ing == nil {
- return false
- }
- return strings.EqualFold(svc.Comment, fmt.Sprintf(VIPSvcOwnerRef, ing.UID))
-}
-
-func isVIPServiceForAnyIngress(svc *tailscale.VIPService) bool {
- if svc == nil {
- return false
- }
- return strings.HasPrefix(svc.Comment, "tailscale.com/k8s-operator:owned-by:")
-}
-
// validateIngress validates that the Ingress is properly configured.
// Currently validates:
// - Any tags provided via tailscale.com/tags annotation are valid Tailscale ACL tags
// - The derived hostname is a valid DNS label
// - The referenced ProxyGroup exists and is of type 'ingress'
// - Ingress' TLS block is invalid
-func (a *IngressPGReconciler) validateIngress(ing *networkingv1.Ingress, pg *tsapi.ProxyGroup) error {
+func (r *HAIngressReconciler) validateIngress(ctx context.Context, ing *networkingv1.Ingress, pg *tsapi.ProxyGroup) error {
var errs []error
// Validate tags if present
@@ -577,26 +638,66 @@ func (a *IngressPGReconciler) validateIngress(ing *networkingv1.Ingress, pg *tsa
errs = append(errs, fmt.Errorf("ProxyGroup %q is not ready", pg.Name))
}
+ // It is invalid to have multiple Ingress resources for the same VIPService in one cluster.
+ ingList := &networkingv1.IngressList{}
+ if err := r.List(ctx, ingList); err != nil {
+ errs = append(errs, fmt.Errorf("[unexpected] error listing Ingresses: %w", err))
+ return errors.Join(errs...)
+ }
+ for _, i := range ingList.Items {
+ if r.shouldExpose(&i) && hostnameForIngress(&i) == hostname && i.Name != ing.Name {
+ errs = append(errs, fmt.Errorf("found duplicate Ingress %q for hostname %q - multiple Ingresses for the same hostname in the same cluster are not allowed", i.Name, hostname))
+ }
+ }
return errors.Join(errs...)
}
-// deleteVIPServiceIfExists attempts to delete the VIPService if it exists and is owned by the given Ingress.
-func (a *IngressPGReconciler) deleteVIPServiceIfExists(ctx context.Context, name tailcfg.ServiceName, ing *networkingv1.Ingress, logger *zap.SugaredLogger) error {
- svc, err := a.getVIPService(ctx, name, logger)
+// cleanupVIPService deletes any VIPService by the provided name if it is not owned by operator instances other than this one.
+// If a VIPService is found, but contains other owner references, only removes this operator's owner reference.
+// If a VIPService by the given name is not found or does not contain this operator's owner reference, do nothing.
+// It returns true if an existing VIPService was updated to remove owner reference, as well as any error that occurred.
+func (r *HAIngressReconciler) cleanupVIPService(ctx context.Context, name tailcfg.ServiceName, logger *zap.SugaredLogger) (updated bool, _ error) {
+ svc, err := r.tsClient.GetVIPService(ctx, name)
if err != nil {
- return fmt.Errorf("error getting VIPService: %w", err)
- }
+ errResp := &tailscale.ErrResponse{}
+ if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound {
+ return false, nil
+ }
- // isVIPServiceForIngress handles nil svc, so we don't need to check it here
- if !isVIPServiceForIngress(svc, ing) {
- return nil
+ return false, fmt.Errorf("error getting VIPService: %w", err)
}
-
+ if svc == nil {
+ return false, nil
+ }
+ c, err := parseComment(svc)
+ if err != nil {
+ return false, fmt.Errorf("error parsing VIPService comment")
+ }
+ if c == nil || len(c.OwnerRefs) == 0 {
+ return false, nil
+ }
+ // Comparing with the operatorID only means that we will not be able to
+ // clean up VIPServices in cases where the operator was deleted from the
+ // cluster before deleting the Ingress. Perhaps the comparison could be
+ // 'if or.OperatorID === r.operatorID || or.ingressUID == r.ingressUID'.
+ ix := slices.IndexFunc(c.OwnerRefs, func(or OwnerRef) bool {
+ return or.OperatorID == r.operatorID
+ })
+ if ix == -1 {
+ return false, nil
+ }
+ if len(c.OwnerRefs) == 1 {
+ logger.Infof("Deleting VIPService %q", name)
+ return false, r.tsClient.DeleteVIPService(ctx, name)
+ }
+ c.OwnerRefs = slices.Delete(c.OwnerRefs, ix, ix+1)
logger.Infof("Deleting VIPService %q", name)
- if err = a.tsClient.DeleteVIPService(ctx, name); err != nil {
- return fmt.Errorf("error deleting VIPService: %w", err)
+ json, err := json.Marshal(c)
+ if err != nil {
+ return false, fmt.Errorf("error marshalling updated VIPService owner reference: %w", err)
}
- return nil
+ svc.Comment = string(json)
+ return true, r.tsClient.CreateOrUpdateVIPService(ctx, svc)
}
// isHTTPEndpointEnabled returns true if the Ingress has been configured to expose an HTTP endpoint to tailnet.
@@ -606,3 +707,283 @@ func isHTTPEndpointEnabled(ing *networkingv1.Ingress) bool {
}
return ing.Annotations[annotationHTTPEndpoint] == "enabled"
}
+
+func (a *HAIngressReconciler) maybeUpdateAdvertiseServicesConfig(ctx context.Context, pgName string, serviceName tailcfg.ServiceName, shouldBeAdvertised bool, logger *zap.SugaredLogger) (err error) {
+ logger.Debugf("Updating ProxyGroup tailscaled configs to advertise service %q: %v", serviceName, shouldBeAdvertised)
+
+ // Get all config Secrets for this ProxyGroup.
+ secrets := &corev1.SecretList{}
+ if err := a.List(ctx, secrets, client.InNamespace(a.tsNamespace), client.MatchingLabels(pgSecretLabels(pgName, "config"))); err != nil {
+ return fmt.Errorf("failed to list config Secrets: %w", err)
+ }
+
+ for _, secret := range secrets.Items {
+ var updated bool
+ for fileName, confB := range secret.Data {
+ var conf ipn.ConfigVAlpha
+ if err := json.Unmarshal(confB, &conf); err != nil {
+ return fmt.Errorf("error unmarshalling ProxyGroup config: %w", err)
+ }
+
+ // Update the services to advertise if required.
+ idx := slices.Index(conf.AdvertiseServices, serviceName.String())
+ isAdvertised := idx >= 0
+ switch {
+ case isAdvertised == shouldBeAdvertised:
+ // Already up to date.
+ continue
+ case isAdvertised:
+ // Needs to be removed.
+ conf.AdvertiseServices = slices.Delete(conf.AdvertiseServices, idx, idx+1)
+ case shouldBeAdvertised:
+ // Needs to be added.
+ conf.AdvertiseServices = append(conf.AdvertiseServices, serviceName.String())
+ }
+
+ // Update the Secret.
+ confB, err := json.Marshal(conf)
+ if err != nil {
+ return fmt.Errorf("error marshalling ProxyGroup config: %w", err)
+ }
+ mak.Set(&secret.Data, fileName, confB)
+ updated = true
+ }
+
+ if updated {
+ if err := a.Update(ctx, &secret); err != nil {
+ return fmt.Errorf("error updating ProxyGroup config Secret: %w", err)
+ }
+ }
+ }
+
+ return nil
+}
+
+func (a *HAIngressReconciler) numberPodsAdvertising(ctx context.Context, pgName string, serviceName tailcfg.ServiceName) (int, error) {
+ // Get all state Secrets for this ProxyGroup.
+ secrets := &corev1.SecretList{}
+ if err := a.List(ctx, secrets, client.InNamespace(a.tsNamespace), client.MatchingLabels(pgSecretLabels(pgName, "state"))); err != nil {
+ return 0, fmt.Errorf("failed to list ProxyGroup %q state Secrets: %w", pgName, err)
+ }
+
+ var count int
+ for _, secret := range secrets.Items {
+ prefs, ok, err := getDevicePrefs(&secret)
+ if err != nil {
+ return 0, fmt.Errorf("error getting node metadata: %w", err)
+ }
+ if !ok {
+ continue
+ }
+ if slices.Contains(prefs.AdvertiseServices, serviceName.String()) {
+ count++
+ }
+ }
+
+ return count, nil
+}
+
+// OwnerRef is an owner reference that uniquely identifies a Tailscale
+// Kubernetes operator instance.
+type OwnerRef struct {
+ // OperatorID is the stable ID of the operator's Tailscale device.
+ OperatorID string `json:"operatorID,omitempty"`
+}
+
+// comment is the content of the VIPService.Comment field.
+type comment struct {
+ // OwnerRefs is a list of owner references that identify all operator
+ // instances that manage this VIPService.
+ OwnerRefs []OwnerRef `json:"ownerRefs,omitempty"`
+}
+
+// ownerRefsComment return VIPService Comment that includes owner reference for this
+// operator instance for the provided VIPService. If the VIPService is nil, a
+// new comment with owner ref is returned. If the VIPService is not nil, the
+// existing comment is returned with the owner reference added, if not already
+// present. If the VIPService is not nil, but does not contain a comment we
+// return an error as this likely means that the VIPService was created by
+// somthing other than a Tailscale Kubernetes operator.
+func (r *HAIngressReconciler) ownerRefsComment(svc *tailscale.VIPService) (string, error) {
+ ref := OwnerRef{
+ OperatorID: r.operatorID,
+ }
+ if svc == nil {
+ c := &comment{OwnerRefs: []OwnerRef{ref}}
+ json, err := json.Marshal(c)
+ if err != nil {
+ return "", fmt.Errorf("[unexpected] unable to marshal VIPService comment contents: %w, please report this", err)
+ }
+ return string(json), nil
+ }
+ c, err := parseComment(svc)
+ if err != nil {
+ return "", fmt.Errorf("error parsing existing VIPService comment: %w", err)
+ }
+ if c == nil || len(c.OwnerRefs) == 0 {
+ return "", fmt.Errorf("VIPService %s exists, but does not contain Comment field with owner references- not proceeding as this is likely a resource created by something other than a Tailscale Kubernetes Operator", svc.Name)
+ }
+ if slices.Contains(c.OwnerRefs, ref) { // up to date
+ return svc.Comment, nil
+ }
+ c.OwnerRefs = append(c.OwnerRefs, ref)
+ json, err := json.Marshal(c)
+ if err != nil {
+ return "", fmt.Errorf("error marshalling updated owner references: %w", err)
+ }
+ return string(json), nil
+}
+
+// ensureCertResources ensures that the TLS Secret for an HA Ingress and RBAC
+// resources that allow proxies to manage the Secret are created.
+// Note that Tailscale VIPService name validation matches Kubernetes
+// resource name validation, so we can be certain that the VIPService name
+// (domain) is a valid Kubernetes resource name.
+// https://github.com/tailscale/tailscale/blob/8b1e7f646ee4730ad06c9b70c13e7861b964949b/util/dnsname/dnsname.go#L99
+// https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names
+func (r *HAIngressReconciler) ensureCertResources(ctx context.Context, pgName, domain string) error {
+ secret := certSecret(pgName, r.tsNamespace, domain)
+ if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, secret, nil); err != nil {
+ return fmt.Errorf("failed to create or update Secret %s: %w", secret.Name, err)
+ }
+ role := certSecretRole(pgName, r.tsNamespace, domain)
+ if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, role, nil); err != nil {
+ return fmt.Errorf("failed to create or update Role %s: %w", role.Name, err)
+ }
+ rb := certSecretRoleBinding(pgName, r.tsNamespace, domain)
+ if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, rb, nil); err != nil {
+ return fmt.Errorf("failed to create or update RoleBinding %s: %w", rb.Name, err)
+ }
+ return nil
+}
+
+// cleanupCertResources ensures that the TLS Secret and associated RBAC
+// resources that allow proxies to read/write to the Secret are deleted.
+func (r *HAIngressReconciler) cleanupCertResources(ctx context.Context, pgName string, name tailcfg.ServiceName) error {
+ domainName, err := r.dnsNameForService(ctx, tailcfg.ServiceName(name))
+ if err != nil {
+ return fmt.Errorf("error getting DNS name for VIPService %s: %w", name, err)
+ }
+ labels := certResourceLabels(pgName, domainName)
+ if err := r.DeleteAllOf(ctx, &rbacv1.RoleBinding{}, client.InNamespace(r.tsNamespace), client.MatchingLabels(labels)); err != nil {
+ return fmt.Errorf("error deleting RoleBinding for domain name %s: %w", domainName, err)
+ }
+ if err := r.DeleteAllOf(ctx, &rbacv1.Role{}, client.InNamespace(r.tsNamespace), client.MatchingLabels(labels)); err != nil {
+ return fmt.Errorf("error deleting Role for domain name %s: %w", domainName, err)
+ }
+ if err := r.DeleteAllOf(ctx, &corev1.Secret{}, client.InNamespace(r.tsNamespace), client.MatchingLabels(labels)); err != nil {
+ return fmt.Errorf("error deleting Secret for domain name %s: %w", domainName, err)
+ }
+ return nil
+}
+
+// parseComment returns VIPService comment or nil if none found or not matching the expected format.
+func parseComment(vipSvc *tailscale.VIPService) (*comment, error) {
+ if vipSvc.Comment == "" {
+ return nil, nil
+ }
+ c := &comment{}
+ if err := json.Unmarshal([]byte(vipSvc.Comment), c); err != nil {
+ return nil, fmt.Errorf("error parsing VIPService Comment field %q: %w", vipSvc.Comment, err)
+ }
+ return c, nil
+}
+
+// requeueInterval returns a time duration between 5 and 10 minutes, which is
+// the period of time after which an HA Ingress, whose VIPService has been newly
+// created or changed, needs to be requeued. This is to protect against
+// VIPService owner references being overwritten as a result of concurrent
+// updates during multi-clutster Ingress create/update operations.
+func requeueInterval() time.Duration {
+ return time.Duration(rand.N(5)+5) * time.Minute
+}
+
+// certSecretRole creates a Role that will allow proxies to manage the TLS
+// Secret for the given domain. Domain must be a valid Kubernetes resource name.
+func certSecretRole(pgName, namespace, domain string) *rbacv1.Role {
+ return &rbacv1.Role{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: domain,
+ Namespace: namespace,
+ Labels: certResourceLabels(pgName, domain),
+ },
+ Rules: []rbacv1.PolicyRule{
+ {
+ APIGroups: []string{""},
+ Resources: []string{"secrets"},
+ ResourceNames: []string{domain},
+ Verbs: []string{
+ "get",
+ "list",
+ "patch",
+ "update",
+ },
+ },
+ },
+ }
+}
+
+// certSecretRoleBinding creates a RoleBinding for Role that will allow proxies
+// to manage the TLS Secret for the given domain. Domain must be a valid
+// Kubernetes resource name.
+func certSecretRoleBinding(pgName, namespace, domain string) *rbacv1.RoleBinding {
+ return &rbacv1.RoleBinding{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: domain,
+ Namespace: namespace,
+ Labels: certResourceLabels(pgName, domain),
+ },
+ Subjects: []rbacv1.Subject{
+ {
+ Kind: "ServiceAccount",
+ Name: pgName,
+ Namespace: namespace,
+ },
+ },
+ RoleRef: rbacv1.RoleRef{
+ Kind: "Role",
+ Name: domain,
+ },
+ }
+}
+
+// certSecret creates a Secret that will store the TLS certificate and private
+// key for the given domain. Domain must be a valid Kubernetes resource name.
+func certSecret(pgName, namespace, domain string) *corev1.Secret {
+ labels := certResourceLabels(pgName, domain)
+ labels[kubetypes.LabelSecretType] = "certs"
+ return &corev1.Secret{
+ TypeMeta: metav1.TypeMeta{
+ APIVersion: "v1",
+ Kind: "Secret",
+ },
+ ObjectMeta: metav1.ObjectMeta{
+ Name: domain,
+ Namespace: namespace,
+ Labels: labels,
+ },
+ Data: map[string][]byte{
+ corev1.TLSCertKey: nil,
+ corev1.TLSPrivateKeyKey: nil,
+ },
+ Type: corev1.SecretTypeTLS,
+ }
+}
+
+func certResourceLabels(pgName, domain string) map[string]string {
+ return map[string]string{
+ kubetypes.LabelManaged: "true",
+ "tailscale.com/proxy-group": pgName,
+ "tailscale.com/domain": domain,
+ }
+}
+
+// dnsNameForService returns the DNS name for the given VIPService name.
+func (r *HAIngressReconciler) dnsNameForService(ctx context.Context, svc tailcfg.ServiceName) (string, error) {
+ s := svc.WithoutPrefix()
+ tcd, err := r.tailnetCertDomain(ctx)
+ if err != nil {
+ return "", fmt.Errorf("error determining DNS name base: %w", err)
+ }
+ return s + "." + tcd, nil
+}
diff --git a/cmd/k8s-operator/ingress-for-pg_test.go b/cmd/k8s-operator/ingress-for-pg_test.go
index c432eb7e1..5716c0bbf 100644
--- a/cmd/k8s-operator/ingress-for-pg_test.go
+++ b/cmd/k8s-operator/ingress-for-pg_test.go
@@ -8,7 +8,10 @@ package main
import (
"context"
"encoding/json"
+ "errors"
+ "fmt"
"maps"
+ "net/http"
"reflect"
"testing"
@@ -17,13 +20,16 @@ import (
"go.uber.org/zap"
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
+ rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
+ "tailscale.com/internal/client/tailscale"
"tailscale.com/ipn"
"tailscale.com/ipn/ipnstate"
+ tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/tailcfg"
"tailscale.com/types/ptr"
@@ -63,6 +69,12 @@ func TestIngressPGReconciler(t *testing.T) {
expectReconciled(t, ingPGR, "default", "test-ingress")
verifyServeConfig(t, fc, "svc:my-svc", false)
verifyVIPService(t, ft, "svc:my-svc", []string{"443"})
+ verifyTailscaledConfig(t, fc, []string{"svc:my-svc"})
+
+ // Verify cert resources were created for the first Ingress
+ expectEqual(t, fc, certSecret("test-pg", "operator-ns", "my-svc.ts.net"))
+ expectEqual(t, fc, certSecretRole("test-pg", "operator-ns", "my-svc.ts.net"))
+ expectEqual(t, fc, certSecretRoleBinding("test-pg", "operator-ns", "my-svc.ts.net"))
mustUpdate(t, fc, "default", "test-ingress", func(ing *networkingv1.Ingress) {
ing.Annotations["tailscale.com/tags"] = "tag:custom,tag:test"
@@ -118,10 +130,17 @@ func TestIngressPGReconciler(t *testing.T) {
verifyServeConfig(t, fc, "svc:my-other-svc", false)
verifyVIPService(t, ft, "svc:my-other-svc", []string{"443"})
+ // Verify cert resources were created for the second Ingress
+ expectEqual(t, fc, certSecret("test-pg", "operator-ns", "my-other-svc.ts.net"))
+ expectEqual(t, fc, certSecretRole("test-pg", "operator-ns", "my-other-svc.ts.net"))
+ expectEqual(t, fc, certSecretRoleBinding("test-pg", "operator-ns", "my-other-svc.ts.net"))
+
// Verify first Ingress is still working
verifyServeConfig(t, fc, "svc:my-svc", false)
verifyVIPService(t, ft, "svc:my-svc", []string{"443"})
+ verifyTailscaledConfig(t, fc, []string{"svc:my-svc", "svc:my-other-svc"})
+
// Delete second Ingress
if err := fc.Delete(context.Background(), ing2); err != nil {
t.Fatalf("deleting second Ingress: %v", err)
@@ -151,6 +170,11 @@ func TestIngressPGReconciler(t *testing.T) {
t.Error("second Ingress service config was not cleaned up")
}
+ verifyTailscaledConfig(t, fc, []string{"svc:my-svc"})
+ expectMissing[corev1.Secret](t, fc, "operator-ns", "my-other-svc.ts.net")
+ expectMissing[rbacv1.Role](t, fc, "operator-ns", "my-other-svc.ts.net")
+ expectMissing[rbacv1.RoleBinding](t, fc, "operator-ns", "my-other-svc.ts.net")
+
// Delete the first Ingress and verify cleanup
if err := fc.Delete(context.Background(), ing); err != nil {
t.Fatalf("deleting Ingress: %v", err)
@@ -175,6 +199,67 @@ func TestIngressPGReconciler(t *testing.T) {
if len(cfg.Services) > 0 {
t.Error("serve config not cleaned up")
}
+ verifyTailscaledConfig(t, fc, nil)
+
+ // Add verification that cert resources were cleaned up
+ expectMissing[corev1.Secret](t, fc, "operator-ns", "my-svc.ts.net")
+ expectMissing[rbacv1.Role](t, fc, "operator-ns", "my-svc.ts.net")
+ expectMissing[rbacv1.RoleBinding](t, fc, "operator-ns", "my-svc.ts.net")
+}
+
+func TestIngressPGReconciler_UpdateIngressHostname(t *testing.T) {
+ ingPGR, fc, ft := setupIngressTest(t)
+
+ ing := &networkingv1.Ingress{
+ TypeMeta: metav1.TypeMeta{Kind: "Ingress", APIVersion: "networking.k8s.io/v1"},
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-ingress",
+ Namespace: "default",
+ UID: types.UID("1234-UID"),
+ Annotations: map[string]string{
+ "tailscale.com/proxy-group": "test-pg",
+ },
+ },
+ Spec: networkingv1.IngressSpec{
+ IngressClassName: ptr.To("tailscale"),
+ DefaultBackend: &networkingv1.IngressBackend{
+ Service: &networkingv1.IngressServiceBackend{
+ Name: "test",
+ Port: networkingv1.ServiceBackendPort{
+ Number: 8080,
+ },
+ },
+ },
+ TLS: []networkingv1.IngressTLS{
+ {Hosts: []string{"my-svc.tailnetxyz.ts.net"}},
+ },
+ },
+ }
+ mustCreate(t, fc, ing)
+
+ // Verify initial reconciliation
+ expectReconciled(t, ingPGR, "default", "test-ingress")
+ verifyServeConfig(t, fc, "svc:my-svc", false)
+ verifyVIPService(t, ft, "svc:my-svc", []string{"443"})
+ verifyTailscaledConfig(t, fc, []string{"svc:my-svc"})
+
+ // Update the Ingress hostname and make sure the original VIPService is deleted.
+ mustUpdate(t, fc, "default", "test-ingress", func(ing *networkingv1.Ingress) {
+ ing.Spec.TLS[0].Hosts[0] = "updated-svc.tailnetxyz.ts.net"
+ })
+ expectReconciled(t, ingPGR, "default", "test-ingress")
+ verifyServeConfig(t, fc, "svc:updated-svc", false)
+ verifyVIPService(t, ft, "svc:updated-svc", []string{"443"})
+ verifyTailscaledConfig(t, fc, []string{"svc:updated-svc"})
+
+ _, err := ft.GetVIPService(context.Background(), tailcfg.ServiceName("svc:my-svc"))
+ if err == nil {
+ t.Fatalf("svc:my-svc not cleaned up")
+ }
+ var errResp *tailscale.ErrResponse
+ if !errors.As(err, &errResp) || errResp.Status != http.StatusNotFound {
+ t.Fatalf("unexpected error: %v", err)
+ }
}
func TestValidateIngress(t *testing.T) {
@@ -182,6 +267,15 @@ func TestValidateIngress(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Name: "test-ingress",
Namespace: "default",
+ Annotations: map[string]string{
+ AnnotationProxyGroup: "test-pg",
+ },
+ },
+ Spec: networkingv1.IngressSpec{
+ IngressClassName: ptr.To("tailscale"),
+ TLS: []networkingv1.IngressTLS{
+ {Hosts: []string{"test"}},
+ },
},
}
@@ -205,10 +299,11 @@ func TestValidateIngress(t *testing.T) {
}
tests := []struct {
- name string
- ing *networkingv1.Ingress
- pg *tsapi.ProxyGroup
- wantErr string
+ name string
+ ing *networkingv1.Ingress
+ pg *tsapi.ProxyGroup
+ existingIngs []networkingv1.Ingress
+ wantErr string
}{
{
name: "valid_ingress_with_hostname",
@@ -298,12 +393,38 @@ func TestValidateIngress(t *testing.T) {
},
wantErr: "ProxyGroup \"test-pg\" is not ready",
},
+ {
+ name: "duplicate_hostname",
+ ing: baseIngress,
+ pg: readyProxyGroup,
+ existingIngs: []networkingv1.Ingress{{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "existing-ingress",
+ Namespace: "default",
+ Annotations: map[string]string{
+ AnnotationProxyGroup: "test-pg",
+ },
+ },
+ Spec: networkingv1.IngressSpec{
+ IngressClassName: ptr.To("tailscale"),
+ TLS: []networkingv1.IngressTLS{
+ {Hosts: []string{"test"}},
+ },
+ },
+ }},
+ wantErr: `found duplicate Ingress "existing-ingress" for hostname "test" - multiple Ingresses for the same hostname in the same cluster are not allowed`,
+ },
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- r := &IngressPGReconciler{}
- err := r.validateIngress(tt.ing, tt.pg)
+ fc := fake.NewClientBuilder().
+ WithScheme(tsapi.GlobalScheme).
+ WithObjects(tt.ing).
+ WithLists(&networkingv1.IngressList{Items: tt.existingIngs}).
+ Build()
+ r := &HAIngressReconciler{Client: fc}
+ err := r.validateIngress(context.Background(), tt.ing, tt.pg)
if (err == nil && tt.wantErr != "") || (err != nil && err.Error() != tt.wantErr) {
t.Errorf("validateIngress() error = %v, wantErr %v", err, tt.wantErr)
}
@@ -359,6 +480,31 @@ func TestIngressPGReconciler_HTTPEndpoint(t *testing.T) {
t.Fatal(err)
}
+ // Status will be empty until the VIPService shows up in prefs.
+ if !reflect.DeepEqual(ing.Status.LoadBalancer.Ingress, []networkingv1.IngressLoadBalancerIngress(nil)) {
+ t.Errorf("incorrect Ingress status: got %v, want empty",
+ ing.Status.LoadBalancer.Ingress)
+ }
+
+ // Add the VIPService to prefs to have the Ingress recognised as ready.
+ mustCreate(t, fc, &corev1.Secret{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-pg-0",
+ Namespace: "operator-ns",
+ Labels: pgSecretLabels("test-pg", "state"),
+ },
+ Data: map[string][]byte{
+ "_current-profile": []byte("profile-foo"),
+ "profile-foo": []byte(`{"AdvertiseServices":["svc:my-svc"],"Config":{"NodeID":"node-foo"}}`),
+ },
+ })
+
+ // Reconcile and re-fetch Ingress.
+ expectReconciled(t, ingPGR, "default", "test-ingress")
+ if err := fc.Get(context.Background(), client.ObjectKeyFromObject(ing), ing); err != nil {
+ t.Fatal(err)
+ }
+
wantStatus := []networkingv1.IngressPortStatus{
{Port: 443, Protocol: "TCP"},
{Port: 80, Protocol: "TCP"},
@@ -464,8 +610,28 @@ func verifyServeConfig(t *testing.T, fc client.Client, serviceName string, wantH
}
}
-func setupIngressTest(t *testing.T) (*IngressPGReconciler, client.Client, *fakeTSClient) {
- t.Helper()
+func verifyTailscaledConfig(t *testing.T, fc client.Client, expectedServices []string) {
+ var expected string
+ if expectedServices != nil {
+ expectedServicesJSON, err := json.Marshal(expectedServices)
+ if err != nil {
+ t.Fatalf("marshaling expected services: %v", err)
+ }
+ expected = fmt.Sprintf(`,"AdvertiseServices":%s`, expectedServicesJSON)
+ }
+ expectEqual(t, fc, &corev1.Secret{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: pgConfigSecretName("test-pg", 0),
+ Namespace: "operator-ns",
+ Labels: pgSecretLabels("test-pg", "config"),
+ },
+ Data: map[string][]byte{
+ tsoperator.TailscaledConfigFileName(106): []byte(fmt.Sprintf(`{"Version":""%s}`, expected)),
+ },
+ })
+}
+
+func setupIngressTest(t *testing.T) (*HAIngressReconciler, client.Client, *fakeTSClient) {
tsIngressClass := &networkingv1.IngressClass{
ObjectMeta: metav1.ObjectMeta{Name: "tailscale"},
@@ -494,9 +660,21 @@ func setupIngressTest(t *testing.T) (*IngressPGReconciler, client.Client, *fakeT
},
}
+ // Pre-create a config Secret for the ProxyGroup
+ pgCfgSecret := &corev1.Secret{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: pgConfigSecretName("test-pg", 0),
+ Namespace: "operator-ns",
+ Labels: pgSecretLabels("test-pg", "config"),
+ },
+ Data: map[string][]byte{
+ tsoperator.TailscaledConfigFileName(106): []byte("{}"),
+ },
+ }
+
fc := fake.NewClientBuilder().
WithScheme(tsapi.GlobalScheme).
- WithObjects(pg, pgConfigMap, tsIngressClass).
+ WithObjects(pg, pgCfgSecret, pgConfigMap, tsIngressClass).
WithStatusSubresource(pg).
Build()
@@ -511,9 +689,9 @@ func setupIngressTest(t *testing.T) (*IngressPGReconciler, client.Client, *fakeT
if err := fc.Status().Update(context.Background(), pg); err != nil {
t.Fatal(err)
}
+ fakeTsnetServer := &fakeTSNetServer{certDomains: []string{"foo.com"}}
ft := &fakeTSClient{}
- fakeTsnetServer := &fakeTSNetServer{certDomains: []string{"foo.com"}}
zl, err := zap.NewDevelopment()
if err != nil {
t.Fatal(err)
@@ -527,12 +705,12 @@ func setupIngressTest(t *testing.T) (*IngressPGReconciler, client.Client, *fakeT
},
}
- ingPGR := &IngressPGReconciler{
+ ingPGR := &HAIngressReconciler{
Client: fc,
tsClient: ft,
- tsnetServer: fakeTsnetServer,
defaultTags: []string{"tag:k8s"},
tsNamespace: "operator-ns",
+ tsnetServer: fakeTsnetServer,
logger: zl.Sugar(),
recorder: record.NewFakeRecorder(10),
lc: lc,
@@ -540,3 +718,87 @@ func setupIngressTest(t *testing.T) (*IngressPGReconciler, client.Client, *fakeT
return ingPGR, fc, ft
}
+
+func TestIngressPGReconciler_MultiCluster(t *testing.T) {
+ ingPGR, fc, ft := setupIngressTest(t)
+ ingPGR.operatorID = "operator-1"
+
+ // Create initial Ingress
+ ing := &networkingv1.Ingress{
+ TypeMeta: metav1.TypeMeta{Kind: "Ingress", APIVersion: "networking.k8s.io/v1"},
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-ingress",
+ Namespace: "default",
+ UID: types.UID("1234-UID"),
+ Annotations: map[string]string{
+ "tailscale.com/proxy-group": "test-pg",
+ },
+ },
+ Spec: networkingv1.IngressSpec{
+ IngressClassName: ptr.To("tailscale"),
+ TLS: []networkingv1.IngressTLS{
+ {Hosts: []string{"my-svc"}},
+ },
+ },
+ }
+ mustCreate(t, fc, ing)
+
+ // Simulate existing VIPService from another cluster
+ existingVIPSvc := &tailscale.VIPService{
+ Name: "svc:my-svc",
+ Comment: `{"ownerrefs":[{"operatorID":"operator-2"}]}`,
+ }
+ ft.vipServices = map[tailcfg.ServiceName]*tailscale.VIPService{
+ "svc:my-svc": existingVIPSvc,
+ }
+
+ // Verify reconciliation adds our operator reference
+ expectReconciled(t, ingPGR, "default", "test-ingress")
+
+ vipSvc, err := ft.GetVIPService(context.Background(), "svc:my-svc")
+ if err != nil {
+ t.Fatalf("getting VIPService: %v", err)
+ }
+ if vipSvc == nil {
+ t.Fatal("VIPService not found")
+ }
+
+ c := &comment{}
+ if err := json.Unmarshal([]byte(vipSvc.Comment), c); err != nil {
+ t.Fatalf("parsing comment: %v", err)
+ }
+
+ wantOwnerRefs := []OwnerRef{
+ {OperatorID: "operator-2"},
+ {OperatorID: "operator-1"},
+ }
+ if !reflect.DeepEqual(c.OwnerRefs, wantOwnerRefs) {
+ t.Errorf("incorrect owner refs\ngot: %+v\nwant: %+v", c.OwnerRefs, wantOwnerRefs)
+ }
+
+ // Delete the Ingress and verify VIPService still exists with one owner ref
+ if err := fc.Delete(context.Background(), ing); err != nil {
+ t.Fatalf("deleting Ingress: %v", err)
+ }
+ expectRequeue(t, ingPGR, "default", "test-ingress")
+
+ vipSvc, err = ft.GetVIPService(context.Background(), "svc:my-svc")
+ if err != nil {
+ t.Fatalf("getting VIPService after deletion: %v", err)
+ }
+ if vipSvc == nil {
+ t.Fatal("VIPService was incorrectly deleted")
+ }
+
+ c = &comment{}
+ if err := json.Unmarshal([]byte(vipSvc.Comment), c); err != nil {
+ t.Fatalf("parsing comment after deletion: %v", err)
+ }
+
+ wantOwnerRefs = []OwnerRef{
+ {OperatorID: "operator-2"},
+ }
+ if !reflect.DeepEqual(c.OwnerRefs, wantOwnerRefs) {
+ t.Errorf("incorrect owner refs after deletion\ngot: %+v\nwant: %+v", c.OwnerRefs, wantOwnerRefs)
+ }
+}
diff --git a/cmd/k8s-operator/ingress.go b/cmd/k8s-operator/ingress.go
index 7cadaecc4..8c19a5e05 100644
--- a/cmd/k8s-operator/ingress.go
+++ b/cmd/k8s-operator/ingress.go
@@ -73,6 +73,7 @@ func (a *IngressReconciler) Reconcile(ctx context.Context, req reconcile.Request
return reconcile.Result{}, fmt.Errorf("failed to get ing: %w", err)
}
if !ing.DeletionTimestamp.IsZero() || !a.shouldExpose(ing) {
+ // TODO(irbekrm): this message is confusing if the Ingress is an HA Ingress
logger.Debugf("ingress is being deleted or should not be exposed, cleaning up")
return reconcile.Result{}, a.maybeCleanup(ctx, logger, ing)
}
diff --git a/cmd/k8s-operator/metrics_resources.go b/cmd/k8s-operator/metrics_resources.go
index 8516cf8be..0579e3466 100644
--- a/cmd/k8s-operator/metrics_resources.go
+++ b/cmd/k8s-operator/metrics_resources.go
@@ -19,6 +19,7 @@ import (
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
+ "tailscale.com/kube/kubetypes"
)
const (
@@ -222,7 +223,7 @@ func metricsResourceName(stsName string) string {
// proxy.
func metricsResourceLabels(opts *metricsOpts) map[string]string {
lbls := map[string]string{
- LabelManaged: "true",
+ kubetypes.LabelManaged: "true",
labelMetricsTarget: opts.proxyStsName,
labelPromProxyType: opts.proxyType,
labelPromProxyParentName: opts.proxyLabels[LabelParentName],
diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go
index 37e37a96e..b0f0b3576 100644
--- a/cmd/k8s-operator/operator.go
+++ b/cmd/k8s-operator/operator.go
@@ -9,6 +9,7 @@ package main
import (
"context"
+ "fmt"
"net/http"
"os"
"regexp"
@@ -39,6 +40,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
+ "tailscale.com/client/local"
"tailscale.com/client/tailscale"
"tailscale.com/hostinfo"
"tailscale.com/ipn"
@@ -335,14 +337,19 @@ func runReconcilers(opts reconcilerOpts) {
if err != nil {
startlog.Fatalf("could not get local client: %v", err)
}
+ id, err := id(context.Background(), lc)
+ if err != nil {
+ startlog.Fatalf("error determining stable ID of the operator's Tailscale device: %v", err)
+ }
ingressProxyGroupFilter := handler.EnqueueRequestsFromMapFunc(ingressesFromIngressProxyGroup(mgr.GetClient(), opts.log))
err = builder.
ControllerManagedBy(mgr).
For(&networkingv1.Ingress{}).
Named("ingress-pg-reconciler").
Watches(&corev1.Service{}, handler.EnqueueRequestsFromMapFunc(serviceHandlerForIngressPG(mgr.GetClient(), startlog))).
+ Watches(&corev1.Secret{}, handler.EnqueueRequestsFromMapFunc(ingressesFromPGStateSecret(mgr.GetClient(), startlog))).
Watches(&tsapi.ProxyGroup{}, ingressProxyGroupFilter).
- Complete(&IngressPGReconciler{
+ Complete(&HAIngressReconciler{
recorder: eventRecorder,
tsClient: opts.tsClient,
tsnetServer: opts.tsServer,
@@ -350,6 +357,7 @@ func runReconcilers(opts reconcilerOpts) {
Client: mgr.GetClient(),
logger: opts.log.Named("ingress-pg-reconciler"),
lc: lc,
+ operatorID: id,
tsNamespace: opts.tailscaleNamespace,
})
if err != nil {
@@ -629,8 +637,8 @@ func enqueueAllIngressEgressProxySvcsInNS(ns string, cl client.Client, logger *z
// Get all headless Services for proxies configured using Service.
svcProxyLabels := map[string]string{
- LabelManaged: "true",
- LabelParentType: "svc",
+ kubetypes.LabelManaged: "true",
+ LabelParentType: "svc",
}
svcHeadlessSvcList := &corev1.ServiceList{}
if err := cl.List(ctx, svcHeadlessSvcList, client.InNamespace(ns), client.MatchingLabels(svcProxyLabels)); err != nil {
@@ -643,8 +651,8 @@ func enqueueAllIngressEgressProxySvcsInNS(ns string, cl client.Client, logger *z
// Get all headless Services for proxies configured using Ingress.
ingProxyLabels := map[string]string{
- LabelManaged: "true",
- LabelParentType: "ingress",
+ kubetypes.LabelManaged: "true",
+ LabelParentType: "ingress",
}
ingHeadlessSvcList := &corev1.ServiceList{}
if err := cl.List(ctx, ingHeadlessSvcList, client.InNamespace(ns), client.MatchingLabels(ingProxyLabels)); err != nil {
@@ -711,7 +719,7 @@ func dnsRecordsReconcilerIngressHandler(ns string, isDefaultLoadBalancer bool, c
func isManagedResource(o client.Object) bool {
ls := o.GetLabels()
- return ls[LabelManaged] == "true"
+ return ls[kubetypes.LabelManaged] == "true"
}
func isManagedByType(o client.Object, typ string) bool {
@@ -948,7 +956,7 @@ func egressPodsHandler(_ context.Context, o client.Object) []reconcile.Request {
// returns reconciler requests for all egress EndpointSlices for that ProxyGroup.
func egressEpsFromPGPods(cl client.Client, ns string) handler.MapFunc {
return func(_ context.Context, o client.Object) []reconcile.Request {
- if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" {
+ if v, ok := o.GetLabels()[kubetypes.LabelManaged]; !ok || v != "true" {
return nil
}
// TODO(irbekrm): for now this is good enough as all ProxyGroups are egress. Add a type check once we
@@ -968,15 +976,13 @@ func egressEpsFromPGPods(cl client.Client, ns string) handler.MapFunc {
// returns reconciler requests for all egress EndpointSlices for that ProxyGroup.
func egressEpsFromPGStateSecrets(cl client.Client, ns string) handler.MapFunc {
return func(_ context.Context, o client.Object) []reconcile.Request {
- if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" {
+ if v, ok := o.GetLabels()[kubetypes.LabelManaged]; !ok || v != "true" {
return nil
}
- // TODO(irbekrm): for now this is good enough as all ProxyGroups are egress. Add a type check once we
- // have ingress ProxyGroups.
if parentType := o.GetLabels()[LabelParentType]; parentType != "proxygroup" {
return nil
}
- if secretType := o.GetLabels()[labelSecretType]; secretType != "state" {
+ if secretType := o.GetLabels()[kubetypes.LabelSecretType]; secretType != "state" {
return nil
}
pg, ok := o.GetLabels()[LabelParentName]
@@ -993,7 +999,7 @@ func egressSvcFromEps(_ context.Context, o client.Object) []reconcile.Request {
if typ := o.GetLabels()[labelSvcType]; typ != typeEgress {
return nil
}
- if v, ok := o.GetLabels()[LabelManaged]; !ok || v != "true" {
+ if v, ok := o.GetLabels()[kubetypes.LabelManaged]; !ok || v != "true" {
return nil
}
svcName, ok := o.GetLabels()[LabelParentName]
@@ -1033,6 +1039,45 @@ func reconcileRequestsForPG(pg string, cl client.Client, ns string) []reconcile.
return reqs
}
+func ingressesFromPGStateSecret(cl client.Client, logger *zap.SugaredLogger) handler.MapFunc {
+ return func(ctx context.Context, o client.Object) []reconcile.Request {
+ secret, ok := o.(*corev1.Secret)
+ if !ok {
+ logger.Infof("[unexpected] ProxyGroup handler triggered for an object that is not a ProxyGroup")
+ return nil
+ }
+ if secret.ObjectMeta.Labels[kubetypes.LabelManaged] != "true" {
+ return nil
+ }
+ if secret.ObjectMeta.Labels[LabelParentType] != "proxygroup" {
+ return nil
+ }
+ if secret.ObjectMeta.Labels[kubetypes.LabelSecretType] != "state" {
+ return nil
+ }
+ pgName, ok := secret.ObjectMeta.Labels[LabelParentName]
+ if !ok {
+ return nil
+ }
+
+ ingList := &networkingv1.IngressList{}
+ if err := cl.List(ctx, ingList, client.MatchingFields{indexIngressProxyGroup: pgName}); err != nil {
+ logger.Infof("error listing Ingresses, skipping a reconcile for event on Secret %s: %v", secret.Name, err)
+ return nil
+ }
+ reqs := make([]reconcile.Request, 0)
+ for _, ing := range ingList.Items {
+ reqs = append(reqs, reconcile.Request{
+ NamespacedName: types.NamespacedName{
+ Namespace: ing.Namespace,
+ Name: ing.Name,
+ },
+ })
+ }
+ return reqs
+ }
+}
+
// egressSvcsFromEgressProxyGroup is an event handler for egress ProxyGroups. It returns reconcile requests for all
// user-created ExternalName Services that should be exposed on this ProxyGroup.
func egressSvcsFromEgressProxyGroup(cl client.Client, logger *zap.SugaredLogger) handler.MapFunc {
@@ -1138,9 +1183,9 @@ func podsFromEgressEps(cl client.Client, logger *zap.SugaredLogger, ns string) h
return nil
}
podLabels := map[string]string{
- LabelManaged: "true",
- LabelParentType: "proxygroup",
- LabelParentName: eps.Labels[labelProxyGroup],
+ kubetypes.LabelManaged: "true",
+ LabelParentType: "proxygroup",
+ LabelParentName: eps.Labels[labelProxyGroup],
}
podList := &corev1.PodList{}
if err := cl.List(ctx, podList, client.InNamespace(ns),
@@ -1262,3 +1307,14 @@ func hasProxyGroupAnnotation(obj client.Object) bool {
ing := obj.(*networkingv1.Ingress)
return ing.Annotations[AnnotationProxyGroup] != ""
}
+
+func id(ctx context.Context, lc *local.Client) (string, error) {
+ st, err := lc.StatusWithoutPeers(ctx)
+ if err != nil {
+ return "", fmt.Errorf("error getting tailscale status: %w", err)
+ }
+ if st.Self == nil {
+ return "", fmt.Errorf("unexpected: device's status does not contain node's metadata")
+ }
+ return string(st.Self.ID), nil
+}
diff --git a/cmd/k8s-operator/operator_test.go b/cmd/k8s-operator/operator_test.go
index 73c795bb3..175003ac7 100644
--- a/cmd/k8s-operator/operator_test.go
+++ b/cmd/k8s-operator/operator_test.go
@@ -1387,10 +1387,10 @@ func Test_serviceHandlerForIngress(t *testing.T) {
Name: "headless-1",
Namespace: "tailscale",
Labels: map[string]string{
- LabelManaged: "true",
- LabelParentName: "ing-1",
- LabelParentNamespace: "ns-1",
- LabelParentType: "ingress",
+ kubetypes.LabelManaged: "true",
+ LabelParentName: "ing-1",
+ LabelParentNamespace: "ns-1",
+ LabelParentType: "ingress",
},
},
}
diff --git a/cmd/k8s-operator/proxygroup.go b/cmd/k8s-operator/proxygroup.go
index 4b17d3470..112e5e2b0 100644
--- a/cmd/k8s-operator/proxygroup.go
+++ b/cmd/k8s-operator/proxygroup.go
@@ -452,7 +452,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
for i := range pgReplicas(pg) {
cfgSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
- Name: fmt.Sprintf("%s-%d-config", pg.Name, i),
+ Name: pgConfigSecretName(pg.Name, i),
Namespace: r.tsNamespace,
Labels: pgSecretLabels(pg.Name, "config"),
OwnerReferences: pgOwnerReference(pg),
@@ -461,7 +461,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
var existingCfgSecret *corev1.Secret // unmodified copy of secret
if err := r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil {
- logger.Debugf("secret %s/%s already exists", cfgSecret.GetNamespace(), cfgSecret.GetName())
+ logger.Debugf("Secret %s/%s already exists", cfgSecret.GetNamespace(), cfgSecret.GetName())
existingCfgSecret = cfgSecret.DeepCopy()
} else if !apierrors.IsNotFound(err) {
return "", err
@@ -469,7 +469,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
var authKey string
if existingCfgSecret == nil {
- logger.Debugf("creating authkey for new ProxyGroup proxy")
+ logger.Debugf("Creating authkey for new ProxyGroup proxy")
tags := pg.Spec.Tags.Stringify()
if len(tags) == 0 {
tags = r.defaultTags
@@ -490,7 +490,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
if err != nil {
return "", fmt.Errorf("error marshalling tailscaled config: %w", err)
}
- mak.Set(&cfgSecret.StringData, tsoperator.TailscaledConfigFileName(cap), string(cfgJSON))
+ mak.Set(&cfgSecret.Data, tsoperator.TailscaledConfigFileName(cap), cfgJSON)
}
// The config sha256 sum is a value for a hash annotation used to trigger
@@ -520,12 +520,14 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
}
if existingCfgSecret != nil {
- logger.Debugf("patching the existing ProxyGroup config Secret %s", cfgSecret.Name)
- if err := r.Patch(ctx, cfgSecret, client.MergeFrom(existingCfgSecret)); err != nil {
- return "", err
+ if !apiequality.Semantic.DeepEqual(existingCfgSecret, cfgSecret) {
+ logger.Debugf("Updating the existing ProxyGroup config Secret %s", cfgSecret.Name)
+ if err := r.Update(ctx, cfgSecret); err != nil {
+ return "", err
+ }
}
} else {
- logger.Debugf("creating a new config Secret %s for the ProxyGroup", cfgSecret.Name)
+ logger.Debugf("Creating a new config Secret %s for the ProxyGroup", cfgSecret.Name)
if err := r.Create(ctx, cfgSecret); err != nil {
return "", err
}
@@ -596,10 +598,35 @@ func pgTailscaledConfig(pg *tsapi.ProxyGroup, class *tsapi.ProxyClass, idx int32
conf.AuthKey = key
}
capVerConfigs := make(map[tailcfg.CapabilityVersion]ipn.ConfigVAlpha)
+
+ // AdvertiseServices config is set by ingress-pg-reconciler, so make sure we
+ // don't overwrite it here.
+ if err := copyAdvertiseServicesConfig(conf, oldSecret, 106); err != nil {
+ return nil, err
+ }
capVerConfigs[106] = *conf
return capVerConfigs, nil
}
+func copyAdvertiseServicesConfig(conf *ipn.ConfigVAlpha, oldSecret *corev1.Secret, capVer tailcfg.CapabilityVersion) error {
+ if oldSecret == nil {
+ return nil
+ }
+
+ oldConfB := oldSecret.Data[tsoperator.TailscaledConfigFileName(capVer)]
+ if len(oldConfB) == 0 {
+ return nil
+ }
+
+ var oldConf ipn.ConfigVAlpha
+ if err := json.Unmarshal(oldConfB, &oldConf); err != nil {
+ return fmt.Errorf("error unmarshalling existing config: %w", err)
+ }
+ conf.AdvertiseServices = oldConf.AdvertiseServices
+
+ return nil
+}
+
func (r *ProxyGroupReconciler) validate(_ *tsapi.ProxyGroup) error {
return nil
}
@@ -620,7 +647,7 @@ func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.Pr
return nil, fmt.Errorf("unexpected secret %s was labelled as owned by the ProxyGroup %s: %w", secret.Name, pg.Name, err)
}
- id, dnsName, ok, err := getNodeMetadata(ctx, &secret)
+ prefs, ok, err := getDevicePrefs(&secret)
if err != nil {
return nil, err
}
@@ -631,8 +658,8 @@ func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.Pr
nm := nodeMetadata{
ordinal: ordinal,
stateSecret: &secret,
- tsID: id,
- dnsName: dnsName,
+ tsID: prefs.Config.NodeID,
+ dnsName: prefs.Config.UserProfile.LoginName,
}
pod := &corev1.Pod{}
if err := r.Get(ctx, client.ObjectKey{Namespace: r.tsNamespace, Name: secret.Name}, pod); err != nil && !apierrors.IsNotFound(err) {
diff --git a/cmd/k8s-operator/proxygroup_specs.go b/cmd/k8s-operator/proxygroup_specs.go
index 1ea91004b..16deea278 100644
--- a/cmd/k8s-operator/proxygroup_specs.go
+++ b/cmd/k8s-operator/proxygroup_specs.go
@@ -73,7 +73,7 @@ func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, image, tsFirewallMode string
Name: fmt.Sprintf("tailscaledconfig-%d", i),
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
- SecretName: fmt.Sprintf("%s-%d-config", pg.Name, i),
+ SecretName: pgConfigSecretName(pg.Name, i),
},
},
})
@@ -178,7 +178,15 @@ func pgStatefulSet(pg *tsapi.ProxyGroup, namespace, image, tsFirewallMode string
corev1.EnvVar{
Name: "TS_SERVE_CONFIG",
Value: fmt.Sprintf("/etc/proxies/%s", serveConfigKey),
- })
+ },
+ corev1.EnvVar{
+ // Run proxies in cert share mode to
+ // ensure that only one TLS cert is
+ // issued for an HA Ingress.
+ Name: "TS_EXPERIMENTAL_CERT_SHARE",
+ Value: "true",
+ },
+ )
}
return append(c.Env, envs...)
}()
@@ -229,6 +237,13 @@ func pgRole(pg *tsapi.ProxyGroup, namespace string) *rbacv1.Role {
APIGroups: []string{""},
Resources: []string{"secrets"},
Verbs: []string{
+ "list",
+ },
+ },
+ {
+ APIGroups: []string{""},
+ Resources: []string{"secrets"},
+ Verbs: []string{
"get",
"patch",
"update",
@@ -236,8 +251,8 @@ func pgRole(pg *tsapi.ProxyGroup, namespace string) *rbacv1.Role {
ResourceNames: func() (secrets []string) {
for i := range pgReplicas(pg) {
secrets = append(secrets,
- fmt.Sprintf("%s-%d-config", pg.Name, i), // Config with auth key.
- fmt.Sprintf("%s-%d", pg.Name, i), // State.
+ pgConfigSecretName(pg.Name, i), // Config with auth key.
+ fmt.Sprintf("%s-%d", pg.Name, i), // State.
)
}
return secrets
@@ -318,9 +333,9 @@ func pgIngressCM(pg *tsapi.ProxyGroup, namespace string) *corev1.ConfigMap {
}
}
-func pgSecretLabels(pgName, typ string) map[string]string {
+func pgSecretLabels(pgName, secretType string) map[string]string {
return pgLabels(pgName, map[string]string{
- labelSecretType: typ, // "config" or "state".
+ kubetypes.LabelSecretType: secretType, // "config" or "state".
})
}
@@ -330,7 +345,7 @@ func pgLabels(pgName string, customLabels map[string]string) map[string]string {
l[k] = v
}
- l[LabelManaged] = "true"
+ l[kubetypes.LabelManaged] = "true"
l[LabelParentType] = "proxygroup"
l[LabelParentName] = pgName
@@ -349,6 +364,10 @@ func pgReplicas(pg *tsapi.ProxyGroup) int32 {
return 2
}
+func pgConfigSecretName(pgName string, i int32) string {
+ return fmt.Sprintf("%s-%d-config", pgName, i)
+}
+
func pgEgressCMName(pg string) string {
return fmt.Sprintf("%s-egress-config", pg)
}
diff --git a/cmd/k8s-operator/proxygroup_test.go b/cmd/k8s-operator/proxygroup_test.go
index 29100de1d..1f1a39ab0 100644
--- a/cmd/k8s-operator/proxygroup_test.go
+++ b/cmd/k8s-operator/proxygroup_test.go
@@ -24,6 +24,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"tailscale.com/client/tailscale"
+ "tailscale.com/ipn"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/kube/kubetypes"
@@ -246,7 +247,6 @@ func TestProxyGroup(t *testing.T) {
// The fake client does not clean up objects whose owner has been
// deleted, so we can't test for the owned resources getting deleted.
})
-
}
func TestProxyGroupTypes(t *testing.T) {
@@ -416,6 +416,7 @@ func TestProxyGroupTypes(t *testing.T) {
}
verifyEnvVar(t, sts, "TS_INTERNAL_APP", kubetypes.AppProxyGroupIngress)
verifyEnvVar(t, sts, "TS_SERVE_CONFIG", "/etc/proxies/serve-config.json")
+ verifyEnvVar(t, sts, "TS_EXPERIMENTAL_CERT_SHARE", "true")
// Verify ConfigMap volume mount
cmName := fmt.Sprintf("%s-ingress-config", pg.Name)
@@ -446,6 +447,77 @@ func TestProxyGroupTypes(t *testing.T) {
})
}
+func TestIngressAdvertiseServicesConfigPreserved(t *testing.T) {
+ fc := fake.NewClientBuilder().
+ WithScheme(tsapi.GlobalScheme).
+ Build()
+ reconciler := &ProxyGroupReconciler{
+ tsNamespace: tsNamespace,
+ proxyImage: testProxyImage,
+ Client: fc,
+ l: zap.Must(zap.NewDevelopment()).Sugar(),
+ tsClient: &fakeTSClient{},
+ clock: tstest.NewClock(tstest.ClockOpts{}),
+ }
+
+ existingServices := []string{"svc1", "svc2"}
+ existingConfigBytes, err := json.Marshal(ipn.ConfigVAlpha{
+ AdvertiseServices: existingServices,
+ Version: "should-get-overwritten",
+ })
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ const pgName = "test-ingress"
+ mustCreate(t, fc, &corev1.Secret{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: pgConfigSecretName(pgName, 0),
+ Namespace: tsNamespace,
+ },
+ Data: map[string][]byte{
+ tsoperator.TailscaledConfigFileName(106): existingConfigBytes,
+ },
+ })
+
+ mustCreate(t, fc, &tsapi.ProxyGroup{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: pgName,
+ UID: "test-ingress-uid",
+ },
+ Spec: tsapi.ProxyGroupSpec{
+ Type: tsapi.ProxyGroupTypeIngress,
+ Replicas: ptr.To[int32](1),
+ },
+ })
+ expectReconciled(t, reconciler, "", pgName)
+
+ expectedConfigBytes, err := json.Marshal(ipn.ConfigVAlpha{
+ // Preserved.
+ AdvertiseServices: existingServices,
+
+ // Everything else got updated in the reconcile:
+ Version: "alpha0",
+ AcceptDNS: "false",
+ AcceptRoutes: "false",
+ Locked: "false",
+ Hostname: ptr.To(fmt.Sprintf("%s-%d", pgName, 0)),
+ })
+ if err != nil {
+ t.Fatal(err)
+ }
+ expectEqual(t, fc, &corev1.Secret{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: pgConfigSecretName(pgName, 0),
+ Namespace: tsNamespace,
+ ResourceVersion: "2",
+ },
+ Data: map[string][]byte{
+ tsoperator.TailscaledConfigFileName(106): expectedConfigBytes,
+ },
+ })
+}
+
func verifyProxyGroupCounts(t *testing.T, r *ProxyGroupReconciler, wantIngress, wantEgress int) {
t.Helper()
if r.ingressProxyGroups.Len() != wantIngress {
@@ -501,7 +573,7 @@ func expectProxyGroupResources(t *testing.T, fc client.WithWatch, pg *tsapi.Prox
for i := range pgReplicas(pg) {
expectedSecrets = append(expectedSecrets,
fmt.Sprintf("%s-%d", pg.Name, i),
- fmt.Sprintf("%s-%d-config", pg.Name, i),
+ pgConfigSecretName(pg.Name, i),
)
}
}
diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go
index 0bc9d6fb9..6327a073b 100644
--- a/cmd/k8s-operator/sts.go
+++ b/cmd/k8s-operator/sts.go
@@ -44,11 +44,9 @@ const (
// Labels that the operator sets on StatefulSets and Pods. If you add a
// new label here, do also add it to tailscaleManagedLabels var to
// ensure that it does not get overwritten by ProxyClass configuration.
- LabelManaged = "tailscale.com/managed"
LabelParentType = "tailscale.com/parent-resource-type"
LabelParentName = "tailscale.com/parent-resource"
LabelParentNamespace = "tailscale.com/parent-resource-ns"
- labelSecretType = "tailscale.com/secret-type" // "config" or "state".
// LabelProxyClass can be set by users on tailscale Ingresses and Services that define cluster ingress or
// cluster egress, to specify that configuration in this ProxyClass should be applied to resources created for
@@ -108,7 +106,7 @@ const (
var (
// tailscaleManagedLabels are label keys that tailscale operator sets on StatefulSets and Pods.
- tailscaleManagedLabels = []string{LabelManaged, LabelParentType, LabelParentName, LabelParentNamespace, "app"}
+ tailscaleManagedLabels = []string{kubetypes.LabelManaged, LabelParentType, LabelParentName, LabelParentNamespace, "app"}
// tailscaleManagedAnnotations are annotation keys that tailscale operator sets on StatefulSets and Pods.
tailscaleManagedAnnotations = []string{podAnnotationLastSetClusterIP, podAnnotationLastSetTailnetTargetIP, podAnnotationLastSetTailnetTargetFQDN, podAnnotationLastSetConfigFileHash}
)
diff --git a/cmd/k8s-operator/sts_test.go b/cmd/k8s-operator/sts_test.go
index 3d0cecc04..35c512c8c 100644
--- a/cmd/k8s-operator/sts_test.go
+++ b/cmd/k8s-operator/sts_test.go
@@ -21,6 +21,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/yaml"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
+ "tailscale.com/kube/kubetypes"
"tailscale.com/types/ptr"
)
@@ -156,8 +157,8 @@ func Test_applyProxyClassToStatefulSet(t *testing.T) {
// Set a couple additional fields so we can test that we don't
// mistakenly override those.
labels := map[string]string{
- LabelManaged: "true",
- LabelParentName: "foo",
+ kubetypes.LabelManaged: "true",
+ LabelParentName: "foo",
}
annots := map[string]string{
podAnnotationLastSetClusterIP: "1.2.3.4",
@@ -303,28 +304,28 @@ func Test_mergeStatefulSetLabelsOrAnnots(t *testing.T) {
}{
{
name: "no custom labels specified and none present in current labels, return current labels",
- current: map[string]string{LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
- want: map[string]string{LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
+ current: map[string]string{kubetypes.LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
+ want: map[string]string{kubetypes.LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
managed: tailscaleManagedLabels,
},
{
name: "no custom labels specified, but some present in current labels, return tailscale managed labels only from the current labels",
- current: map[string]string{"foo": "bar", "something.io/foo": "bar", LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
- want: map[string]string{LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
+ current: map[string]string{"foo": "bar", "something.io/foo": "bar", kubetypes.LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
+ want: map[string]string{kubetypes.LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
managed: tailscaleManagedLabels,
},
{
name: "custom labels specified, current labels only contain tailscale managed labels, return a union of both",
- current: map[string]string{LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
+ current: map[string]string{kubetypes.LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
custom: map[string]string{"foo": "bar", "something.io/foo": "bar"},
- want: map[string]string{"foo": "bar", "something.io/foo": "bar", LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
+ want: map[string]string{"foo": "bar", "something.io/foo": "bar", kubetypes.LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
managed: tailscaleManagedLabels,
},
{
name: "custom labels specified, current labels contain tailscale managed labels and custom labels, some of which re not present in the new custom labels, return a union of managed labels and the desired custom labels",
- current: map[string]string{"foo": "bar", "bar": "baz", "app": "1234", LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
+ current: map[string]string{"foo": "bar", "bar": "baz", "app": "1234", kubetypes.LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
custom: map[string]string{"foo": "bar", "something.io/foo": "bar"},
- want: map[string]string{"foo": "bar", "something.io/foo": "bar", "app": "1234", LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
+ want: map[string]string{"foo": "bar", "something.io/foo": "bar", "app": "1234", kubetypes.LabelManaged: "true", LabelParentName: "foo", LabelParentType: "svc", LabelParentNamespace: "foo"},
managed: tailscaleManagedLabels,
},
{
diff --git a/cmd/k8s-operator/svc.go b/cmd/k8s-operator/svc.go
index 70c810b25..d6a6f440f 100644
--- a/cmd/k8s-operator/svc.go
+++ b/cmd/k8s-operator/svc.go
@@ -84,10 +84,10 @@ func childResourceLabels(name, ns, typ string) map[string]string {
// proxying. Instead, we have to do our own filtering and tracking with
// labels.
return map[string]string{
- LabelManaged: "true",
- LabelParentName: name,
- LabelParentNamespace: ns,
- LabelParentType: typ,
+ kubetypes.LabelManaged: "true",
+ LabelParentName: name,
+ LabelParentNamespace: ns,
+ LabelParentType: typ,
}
}
diff --git a/cmd/k8s-operator/testutils_test.go b/cmd/k8s-operator/testutils_test.go
index 6b1a4f85b..f47f96e44 100644
--- a/cmd/k8s-operator/testutils_test.go
+++ b/cmd/k8s-operator/testutils_test.go
@@ -32,6 +32,7 @@ import (
"tailscale.com/ipn"
"tailscale.com/ipn/ipnstate"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
+ "tailscale.com/kube/kubetypes"
"tailscale.com/tailcfg"
"tailscale.com/types/ptr"
"tailscale.com/util/mak"
@@ -563,10 +564,10 @@ func expectedSecret(t *testing.T, cl client.Client, opts configOpts) *corev1.Sec
func findGenName(t *testing.T, client client.Client, ns, name, typ string) (full, noSuffix string) {
t.Helper()
labels := map[string]string{
- LabelManaged: "true",
- LabelParentName: name,
- LabelParentNamespace: ns,
- LabelParentType: typ,
+ kubetypes.LabelManaged: "true",
+ LabelParentName: name,
+ LabelParentNamespace: ns,
+ LabelParentType: typ,
}
s, err := getSingleObject[corev1.Secret](context.Background(), client, "operator-ns", labels)
if err != nil {
diff --git a/cmd/k8s-operator/tsrecorder.go b/cmd/k8s-operator/tsrecorder.go
index 44ce731fe..e9e6b2c6c 100644
--- a/cmd/k8s-operator/tsrecorder.go
+++ b/cmd/k8s-operator/tsrecorder.go
@@ -230,7 +230,7 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsr *tsapi.Reco
func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Recorder) (bool, error) {
logger := r.logger(tsr.Name)
- id, _, ok, err := r.getNodeMetadata(ctx, tsr.Name)
+ prefs, ok, err := r.getDevicePrefs(ctx, tsr.Name)
if err != nil {
return false, err
}
@@ -243,6 +243,7 @@ func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Record
return true, nil
}
+ id := string(prefs.Config.NodeID)
logger.Debugf("deleting device %s from control", string(id))
if err := r.tsClient.DeleteDevice(ctx, string(id)); err != nil {
errResp := &tailscale.ErrResponse{}
@@ -327,34 +328,33 @@ func (r *RecorderReconciler) getStateSecret(ctx context.Context, tsrName string)
return secret, nil
}
-func (r *RecorderReconciler) getNodeMetadata(ctx context.Context, tsrName string) (id tailcfg.StableNodeID, dnsName string, ok bool, err error) {
+func (r *RecorderReconciler) getDevicePrefs(ctx context.Context, tsrName string) (prefs prefs, ok bool, err error) {
secret, err := r.getStateSecret(ctx, tsrName)
if err != nil || secret == nil {
- return "", "", false, err
+ return prefs, false, err
}
- return getNodeMetadata(ctx, secret)
+ return getDevicePrefs(secret)
}
-// getNodeMetadata returns 'ok == true' iff the node ID is found. The dnsName
+// getDevicePrefs returns 'ok == true' iff the node ID is found. The dnsName
// is expected to always be non-empty if the node ID is, but not required.
-func getNodeMetadata(ctx context.Context, secret *corev1.Secret) (id tailcfg.StableNodeID, dnsName string, ok bool, err error) {
+func getDevicePrefs(secret *corev1.Secret) (prefs prefs, ok bool, err error) {
// TODO(tomhjp): Should maybe use ipn to parse the following info instead.
currentProfile, ok := secret.Data[currentProfileKey]
if !ok {
- return "", "", false, nil
+ return prefs, false, nil
}
profileBytes, ok := secret.Data[string(currentProfile)]
if !ok {
- return "", "", false, nil
+ return prefs, false, nil
}
- var profile profile
- if err := json.Unmarshal(profileBytes, &profile); err != nil {
- return "", "", false, fmt.Errorf("failed to extract node profile info from state Secret %s: %w", secret.Name, err)
+ if err := json.Unmarshal(profileBytes, &prefs); err != nil {
+ return prefs, false, fmt.Errorf("failed to extract node profile info from state Secret %s: %w", secret.Name, err)
}
- ok = profile.Config.NodeID != ""
- return tailcfg.StableNodeID(profile.Config.NodeID), profile.Config.UserProfile.LoginName, ok, nil
+ ok = prefs.Config.NodeID != ""
+ return prefs, ok, nil
}
func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string) (d tsapi.RecorderTailnetDevice, ok bool, err error) {
@@ -367,14 +367,14 @@ func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsrName string)
}
func getDeviceInfo(ctx context.Context, tsClient tsClient, secret *corev1.Secret) (d tsapi.RecorderTailnetDevice, ok bool, err error) {
- nodeID, dnsName, ok, err := getNodeMetadata(ctx, secret)
+ prefs, ok, err := getDevicePrefs(secret)
if !ok || err != nil {
return tsapi.RecorderTailnetDevice{}, false, err
}
// TODO(tomhjp): The profile info doesn't include addresses, which is why we
// need the API. Should we instead update the profile to include addresses?
- device, err := tsClient.Device(ctx, string(nodeID), nil)
+ device, err := tsClient.Device(ctx, string(prefs.Config.NodeID), nil)
if err != nil {
return tsapi.RecorderTailnetDevice{}, false, fmt.Errorf("failed to get device info from API: %w", err)
}
@@ -383,20 +383,25 @@ func getDeviceInfo(ctx context.Context, tsClient tsClient, secret *corev1.Secret
Hostname: device.Hostname,
TailnetIPs: device.Addresses,
}
- if dnsName != "" {
+ if dnsName := prefs.Config.UserProfile.LoginName; dnsName != "" {
d.URL = fmt.Sprintf("https://%s", dnsName)
}
return d, true, nil
}
-type profile struct {
+// [prefs] is a subset of the ipn.Prefs struct used for extracting information
+// from the state Secret of Tailscale devices.
+type prefs struct {
Config struct {
- NodeID string `json:"NodeID"`
+ NodeID tailcfg.StableNodeID `json:"NodeID"`
UserProfile struct {
+ // LoginName is the MagicDNS name of the device, e.g. foo.tail-scale.ts.net.
LoginName string `json:"LoginName"`
} `json:"UserProfile"`
} `json:"Config"`
+
+ AdvertiseServices []string `json:"AdvertiseServices"`
}
func markedForDeletion(obj metav1.Object) bool {
diff --git a/cmd/natc/natc.go b/cmd/natc/natc.go
index 956d2455e..73ba116ff 100644
--- a/cmd/natc/natc.go
+++ b/cmd/natc/natc.go
@@ -41,6 +41,8 @@ import (
"tailscale.com/wgengine/netstack"
)
+var ErrNoIPsAvailable = errors.New("no IPs available")
+
func main() {
hostinfo.SetApp("natc")
if !envknob.UseWIPCode() {
@@ -277,14 +279,14 @@ func (c *connector) handleDNS(pc net.PacketConn, buf []byte, remoteAddr *net.UDP
defer cancel()
who, err := c.lc.WhoIs(ctx, remoteAddr.String())
if err != nil {
- log.Printf("HandleDNS: WhoIs failed: %v\n", err)
+ log.Printf("HandleDNS(remote=%s): WhoIs failed: %v\n", remoteAddr.String(), err)
return
}
var msg dnsmessage.Message
err = msg.Unpack(buf)
if err != nil {
- log.Printf("HandleDNS: dnsmessage unpack failed: %v\n ", err)
+ log.Printf("HandleDNS(remote=%s): dnsmessage unpack failed: %v\n", remoteAddr.String(), err)
return
}
@@ -297,19 +299,19 @@ func (c *connector) handleDNS(pc net.PacketConn, buf []byte, remoteAddr *net.UDP
case dnsmessage.TypeAAAA, dnsmessage.TypeA:
dstAddrs, err := lookupDestinationIP(q.Name.String())
if err != nil {
- log.Printf("HandleDNS: lookup destination failed: %v\n ", err)
+ log.Printf("HandleDNS(remote=%s): lookup destination failed: %v\n", remoteAddr.String(), err)
return
}
if c.ignoreDestination(dstAddrs) {
bs, err := dnsResponse(&msg, dstAddrs)
// TODO (fran): treat as SERVFAIL
if err != nil {
- log.Printf("HandleDNS: generate ignore response failed: %v\n", err)
+ log.Printf("HandleDNS(remote=%s): generate ignore response failed: %v\n", remoteAddr.String(), err)
return
}
_, err = pc.WriteTo(bs, remoteAddr)
if err != nil {
- log.Printf("HandleDNS: write failed: %v\n", err)
+ log.Printf("HandleDNS(remote=%s): write failed: %v\n", remoteAddr.String(), err)
}
return
}
@@ -322,7 +324,7 @@ func (c *connector) handleDNS(pc net.PacketConn, buf []byte, remoteAddr *net.UDP
resp, err := c.generateDNSResponse(&msg, who.Node.ID)
// TODO (fran): treat as SERVFAIL
if err != nil {
- log.Printf("HandleDNS: connector handling failed: %v\n", err)
+ log.Printf("HandleDNS(remote=%s): connector handling failed: %v\n", remoteAddr.String(), err)
return
}
// TODO (fran): treat as NXDOMAIN
@@ -332,7 +334,7 @@ func (c *connector) handleDNS(pc net.PacketConn, buf []byte, remoteAddr *net.UDP
// This connector handled the DNS request
_, err = pc.WriteTo(resp, remoteAddr)
if err != nil {
- log.Printf("HandleDNS: write failed: %v\n", err)
+ log.Printf("HandleDNS(remote=%s): write failed: %v\n", remoteAddr.String(), err)
}
}
@@ -529,6 +531,9 @@ func (ps *perPeerState) ipForDomain(domain string) ([]netip.Addr, error) {
return addrs, nil
}
addrs := ps.assignAddrsLocked(domain)
+ if addrs == nil {
+ return nil, ErrNoIPsAvailable
+ }
return addrs, nil
}
@@ -575,6 +580,9 @@ func (ps *perPeerState) assignAddrsLocked(domain string) []netip.Addr {
ps.addrToDomain = &bart.Table[string]{}
}
v4 := ps.unusedIPv4Locked()
+ if !v4.IsValid() {
+ return nil
+ }
as16 := ps.c.v6ULA.Addr().As16()
as4 := v4.As4()
copy(as16[12:], as4[:])
diff --git a/cmd/proxy-to-grafana/proxy-to-grafana.go b/cmd/proxy-to-grafana/proxy-to-grafana.go
index 849d184c6..bdabd650f 100644
--- a/cmd/proxy-to-grafana/proxy-to-grafana.go
+++ b/cmd/proxy-to-grafana/proxy-to-grafana.go
@@ -19,8 +19,25 @@
// header_property = username
// auto_sign_up = true
// whitelist = 127.0.0.1
-// headers = Name:X-WEBAUTH-NAME
+// headers = Email:X-Webauth-User, Name:X-Webauth-Name, Role:X-Webauth-Role
// enable_login_token = true
+//
+// You can use grants in Tailscale ACL to give users different roles in Grafana.
+// For example, to give group:eng the Editor role, add the following to your ACLs:
+//
+// "grants": [
+// {
+// "src": ["group:eng"],
+// "dst": ["tag:grafana"],
+// "app": {
+// "tailscale.com/cap/proxy-to-grafana": [{
+// "role": "editor",
+// }],
+// },
+// },
+// ],
+//
+// If multiple roles are specified, the most permissive role is used.
package main
import (
@@ -49,6 +66,57 @@ var (
loginServer = flag.String("login-server", "", "URL to alternative control server. If empty, the default Tailscale control is used.")
)
+// aclCap is the Tailscale ACL capability used to configure proxy-to-grafana.
+const aclCap tailcfg.PeerCapability = "tailscale.com/cap/proxy-to-grafana"
+
+// aclGrant is an access control rule that assigns Grafana permissions
+// while provisioning a user.
+type aclGrant struct {
+ // Role is one of: "viewer", "editor", "admin".
+ Role string `json:"role"`
+}
+
+// grafanaRole defines possible Grafana roles.
+type grafanaRole int
+
+const (
+ // Roles are ordered by their permissions, with the least permissive role first.
+ // If a user has multiple roles, the most permissive role is used.
+ ViewerRole grafanaRole = iota
+ EditorRole
+ AdminRole
+)
+
+// String returns the string representation of a grafanaRole.
+// It is used as a header value in the HTTP request to Grafana.
+func (r grafanaRole) String() string {
+ switch r {
+ case ViewerRole:
+ return "Viewer"
+ case EditorRole:
+ return "Editor"
+ case AdminRole:
+ return "Admin"
+ default:
+ // A safe default.
+ return "Viewer"
+ }
+}
+
+// roleFromString converts a string to a grafanaRole.
+// It is used to parse the role from the ACL grant.
+func roleFromString(s string) (grafanaRole, error) {
+ switch strings.ToLower(s) {
+ case "viewer":
+ return ViewerRole, nil
+ case "editor":
+ return EditorRole, nil
+ case "admin":
+ return AdminRole, nil
+ }
+ return ViewerRole, fmt.Errorf("unknown role: %q", s)
+}
+
func main() {
flag.Parse()
if *hostname == "" || strings.Contains(*hostname, ".") {
@@ -134,7 +202,15 @@ func modifyRequest(req *http.Request, localClient *local.Client) {
return
}
- user, err := getTailscaleUser(req.Context(), localClient, req.RemoteAddr)
+ // Delete any existing X-Webauth-* headers to prevent possible spoofing
+ // if getting Tailnet identity fails.
+ for h := range req.Header {
+ if strings.HasPrefix(h, "X-Webauth-") {
+ req.Header.Del(h)
+ }
+ }
+
+ user, role, err := getTailscaleIdentity(req.Context(), localClient, req.RemoteAddr)
if err != nil {
log.Printf("error getting Tailscale user: %v", err)
return
@@ -142,19 +218,33 @@ func modifyRequest(req *http.Request, localClient *local.Client) {
req.Header.Set("X-Webauth-User", user.LoginName)
req.Header.Set("X-Webauth-Name", user.DisplayName)
+ req.Header.Set("X-Webauth-Role", role.String())
}
-func getTailscaleUser(ctx context.Context, localClient *local.Client, ipPort string) (*tailcfg.UserProfile, error) {
+func getTailscaleIdentity(ctx context.Context, localClient *local.Client, ipPort string) (*tailcfg.UserProfile, grafanaRole, error) {
whois, err := localClient.WhoIs(ctx, ipPort)
if err != nil {
- return nil, fmt.Errorf("failed to identify remote host: %w", err)
+ return nil, ViewerRole, fmt.Errorf("failed to identify remote host: %w", err)
}
if whois.Node.IsTagged() {
- return nil, fmt.Errorf("tagged nodes are not users")
+ return nil, ViewerRole, fmt.Errorf("tagged nodes are not users")
}
if whois.UserProfile == nil || whois.UserProfile.LoginName == "" {
- return nil, fmt.Errorf("failed to identify remote user")
+ return nil, ViewerRole, fmt.Errorf("failed to identify remote user")
+ }
+
+ role := ViewerRole
+ grants, err := tailcfg.UnmarshalCapJSON[aclGrant](whois.CapMap, aclCap)
+ if err != nil {
+ return nil, ViewerRole, fmt.Errorf("failed to unmarshal ACL grants: %w", err)
+ }
+ for _, g := range grants {
+ r, err := roleFromString(g.Role)
+ if err != nil {
+ return nil, ViewerRole, fmt.Errorf("failed to parse role: %w", err)
+ }
+ role = max(role, r)
}
- return whois.UserProfile, nil
+ return whois.UserProfile, role, nil
}
diff --git a/cmd/stund/depaware.txt b/cmd/stund/depaware.txt
index 1d0a093c4..2326e3a24 100644
--- a/cmd/stund/depaware.txt
+++ b/cmd/stund/depaware.txt
@@ -49,6 +49,7 @@ tailscale.com/cmd/stund dependencies: (generated by github.com/tailscale/depawar
google.golang.org/protobuf/types/known/timestamppb from github.com/prometheus/client_golang/prometheus+
tailscale.com from tailscale.com/version
tailscale.com/envknob from tailscale.com/tsweb+
+ tailscale.com/feature from tailscale.com/tsweb
tailscale.com/kube/kubetypes from tailscale.com/envknob
tailscale.com/metrics from tailscale.com/net/stunserver+
tailscale.com/net/netaddr from tailscale.com/net/tsaddr
@@ -57,8 +58,8 @@ tailscale.com/cmd/stund dependencies: (generated by github.com/tailscale/depawar
tailscale.com/net/tsaddr from tailscale.com/tsweb
tailscale.com/syncs from tailscale.com/metrics
tailscale.com/tailcfg from tailscale.com/version
- tailscale.com/tsweb from tailscale.com/cmd/stund
- tailscale.com/tsweb/promvarz from tailscale.com/tsweb
+ tailscale.com/tsweb from tailscale.com/cmd/stund+
+ tailscale.com/tsweb/promvarz from tailscale.com/cmd/stund
tailscale.com/tsweb/varz from tailscale.com/tsweb+
tailscale.com/types/dnstype from tailscale.com/tailcfg
tailscale.com/types/ipproto from tailscale.com/tailcfg
@@ -194,7 +195,7 @@ tailscale.com/cmd/stund dependencies: (generated by github.com/tailscale/depawar
hash/maphash from go4.org/mem
html from net/http/pprof+
internal/abi from crypto/x509/internal/macos+
- internal/asan from syscall+
+ internal/asan from internal/runtime/maps+
internal/bisect from internal/godebug
internal/bytealg from bytes+
internal/byteorder from crypto/cipher+
@@ -204,12 +205,12 @@ tailscale.com/cmd/stund dependencies: (generated by github.com/tailscale/depawar
internal/filepathlite from os+
internal/fmtsort from fmt
internal/goarch from crypto/internal/fips140deps/cpu+
- internal/godebug from crypto/tls+
+ internal/godebug from crypto/internal/fips140deps/godebug+
internal/godebugs from internal/godebug+
- internal/goexperiment from runtime+
+ internal/goexperiment from hash/maphash+
internal/goos from crypto/x509+
internal/itoa from internal/poll+
- internal/msan from syscall+
+ internal/msan from internal/runtime/maps+
internal/nettrace from net+
internal/oserror from io/fs+
internal/poll from net+
diff --git a/cmd/stund/stund.go b/cmd/stund/stund.go
index c38429169..1055d966f 100644
--- a/cmd/stund/stund.go
+++ b/cmd/stund/stund.go
@@ -15,6 +15,9 @@ import (
"tailscale.com/net/stunserver"
"tailscale.com/tsweb"
+
+ // Support for prometheus varz in tsweb
+ _ "tailscale.com/tsweb/promvarz"
)
var (
diff --git a/cmd/tailscale/depaware.txt b/cmd/tailscale/depaware.txt
index afe62165c..431bf7b71 100644
--- a/cmd/tailscale/depaware.txt
+++ b/cmd/tailscale/depaware.txt
@@ -333,7 +333,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
image/color from github.com/skip2/go-qrcode+
image/png from github.com/skip2/go-qrcode
internal/abi from crypto/x509/internal/macos+
- internal/asan from syscall+
+ internal/asan from internal/runtime/maps+
internal/bisect from internal/godebug
internal/bytealg from bytes+
internal/byteorder from crypto/cipher+
@@ -345,10 +345,10 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
internal/goarch from crypto/internal/fips140deps/cpu+
internal/godebug from archive/tar+
internal/godebugs from internal/godebug+
- internal/goexperiment from runtime+
+ internal/goexperiment from hash/maphash+
internal/goos from crypto/x509+
internal/itoa from internal/poll+
- internal/msan from syscall+
+ internal/msan from internal/runtime/maps+
internal/nettrace from net+
internal/oserror from io/fs+
internal/poll from net+
diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt
index c0f592ea1..0a9c46831 100644
--- a/cmd/tailscaled/depaware.txt
+++ b/cmd/tailscaled/depaware.txt
@@ -271,6 +271,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/hostinfo from tailscale.com/client/web+
tailscale.com/internal/noiseconn from tailscale.com/control/controlclient
tailscale.com/ipn from tailscale.com/client/local+
+ tailscale.com/ipn/auditlog from tailscale.com/ipn/ipnlocal+
tailscale.com/ipn/conffile from tailscale.com/cmd/tailscaled+
💣 tailscale.com/ipn/desktop from tailscale.com/cmd/tailscaled+
💣 tailscale.com/ipn/ipnauth from tailscale.com/ipn/ipnlocal+
@@ -285,7 +286,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/ipn/store/mem from tailscale.com/ipn/ipnlocal+
L tailscale.com/kube/kubeapi from tailscale.com/ipn/store/kubestore+
L tailscale.com/kube/kubeclient from tailscale.com/ipn/store/kubestore
- tailscale.com/kube/kubetypes from tailscale.com/envknob
+ tailscale.com/kube/kubetypes from tailscale.com/envknob+
tailscale.com/licenses from tailscale.com/client/web
tailscale.com/log/filelogger from tailscale.com/logpolicy
tailscale.com/log/sockstatlog from tailscale.com/ipn/ipnlocal
@@ -588,7 +589,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
html from html/template+
html/template from github.com/gorilla/csrf
internal/abi from crypto/x509/internal/macos+
- internal/asan from syscall+
+ internal/asan from internal/runtime/maps+
internal/bisect from internal/godebug
internal/bytealg from bytes+
internal/byteorder from crypto/cipher+
@@ -600,10 +601,10 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
internal/goarch from crypto/internal/fips140deps/cpu+
internal/godebug from archive/tar+
internal/godebugs from internal/godebug+
- internal/goexperiment from runtime+
+ internal/goexperiment from hash/maphash+
internal/goos from crypto/x509+
internal/itoa from internal/poll+
- internal/msan from syscall+
+ internal/msan from internal/runtime/maps+
internal/nettrace from net+
internal/oserror from io/fs+
internal/poll from net+
diff --git a/cmd/testwrapper/testwrapper.go b/cmd/testwrapper/testwrapper.go
index 1df1ef11f..53c1b1d05 100644
--- a/cmd/testwrapper/testwrapper.go
+++ b/cmd/testwrapper/testwrapper.go
@@ -141,7 +141,7 @@ func runTests(ctx context.Context, attempt int, pt *packageTests, goTestArgs, te
}
outcome := goOutput.Action
if outcome == "build-fail" {
- outcome = "FAIL"
+ outcome = "fail"
}
pkgTests[""].logs.WriteString(goOutput.Output)
ch <- &testAttempt{
@@ -152,7 +152,15 @@ func runTests(ctx context.Context, attempt int, pt *packageTests, goTestArgs, te
logs: pkgTests[""].logs,
pkgFinished: true,
}
+ case "output":
+ // Capture all output from the package except for the final
+ // "FAIL tailscale.io/control 0.684s" line, as
+ // printPkgOutcome will output a similar line
+ if !strings.HasPrefix(goOutput.Output, fmt.Sprintf("FAIL\t%s\t", goOutput.Package)) {
+ pkgTests[""].logs.WriteString(goOutput.Output)
+ }
}
+
continue
}
testName := goOutput.Test
@@ -251,6 +259,7 @@ func main() {
fmt.Printf("\n\nAttempt #%d: Retrying flaky tests:\n\nflakytest failures JSON: %s\n\n", thisRun.attempt, j)
}
+ fatalFailures := make(map[string]struct{}) // pkg.Test key
toRetry := make(map[string][]*testAttempt) // pkg -> tests to retry
for _, pt := range thisRun.tests {
ch := make(chan *testAttempt)
@@ -276,7 +285,11 @@ func main() {
// when a package times out.
failed = true
}
- os.Stdout.ReadFrom(&tr.logs)
+ if testingVerbose || tr.outcome == "fail" {
+ // Output package-level output which is where e.g.
+ // panics outside tests will be printed
+ io.Copy(os.Stdout, &tr.logs)
+ }
printPkgOutcome(tr.pkg, tr.outcome, thisRun.attempt, tr.end.Sub(tr.start))
continue
}
@@ -289,11 +302,24 @@ func main() {
if tr.isMarkedFlaky {
toRetry[tr.pkg] = append(toRetry[tr.pkg], tr)
} else {
+ fatalFailures[tr.pkg+"."+tr.testName] = struct{}{}
failed = true
}
}
if failed {
fmt.Println("\n\nNot retrying flaky tests because non-flaky tests failed.")
+
+ // Print the list of non-flakytest failures.
+ // We will later analyze the retried GitHub Action runs to see
+ // if non-flakytest failures succeeded upon retry. This will
+ // highlight tests which are flaky but not yet flagged as such.
+ if len(fatalFailures) > 0 {
+ tests := slicesx.MapKeys(fatalFailures)
+ sort.Strings(tests)
+ j, _ := json.Marshal(tests)
+ fmt.Printf("non-flakytest failures: %s\n", j)
+ }
+ fmt.Println()
os.Exit(1)
}
diff --git a/cmd/tsidp/Dockerfile b/cmd/tsidp/Dockerfile
new file mode 100644
index 000000000..605a7ba2e
--- /dev/null
+++ b/cmd/tsidp/Dockerfile
@@ -0,0 +1,41 @@
+# Build stage
+FROM golang:alpine AS builder
+
+# Install build dependencies
+RUN apk add --no-cache git
+
+# Set working directory
+WORKDIR /src
+
+# Copy only go.mod and go.sum first to leverage Docker caching
+COPY go.mod go.sum ./
+RUN go mod download
+
+# Copy the entire repository
+COPY . .
+
+# Build the tsidp binary
+RUN go build -o /bin/tsidp ./cmd/tsidp
+
+# Final stage
+FROM alpine:latest
+
+# Create necessary directories
+RUN mkdir -p /var/lib/tsidp
+
+# Copy binary from builder stage
+COPY --from=builder /bin/tsidp /app/tsidp
+
+# Set working directory
+WORKDIR /app
+
+# Environment variables
+ENV TAILSCALE_USE_WIP_CODE=1 \
+ TS_HOSTNAME=tsidp \
+ TS_STATE_DIR=/var/lib/tsidp
+
+# Expose the default port
+EXPOSE 443
+
+# Run the application
+ENTRYPOINT ["/app/tsidp"] \ No newline at end of file
diff --git a/cmd/tsidp/README.md b/cmd/tsidp/README.md
new file mode 100644
index 000000000..d51138b6d
--- /dev/null
+++ b/cmd/tsidp/README.md
@@ -0,0 +1,100 @@
+# `tsidp` - Tailscale OpenID Connect (OIDC) Identity Provider
+
+[![status: experimental](https://img.shields.io/badge/status-experimental-blue)](https://tailscale.com/kb/1167/release-stages/#experimental)
+
+`tsidp` is an OIDC Identity Provider (IdP) server that integrates with your Tailscale network. It allows you to use Tailscale identities for authentication in applications that support OpenID Connect, enabling single sign-on (SSO) capabilities within your tailnet.
+
+## Prerequisites
+
+- A Tailscale network (tailnet) with magicDNS and HTTPS enabled
+- A Tailscale authentication key from your tailnet
+- Docker installed on your system
+
+## Installation using Docker
+
+1. **Build the Docker Image**
+
+ The Dockerfile uses a multi-stage build process to:
+ - Build the `tsidp` binary from source
+ - Create a minimal Alpine-based image with just the necessary components
+
+ ```bash
+ # Clone the Tailscale repository
+ git clone https://github.com/tailscale/tailscale.git
+ cd tailscale
+ ```
+
+ ```bash
+ # Build the Docker image
+ docker build -t tsidp:latest -f cmd/tsidp/Dockerfile .
+ ```
+
+2. **Run the Container**
+
+ Replace `YOUR_TAILSCALE_AUTHKEY` with your Tailscale authentication key.
+
+ ```bash
+ docker run -d \
+ --name `tsidp` \
+ -p 443:443 \
+ -e TS_AUTHKEY=YOUR_TAILSCALE_AUTHKEY \
+ -e TS_HOSTNAME=tsidp \
+ -v tsidp-data:/var/lib/tsidp \
+ tsidp:latest
+ ```
+
+3. **Verify Installation**
+ ```bash
+ docker logs tsidp
+ ```
+
+ Visit `https://tsidp.tailnet.ts.net` to confirm the service is running.
+
+## Usage Example: Proxmox Integration
+
+Here's how to configure Proxmox to use `tsidp` for authentication:
+
+1. In Proxmox, navigate to Datacenter > Realms > Add OpenID Connect Server
+
+2. Configure the following settings:
+ - Issuer URL: `https://idp.velociraptor.ts.net`
+ - Realm: `tailscale` (or your preferred name)
+ - Client ID: `unused`
+ - Client Key: `unused`
+ - Default: `true`
+ - Autocreate users: `true`
+ - Username claim: `email`
+
+3. Set up user permissions:
+ - Go to Datacenter > Permissions > Groups
+ - Create a new group (e.g., "tsadmins")
+ - Click Permissions in the sidebar
+ - Add Group Permission
+ - Set Path to `/` for full admin access or scope as needed
+ - Set the group and role
+ - Add Tailscale-authenticated users to the group
+
+## Configuration Options
+
+The `tsidp` server supports several command-line flags:
+
+- `--verbose`: Enable verbose logging
+- `--port`: Port to listen on (default: 443)
+- `--local-port`: Allow requests from localhost
+- `--use-local-tailscaled`: Use local tailscaled instead of tsnet
+- `--dir`: tsnet state directory
+
+## Environment Variables
+
+- `TS_AUTHKEY`: Your Tailscale authentication key (required)
+- `TS_HOSTNAME`: Hostname for the `tsidp` server (default: "idp")
+- `TS_STATE_DIR`: State directory (default: "/var/lib/tsidp")
+- `TAILSCALE_USE_WIP_CODE`: Enable work-in-progress code (default: "1")
+
+## Support
+
+This is an [experimental](https://tailscale.com/kb/1167/release-stages#experimental), work in progress feature. For issues or questions, file issues on the [GitHub repository](https://github.com/tailscale/tailscale)
+
+## License
+
+BSD-3-Clause License. See [LICENSE](../../LICENSE) for details. \ No newline at end of file
diff --git a/cmd/tsidp/tsidp.go b/cmd/tsidp/tsidp.go
index 3eabef245..95ab2b2eb 100644
--- a/cmd/tsidp/tsidp.go
+++ b/cmd/tsidp/tsidp.go
@@ -11,6 +11,7 @@ import (
"context"
crand "crypto/rand"
"crypto/rsa"
+ "crypto/subtle"
"crypto/tls"
"crypto/x509"
"encoding/base64"
@@ -345,7 +346,9 @@ func (ar *authRequest) allowRelyingParty(r *http.Request, lc *local.Client) erro
clientID = r.FormValue("client_id")
clientSecret = r.FormValue("client_secret")
}
- if ar.funnelRP.ID != clientID || ar.funnelRP.Secret != clientSecret {
+ clientIDcmp := subtle.ConstantTimeCompare([]byte(clientID), []byte(ar.funnelRP.ID))
+ clientSecretcmp := subtle.ConstantTimeCompare([]byte(clientSecret), []byte(ar.funnelRP.Secret))
+ if clientIDcmp != 1 || clientSecretcmp != 1 {
return fmt.Errorf("tsidp: invalid client credentials")
}
return nil
@@ -762,6 +765,18 @@ var (
)
func (s *idpServer) serveOpenIDConfig(w http.ResponseWriter, r *http.Request) {
+ h := w.Header()
+ h.Set("Access-Control-Allow-Origin", "*")
+ h.Set("Access-Control-Allow-Method", "GET, OPTIONS")
+ // allow all to prevent errors from client sending their own bespoke headers
+ // and having the server reject the request.
+ h.Set("Access-Control-Allow-Headers", "*")
+
+ // early return for pre-flight OPTIONS requests.
+ if r.Method == "OPTIONS" {
+ w.WriteHeader(http.StatusOK)
+ return
+ }
if r.URL.Path != oidcConfigPath {
http.Error(w, "tsidp: not found", http.StatusNotFound)
return
diff --git a/cmd/xdpderper/xdpderper.go b/cmd/xdpderper/xdpderper.go
index 599034ae7..c127baf54 100644
--- a/cmd/xdpderper/xdpderper.go
+++ b/cmd/xdpderper/xdpderper.go
@@ -18,6 +18,9 @@ import (
"tailscale.com/derp/xdp"
"tailscale.com/net/netutil"
"tailscale.com/tsweb"
+
+ // Support for prometheus varz in tsweb
+ _ "tailscale.com/tsweb/promvarz"
)
var (