summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorIrbe Krumina <irbe@tailscale.com>2023-09-06 19:33:52 +0100
committerIrbe Krumina <irbe@tailscale.com>2023-09-06 19:33:52 +0100
commit34820394bb60d17388eb3efd56caee7b63a321cb (patch)
tree4538e629554463071a24c7b87e750b04d0575485
parent1262df057838bba248ec8e1ace0c148b3f7a3463 (diff)
downloadtailscale-irbekrm/k8s-nftables.tar.xz
tailscale-irbekrm/k8s-nftables.zip
WIP:cmd/k8s-operator: use nftables for proxyirbekrm/k8s-nftables
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
-rw-r--r--Makefile2
-rw-r--r--cmd/containerboot/main.go107
-rw-r--r--cmd/containerboot/main_test.go1
-rw-r--r--cmd/k8s-operator/manifests/operator.yaml2
-rw-r--r--cmd/k8s-operator/manifests/proxy.yaml2
-rw-r--r--cmd/k8s-operator/operator.go6
-rw-r--r--cmd/k8s-operator/sts.go5
7 files changed, 92 insertions, 33 deletions
diff --git a/Makefile b/Makefile
index c4391b90a..a395b46c8 100644
--- a/Makefile
+++ b/Makefile
@@ -67,7 +67,7 @@ publishdevimage: ## Build and publish tailscale image to location specified by $
@test "${REPO}" != "ghcr.io/tailscale/tailscale" || (echo "REPO=... must not be ghcr.io/tailscale/tailscale" && exit 1)
@test "${REPO}" != "tailscale/k8s-operator" || (echo "REPO=... must not be tailscale/k8s-operator" && exit 1)
@test "${REPO}" != "ghcr.io/tailscale/k8s-operator" || (echo "REPO=... must not be ghcr.io/tailscale/k8s-operator" && exit 1)
- TAGS=latest REPOS=${REPO} PUSH=true TARGET=client ./build_docker.sh
+ TAGS=latest REPOS=${REPO} PUSH=true TARGET=client BASE="gcr.io/csi-test-290908/alpine:3.18" ./build_docker.sh
publishdevoperator: ## Build and publish k8s-operator image to location specified by ${REPO}
@test -n "${REPO}" || (echo "REPO=... required; e.g. REPO=ghcr.io/${USER}/tailscale" && exit 1)
diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go
index 3676db02c..755e5cd89 100644
--- a/cmd/containerboot/main.go
+++ b/cmd/containerboot/main.go
@@ -658,9 +658,9 @@ func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []net
if err != nil {
return err
}
- argv0 := "iptables"
+ family := "ip"
if dst.Is6() {
- argv0 = "ip6tables"
+ family = "ip6"
}
var local string
for _, pfx := range tsIPs {
@@ -676,26 +676,54 @@ func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []net
if local == "" {
return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs)
}
- // Technically, if the control server ever changes the IPs assigned to this
- // node, we'll slowly accumulate iptables rules. This shouldn't happen, so
- // for now we'll live with it.
- // Set up a rule that ensures that all packets
- // except for those received on tailscale0 interface is forwarded to
- // destination address
- cmdDNAT := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "PREROUTING", "1", "!", "-i", "tailscale0", "-j", "DNAT", "--to-destination", dstStr)
- cmdDNAT.Stdout = os.Stdout
- cmdDNAT.Stderr = os.Stderr
- if err := cmdDNAT.Run(); err != nil {
- return fmt.Errorf("executing iptables failed: %w", err)
+
+ // Ensure a nat table exists. If tailscaled is running in nftables mode
+ // nat table and POSTROUTING chain will already exist. This is fine as
+ // creating those is an idempotent operation.
+ nft := "nft"
+ natTable := "nat"
+ cmdAddTable := exec.CommandContext(ctx, nft, "add", "table", family, natTable)
+ cmdAddTable.Stdout = os.Stdout
+ cmdAddTable.Stderr = os.Stderr
+ if err := cmdAddTable.Run(); err != nil {
+ return fmt.Errorf("adding nat table failed: %w", err)
+ }
+ preroutingChain := "PREROUTING"
+ cmdCreatePreroutingChain := exec.CommandContext(ctx, nft, "--", "add", "chain", family, natTable, preroutingChain, "{ type nat hook prerouting priority -100; policy accept; }")
+ cmdCreatePreroutingChain.Stdout = os.Stdout
+ cmdCreatePreroutingChain.Stderr = os.Stderr
+ if err := cmdCreatePreroutingChain.Run(); err != nil {
+ return fmt.Errorf("adding prerouting chain failed: %w", err)
+ }
+
+ // running this multiple times will result in multiple rules, but that
+ // won't affect the functionality and hopefully we'll never run in that
+ // often that it becomes a performance problem
+ cmdCreateDNATRule := exec.CommandContext(ctx, nft, "insert", "rule", family, natTable, preroutingChain, "iifname", "!=", "tailscale0*", "dnat", "to", dstStr)
+ cmdCreateDNATRule.Stdout = os.Stdout
+ cmdCreateDNATRule.Stderr = os.Stderr
+ if err := cmdCreateDNATRule.Run(); err != nil {
+ return fmt.Errorf("adding DNAT rule failed: %w", err)
}
- // Set up a rule that ensures that all packets sent to the destination
- // address will have the proxy's IP set as source IP
- cmdSNAT := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "POSTROUTING", "1", "--destination", dstStr, "-j", "SNAT", "--to-source", local)
- cmdSNAT.Stdout = os.Stdout
- cmdSNAT.Stderr = os.Stderr
- if err := cmdSNAT.Run(); err != nil {
- return fmt.Errorf("setting up SNAT via iptables failed: %w", err)
+
+ postRoutingChain := "POSTROUTING"
+ cmdCreatePostRoutingChain := exec.CommandContext(ctx, nft, "add", "chain", family, natTable, postRoutingChain, "{ type nat hook postrouting priority srcnat; policy accept; }")
+ cmdCreatePostRoutingChain.Stdout = os.Stdout
+ cmdCreatePostRoutingChain.Stderr = os.Stderr
+ if err := cmdCreatePostRoutingChain.Run(); err != nil {
+ return fmt.Errorf("adding postrouting chain failed: %w", err)
+ }
+
+ // running this multiple times will result in multiple rules, but that
+ // won't affect the functionality and hopefully we'll never run in that
+ // often that it becomes a performance problem
+ cmdCreateSNATRule := exec.CommandContext(ctx, nft, "insert", "rule", family, natTable, postRoutingChain, "ip", "daddr", dstStr, "snat", "to", local)
+ cmdCreateSNATRule.Stdout = os.Stdout
+ cmdCreateSNATRule.Stderr = os.Stderr
+ if err := cmdCreateSNATRule.Run(); err != nil {
+ return fmt.Errorf("adding SNAT rule failed: %w", err)
}
+
return nil
}
@@ -704,9 +732,9 @@ func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []ne
if err != nil {
return err
}
- argv0 := "iptables"
+ family := "ip"
if dst.Is6() {
- argv0 = "ip6tables"
+ family = "ip6"
}
var local string
for _, pfx := range tsIPs {
@@ -722,15 +750,34 @@ func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []ne
if local == "" {
return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs)
}
- // Technically, if the control server ever changes the IPs assigned to this
- // node, we'll slowly accumulate iptables rules. This shouldn't happen, so
- // for now we'll live with it.
- cmd := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "PREROUTING", "1", "-d", local, "-j", "DNAT", "--to-destination", dstStr)
- cmd.Stdout = os.Stdout
- cmd.Stderr = os.Stderr
- if err := cmd.Run(); err != nil {
- return fmt.Errorf("executing iptables failed: %w", err)
+ // Create a nat table
+ nft := "nft"
+ natTable := "nat"
+ // adding a table and a chain are idempotent operations
+ cmdAddTable := exec.CommandContext(ctx, nft, "add", "table", family, natTable)
+ cmdAddTable.Stdout = os.Stdout
+ cmdAddTable.Stderr = os.Stderr
+ if err := cmdAddTable.Run(); err != nil {
+ return fmt.Errorf("adding nat table failed: %w", err)
+ }
+ preroutingChain := "PREROUTING"
+ cmdCreatePreroutingChain := exec.CommandContext(ctx, nft, "--", "add", "chain", family, natTable, preroutingChain, "{ type nat hook prerouting priority -100; policy accept; }")
+ cmdCreatePreroutingChain.Stdout = os.Stdout
+ cmdCreatePreroutingChain.Stderr = os.Stderr
+ if err := cmdAddTable.Run(); err != nil {
+ return fmt.Errorf("adding prerouting chain failed: %w", err)
}
+
+ // running this multiple times will result in multiple rules, but that
+ // won't affect the functionality and hopefully we'll never run in that
+ // often that it becomes a performance problem
+ cmdCreateDNATRule := exec.CommandContext(ctx, nft, "insert", "rule", family, natTable, preroutingChain, "ip", "daddr", local, "dnat", "to", dstStr)
+ cmdCreateDNATRule.Stdout = os.Stdout
+ cmdCreateDNATRule.Stderr = os.Stderr
+ if err := cmdAddTable.Run(); err != nil {
+ return fmt.Errorf("adding DNAT rule failed: %w", err)
+ }
+
return nil
}
diff --git a/cmd/containerboot/main_test.go b/cmd/containerboot/main_test.go
index e1353a8e6..95c30bef0 100644
--- a/cmd/containerboot/main_test.go
+++ b/cmd/containerboot/main_test.go
@@ -69,6 +69,7 @@ func TestContainerBoot(t *testing.T) {
"usr/bin/tailscaled": fakeTailscaled,
"usr/bin/tailscale": fakeTailscale,
"usr/bin/iptables": fakeTailscale,
+ "usr/bin/nft": fakeTailscale,
"usr/bin/ip6tables": fakeTailscale,
"dev/net/tun": []byte(""),
"proc/sys/net/ipv4/ip_forward": []byte("0"),
diff --git a/cmd/k8s-operator/manifests/operator.yaml b/cmd/k8s-operator/manifests/operator.yaml
index 4da7a4dae..40b30ba08 100644
--- a/cmd/k8s-operator/manifests/operator.yaml
+++ b/cmd/k8s-operator/manifests/operator.yaml
@@ -139,6 +139,8 @@ spec:
value: operator
- name: OPERATOR_LOGGING
value: info
+ - name: RUN_IN_RESTRICTED_ENV
+ value: "true"
- name: OPERATOR_NAMESPACE
valueFrom:
fieldRef:
diff --git a/cmd/k8s-operator/manifests/proxy.yaml b/cmd/k8s-operator/manifests/proxy.yaml
index 361af8910..5f8cd2417 100644
--- a/cmd/k8s-operator/manifests/proxy.yaml
+++ b/cmd/k8s-operator/manifests/proxy.yaml
@@ -31,6 +31,8 @@ spec:
value: "false"
- name: TS_AUTH_ONCE
value: "true"
+ - name: TS_DEBUG_FIREWALL_MODE
+ value: "nftables"
securityContext:
capabilities:
add:
diff --git a/cmd/k8s-operator/operator.go b/cmd/k8s-operator/operator.go
index 5d78f4c25..177393c33 100644
--- a/cmd/k8s-operator/operator.go
+++ b/cmd/k8s-operator/operator.go
@@ -53,6 +53,7 @@ func main() {
priorityClassName = defaultEnv("PROXY_PRIORITY_CLASS_NAME", "")
tags = defaultEnv("PROXY_TAGS", "tag:k8s")
shouldRunAuthProxy = defaultBool("AUTH_PROXY", false)
+ runInRestrictedEnv = defaultBool("RUN_IN_RESTRICTED_ENV", false)
)
var opts []kzap.Opts
@@ -73,7 +74,7 @@ func main() {
if shouldRunAuthProxy {
launchAuthProxy(zlog, restConfig, s)
}
- startReconcilers(zlog, s, tsNamespace, restConfig, tsClient, image, priorityClassName, tags)
+ startReconcilers(zlog, s, tsNamespace, restConfig, tsClient, image, priorityClassName, tags, runInRestrictedEnv)
}
// initTSNet initializes the tsnet.Server and logs in to Tailscale. It uses the
@@ -182,7 +183,7 @@ waitOnline:
// startReconcilers starts the controller-runtime manager and registers the
// ServiceReconciler.
-func startReconcilers(zlog *zap.SugaredLogger, s *tsnet.Server, tsNamespace string, restConfig *rest.Config, tsClient *tailscale.Client, image, priorityClassName, tags string) {
+func startReconcilers(zlog *zap.SugaredLogger, s *tsnet.Server, tsNamespace string, restConfig *rest.Config, tsClient *tailscale.Client, image, priorityClassName, tags string, runInRestrictedEnv bool) {
var (
isDefaultLoadBalancer = defaultBool("OPERATOR_DEFAULT_LOAD_BALANCER", false)
)
@@ -231,6 +232,7 @@ func startReconcilers(zlog *zap.SugaredLogger, s *tsnet.Server, tsNamespace stri
operatorNamespace: tsNamespace,
proxyImage: image,
proxyPriorityClassName: priorityClassName,
+ runInRestrictedEnv: runInRestrictedEnv,
}
err = builder.
ControllerManagedBy(mgr).
diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go
index 31aa6bad7..c5d95e1f2 100644
--- a/cmd/k8s-operator/sts.go
+++ b/cmd/k8s-operator/sts.go
@@ -78,6 +78,7 @@ type tailscaleSTSReconciler struct {
operatorNamespace string
proxyImage string
proxyPriorityClassName string
+ runInRestrictedEnv bool
}
// IsHTTPSEnabledOnTailnet reports whether HTTPS is enabled on the tailnet.
@@ -352,6 +353,10 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S
},
})
}
+ if a.runInRestrictedEnv {
+ // in run in restricted env remove the sysctl init container
+ ss.Spec.Template.Spec.InitContainers = nil
+ }
ss.ObjectMeta = metav1.ObjectMeta{
Name: headlessSvc.Name,
Namespace: a.operatorNamespace,