summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorIrbe Krumina <irbe@tailscale.com>2024-04-18 10:33:51 +0100
committerIrbe Krumina <irbe@tailscale.com>2024-04-18 20:34:44 +0100
commitd37f2f508509c6c35ad724fd75a27685b90b575b (patch)
tree6ff719daa60e1e02ad1c21b8aa6d8bbe1a464083
parent03d5d1f0f951b7964f2aa2c74ea395e511aff605 (diff)
downloadtailscale-irbekrm/extsvcnftableslb.tar.xz
tailscale-irbekrm/extsvcnftableslb.zip
Signed-off-by: Irbe Krumina <irbe@tailscale.com>
-rw-r--r--cmd/containerboot/main.go185
-rw-r--r--cmd/k8s-operator/sts.go11
-rw-r--r--cmd/k8s-operator/svc.go15
-rw-r--r--util/linuxfw/iptables_runner.go24
-rw-r--r--util/linuxfw/nftables_runner.go97
5 files changed, 311 insertions, 21 deletions
diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go
index 5d14826d8..a7c438a33 100644
--- a/cmd/containerboot/main.go
+++ b/cmd/containerboot/main.go
@@ -18,7 +18,9 @@
// previously advertised routes. To accept routes, use TS_EXTRA_ARGS to pass
// in --accept-routes.
// - TS_DEST_IP: proxy all incoming Tailscale traffic to the given
-// destination.
+// destination defined by an IP address.
+// - TS_DEST_DNS_NAME: proxy all incoming Tailscale traffic to the given
+// destination defined by a DNS name. The DNS name will be periodically resolved and firewall rules updated accordingly.
// - TS_TAILNET_TARGET_IP: proxy all incoming non-Tailscale traffic to the given
// destination defined by an IP.
// - TS_TAILNET_TARGET_FQDN: proxy all incoming non-Tailscale traffic to the given
@@ -82,12 +84,14 @@ import (
"fmt"
"io/fs"
"log"
+ "net"
"net/netip"
"os"
"os/exec"
"os/signal"
"path/filepath"
"reflect"
+ "slices"
"strconv"
"strings"
"sync"
@@ -122,7 +126,8 @@ func main() {
Hostname: defaultEnv("TS_HOSTNAME", ""),
Routes: defaultEnvStringPointer("TS_ROUTES"),
ServeConfigPath: defaultEnv("TS_SERVE_CONFIG", ""),
- ProxyTo: defaultEnv("TS_DEST_IP", ""),
+ ProxyTargetIP: defaultEnv("TS_DEST_IP", ""),
+ ProxyTargetDNSName: defaultEnv("TS_DEST_DNS_NAME", ""),
TailnetTargetIP: defaultEnv("TS_TAILNET_TARGET_IP", ""),
TailnetTargetFQDN: defaultEnv("TS_TAILNET_TARGET_FQDN", ""),
DaemonExtraArgs: defaultEnv("TS_TAILSCALED_EXTRA_ARGS", ""),
@@ -150,8 +155,8 @@ func main() {
if err := ensureTunFile(cfg.Root); err != nil {
log.Fatalf("Unable to create tuntap device file: %v", err)
}
- if cfg.ProxyTo != "" || cfg.Routes != nil || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" {
- if err := ensureIPForwarding(cfg.Root, cfg.ProxyTo, cfg.TailnetTargetIP, cfg.TailnetTargetFQDN, cfg.Routes); err != nil {
+ if cfg.ProxyTargetIP != "" || cfg.ProxyTargetDNSName != "" || cfg.Routes != nil || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" {
+ if err := ensureIPForwarding(cfg.Root, cfg.ProxyTargetIP, cfg.ProxyTargetDNSName, cfg.TailnetTargetIP, cfg.TailnetTargetFQDN, cfg.Routes); err != nil {
log.Printf("Failed to enable IP forwarding: %v", err)
log.Printf("To run tailscale as a proxy or router container, IP forwarding must be enabled.")
if cfg.InKubernetes {
@@ -341,7 +346,7 @@ authLoop:
}
var (
- wantProxy = cfg.ProxyTo != "" || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" || cfg.AllowProxyingClusterTrafficViaIngress
+ wantProxy = cfg.ProxyTargetIP != "" || cfg.ProxyTargetDNSName != "" || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" || cfg.AllowProxyingClusterTrafficViaIngress
wantDeviceInfo = cfg.InKubernetes && cfg.KubeSecret != "" && cfg.KubernetesCanPatch
startupTasksDone = false
currentIPs deephash.Sum // tailscale IPs assigned to device
@@ -349,6 +354,9 @@ authLoop:
currentEgressIPs deephash.Sum
+ addrs []netip.Prefix
+ backendAddrs []net.IP
+
certDomain = new(atomic.Pointer[string])
certDomainChanged = make(chan bool, 1)
)
@@ -362,6 +370,16 @@ authLoop:
log.Fatalf("error creating new netfilter runner: %v", err)
}
}
+
+ // If we are proxying to a target specified by a DNS name, periodically
+ // resolve the DNS name and update firewall rules if the backend IPs
+ // have changed.
+ const proxyTargetIPsResolvePeriod = time.Minute * 10
+ var ts time.Ticker
+ if cfg.ProxyTargetDNSName != "" {
+ ts = *time.NewTicker(proxyTargetIPsResolvePeriod)
+ }
+
notifyChan := make(chan ipn.Notify)
errChan := make(chan error)
go func() {
@@ -399,7 +417,7 @@ runLoop:
log.Fatalf("tailscaled left running state (now in state %q), exiting", *n.State)
}
if n.NetMap != nil {
- addrs := n.NetMap.SelfNode.Addresses().AsSlice()
+ addrs = n.NetMap.SelfNode.Addresses().AsSlice()
newCurrentIPs := deephash.Hash(&addrs)
ipsHaveChanged := newCurrentIPs != currentIPs
@@ -441,12 +459,32 @@ runLoop:
}
currentEgressIPs = newCurentEgressIPs
}
- if cfg.ProxyTo != "" && len(addrs) > 0 && ipsHaveChanged {
+ if cfg.ProxyTargetIP != "" && len(addrs) > 0 && ipsHaveChanged {
log.Printf("Installing proxy rules")
- if err := installIngressForwardingRule(ctx, cfg.ProxyTo, addrs, nfr); err != nil {
+ if err := installIngressForwardingRule(ctx, cfg.ProxyTargetIP, addrs, nfr); err != nil {
log.Fatalf("installing ingress proxy rules: %v", err)
}
}
+ if cfg.ProxyTargetDNSName != "" {
+ newBackendAddrs, err := resolveDNS(ctx, cfg.ProxyTargetDNSName)
+ if err != nil {
+ log.Printf("unable to resolve DNS name %s: %v, retrying in %s", cfg.ProxyTargetDNSName, err, proxyTargetIPsResolvePeriod)
+ continue
+ }
+ backendsHaveChanged := slices.CompareFunc(backendAddrs, newBackendAddrs, func(ip1 net.IP, ip2 net.IP) int {
+ if ip1.Equal(ip2) {
+ return 0
+ }
+ return -1
+ })
+ if len(addrs) > 0 && (backendsHaveChanged != 0 || ipsHaveChanged) && len(newBackendAddrs) > 0 {
+ log.Printf("installing ingresss proxy rules for backends %v", newBackendAddrs)
+ if err := installIngressForwardingRuleExternalNameService(ctx, newBackendAddrs, addrs, nfr); err != nil {
+ log.Fatalf("error installing ingress proxy rules: %v", err)
+ }
+ }
+ backendAddrs = newBackendAddrs
+ }
if cfg.ServeConfigPath != "" && len(n.NetMap.DNS.CertDomains) > 0 {
cd := n.NetMap.DNS.CertDomains[0]
prev := certDomain.Swap(ptr.To(cd))
@@ -511,12 +549,31 @@ runLoop:
os.Exit(0)
}
}
-
}
wg.Add(1)
go reaper()
}
}
+ case <-ts.C:
+ newBackendAddrs, err := resolveDNS(ctx, cfg.ProxyTargetDNSName)
+ if err != nil {
+ log.Printf("unable to resolve DNS name %s: %v, retrying in %s", cfg.ProxyTargetDNSName, err, proxyTargetIPsResolvePeriod.String())
+ continue
+ }
+ backendsHaveChanged := slices.CompareFunc(backendAddrs, newBackendAddrs, func(ip1 net.IP, ip2 net.IP) int {
+ if ip1.Equal(ip2) {
+ return 0
+ }
+ return -1
+ })
+ if backendsHaveChanged != 0 && len(newBackendAddrs) != 0 && len(addrs) != 0 {
+ log.Printf("Backend address change detected, installing proxy rules for backends %v", newBackendAddrs)
+ if err := installIngressForwardingRuleExternalNameService(ctx, newBackendAddrs, addrs, nfr); err != nil {
+ log.Fatalf("installing ingress proxy rules for DNS target %s: %v", cfg.ProxyTargetDNSName, err)
+ }
+ }
+ backendAddrs = newBackendAddrs
+
}
}
wg.Wait()
@@ -757,12 +814,12 @@ func ensureTunFile(root string) error {
}
// ensureIPForwarding enables IPv4/IPv6 forwarding for the container.
-func ensureIPForwarding(root, clusterProxyTarget, tailnetTargetiP, tailnetTargetFQDN string, routes *string) error {
+func ensureIPForwarding(root, clusterProxyTargetIP, clusterProxyTargetDNSName, tailnetTargetiP, tailnetTargetFQDN string, routes *string) error {
var (
v4Forwarding, v6Forwarding bool
)
- if clusterProxyTarget != "" {
- proxyIP, err := netip.ParseAddr(clusterProxyTarget)
+ if clusterProxyTargetIP != "" {
+ proxyIP, err := netip.ParseAddr(clusterProxyTargetIP)
if err != nil {
return fmt.Errorf("invalid cluster destination IP: %v", err)
}
@@ -772,6 +829,26 @@ func ensureIPForwarding(root, clusterProxyTarget, tailnetTargetiP, tailnetTarget
v6Forwarding = true
}
}
+ if clusterProxyTargetDNSName != "" {
+ ips, err := resolveDNS(context.Background(), clusterProxyTargetDNSName)
+ if err != nil {
+ return fmt.Errorf("error resolving DNS name %s: %w", clusterProxyTargetDNSName, err)
+ }
+ for _, ip := range ips {
+ if ip.To4() != nil {
+ v4Forwarding = true
+ if v6Forwarding {
+ break
+ }
+ }
+ if ip.To16() != nil {
+ v6Forwarding = true
+ if v4Forwarding {
+ break
+ }
+ }
+ }
+ }
if tailnetTargetiP != "" {
proxyIP, err := netip.ParseAddr(tailnetTargetiP)
if err != nil {
@@ -918,15 +995,77 @@ func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []ne
return nil
}
+func installIngressForwardingRuleExternalNameService(ctx context.Context, backendAddrs []net.IP, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error {
+ var (
+ tsv4 netip.Addr
+ tsv6 netip.Addr
+ v4Backends []netip.Addr
+ v6Backends []netip.Addr
+ )
+ for _, pfx := range tsIPs {
+ if pfx.IsSingleIP() && pfx.Addr().Is4() {
+ tsv4 = pfx.Addr()
+ continue
+ }
+ if pfx.IsSingleIP() && pfx.Addr().Is6() {
+ tsv6 = pfx.Addr()
+ continue
+ }
+ }
+ for _, ip := range backendAddrs {
+ if ip.To4() != nil {
+ v4Backends = append(v4Backends, netip.AddrFrom4([4]byte(ip.To4())))
+ }
+ if ip.To16() != nil {
+ v6Backends = append(v6Backends, netip.AddrFrom16([16]byte(ip.To16())))
+ }
+ }
+
+ updateFirewall := func(dst netip.Addr, backendTargets []netip.Addr) error {
+ if err := nfr.DNATWithLoadBalancer(dst, backendTargets); err != nil {
+ return fmt.Errorf("installing DNAT rules for ingress backends %+#v: %w", backendTargets, err)
+ }
+ // The backend might advertize MSS higher than that of the
+ // tailscale interfaces. Clamp MSS of packets going out via
+ // tailscale0 interface to its MTU to prevent broken connections
+ // in environments where path MTU discovery is not working.
+ if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil {
+ return fmt.Errorf("adding rule to clamp traffic via tailscale0: %v", err)
+ }
+ return nil
+ }
+
+ if len(v4Backends) != 0 {
+ if !tsv4.IsValid() {
+ log.Printf("backend targets %v contain at least one IPv4 address, but this node's Tailscale IPs do not contain a valid IPv4 address: %v", backendAddrs, tsIPs)
+ } else if err := updateFirewall(tsv4, v4Backends); err != nil {
+ return fmt.Errorf("Installing IPv4 firewall rules: %w", err)
+ }
+ }
+ if len(v6Backends) != 0 && !tsv6.IsValid() {
+ if !tsv6.IsValid() {
+ log.Printf("backend targets %v contain at least one IPv6 address, but this node's Tailscale IPs do not contain a valid IPv6 address: %v", backendAddrs, tsIPs)
+ } else if !nfr.HasIPV6NAT() {
+ log.Printf("backend targets %v contain at least one IPv6 address, but the chosen firewall mode does not support IPv6 NAT", backendAddrs)
+ } else if err := updateFirewall(tsv6, v6Backends); err != nil {
+ return fmt.Errorf("Installing IPv6 firewall rules: %w", err)
+ }
+ }
+ return nil
+}
+
// settings is all the configuration for containerboot.
type settings struct {
AuthKey string
Hostname string
Routes *string
- // ProxyTo is the destination IP to which all incoming
+ // ProxyTargetIP is the destination IP to which all incoming
// Tailscale traffic should be proxied. If empty, no proxying
// is done. This is typically a locally reachable IP.
- ProxyTo string
+ ProxyTargetIP string
+ // ProxyTargetDNSName is a DNS name whose backing IP addresses all
+ // incoming Tailscale traffic should be proxied to.
+ ProxyTargetDNSName string
// TailnetTargetIP is the destination IP to which all incoming
// non-Tailscale traffic should be proxied. This is typically a
// Tailscale IP.
@@ -966,9 +1105,15 @@ func (s *settings) validate() error {
return fmt.Errorf("error validating tailscaled configfile contents: %w", err)
}
}
- if s.ProxyTo != "" && s.UserspaceMode {
+ if s.ProxyTargetIP != "" && s.UserspaceMode {
return errors.New("TS_DEST_IP is not supported with TS_USERSPACE")
}
+ if s.ProxyTargetDNSName != "" && s.UserspaceMode {
+ return errors.New("TS_DEST_DNS_NAME is not supported with TS_USERSPACE")
+ }
+ if s.ProxyTargetDNSName != "" && s.ProxyTargetIP != "" {
+ return errors.New("TS_DEST_DNS_NAME and TS_DEST_IP cannot both be set")
+ }
if s.TailnetTargetIP != "" && s.UserspaceMode {
return errors.New("TS_TAILNET_TARGET_IP is not supported with TS_USERSPACE")
}
@@ -993,6 +1138,16 @@ func (s *settings) validate() error {
return nil
}
+func resolveDNS(ctx context.Context, name string) ([]net.IP, error) {
+ ips, err := net.LookupIP(name)
+ if err != nil {
+ return nil, fmt.Errorf("error looking up IPs for DNS name %s: %w", name, err)
+ }
+ log.Printf("%s resolved to %v", name, ips)
+
+ return ips, nil
+}
+
// defaultEnv returns the value of the given envvar name, or defVal if
// unset.
func defaultEnv(name, defVal string) string {
diff --git a/cmd/k8s-operator/sts.go b/cmd/k8s-operator/sts.go
index 4c800cdbb..114eb30e7 100644
--- a/cmd/k8s-operator/sts.go
+++ b/cmd/k8s-operator/sts.go
@@ -109,8 +109,9 @@ type tailscaleSTSConfig struct {
ParentResourceUID string
ChildResourceLabels map[string]string
- ServeConfig *ipn.ServeConfig // if serve config is set, this is a proxy for Ingress
- ClusterTargetIP string // ingress target
+ ServeConfig *ipn.ServeConfig // if serve config is set, this is a proxy for Ingress
+ ClusterTargetIP string // ingress target IP
+ ClusterTargetDNSName string // ingress target DNS name
// If set to true, operator should configure containerboot to forward
// cluster traffic via the proxy set up for Kubernetes Ingress.
ForwardClusterTrafficViaL7IngressProxy bool
@@ -536,6 +537,12 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S
Value: sts.ClusterTargetIP,
})
mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetClusterIP, sts.ClusterTargetIP)
+ } else if sts.ClusterTargetDNSName != "" {
+ container.Env = append(container.Env, corev1.EnvVar{
+ Name: "TS_DEST_DNS_NAME",
+ Value: sts.ClusterTargetDNSName,
+ })
+ mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetClusterIP, sts.ClusterTargetIP)
} else if sts.TailnetTargetIP != "" {
container.Env = append(container.Env, corev1.EnvVar{
Name: "TS_TAILNET_TARGET_IP",
diff --git a/cmd/k8s-operator/svc.go b/cmd/k8s-operator/svc.go
index 8820a3554..136c68d09 100644
--- a/cmd/k8s-operator/svc.go
+++ b/cmd/k8s-operator/svc.go
@@ -200,10 +200,14 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
}
a.mu.Lock()
- if a.shouldExpose(svc) {
+ if a.shouldExposeClusterIP(svc) {
sts.ClusterTargetIP = svc.Spec.ClusterIP
a.managedIngressProxies.Add(svc.UID)
gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len()))
+ } else if a.shouldExposeDNSName(svc) {
+ sts.ClusterTargetDNSName = svc.Spec.ExternalName
+ a.managedIngressProxies.Add(svc.UID)
+ gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len()))
} else if ip := a.tailnetTargetAnnotation(svc); ip != "" {
sts.TailnetTargetIP = ip
a.managedEgressProxies.Add(svc.UID)
@@ -297,15 +301,22 @@ func validateService(svc *corev1.Service) []string {
}
func (a *ServiceReconciler) shouldExpose(svc *corev1.Service) bool {
+ return a.shouldExposeClusterIP(svc) || a.shouldExposeDNSName(svc)
+}
+
+func (a *ServiceReconciler) shouldExposeClusterIP(svc *corev1.Service) bool {
// Headless services can't be exposed, since there is no ClusterIP to
// forward to.
if svc.Spec.ClusterIP == "" || svc.Spec.ClusterIP == "None" {
return false
}
-
return a.hasLoadBalancerClass(svc) || a.hasExposeAnnotation(svc)
}
+func (a *ServiceReconciler) shouldExposeDNSName(svc *corev1.Service) bool {
+ return a.hasExposeAnnotation(svc) && svc.Spec.Type == corev1.ServiceTypeExternalName && svc.Spec.ExternalName != ""
+}
+
func (a *ServiceReconciler) hasLoadBalancerClass(svc *corev1.Service) bool {
return svc != nil &&
svc.Spec.Type == corev1.ServiceTypeLoadBalancer &&
diff --git a/util/linuxfw/iptables_runner.go b/util/linuxfw/iptables_runner.go
index 83c069af4..f688e82da 100644
--- a/util/linuxfw/iptables_runner.go
+++ b/util/linuxfw/iptables_runner.go
@@ -373,6 +373,30 @@ func (i *iptablesRunner) DNATNonTailscaleTraffic(tun string, dst netip.Addr) err
return table.Insert("nat", "PREROUTING", 1, "!", "-i", tun, "-j", "DNAT", "--to-destination", dst.String())
}
+// DNATWithLoadBalancer adds DNAT rules to load balance all incoming traffic NOT
+// destined to tailscale0 interface to provided destinations using round robin.
+// NB: this function clears the nat PREROUTING chain on start, so it is only
+// safe to use on systems where Tailscale is the only process that uses this
+// chain (i.e containers).
+func (i *iptablesRunner) DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error {
+ table := i.getIPTByAddr(dsts[0])
+ if err := table.ClearChain("nat", "PREROUTING"); err != nil && !isErrChainNotExist(err) {
+ // If clearing the PREROUTING chain fails, fail the whole operation. This
+ // rule is currently only used in Kubernetes containers where a
+ // failed container gets restarted which should hopefully fix things.
+ return fmt.Errorf("error clearing nat PREROUTING chain: %w", err)
+ }
+ // If dsts contain more than one address, for n := n in range(len(dsts)..2) route packets for every nth connection to dsts[n].
+ for i := len(dsts); i >= 2; i-- {
+ dst := dsts[i-1] // the order in which rules for addrs are installed does not matter
+ if err := table.Append("nat", "PREROUTING", "--destination", origDst.String(), "-m", "statistic", "--mode", "nth", "--every", fmt.Sprint(i), "--packet", "0", "-j", "DNAT", "--to-destination", dst.String()); err != nil {
+ return fmt.Errorf("error adding DNAT rule for %s: %w", dst.String(), err)
+ }
+ }
+ // If the packet falls through to this rule, we route to the first destination in the list unconditionally.
+ return table.Append("nat", "PREROUTING", "--destination", origDst.String(), "-j", "DNAT", "--to-destination", dsts[0].String())
+}
+
func (i *iptablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
table := i.getIPTByAddr(addr)
return table.Append("mangle", "FORWARD", "-o", tun, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu")
diff --git a/util/linuxfw/nftables_runner.go b/util/linuxfw/nftables_runner.go
index 144a9d942..61d5992e3 100644
--- a/util/linuxfw/nftables_runner.go
+++ b/util/linuxfw/nftables_runner.go
@@ -16,6 +16,7 @@ import (
"strings"
"github.com/google/nftables"
+ "github.com/google/nftables/binaryutil"
"github.com/google/nftables/expr"
"golang.org/x/sys/unix"
"tailscale.com/net/tsaddr"
@@ -114,7 +115,6 @@ func (n *nftablesRunner) AddDNATRule(origDst netip.Addr, dst netip.Addr) error {
dadderLen = 16
fam = unix.NFPROTO_IPV6
}
-
dnatRule := &nftables.Rule{
Table: nat,
Chain: preroutingCh,
@@ -145,6 +145,91 @@ func (n *nftablesRunner) AddDNATRule(origDst netip.Addr, dst netip.Addr) error {
return n.conn.Flush()
}
+// This function does set up nftables rules to load balance traffic to the
+// backend targets as expected. However, if the same client makes frequent
+// connections, the connections are frequently dropped. TODO (irbekrm):
+// investigate why the connections are dropped.
+func (n *nftablesRunner) DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error {
+ nat, preroutingCh, err := n.ensurePreroutingChain(dsts[0])
+ if err != nil {
+ return fmt.Errorf("error ensuring PREROUTING chain in nat table: %w", err)
+ }
+
+ // Figure out if we are dealing with IPv4 or IPv6 addresses and set
+ // parameters accordingly.
+ var (
+ dstsMapValType = nftables.TypeIPAddr
+ origDstIPHeaderOffset uint32 = 16
+ origDstIPHeaderLen uint32 = 4
+ fam = nftables.TableFamilyIPv4
+ )
+ if dsts[0].Is6() {
+ dstsMapValType = nftables.TypeIP6Addr
+ origDstIPHeaderOffset = 24
+ origDstIPHeaderLen = 16
+ fam = nftables.TableFamilyIPv6
+ }
+
+ mapElements := make([]nftables.SetElement, len(dsts))
+ for i, addr := range dsts {
+ mapElements[i] = nftables.SetElement{
+ Key: binaryutil.BigEndian.PutUint32(uint32(i)),
+ Val: addr.AsSlice(),
+ }
+ }
+ dstsMap := &nftables.Set{
+ Table: nat,
+ KeyByteOrder: binaryutil.NativeEndian,
+ KeyType: nftables.TypeInteger,
+ DataType: dstsMapValType,
+ IsMap: true,
+ Anonymous: true,
+ Constant: true, // Anonymous sets must be constant (unmodifiable)
+
+ }
+ if err := n.conn.AddSet(dstsMap, mapElements); err != nil {
+ return fmt.Errorf("error creating a new map: %w", err)
+ }
+
+ dnatRule := &nftables.Rule{
+ Table: nat,
+ Chain: preroutingCh,
+ Exprs: []expr.Any{
+ &expr.Payload{
+ DestRegister: 1,
+ Base: expr.PayloadBaseNetworkHeader,
+ Offset: origDstIPHeaderOffset,
+ Len: origDstIPHeaderLen,
+ },
+ &expr.Cmp{
+ Op: expr.CmpOpEq,
+ Register: 1,
+ Data: origDst.AsSlice(),
+ },
+ &expr.Numgen{
+ Register: 1,
+ Type: unix.NFT_NG_INCREMENTAL,
+ Modulus: uint32(len(dsts)),
+ Offset: 0,
+ },
+ &expr.Lookup{
+ SourceRegister: 1,
+ DestRegister: 2,
+ SetName: dstsMap.Name,
+ SetID: dstsMap.ID,
+ IsDestRegSet: true,
+ },
+ &expr.NAT{
+ Type: expr.NATTypeDestNAT,
+ Family: uint32(fam),
+ RegAddrMin: 2,
+ },
+ },
+ }
+ n.conn.InsertRule(dnatRule)
+ return n.conn.Flush()
+}
+
func (n *nftablesRunner) DNATNonTailscaleTraffic(tunname string, dst netip.Addr) error {
nat, preroutingCh, err := n.ensurePreroutingChain(dst)
if err != nil {
@@ -524,6 +609,14 @@ type NetfilterRunner interface {
// to the provided destination, as used in the Kubernetes ingress proxies.
AddDNATRule(origDst, dst netip.Addr) error
+ // DNATWithLoadBalancer adds a rule to the nat/PREROUTING chain to DNAT
+ // traffic destined for the given original destination to the given new
+ // destination(s) using round robin to load balance if more than one
+ // destination is provided. This is used to forward all traffic destined
+ // for the Tailscale interface to the provided destination(s), as used
+ // in the Kubernetes ingress proxies.
+ DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error
+
// AddSNATRuleForDst adds a rule to the nat/POSTROUTING chain to SNAT
// traffic destined for dst to src.
// This is used to forward traffic destined for the local machine over
@@ -533,7 +626,7 @@ type NetfilterRunner interface {
// DNATNonTailscaleTraffic adds a rule to the nat/PREROUTING chain to DNAT
// all traffic inbound from any interface except exemptInterface to dst.
// This is used to forward traffic destined for the local machine over
- // the Tailscale interface, as used in the Kubernetes egress proxies.//
+ // the Tailscale interface, as used in the Kubernetes egress proxies.
DNATNonTailscaleTraffic(exemptInterface string, dst netip.Addr) error
// ClampMSSToPMTU adds a rule to the mangle/FORWARD chain to clamp MSS for