summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJames Tucker <james@tailscale.com>2023-06-13 16:28:58 -0700
committerJames Tucker <james@tailscale.com>2023-06-13 16:28:58 -0700
commita7c1ee19ae105d54985f2056a927ea130e8ef89f (patch)
treece477b011667dff17dab6c8eaad83a811404936a
parent99f17a7135647af6b7ecee2c1bbbbfaf8921a56e (diff)
downloadtailscale-raggi/derp-probe-stun-loss.tar.xz
tailscale-raggi/derp-probe-stun-loss.zip
prober: make DERP probes fail when stun requests failraggi/derp-probe-stun-loss
The client treats a single lost stun request as a failure in netcheck and will bounce regions as a result. The prober results should represent this if it is happening regularly in a region we need to know about that. The fist step is to report the data with more precision; the likely immediate next step is to adjust alerting to soften the blow. Updates tailscale/corp#11492 Signed-off-by: James Tucker <james@tailscale.com>
-rw-r--r--prober/derp.go72
1 files changed, 35 insertions, 37 deletions
diff --git a/prober/derp.go b/prober/derp.go
index cd267e27e..e0de5f803 100644
--- a/prober/derp.go
+++ b/prober/derp.go
@@ -13,6 +13,7 @@ import (
"log"
"net"
"net/http"
+ "net/netip"
"strconv"
"strings"
"sync"
@@ -209,47 +210,44 @@ func derpProbeUDP(ctx context.Context, ipStr string, port int) (latency time.Dur
defer pc.Close()
uc := pc.(*net.UDPConn)
- tx := stun.NewTxID()
- req := stun.Request(tx)
-
+ ip, err := netip.ParseAddr(ipStr)
+ if err != nil {
+ return 0, err
+ }
if port == 0 {
port = 3478
}
- for {
- ip := net.ParseIP(ipStr)
- _, err := uc.WriteToUDP(req, &net.UDPAddr{IP: ip, Port: port})
- if err != nil {
- return 0, err
- }
- // Binding requests and responses are fairly small (~40 bytes),
- // but in practice a STUN response can be up to the size of the
- // path MTU, so we use a jumbo frame size buffer here.
- buf := make([]byte, 9000)
- uc.SetReadDeadline(time.Now().Add(2 * time.Second))
- t0 := time.Now()
- n, _, err := uc.ReadFromUDP(buf)
- d := time.Since(t0)
- if err != nil {
- if ctx.Err() != nil {
- return 0, fmt.Errorf("timeout reading from %v: %v", ip, err)
- }
- if d < time.Second {
- return 0, fmt.Errorf("error reading from %v: %v", ip, err)
- }
- time.Sleep(100 * time.Millisecond)
- continue
- }
- txBack, _, err := stun.ParseResponse(buf[:n])
- if err != nil {
- return 0, fmt.Errorf("parsing STUN response from %v: %v", ip, err)
- }
- if txBack != tx {
- return 0, fmt.Errorf("read wrong tx back from %v", ip)
- }
- if latency == 0 || d < latency {
- latency = d
+ addr := netip.AddrPortFrom(ip, uint16(port))
+
+ // Binding requests and responses are fairly small (~40 bytes),
+ // but in practice a STUN response can be up to the size of the
+ // path MTU, so we use a jumbo frame size buffer here.
+ buf := make([]byte, 9000)
+
+ tx := stun.NewTxID()
+ req := stun.Request(tx)
+ _, err = uc.WriteToUDPAddrPort(req, addr)
+ if err != nil {
+ return 0, err
+ }
+ t0 := time.Now()
+ n, _, err := uc.ReadFromUDP(buf)
+ d := time.Since(t0)
+ if err != nil {
+ if ctx.Err() != nil || err == context.DeadlineExceeded || d > time.Second {
+ return 0, fmt.Errorf("timeout reading from %s: (%s) %w", addr, d, err)
}
- break
+ return 0, fmt.Errorf("error reading from %s: (%s) %w", addr, d, err)
+ }
+ txBack, _, err := stun.ParseResponse(buf[:n])
+ if err != nil {
+ return 0, fmt.Errorf("parsing STUN response from %s: %v", addr, err)
+ }
+ if txBack != tx {
+ return 0, fmt.Errorf("read wrong tx back from %s", addr)
+ }
+ if latency == 0 || d < latency {
+ latency = d
}
return latency, nil
}