Skip to content

Commit aa04f61

Browse files
committed
net/netcheck: adjust HTTPS latency check to connection time and avoid data race
The go-httpstat package has a data race when used with connections that are performing happy-eyeballs connection setups as we are in the DERP client. There is a long-stale PR upstream to address this, however revisiting the purpose of this code suggests we don't really need httpstat here. The code populates a latency table that may be used to compare to STUN latency, which is a lightweight RTT check. Switching out the reported timing here to simply the request HTTP request RTT avoids the problematic package. Fixes tailscale/corp#25095 Signed-off-by: James Tucker <james@tailscale.com>
1 parent 73128e2 commit aa04f61

File tree

4 files changed

+19
-12
lines changed

4 files changed

+19
-12
lines changed

cmd/k8s-operator/depaware.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,6 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
225225
github.com/tailscale/wireguard-go/rwcancel from github.com/tailscale/wireguard-go/device+
226226
github.com/tailscale/wireguard-go/tai64n from github.com/tailscale/wireguard-go/device
227227
💣 github.com/tailscale/wireguard-go/tun from github.com/tailscale/wireguard-go/device+
228-
github.com/tcnksm/go-httpstat from tailscale.com/net/netcheck
229228
L github.com/u-root/uio/rand from github.com/insomniacslk/dhcp/dhcpv4
230229
L github.com/u-root/uio/uio from github.com/insomniacslk/dhcp/dhcpv4+
231230
L github.com/vishvananda/netns from github.com/tailscale/netlink+

cmd/tailscale/depaware.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
5858
L 💣 github.com/tailscale/netlink from tailscale.com/util/linuxfw
5959
L 💣 github.com/tailscale/netlink/nl from github.com/tailscale/netlink
6060
github.com/tailscale/web-client-prebuilt from tailscale.com/client/web
61-
github.com/tcnksm/go-httpstat from tailscale.com/net/netcheck
6261
github.com/toqueteos/webbrowser from tailscale.com/cmd/tailscale/cli
6362
L github.com/vishvananda/netns from github.com/tailscale/netlink+
6463
github.com/x448/float16 from github.com/fxamacker/cbor/v2
@@ -306,7 +305,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
306305
net from crypto/tls+
307306
net/http from expvar+
308307
net/http/cgi from tailscale.com/cmd/tailscale/cli
309-
net/http/httptrace from github.com/tcnksm/go-httpstat+
308+
net/http/httptrace from golang.org/x/net/http2+
310309
net/http/httputil from tailscale.com/client/web+
311310
net/http/internal from net/http+
312311
net/netip from go4.org/netipx+

cmd/tailscaled/depaware.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
181181
💣 github.com/tailscale/wireguard-go/tun from github.com/tailscale/wireguard-go/device+
182182
github.com/tailscale/xnet/webdav from tailscale.com/drive/driveimpl+
183183
github.com/tailscale/xnet/webdav/internal/xml from github.com/tailscale/xnet/webdav
184-
github.com/tcnksm/go-httpstat from tailscale.com/net/netcheck
185184
LD github.com/u-root/u-root/pkg/termios from tailscale.com/ssh/tailssh
186185
L github.com/u-root/uio/rand from github.com/insomniacslk/dhcp/dhcpv4
187186
L github.com/u-root/uio/uio from github.com/insomniacslk/dhcp/dhcpv4+
@@ -553,7 +552,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
553552
net from crypto/tls+
554553
net/http from expvar+
555554
net/http/httptest from tailscale.com/control/controlclient
556-
net/http/httptrace from github.com/tcnksm/go-httpstat+
555+
net/http/httptrace from github.com/prometheus-community/pro-bing+
557556
net/http/httputil from github.com/aws/smithy-go/transport/http+
558557
net/http/internal from net/http+
559558
net/http/pprof from tailscale.com/cmd/tailscaled+

net/netcheck/netcheck.go

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import (
2323
"syscall"
2424
"time"
2525

26-
"github.com/tcnksm/go-httpstat"
2726
"tailscale.com/derp/derphttp"
2827
"tailscale.com/envknob"
2928
"tailscale.com/net/captivedetection"
@@ -1110,17 +1109,20 @@ func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *report
11101109
return nil
11111110
}
11121111

1112+
// measureHTTPSLatency measures HTTP request latency to the DERP region, but
1113+
// only returns success if an HTTPS request to the region succeeds.
11131114
func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netip.Addr, error) {
11141115
metricHTTPSend.Add(1)
1115-
var result httpstat.Result
1116-
ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), httpsProbeTimeout)
1116+
ctx, cancel := context.WithTimeout(ctx, httpsProbeTimeout)
11171117
defer cancel()
11181118

11191119
var ip netip.Addr
11201120

11211121
dc := derphttp.NewNetcheckClient(c.logf, c.NetMon)
11221122
defer dc.Close()
11231123

1124+
// DialRegionTLS may dial multiple times if a node is not available, as such
1125+
// it does not have stable timing to measure.
11241126
tlsConn, tcpConn, node, err := dc.DialRegionTLS(ctx, reg)
11251127
if err != nil {
11261128
return 0, ip, err
@@ -1138,6 +1140,8 @@ func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegio
11381140
connc := make(chan *tls.Conn, 1)
11391141
connc <- tlsConn
11401142

1143+
// make an HTTP request to measure, as this enables us to account for MITM
1144+
// overhead in e.g. corp environments that have HTTP MITM in front of DERP.
11411145
tr := &http.Transport{
11421146
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
11431147
return nil, errors.New("unexpected DialContext dial")
@@ -1153,12 +1157,17 @@ func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegio
11531157
}
11541158
hc := &http.Client{Transport: tr}
11551159

1160+
// This is the request that will be measured, the request and response
1161+
// should be small enough to fit into a single packet each way unless the
1162+
// connection has already become unstable.
11561163
req, err := http.NewRequestWithContext(ctx, "GET", "https://"+node.HostName+"/derp/latency-check", nil)
11571164
if err != nil {
11581165
return 0, ip, err
11591166
}
11601167

1168+
startTime := c.timeNow()
11611169
resp, err := hc.Do(req)
1170+
reqDur := c.timeNow().Sub(startTime)
11621171
if err != nil {
11631172
return 0, ip, err
11641173
}
@@ -1175,11 +1184,12 @@ func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegio
11751184
if err != nil {
11761185
return 0, ip, err
11771186
}
1178-
result.End(c.timeNow())
11791187

1180-
// TODO: decide best timing heuristic here.
1181-
// Maybe the server should return the tcpinfo_rtt?
1182-
return result.ServerProcessing, ip, nil
1188+
// return the connection duration, not the request duration, as this is the
1189+
// best approximation of the RTT latency to the node. Note that the
1190+
// connection setup performs happy-eyeballs and TLS so there are additional
1191+
// overheads.
1192+
return reqDur, ip, nil
11831193
}
11841194

11851195
func (c *Client) measureAllICMPLatency(ctx context.Context, rs *reportState, need []*tailcfg.DERPRegion) error {

0 commit comments

Comments
 (0)