Skip to content

Commit c76a6e5

Browse files
committed
derp: track client-advertised non-ideal DERP connections in more places
In f77821f (released in v1.72.0), we made the client tell a DERP server when the connection was not its ideal choice (the first node in its region). But we didn't do anything with that information until now. This adds a metric about how many such connections are on a given derper, and also adds a bit to the PeerPresentFlags bitmask so watchers can identify (and rebalance) them. Updates tailscale/corp#372 Change-Id: Ief8af448750aa6d598e5939a57c062f4e55962be Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
1 parent fd77965 commit c76a6e5

File tree

5 files changed

+36
-7
lines changed

5 files changed

+36
-7
lines changed

cmd/tailscale/depaware.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
155155
tailscale.com/util/clientmetric from tailscale.com/net/netcheck+
156156
tailscale.com/util/cloudenv from tailscale.com/net/dnscache+
157157
tailscale.com/util/cmpver from tailscale.com/net/tshttpproxy+
158-
tailscale.com/util/ctxkey from tailscale.com/types/logger
158+
tailscale.com/util/ctxkey from tailscale.com/types/logger+
159159
💣 tailscale.com/util/deephash from tailscale.com/util/syspolicy/setting
160160
L 💣 tailscale.com/util/dirwalk from tailscale.com/metrics
161161
tailscale.com/util/dnsname from tailscale.com/cmd/tailscale/cli+

derp/derp.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ const (
147147
PeerPresentIsRegular = 1 << 0
148148
PeerPresentIsMeshPeer = 1 << 1
149149
PeerPresentIsProber = 1 << 2
150+
PeerPresentNotIdeal = 1 << 3 // client said derp server is not its Region.Nodes[0] ideal node
150151
)
151152

152153
var bin = binary.BigEndian

derp/derp_server.go

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import (
4747
"tailscale.com/tstime/rate"
4848
"tailscale.com/types/key"
4949
"tailscale.com/types/logger"
50+
"tailscale.com/util/ctxkey"
5051
"tailscale.com/util/mak"
5152
"tailscale.com/util/set"
5253
"tailscale.com/util/slicesx"
@@ -57,6 +58,16 @@ import (
5758
// verbosely log whenever DERP drops a packet.
5859
var verboseDropKeys = map[key.NodePublic]bool{}
5960

61+
// IdealNodeHeader is the HTTP request header sent on DERP HTTP client requests
62+
// to indicate that they're connecting to their ideal (Region.Nodes[0]) node.
63+
// The HTTP header value is the name of the node they wish they were connected
64+
// to. This is an optional header.
65+
const IdealNodeHeader = "Ideal-Node"
66+
67+
// IdealNodeContextKey is the context key used to pass the IdealNodeHeader value
68+
// from the HTTP handler to the DERP server's Accept method.
69+
var IdealNodeContextKey = ctxkey.New[string]("ideal-node", "")
70+
6071
func init() {
6172
keys := envknob.String("TS_DEBUG_VERBOSE_DROPS")
6273
if keys == "" {
@@ -133,6 +144,7 @@ type Server struct {
133144
sentPong expvar.Int // number of pong frames enqueued to client
134145
accepts expvar.Int
135146
curClients expvar.Int
147+
curClientsNotIdeal expvar.Int
136148
curHomeClients expvar.Int // ones with preferred
137149
dupClientKeys expvar.Int // current number of public keys we have 2+ connections for
138150
dupClientConns expvar.Int // current number of connections sharing a public key
@@ -603,6 +615,9 @@ func (s *Server) registerClient(c *sclient) {
603615
}
604616
s.keyOfAddr[c.remoteIPPort] = c.key
605617
s.curClients.Add(1)
618+
if c.isNotIdealConn {
619+
s.curClientsNotIdeal.Add(1)
620+
}
606621
s.broadcastPeerStateChangeLocked(c.key, c.remoteIPPort, c.presentFlags(), true)
607622
}
608623

@@ -693,6 +708,9 @@ func (s *Server) unregisterClient(c *sclient) {
693708
if c.preferred {
694709
s.curHomeClients.Add(-1)
695710
}
711+
if c.isNotIdealConn {
712+
s.curClientsNotIdeal.Add(-1)
713+
}
696714
}
697715

698716
// addPeerGoneFromRegionWatcher adds a function to be called when peer is gone
@@ -809,8 +827,8 @@ func (s *Server) accept(ctx context.Context, nc Conn, brw *bufio.ReadWriter, rem
809827
return fmt.Errorf("receive client key: %v", err)
810828
}
811829

812-
clientAP, _ := netip.ParseAddrPort(remoteAddr)
813-
if err := s.verifyClient(ctx, clientKey, clientInfo, clientAP.Addr()); err != nil {
830+
remoteIPPort, _ := netip.ParseAddrPort(remoteAddr)
831+
if err := s.verifyClient(ctx, clientKey, clientInfo, remoteIPPort.Addr()); err != nil {
814832
return fmt.Errorf("client %v rejected: %v", clientKey, err)
815833
}
816834

@@ -820,8 +838,6 @@ func (s *Server) accept(ctx context.Context, nc Conn, brw *bufio.ReadWriter, rem
820838
ctx, cancel := context.WithCancel(ctx)
821839
defer cancel()
822840

823-
remoteIPPort, _ := netip.ParseAddrPort(remoteAddr)
824-
825841
c := &sclient{
826842
connNum: connNum,
827843
s: s,
@@ -838,6 +854,7 @@ func (s *Server) accept(ctx context.Context, nc Conn, brw *bufio.ReadWriter, rem
838854
sendPongCh: make(chan [8]byte, 1),
839855
peerGone: make(chan peerGoneMsg),
840856
canMesh: s.isMeshPeer(clientInfo),
857+
isNotIdealConn: IdealNodeContextKey.Value(ctx) != "",
841858
peerGoneLim: rate.NewLimiter(rate.Every(time.Second), 3),
842859
}
843860

@@ -1511,6 +1528,7 @@ type sclient struct {
15111528
peerGone chan peerGoneMsg // write request that a peer is not at this server (not used by mesh peers)
15121529
meshUpdate chan struct{} // write request to write peerStateChange
15131530
canMesh bool // clientInfo had correct mesh token for inter-region routing
1531+
isNotIdealConn bool // client indicated it is not its ideal node in the region
15141532
isDup atomic.Bool // whether more than 1 sclient for key is connected
15151533
isDisabled atomic.Bool // whether sends to this peer are disabled due to active/active dups
15161534
debug bool // turn on for verbose logging
@@ -1546,6 +1564,9 @@ func (c *sclient) presentFlags() PeerPresentFlags {
15461564
if c.canMesh {
15471565
f |= PeerPresentIsMeshPeer
15481566
}
1567+
if c.isNotIdealConn {
1568+
f |= PeerPresentNotIdeal
1569+
}
15491570
if f == 0 {
15501571
return PeerPresentIsRegular
15511572
}
@@ -2051,6 +2072,7 @@ func (s *Server) ExpVar() expvar.Var {
20512072
m.Set("gauge_current_file_descriptors", expvar.Func(func() any { return metrics.CurrentFDs() }))
20522073
m.Set("gauge_current_connections", &s.curClients)
20532074
m.Set("gauge_current_home_connections", &s.curHomeClients)
2075+
m.Set("gauge_current_notideal_connections", &s.curClientsNotIdeal)
20542076
m.Set("gauge_clients_total", expvar.Func(func() any { return len(s.clientsMesh) }))
20552077
m.Set("gauge_clients_local", expvar.Func(func() any { return len(s.clients) }))
20562078
m.Set("gauge_clients_remote", expvar.Func(func() any { return len(s.clientsMesh) - len(s.clients) }))

derp/derphttp/derphttp_client.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ func (c *Client) connect(ctx context.Context, caller string) (client *derp.Clien
498498
req.Header.Set("Connection", "Upgrade")
499499
if !idealNodeInRegion && reg != nil {
500500
// This is purely informative for now (2024-07-06) for stats:
501-
req.Header.Set("Ideal-Node", reg.Nodes[0].Name)
501+
req.Header.Set(derp.IdealNodeHeader, reg.Nodes[0].Name)
502502
// TODO(bradfitz,raggi): start a time.AfterFunc for 30m-1h or so to
503503
// dialNode(reg.Nodes[0]) and see if we can even TCP connect to it. If
504504
// so, TLS handshake it as well (which is mixed up in this massive

derp/derphttp/derphttp_server.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ const fastStartHeader = "Derp-Fast-Start"
2121
// Handler returns an http.Handler to be mounted at /derp, serving s.
2222
func Handler(s *derp.Server) http.Handler {
2323
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
24+
ctx := r.Context()
25+
2426
// These are installed both here and in cmd/derper. The check here
2527
// catches both cmd/derper run with DERP disabled (STUN only mode) as
2628
// well as DERP being run in tests with derphttp.Handler directly,
@@ -66,7 +68,11 @@ func Handler(s *derp.Server) http.Handler {
6668
pubKey.UntypedHexString())
6769
}
6870

69-
s.Accept(r.Context(), netConn, conn, netConn.RemoteAddr().String())
71+
if v := r.Header.Get(derp.IdealNodeHeader); v != "" {
72+
ctx = derp.IdealNodeContextKey.WithValue(ctx, v)
73+
}
74+
75+
s.Accept(ctx, netConn, conn, netConn.RemoteAddr().String())
7076
})
7177
}
7278

0 commit comments

Comments
 (0)