kubernetes · ttetyanka · Aug 28, 2025 · Sep 23, 2025 · Sep 23, 2025 · Sep 23, 2025
diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go
@@ -349,6 +349,8 @@ type AutoscalingOptions struct {
 	CapacitybufferControllerEnabled bool
 	// CapacitybufferPodInjectionEnabled tells if CA should injects fake pods for capacity buffers that are ready for provisioning
 	CapacitybufferPodInjectionEnabled bool
+	// NodeRemovalLatencyTrackingEnabled is used to enable/disable node removal latency tracking.
+	NodeRemovalLatencyTrackingEnabled bool
 }
 
 // KubeClientOptions specify options for kube client

diff --git a/cluster-autoscaler/config/flags/flags.go b/cluster-autoscaler/config/flags/flags.go
@@ -230,6 +230,7 @@ var (
 	nodeDeletionCandidateTTL                     = flag.Duration("node-deletion-candidate-ttl", time.Duration(0), "Maximum time a node can be marked as removable before the marking becomes stale. This sets the TTL of Cluster-Autoscaler's state if the Cluste-Autoscaler deployment becomes inactive")
 	capacitybufferControllerEnabled              = flag.Bool("capacity-buffer-controller-enabled", false, "Whether to enable the default controller for capacity buffers or not")
 	capacitybufferPodInjectionEnabled            = flag.Bool("capacity-buffer-pod-injection-enabled", false, "Whether to enable pod list processor that processes ready capacity buffers and injects fake pods accordingly")
+	nodeRemovalLatencyTrackingEnabled            = flag.Bool("enable-node-removal-latency-tracking", false, "Whether to track latency from when a node is marked unneeded until it is removed or needed again.")
 
 	// Deprecated flags
 	ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group (Deprecated, use startup-taints instead)")
@@ -414,6 +415,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
 		NodeDeletionCandidateTTL:                     *nodeDeletionCandidateTTL,
 		CapacitybufferControllerEnabled:              *capacitybufferControllerEnabled,
 		CapacitybufferPodInjectionEnabled:            *capacitybufferPodInjectionEnabled,
+		NodeRemovalLatencyTrackingEnabled:            *nodeRemovalLatencyTrackingEnabled,
 	}
 }
 

diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator.go b/cluster-autoscaler/core/scaledown/actuation/actuator.go
@@ -27,6 +27,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/core/scaledown"
 	"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/budgets"
 	"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/deletiontracker"
+	"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/latencytracker"
 	"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb"
 	"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status"
 	"k8s.io/autoscaler/cluster-autoscaler/core/utils"
@@ -58,6 +59,7 @@ const (
 type Actuator struct {
 	ctx                   *context.AutoscalingContext
 	nodeDeletionTracker   *deletiontracker.NodeDeletionTracker
+	nodeLatencyTracker    latencytracker.LatencyTracker
 	nodeDeletionScheduler *GroupDeletionScheduler
 	deleteOptions         options.NodeDeleteOptions
 	drainabilityRules     rules.Rules
@@ -78,7 +80,7 @@ type actuatorNodeGroupConfigGetter interface {
 }
 
 // NewActuator returns a new instance of Actuator.
-func NewActuator(ctx *context.AutoscalingContext, scaleStateNotifier nodegroupchange.NodeGroupChangeObserver, ndt *deletiontracker.NodeDeletionTracker, deleteOptions options.NodeDeleteOptions, drainabilityRules rules.Rules, configGetter actuatorNodeGroupConfigGetter) *Actuator {
+func NewActuator(ctx *context.AutoscalingContext, scaleStateNotifier nodegroupchange.NodeGroupChangeObserver, ndt *deletiontracker.NodeDeletionTracker, nlt latencytracker.LatencyTracker, deleteOptions options.NodeDeleteOptions, drainabilityRules rules.Rules, configGetter actuatorNodeGroupConfigGetter) *Actuator {
 	ndb := NewNodeDeletionBatcher(ctx, scaleStateNotifier, ndt, ctx.NodeDeletionBatcherInterval)
 	legacyFlagDrainConfig := SingleRuleDrainConfig(ctx.MaxGracefulTerminationSec)
 	var evictor Evictor
@@ -90,6 +92,7 @@ func NewActuator(ctx *context.AutoscalingContext, scaleStateNotifier nodegroupch
 	return &Actuator{
 		ctx:                       ctx,
 		nodeDeletionTracker:       ndt,
+		nodeLatencyTracker:        nlt,
 		nodeDeletionScheduler:     NewGroupDeletionScheduler(ctx, ndt, ndb, evictor),
 		budgetProcessor:           budgets.NewScaleDownBudgetProcessor(ctx),
 		deleteOptions:             deleteOptions,
@@ -346,10 +349,16 @@ func (a *Actuator) deleteNodesAsync(nodes []*apiv1.Node, nodeGroup cloudprovider
 
 		if force {
 			go a.nodeDeletionScheduler.scheduleForceDeletion(nodeInfo, nodeGroup, batchSize, drain)
+			if a.nodeLatencyTracker != nil {
+				a.nodeLatencyTracker.ObserveDeletion(node.Name, time.Now())
+			}
 			continue
 		}
 
 		go a.nodeDeletionScheduler.ScheduleDeletion(nodeInfo, nodeGroup, batchSize, drain)
+		if a.nodeLatencyTracker != nil {
+			a.nodeLatencyTracker.ObserveDeletion(node.Name, time.Now())
+		}
 	}
 }
 

diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator_test.go b/cluster-autoscaler/core/scaledown/actuation/actuator_test.go
@@ -18,6 +18,7 @@ package actuation
 
 import (
 	"fmt"
+	"strings"
 	"sync"
 	"testing"
 	"time"
@@ -89,6 +90,21 @@ type startDeletionTestCase struct {
 	wantNodeDeleteResults map[string]status.NodeDeleteResult
 }
 
+// FakeLatencyTracker implements the same interface as NodeLatencyTracker
+type fakeLatencyTracker struct {
+	ObservedNodes []string
+}
+
+// ObserveDeletion simply records the node name
+func (f *fakeLatencyTracker) ObserveDeletion(nodeName string, timestamp time.Time) {
+	f.ObservedNodes = append(f.ObservedNodes, nodeName)
+}
+func (f *fakeLatencyTracker) UpdateStateWithUnneededList(list []*apiv1.Node, currentlyInDeletion map[string]bool, timestamp time.Time) {
+}
+func (f *fakeLatencyTracker) UpdateThreshold(nodeName string, threshold time.Duration) {}
+
+func (f *fakeLatencyTracker) GetTrackedNodes() []string { return nil }
+
 func getStartDeletionTestCases(ignoreDaemonSetsUtilization bool, force bool, suffix string) map[string]startDeletionTestCase {
 	toBeDeletedTaint := apiv1.Taint{Key: taints.ToBeDeletedTaint, Effect: apiv1.TaintEffectNoSchedule}
 
@@ -1274,11 +1290,13 @@ func runStartDeletionTest(t *testing.T, tc startDeletionTestCase, force bool) {
 	ndb := NewNodeDeletionBatcher(&ctx, scaleStateNotifier, ndt, 0*time.Second)
 	legacyFlagDrainConfig := SingleRuleDrainConfig(ctx.MaxGracefulTerminationSec)
 	evictor := Evictor{EvictionRetryTime: 0, PodEvictionHeadroom: DefaultPodEvictionHeadroom, shutdownGracePeriodByPodPriority: legacyFlagDrainConfig, fullDsEviction: force}
+	fakeNodeLatencyTracker := &fakeLatencyTracker{}
 	actuator := Actuator{
 		ctx: &ctx, nodeDeletionTracker: ndt,
 		nodeDeletionScheduler: NewGroupDeletionScheduler(&ctx, ndt, ndb, evictor),
 		budgetProcessor:       budgets.NewScaleDownBudgetProcessor(&ctx),
 		configGetter:          nodegroupconfig.NewDefaultNodeGroupConfigProcessor(ctx.NodeGroupDefaults),
+		nodeLatencyTracker:    fakeNodeLatencyTracker,
 	}
 
 	var gotResult status.ScaleDownResult
@@ -1375,6 +1393,19 @@ taintsLoop:
 	if diff := cmp.Diff(tc.wantNodeDeleteResults, nodeDeleteResults, cmpopts.EquateEmpty(), cmpopts.EquateErrors()); diff != "" {
 		t.Errorf("NodeDeleteResults diff (-want +got):\n%s", diff)
 	}
+	// Verify ObserveDeletion was called for all nodes that were actually deleted
+	for _, expectedNode := range tc.wantDeletedNodes {
+		found := false
+		for _, observed := range fakeNodeLatencyTracker.ObservedNodes {
+			if observed == expectedNode {
+				found = true
+				break
+			}
+		}
+		if !found {
+			t.Errorf("Expected ObserveDeletion to be called for node %s, but it wasn't", expectedNode)
+		}
+	}
 }
 
 func TestStartDeletion(t *testing.T) {
@@ -1553,10 +1584,12 @@ func TestStartDeletionInBatchBasic(t *testing.T) {
 			ndb := NewNodeDeletionBatcher(&ctx, scaleStateNotifier, ndt, deleteInterval)
 			legacyFlagDrainConfig := SingleRuleDrainConfig(ctx.MaxGracefulTerminationSec)
 			evictor := Evictor{EvictionRetryTime: 0, PodEvictionHeadroom: DefaultPodEvictionHeadroom, shutdownGracePeriodByPodPriority: legacyFlagDrainConfig}
+			fakeNodeLatencyTracker := &fakeLatencyTracker{}
 			actuator := Actuator{
 				ctx: &ctx, nodeDeletionTracker: ndt,
 				nodeDeletionScheduler: NewGroupDeletionScheduler(&ctx, ndt, ndb, evictor),
 				budgetProcessor:       budgets.NewScaleDownBudgetProcessor(&ctx),
+				nodeLatencyTracker:    fakeNodeLatencyTracker,
 			}
 
 			for _, nodes := range deleteNodes {
@@ -1584,6 +1617,33 @@ func TestStartDeletionInBatchBasic(t *testing.T) {
 			if diff := cmp.Diff(test.wantSuccessfulDeletion, gotDeletedNodes); diff != "" {
 				t.Errorf("Successful deleteions per node group diff (-want +got):\n%s", diff)
 			}
+			for _, nodes := range deleteNodes {
+				for _, node := range nodes {
+					// Extract node group from node name
+					parts := strings.Split(node.Name, "-")
+					if len(parts) < 3 {
+						continue
+					}
+					ngName := strings.Join(parts[:2], "-")
+
+					// Skip check if no successful deletions expected for this group
+					if test.wantSuccessfulDeletion[ngName] == 0 {
+						continue
+					}
+
+					// Verify ObserveDeletion was called
+					found := false
+					for _, observedNode := range fakeNodeLatencyTracker.ObservedNodes {
+						if observedNode == node.Name {
+							found = true
+							break
+						}
+					}
+					if !found {
+						t.Errorf("Expected ObserveDeletion to be called for node %s", node.Name)
+					}
+				}
+			}
 		})
 	}
 }

diff --git a/cluster-autoscaler/core/scaledown/latencytracker/node_latency_tracker.go b/cluster-autoscaler/core/scaledown/latencytracker/node_latency_tracker.go
@@ -0,0 +1,120 @@
+/*
+Copyright 2019 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package latencytracker
+
+import (
+	"time"
+
+	apiv1 "k8s.io/api/core/v1"
+	"k8s.io/autoscaler/cluster-autoscaler/metrics"
+	"k8s.io/klog/v2"
+)
+
+// LatencyTracker defines the interface for tracking node removal latency.
+// Implementations record when nodes become unneeded, observe deletion events,
+// and expose thresholds for measuring node removal duration.
+type LatencyTracker interface {
+	ObserveDeletion(nodeName string, timestamp time.Time)
+	UpdateStateWithUnneededList(list []*apiv1.Node, currentlyInDeletion map[string]bool, timestamp time.Time)
+	UpdateThreshold(nodeName string, threshold time.Duration)
+	GetTrackedNodes() []string
+}
+type nodeInfo struct {
+	unneededSince time.Time
+	threshold     time.Duration
+}
+
+// NodeLatencyTracker is a concrete implementation of LatencyTracker.
+// It keeps track of nodes that are marked as unneeded, when they became unneeded,
+// and thresholds to adjust node removal latency metrics.
+type NodeLatencyTracker struct {
+	nodes map[string]nodeInfo
+}
+
+// NewNodeLatencyTracker creates a new tracker.
+func NewNodeLatencyTracker() *NodeLatencyTracker {
+	return &NodeLatencyTracker{
+		nodes: make(map[string]nodeInfo),
+	}
+}
+
+// UpdateStateWithUnneededList records unneeded nodes and handles missing ones.
+func (t *NodeLatencyTracker) UpdateStateWithUnneededList(
+	list []*apiv1.Node,
+	currentlyInDeletion map[string]bool,
+	timestamp time.Time,
+) {
+	currentSet := make(map[string]struct{}, len(list))
+	for _, node := range list {
+		currentSet[node.Name] = struct{}{}
+
+		if _, exists := t.nodes[node.Name]; !exists {
+			t.nodes[node.Name] = nodeInfo{
+				unneededSince: timestamp,
+				threshold:     0,
+			}
+			klog.V(4).Infof("Started tracking unneeded node %s at %v", node.Name, timestamp)
+		}
+	}
+
+	for name, info := range t.nodes {
+		if _, stillUnneeded := currentSet[name]; !stillUnneeded {
+			if _, inDeletion := currentlyInDeletion[name]; !inDeletion {
+				duration := timestamp.Sub(info.unneededSince)
+				metrics.UpdateScaleDownNodeRemovalLatency(false, duration-info.threshold)
+				delete(t.nodes, name)
+				klog.V(4).Infof("Node %q reported as deleted/missing (unneeded for %s, threshold %s)",
+					name, duration, info.threshold)
+			}
+		}
+	}
+}
+
+// ObserveDeletion is called by the actuator just before node deletion.
+func (t *NodeLatencyTracker) ObserveDeletion(nodeName string, timestamp time.Time) {
+	if info, exists := t.nodes[nodeName]; exists {
+		duration := timestamp.Sub(info.unneededSince)
+
+		klog.V(4).Infof(
+			"Observing deletion for node %s, unneeded for %s (threshold was %s).",
+			nodeName, duration, info.threshold,
+		)
+
+		metrics.UpdateScaleDownNodeRemovalLatency(true, duration-info.threshold)
+		delete(t.nodes, nodeName)
+	}
+}
+
+// UpdateThreshold updates the scale-down threshold for a tracked node.
+func (t *NodeLatencyTracker) UpdateThreshold(nodeName string, threshold time.Duration) {
+	if info, exists := t.nodes[nodeName]; exists {
+		info.threshold = threshold
+		t.nodes[nodeName] = info
+		klog.V(4).Infof("Updated threshold for node %q to %s", nodeName, threshold)
+	} else {
+		klog.Warningf("Attempted to update threshold for unknown node %q", nodeName)
+	}
+}
+
+// GetTrackedNodes returns the names of all nodes currently tracked as unneeded.
+func (t *NodeLatencyTracker) GetTrackedNodes() []string {
+	names := make([]string, 0, len(t.nodes))
+	for name := range t.nodes {
+		names = append(names, name)
+	}
+	return names
+}