Skip to content

Commit 8101452

Browse files
authored
Merge pull request #3339 from YahiaBadr/threshold-by-perc
Adding threshold by percentile for podStartupLatency
2 parents dfb1374 + e209b99 commit 8101452

File tree

3 files changed

+45
-3
lines changed

3 files changed

+45
-3
lines changed

clusterloader2/pkg/measurement/common/slos/pod_startup_latency.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ type podStartupLatencyMeasurement struct {
7979
podStartupEntries *measurementutil.ObjectTransitionTimes
8080
podMetadata *measurementutil.PodsMetadata
8181
threshold time.Duration
82+
// Threshold for pod startup latency by percentile. The default value is threshold.
83+
perc50Threshold time.Duration
84+
perc90Threshold time.Duration
85+
perc99Threshold time.Duration
8286
}
8387

8488
// Execute supports two actions:
@@ -101,6 +105,18 @@ func (p *podStartupLatencyMeasurement) Execute(config *measurement.Config) ([]me
101105
if err != nil {
102106
return nil, err
103107
}
108+
p.perc50Threshold, err = util.GetDurationOrDefault(config.Params, "perc50Threshold", p.threshold)
109+
if err != nil {
110+
return nil, err
111+
}
112+
p.perc90Threshold, err = util.GetDurationOrDefault(config.Params, "perc90Threshold", p.threshold)
113+
if err != nil {
114+
return nil, err
115+
}
116+
p.perc99Threshold, err = util.GetDurationOrDefault(config.Params, "perc99Threshold", p.threshold)
117+
if err != nil {
118+
return nil, err
119+
}
104120
return nil, p.start(config.ClusterFramework.GetClientSets().GetClient())
105121
case "gather":
106122
schedulerName, err := util.GetStringOrDefault(config.Params, "schedulerName", defaultSchedulerName)
@@ -255,7 +271,7 @@ func (p *podStartupLatencyMeasurement) gather(c clientset.Interface, identifier
255271
transitions := podStartupTransitionsWithThreshold(p.threshold)
256272
podStartupLatency := p.podStartupEntries.CalculateTransitionsLatency(transitions, check.filter)
257273

258-
if slosErr := podStartupLatency["pod_startup"].VerifyThreshold(p.threshold); slosErr != nil {
274+
if slosErr := podStartupLatency["pod_startup"].VerifyThresholdByPercentile(p.perc50Threshold, p.perc90Threshold, p.perc99Threshold); slosErr != nil {
259275
err = errors.NewMetricViolationError("pod startup", slosErr.Error())
260276
klog.Errorf("%s%s: %v", check.namePrefix, p, err)
261277
}

clusterloader2/pkg/measurement/common/slos/slo_measurement_test.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,11 @@ func Test_getMeasurementConfig(t *testing.T) {
3232
"threshold": 200,
3333
},
3434
"PodStartupLatency": map[string]interface{}{
35-
"threshold": 5,
36-
"latency": "10s",
35+
"threshold": 5,
36+
"perc50Threshold": 5,
37+
"perc90Threshold": 10,
38+
"perc99Threshold": 15,
39+
"latency": "10s",
3740
},
3841
},
3942
},
@@ -53,6 +56,15 @@ func Test_getMeasurementConfig(t *testing.T) {
5356
if result := params["threshold"]; result != 5 {
5457
t.Errorf("want %v, got %v", 5, result)
5558
}
59+
if result := params["perc50Threshold"]; result != 5 {
60+
t.Errorf("want %v, got %v", 5, result)
61+
}
62+
if result := params["perc90Threshold"]; result != 10 {
63+
t.Errorf("want %v, got %v", 5, result)
64+
}
65+
if result := params["perc99Threshold"]; result != 15 {
66+
t.Errorf("want %v, got %v", 5, result)
67+
}
5668
if result := params["latency"]; result != "10s" {
5769
t.Errorf("want %v, got %v", "10s", result)
5870
}

clusterloader2/pkg/measurement/util/latency_metric.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,20 @@ func (metric *LatencyMetric) VerifyThreshold(threshold time.Duration) error {
5959
return nil
6060
}
6161

62+
// VerifyThreshold verifies latency metric against given percentile thresholds.
63+
func (metric *LatencyMetric) VerifyThresholdByPercentile(perc50Threshold, perc90Threshold, perc99Threshold time.Duration) error {
64+
if metric.Perc50 > perc50Threshold {
65+
return fmt.Errorf("too high latency 50th percentile: got %v expected: %v", metric.Perc50, perc50Threshold)
66+
}
67+
if metric.Perc90 > perc90Threshold {
68+
return fmt.Errorf("too high latency 90th percentile: got %v expected: %v", metric.Perc90, perc90Threshold)
69+
}
70+
if metric.Perc99 > perc99Threshold {
71+
return fmt.Errorf("too high latency 99th percentile: got %v expected: %v", metric.Perc99, perc99Threshold)
72+
}
73+
return nil
74+
}
75+
6276
// ToPerfData converts latency metric to PerfData.
6377
func (metric *LatencyMetric) ToPerfData(name string) DataItem {
6478
return DataItem{

0 commit comments

Comments
 (0)