Skip to content

Commit e209b99

Browse files
committed
Adding threshold by percentile for podStartupLatency
Intorducing threshold by percentile 50,90,and 99 for the podStartupLatency only. The default values of the new thresholds are the "threshold" value. So this change should be no-op unless the params are used in the test configuration.
1 parent dfb1374 commit e209b99

File tree

3 files changed

+45
-3
lines changed

3 files changed

+45
-3
lines changed

clusterloader2/pkg/measurement/common/slos/pod_startup_latency.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ type podStartupLatencyMeasurement struct {
7979
podStartupEntries *measurementutil.ObjectTransitionTimes
8080
podMetadata *measurementutil.PodsMetadata
8181
threshold time.Duration
82+
// Threshold for pod startup latency by percentile. The default value is threshold.
83+
perc50Threshold time.Duration
84+
perc90Threshold time.Duration
85+
perc99Threshold time.Duration
8286
}
8387

8488
// Execute supports two actions:
@@ -101,6 +105,18 @@ func (p *podStartupLatencyMeasurement) Execute(config *measurement.Config) ([]me
101105
if err != nil {
102106
return nil, err
103107
}
108+
p.perc50Threshold, err = util.GetDurationOrDefault(config.Params, "perc50Threshold", p.threshold)
109+
if err != nil {
110+
return nil, err
111+
}
112+
p.perc90Threshold, err = util.GetDurationOrDefault(config.Params, "perc90Threshold", p.threshold)
113+
if err != nil {
114+
return nil, err
115+
}
116+
p.perc99Threshold, err = util.GetDurationOrDefault(config.Params, "perc99Threshold", p.threshold)
117+
if err != nil {
118+
return nil, err
119+
}
104120
return nil, p.start(config.ClusterFramework.GetClientSets().GetClient())
105121
case "gather":
106122
schedulerName, err := util.GetStringOrDefault(config.Params, "schedulerName", defaultSchedulerName)
@@ -255,7 +271,7 @@ func (p *podStartupLatencyMeasurement) gather(c clientset.Interface, identifier
255271
transitions := podStartupTransitionsWithThreshold(p.threshold)
256272
podStartupLatency := p.podStartupEntries.CalculateTransitionsLatency(transitions, check.filter)
257273

258-
if slosErr := podStartupLatency["pod_startup"].VerifyThreshold(p.threshold); slosErr != nil {
274+
if slosErr := podStartupLatency["pod_startup"].VerifyThresholdByPercentile(p.perc50Threshold, p.perc90Threshold, p.perc99Threshold); slosErr != nil {
259275
err = errors.NewMetricViolationError("pod startup", slosErr.Error())
260276
klog.Errorf("%s%s: %v", check.namePrefix, p, err)
261277
}

clusterloader2/pkg/measurement/common/slos/slo_measurement_test.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,11 @@ func Test_getMeasurementConfig(t *testing.T) {
3232
"threshold": 200,
3333
},
3434
"PodStartupLatency": map[string]interface{}{
35-
"threshold": 5,
36-
"latency": "10s",
35+
"threshold": 5,
36+
"perc50Threshold": 5,
37+
"perc90Threshold": 10,
38+
"perc99Threshold": 15,
39+
"latency": "10s",
3740
},
3841
},
3942
},
@@ -53,6 +56,15 @@ func Test_getMeasurementConfig(t *testing.T) {
5356
if result := params["threshold"]; result != 5 {
5457
t.Errorf("want %v, got %v", 5, result)
5558
}
59+
if result := params["perc50Threshold"]; result != 5 {
60+
t.Errorf("want %v, got %v", 5, result)
61+
}
62+
if result := params["perc90Threshold"]; result != 10 {
63+
t.Errorf("want %v, got %v", 5, result)
64+
}
65+
if result := params["perc99Threshold"]; result != 15 {
66+
t.Errorf("want %v, got %v", 5, result)
67+
}
5668
if result := params["latency"]; result != "10s" {
5769
t.Errorf("want %v, got %v", "10s", result)
5870
}

clusterloader2/pkg/measurement/util/latency_metric.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,20 @@ func (metric *LatencyMetric) VerifyThreshold(threshold time.Duration) error {
5959
return nil
6060
}
6161

62+
// VerifyThreshold verifies latency metric against given percentile thresholds.
63+
func (metric *LatencyMetric) VerifyThresholdByPercentile(perc50Threshold, perc90Threshold, perc99Threshold time.Duration) error {
64+
if metric.Perc50 > perc50Threshold {
65+
return fmt.Errorf("too high latency 50th percentile: got %v expected: %v", metric.Perc50, perc50Threshold)
66+
}
67+
if metric.Perc90 > perc90Threshold {
68+
return fmt.Errorf("too high latency 90th percentile: got %v expected: %v", metric.Perc90, perc90Threshold)
69+
}
70+
if metric.Perc99 > perc99Threshold {
71+
return fmt.Errorf("too high latency 99th percentile: got %v expected: %v", metric.Perc99, perc99Threshold)
72+
}
73+
return nil
74+
}
75+
6276
// ToPerfData converts latency metric to PerfData.
6377
func (metric *LatencyMetric) ToPerfData(name string) DataItem {
6478
return DataItem{

0 commit comments

Comments
 (0)