4
4
"context"
5
5
"encoding/json"
6
6
"fmt"
7
- "net/http "
7
+ "maps "
8
8
"strconv"
9
9
"strings"
10
10
"time"
@@ -92,13 +92,13 @@ func (s *Service) GetWorkloadMetrics(ctx context.Context, workloadID string, par
92
92
return m , err
93
93
}
94
94
95
- if resp .StatusCode () != http . StatusOK {
96
- return m , fmt .Errorf ("failed to get workload metrics, status code : %d" , resp .StatusCode ())
95
+ if resp .StatusCode () >= 400 {
96
+ return m , fmt .Errorf ("failed to get workload metrics after %d retries , status: %d %s " , s . client . RetryCount , resp .StatusCode (), resp . Status ())
97
97
}
98
98
99
- result := resp .Result ().(* MeasurementResponse )
100
- if result == nil {
101
- return m , fmt .Errorf ("failed to get workload metrics, result is nil " )
99
+ result , ok := resp .Result ().(* MeasurementResponse )
100
+ if ! ok || result == nil {
101
+ return m , fmt .Errorf ("failed to get workload metrics due to invalid response " )
102
102
}
103
103
104
104
for _ , measurement := range result .Measurements {
@@ -149,8 +149,8 @@ func (s *Service) GetAllWorkloadWithMetrics(ctx context.Context, params Measurem
149
149
return nil , err
150
150
}
151
151
152
- workloadsWithMetrics := make ([]WorkloadWithMetrics , len (workloads ))
153
- for i , workload := range workloads {
152
+ workloadsWithMetrics := make ([]WorkloadWithMetrics , 0 , len (workloads ))
153
+ for _ , workload := range workloads {
154
154
metrics := Metrics {
155
155
Timestamp : params .EndTime .UTC (),
156
156
Values : make (map [MetricType ]float64 ),
@@ -164,17 +164,21 @@ func (s *Service) GetAllWorkloadWithMetrics(ctx context.Context, params Measurem
164
164
EndTime : params .EndTime .UTC (),
165
165
})
166
166
if err != nil {
167
- return nil , err
167
+ // We don't want to fail the whole operation if one workload fails to get metrics
168
+ s .logger .With ("startTime" , params .StartTime .UTC (), "endTime" , params .EndTime .UTC (), "workloadID" , workload .ID , "metricTypes" , metricTypes ).Errorf ("failed to get workload metrics: %w" , err )
169
+ continue
168
170
}
169
171
170
- for mt , v := range m .Values {
171
- metrics .Values [mt ] = v
172
- }
172
+ // Copy the metrics into the metrics struct
173
+ maps .Copy (metrics .Values , m .Values )
173
174
}
174
175
175
- workloadsWithMetrics [i ] = WorkloadWithMetrics {
176
- Workload : workload ,
177
- Metrics : metrics ,
176
+ // Only add the workload to the list if it has metrics
177
+ if len (metrics .Values ) > 0 {
178
+ workloadsWithMetrics = append (workloadsWithMetrics , WorkloadWithMetrics {
179
+ Workload : workload ,
180
+ Metrics : metrics ,
181
+ })
178
182
}
179
183
}
180
184
@@ -228,13 +232,13 @@ func (s *Service) GetPodMetrics(ctx context.Context, workloadID string, podID st
228
232
return m , err
229
233
}
230
234
231
- if resp .StatusCode () != http . StatusOK {
232
- return m , fmt .Errorf ("failed to get pod metrics, status code : %d" , resp .StatusCode ())
235
+ if resp .StatusCode () >= 400 {
236
+ return m , fmt .Errorf ("failed to get pod metrics after %d retries , status: %d %s " , s . client . RetryCount , resp .StatusCode (), resp . Status ())
233
237
}
234
238
235
- result := resp .Result ().(* MeasurementResponse )
236
- if result == nil {
237
- return m , fmt .Errorf ("failed to get pod metrics, result is nil " )
239
+ result , ok := resp .Result ().(* MeasurementResponse )
240
+ if ! ok || result == nil {
241
+ return m , fmt .Errorf ("failed to get pod metrics due to invalid response " )
238
242
}
239
243
240
244
for _ , measurement := range result .Measurements {
@@ -285,8 +289,8 @@ func (s *Service) GetAllPodWithMetrics(ctx context.Context, params MeasurementPa
285
289
return nil , err
286
290
}
287
291
288
- podsWithMetrics := make ([]PodWithMetrics , len (pods ))
289
- for i , pod := range pods {
292
+ podsWithMetrics := make ([]PodWithMetrics , 0 , len (pods ))
293
+ for _ , pod := range pods {
290
294
metrics := Metrics {
291
295
Timestamp : params .EndTime .UTC (),
292
296
Values : make (map [MetricType ]float64 ),
@@ -300,17 +304,21 @@ func (s *Service) GetAllPodWithMetrics(ctx context.Context, params MeasurementPa
300
304
EndTime : params .EndTime .UTC (),
301
305
})
302
306
if err != nil {
303
- return nil , err
307
+ // We don't want to fail the whole operation if one pod fails to get metrics
308
+ s .logger .With ("startTime" , params .StartTime .UTC (), "endTime" , params .EndTime .UTC (), "workloadID" , pod .WorkloadID , "podID" , pod .ID , "metricTypes" , metricTypes ).Errorf ("failed to get pod metrics: %w" , err )
309
+ continue
304
310
}
305
311
306
- for mt , v := range m .Values {
307
- metrics .Values [mt ] = v
308
- }
312
+ // Copy the metrics into the metrics struct
313
+ maps .Copy (metrics .Values , m .Values )
309
314
}
310
315
311
- podsWithMetrics [i ] = PodWithMetrics {
312
- Pod : pod ,
313
- Metrics : metrics ,
316
+ // Only add the pod to the list if it has metrics
317
+ if len (metrics .Values ) > 0 {
318
+ podsWithMetrics = append (podsWithMetrics , PodWithMetrics {
319
+ Pod : pod ,
320
+ Metrics : metrics ,
321
+ })
314
322
}
315
323
}
316
324
0 commit comments