Skip to content

Commit ea25c20

Browse files
committed
feat(bmc): add redfish support
Signed-off-by: Sunil Thaha <sthaha@redhat.com>
1 parent b319693 commit ea25c20

26 files changed

+6000
-11
lines changed

cmd/kepler/main.go

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@ import (
1111
"syscall"
1212

1313
"github.com/alecthomas/kingpin/v2"
14+
"k8s.io/utils/ptr"
15+
1416
"github.com/sustainable-computing-io/kepler/config"
1517
"github.com/sustainable-computing-io/kepler/internal/device"
1618
"github.com/sustainable-computing-io/kepler/internal/exporter/prometheus"
1719
"github.com/sustainable-computing-io/kepler/internal/exporter/stdout"
1820
"github.com/sustainable-computing-io/kepler/internal/k8s/pod"
1921
"github.com/sustainable-computing-io/kepler/internal/logger"
2022
"github.com/sustainable-computing-io/kepler/internal/monitor"
23+
"github.com/sustainable-computing-io/kepler/internal/platform/redfish"
2124
"github.com/sustainable-computing-io/kepler/internal/resource"
2225
"github.com/sustainable-computing-io/kepler/internal/server"
2326
"github.com/sustainable-computing-io/kepler/internal/service"
@@ -157,6 +160,20 @@ func createServices(logger *slog.Logger, cfg *config.Config) ([]service.Service,
157160
monitor.WithMinTerminatedEnergyThreshold(monitor.Energy(cfg.Monitor.MinTerminatedEnergyThreshold)*monitor.Joule),
158161
)
159162

163+
// Create Redfish service if enabled
164+
var redfishService *redfish.Service
165+
if ptr.Deref(cfg.Platform.Redfish.Enabled, false) {
166+
var err error
167+
redfishService, err = redfish.NewService(
168+
cfg.Platform.Redfish.ConfigFile,
169+
cfg.Platform.NodeID,
170+
logger,
171+
)
172+
if err != nil {
173+
return nil, fmt.Errorf("failed to create Redfish service: %w", err)
174+
}
175+
}
176+
160177
apiServer := server.NewAPIServer(
161178
server.WithLogger(logger),
162179
server.WithListenAddress(cfg.Web.ListenAddresses),
@@ -170,9 +187,14 @@ func createServices(logger *slog.Logger, cfg *config.Config) ([]service.Service,
170187
pm,
171188
)
172189

190+
// Add Redfish service if enabled
191+
if redfishService != nil {
192+
services = append(services, redfishService)
193+
}
194+
173195
// Add Prometheus exporter if enabled
174196
if *cfg.Exporter.Prometheus.Enabled {
175-
promExporter, err := createPrometheusExporter(logger, cfg, apiServer, pm)
197+
promExporter, err := createPrometheusExporter(logger, cfg, apiServer, pm, redfishService)
176198
if err != nil {
177199
return nil, fmt.Errorf("failed to create Prometheus exporter: %w", err)
178200
}
@@ -194,19 +216,26 @@ func createServices(logger *slog.Logger, cfg *config.Config) ([]service.Service,
194216
return services, nil
195217
}
196218

197-
func createPrometheusExporter(logger *slog.Logger, cfg *config.Config, apiServer *server.APIServer, pm *monitor.PowerMonitor) (*prometheus.Exporter, error) {
219+
func createPrometheusExporter(logger *slog.Logger, cfg *config.Config, apiServer *server.APIServer, pm *monitor.PowerMonitor, redfishService *redfish.Service) (*prometheus.Exporter, error) {
198220
logger.Debug("Creating Prometheus exporter")
199221

200222
// Use metrics level from configuration (already parsed)
201223
metricsLevel := cfg.Exporter.Prometheus.MetricsLevel
202224

203-
collectors, err := prometheus.CreateCollectors(
204-
pm,
225+
var collectorOpts []prometheus.OptionFn
226+
collectorOpts = append(collectorOpts,
205227
prometheus.WithLogger(logger),
206228
prometheus.WithProcFSPath(cfg.Host.ProcFS),
207229
prometheus.WithNodeName(cfg.Kube.Node),
208230
prometheus.WithMetricsLevel(metricsLevel),
209231
)
232+
233+
// Add platform data provider if Redfish service is available
234+
if redfishService != nil {
235+
collectorOpts = append(collectorOpts, prometheus.WithPlatformDataProvider(redfishService))
236+
}
237+
238+
collectors, err := prometheus.CreateCollectors(pm, collectorOpts...)
210239
if err != nil {
211240
return nil, fmt.Errorf("failed to create Prometheus collectors: %w", err)
212241
}

config/config.go

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,18 @@ type (
9393
Node string `yaml:"nodeName"`
9494
}
9595

96+
// Platform contains settings for platform power monitoring
97+
Platform struct {
98+
NodeID string `yaml:"nodeID"` // High-level node identifier
99+
Redfish Redfish `yaml:"redfish"`
100+
}
101+
102+
// Redfish contains settings for Redfish BMC power monitoring
103+
Redfish struct {
104+
Enabled *bool `yaml:"enabled"`
105+
ConfigFile string `yaml:"configFile"`
106+
}
107+
96108
Config struct {
97109
Log Log `yaml:"log"`
98110
Host Host `yaml:"host"`
@@ -103,7 +115,8 @@ type (
103115
Debug Debug `yaml:"debug"`
104116
Dev Dev `yaml:"dev"` // WARN: do not expose dev settings as flags
105117

106-
Kube Kube `yaml:"kube"`
118+
Platform Platform `yaml:"platform"`
119+
Kube Kube `yaml:"kube"`
107120
}
108121
)
109122

@@ -186,6 +199,11 @@ const (
186199
KubeConfigFlag = "kube.config"
187200
KubeNodeNameFlag = "kube.node-name"
188201

202+
// Platform flags
203+
PlatformNodeIDFlag = "platform.node-id"
204+
PlatformRedfishEnabledFlag = "platform.redfish.enabled"
205+
PlatformRedfishConfigFlag = "platform.redfish.config"
206+
189207
// WARN: dev settings shouldn't be exposed as flags as flags are intended for end users
190208
)
191209

@@ -228,6 +246,13 @@ func DefaultConfig() *Config {
228246
Web: Web{
229247
ListenAddresses: []string{":28282"},
230248
},
249+
Platform: Platform{
250+
NodeID: "",
251+
Redfish: Redfish{
252+
Enabled: ptr.To(false),
253+
ConfigFile: "",
254+
},
255+
},
231256
Kube: Kube{
232257
Enabled: ptr.To(false),
233258
},
@@ -327,6 +352,11 @@ func RegisterFlags(app *kingpin.Application) ConfigUpdaterFn {
327352
kubeconfig := app.Flag(KubeConfigFlag, "Path to a kubeconfig. Only required if out-of-cluster.").ExistingFile()
328353
nodeName := app.Flag(KubeNodeNameFlag, "Name of kubernetes node on which kepler is running.").String()
329354

355+
// platform
356+
platformNodeID := app.Flag(PlatformNodeIDFlag, "Node identifier for platform power monitoring").String()
357+
platformRedfishEnabled := app.Flag(PlatformRedfishEnabledFlag, "Enable Redfish BMC power monitoring").Default("false").Bool()
358+
platformRedfishConfig := app.Flag(PlatformRedfishConfigFlag, "Path to Redfish BMC configuration file").String()
359+
330360
return func(cfg *Config) error {
331361
// Logging settings
332362
if flagsSet[LogLevelFlag] {
@@ -389,6 +419,19 @@ func RegisterFlags(app *kingpin.Application) ConfigUpdaterFn {
389419
cfg.Kube.Node = *nodeName
390420
}
391421

422+
// platform settings
423+
if flagsSet[PlatformNodeIDFlag] {
424+
cfg.Platform.NodeID = *platformNodeID
425+
}
426+
427+
if flagsSet[PlatformRedfishEnabledFlag] {
428+
cfg.Platform.Redfish.Enabled = platformRedfishEnabled
429+
}
430+
431+
if flagsSet[PlatformRedfishConfigFlag] {
432+
cfg.Platform.Redfish.ConfigFile = *platformRedfishConfig
433+
}
434+
392435
cfg.sanitize()
393436
return cfg.Validate()
394437
}
@@ -412,6 +455,8 @@ func (c *Config) sanitize() {
412455
c.Exporter.Prometheus.DebugCollectors[i] = strings.TrimSpace(c.Exporter.Prometheus.DebugCollectors[i])
413456
}
414457
c.Kube.Config = strings.TrimSpace(c.Kube.Config)
458+
c.Platform.NodeID = strings.TrimSpace(c.Platform.NodeID)
459+
c.Platform.Redfish.ConfigFile = strings.TrimSpace(c.Platform.Redfish.ConfigFile)
415460
}
416461

417462
// Validate checks for configuration errors
@@ -500,6 +545,17 @@ func (c *Config) Validate(skips ...SkipValidation) error {
500545
}
501546
}
502547
}
548+
{ // Platform
549+
if ptr.Deref(c.Platform.Redfish.Enabled, false) {
550+
if c.Platform.Redfish.ConfigFile == "" {
551+
errs = append(errs, fmt.Sprintf("%s not supplied but %s set to true", PlatformRedfishConfigFlag, PlatformRedfishEnabledFlag))
552+
} else {
553+
if err := canReadFile(c.Platform.Redfish.ConfigFile); err != nil {
554+
errs = append(errs, fmt.Sprintf("unreadable Redfish config file: %s: %s", c.Platform.Redfish.ConfigFile, err.Error()))
555+
}
556+
}
557+
}
558+
}
503559

504560
if len(errs) > 0 {
505561
return fmt.Errorf("invalid configuration: %s", strings.Join(errs, ", "))

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ require (
1414
github.com/prometheus/client_model v0.6.1
1515
github.com/prometheus/exporter-toolkit v0.14.0
1616
github.com/prometheus/procfs v0.15.1
17+
github.com/stmcginnis/gofish v0.15.0
1718
github.com/stretchr/testify v1.10.0
1819
go.uber.org/zap v1.26.0
1920
golang.org/x/sync v0.12.0

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU
135135
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
136136
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
137137
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
138+
github.com/stmcginnis/gofish v0.15.0 h1:8TG41+lvJk/0Nf8CIIYErxbMlQUy80W0JFRZP3Ld82A=
139+
github.com/stmcginnis/gofish v0.15.0/go.mod h1:BLDSFTp8pDlf/xDbLZa+F7f7eW0E/CHCboggsu8CznI=
138140
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
139141
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
140142
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=

0 commit comments

Comments
 (0)