Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 37 additions & 7 deletions cmd/kepler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ import (
"syscall"

"github.com/alecthomas/kingpin/v2"

"github.com/sustainable-computing-io/kepler/config"
"github.com/sustainable-computing-io/kepler/internal/device"
"github.com/sustainable-computing-io/kepler/internal/exporter/prometheus"
"github.com/sustainable-computing-io/kepler/internal/exporter/stdout"
"github.com/sustainable-computing-io/kepler/internal/k8s/pod"
"github.com/sustainable-computing-io/kepler/internal/logger"
"github.com/sustainable-computing-io/kepler/internal/monitor"
"github.com/sustainable-computing-io/kepler/internal/platform/redfish"
"github.com/sustainable-computing-io/kepler/internal/resource"
"github.com/sustainable-computing-io/kepler/internal/server"
"github.com/sustainable-computing-io/kepler/internal/service"
Expand Down Expand Up @@ -157,6 +159,8 @@ func createServices(logger *slog.Logger, cfg *config.Config) ([]service.Service,
monitor.WithMinTerminatedEnergyThreshold(monitor.Energy(cfg.Monitor.MinTerminatedEnergyThreshold)*monitor.Joule),
)

// Create Redfish service if enabled (experimental feature)

apiServer := server.NewAPIServer(
server.WithLogger(logger),
server.WithListenAddress(cfg.Web.ListenAddresses),
Expand All @@ -170,43 +174,69 @@ func createServices(logger *slog.Logger, cfg *config.Config) ([]service.Service,
pm,
)

// Add Redfish service if enabled
var redfishService *redfish.Service
if cfg.IsFeatureEnabled(config.ExperimentalRedfishFeature) {
rs, err := createRedfishService(logger, cfg)
if err != nil {
return nil, fmt.Errorf("failed to create Redfish service: %w", err)
}
Comment on lines +181 to +183
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is redfish service, as configured in configFile: /etc/kepler/redfish.yaml , does not respond or is unavailable at the moment. should kepler shutdown?

kepler-dev-1  | time=2025-09-03T16:27:32.244Z level=ERROR source=cmd/kepler/main.go:57 msg="failed to initialize services" error="failed to initialize service platform.redfish: failed to connect to BMC at http://127.0.0.1:8000 for node kind-control-plane: Get \"http://127.0.0.1:8000/redfish/v1/\": dial tcp 127.0.0.1:8000: connect: connection refused"

services = append(services, rs)
redfishService = rs
}

// Add Prometheus exporter if enabled
if *cfg.Exporter.Prometheus.Enabled {
promExporter, err := createPrometheusExporter(logger, cfg, apiServer, pm)
if cfg.IsFeatureEnabled(config.PrometheusFeature) {
promExporter, err := createPrometheusExporter(logger, cfg, apiServer, pm, redfishService)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use Options to inject redfish

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, that's how it is currently done in createPrometheuesExporter. Are you suggesting a better pattern?

Copy link
Collaborator

@vimalk78 vimalk78 Sep 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if there is no redfish, we are passing a nil pointer

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct, .. and createPrometheusExporter (local function) handles that.

if err != nil {
return nil, fmt.Errorf("failed to create Prometheus exporter: %w", err)
}
services = append(services, promExporter)
}

// Add pprof if enabled
if *cfg.Debug.Pprof.Enabled {
if cfg.IsFeatureEnabled(config.PprofFeature) {
pprof := server.NewPprof(apiServer)
services = append(services, pprof)
}

// Add stdout exporter if enabled
if *cfg.Exporter.Stdout.Enabled {
if cfg.IsFeatureEnabled(config.StdoutFeature) {
stdoutExporter := stdout.NewExporter(pm, stdout.WithLogger(logger))
services = append(services, stdoutExporter)
}

return services, nil
}

func createPrometheusExporter(logger *slog.Logger, cfg *config.Config, apiServer *server.APIServer, pm *monitor.PowerMonitor) (*prometheus.Exporter, error) {
func createRedfishService(logger *slog.Logger, cfg *config.Config) (*redfish.Service, error) {
return redfish.NewService(cfg.Experimental.Platform.Redfish, logger, redfish.WithStaleness(cfg.Monitor.Staleness))
}

func createPrometheusExporter(
logger *slog.Logger, cfg *config.Config,
apiServer *server.APIServer, pm *monitor.PowerMonitor,
rs *redfish.Service,
) (*prometheus.Exporter, error) {
logger.Debug("Creating Prometheus exporter")

// Use metrics level from configuration (already parsed)
metricsLevel := cfg.Exporter.Prometheus.MetricsLevel

collectors, err := prometheus.CreateCollectors(
pm,
var collectorOpts []prometheus.OptionFn
collectorOpts = append(collectorOpts,
prometheus.WithLogger(logger),
prometheus.WithProcFSPath(cfg.Host.ProcFS),
prometheus.WithNodeName(cfg.Kube.Node),
prometheus.WithMetricsLevel(metricsLevel),
)

// Add platform data provider if Redfish service is available
if rs != nil {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vimalk78 .. here. Should we make it more abstract?

collectorOpts = append(collectorOpts, prometheus.WithPlatformDataProvider(rs))
}

collectors, err := prometheus.CreateCollectors(pm, collectorOpts...)
if err != nil {
return nil, fmt.Errorf("failed to create Prometheus collectors: %w", err)
}
Expand Down
10 changes: 10 additions & 0 deletions compose/default/kepler/etc/kepler/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,13 @@ dev:
fake-cpu-meter:
enabled: false
zones: [] # zones to be enabled, empty enables all default zones

# EXPERIMENTAL FEATURES - These features are experimental and may be unstable
# and are disabled by default
experimental:
platform:
redfish:
enabled: false # Enable experimental Redfish BMC power monitoring
configFile: /etc/kepler/redfish.yaml # Path to Redfish BMC configuration file
nodeName: "" # Node name to use (overrides Kubernetes node name and hostname fallback)
httpTimeout: 5s # HTTP client timeout for BMC requests (default: 5s)
10 changes: 10 additions & 0 deletions compose/dev/kepler-dev/etc/kepler/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,13 @@ dev:
fake-cpu-meter:
enabled: false
zones: [] # zones to be enabled, empty enables all default zones

# EXPERIMENTAL FEATURES - These features are experimental and may be unstable
# and are disabled by default
experimental:
platform:
redfish:
enabled: false # Enable experimental Redfish BMC power monitoring
configFile: /etc/kepler/redfish.yaml # Path to Redfish BMC configuration file
nodeName: "" # Node name to use (overrides Kubernetes node name and hostname fallback)
httpTimeout: 5s # HTTP client timeout for BMC requests (default: 5s)
Loading
Loading