package webui import ( "bytes" "context" "log/slog" "os/exec" "time" "bee/audit/internal/app" "bee/audit/internal/collector" ) const healthPollInterval = 60 * time.Second const psuIPMITimeout = 15 * time.Second // healthPoller runs periodic health checks for hardware components that do not // emit kernel log events (e.g. PSU). Results are written to ComponentStatusDB. type healthPoller struct { statusDB *app.ComponentStatusDB } func newHealthPoller(statusDB *app.ComponentStatusDB) *healthPoller { return &healthPoller{statusDB: statusDB} } func (p *healthPoller) start() { goRecoverLoop("health poller", 5*time.Second, p.run) } func (p *healthPoller) run() { ticker := time.NewTicker(healthPollInterval) defer ticker.Stop() for range ticker.C { p.pollPSU() } } func (p *healthPoller) pollPSU() { if p.statusDB == nil { return } ctx, cancel := context.WithTimeout(context.Background(), psuIPMITimeout) defer cancel() cmd := exec.CommandContext(ctx, "ipmitool", "sdr") var out bytes.Buffer cmd.Stdout = &out if err := cmd.Run(); err != nil { // IPMI not available or not a server — skip silently. slog.Debug("health poller: ipmitool sdr unavailable", "err", err) return } slots := collector.PSUSlotsFromSDR(out.String()) if len(slots) == 0 { return } const source = "watchdog:psu" for slot, psu := range slots { key := "psu:" + slot status := psu.Status if status == "" { status = "Unknown" } detail := "" switch status { case "Critical": detail = "PSU sensor reported non-OK state" case "Warning": detail = "PSU sensor in warning state" } p.statusDB.Record(key, source, status, detail) } }