Files
bee/audit/internal/collector/psu.go
Michael Chus 3053cb0710 Fix PSU slot regex: match MSI underscore format PSU1_POWER_IN
\b does not fire between a digit and '_' because '_' is \w in RE2.
The pattern \bpsu?\s*([0-9]+)\b never matched PSU1_POWER_IN style
sensors, so parsePSUSDR (and PSUSlotsFromSDR / samplePSUPower) returned
empty results for MSI servers — causing all power graphs to fall back
to DCMI which reports ~half actual draw.

Added an explicit underscore-terminated pattern first in the list and
tests covering the MSI format.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-19 19:03:02 +03:00

444 lines
12 KiB
Go

package collector
import (
"bee/audit/internal/schema"
"log/slog"
"os/exec"
"regexp"
"sort"
"strconv"
"strings"
)
func collectPSUs() []schema.HardwarePowerSupply {
var psus []schema.HardwarePowerSupply
if out, err := exec.Command("ipmitool", "fru", "print").Output(); err == nil {
psus = parseFRU(string(out))
} else {
slog.Info("psu: fru unavailable", "err", err)
}
sdrData := map[int]psuSDR{}
if sdrOut, err := exec.Command("ipmitool", "sdr").Output(); err == nil {
sdrData = parsePSUSDR(string(sdrOut))
if len(psus) == 0 {
psus = synthesizePSUsFromSDR(sdrData)
} else {
mergePSUSDR(psus, sdrData)
}
} else if len(psus) == 0 {
slog.Info("psu: ipmitool unavailable, skipping", "err", err)
return nil
}
slog.Info("psu: collected", "count", len(psus))
return psus
}
// parseFRU parses ipmitool fru print output.
// Each FRU record starts with "FRU Device Description : <name> (ID <n>)"
// followed by indented key: value lines.
func parseFRU(output string) []schema.HardwarePowerSupply {
var psus []schema.HardwarePowerSupply
slot := 0
for _, block := range splitFRUBlocks(output) {
psu, ok := parseFRUBlock(block, slot)
if !ok {
continue
}
psus = append(psus, psu)
slot++
}
return psus
}
func splitFRUBlocks(output string) []string {
var blocks []string
var cur strings.Builder
for _, line := range strings.Split(output, "\n") {
if strings.HasPrefix(line, "FRU Device Description") {
if cur.Len() > 0 {
blocks = append(blocks, cur.String())
cur.Reset()
}
}
cur.WriteString(line)
cur.WriteByte('\n')
}
if cur.Len() > 0 {
blocks = append(blocks, cur.String())
}
return blocks
}
func parseFRUBlock(block string, slotIdx int) (schema.HardwarePowerSupply, bool) {
fields := map[string]string{}
header := ""
for _, line := range strings.Split(block, "\n") {
if strings.HasPrefix(line, "FRU Device Description") {
header = line
continue
}
idx := strings.Index(line, " : ")
if idx < 0 {
continue
}
key := strings.TrimSpace(line[:idx])
val := strings.TrimSpace(line[idx+3:])
fields[key] = val
}
// Only process PSU FRU records
headerLower := strings.ToLower(header)
if !isPSUHeader(headerLower) {
return schema.HardwarePowerSupply{}, false
}
present := true
psu := schema.HardwarePowerSupply{Present: &present}
slotStr := strconv.Itoa(slotIdx)
if slot, ok := parsePSUSlot(header); ok && slot > 0 {
slotStr = strconv.Itoa(slot - 1)
}
psu.Slot = &slotStr
if v := firstNonEmptyField(fields, "Board Product", "Product Name", "Product Part Number"); v != "" {
psu.Model = &v
}
if v := firstNonEmptyField(fields, "Board Mfg", "Product Manufacturer", "Product Manufacturer Name"); v != "" {
psu.Vendor = &v
}
if v := firstNonEmptyField(fields, "Board Serial", "Product Serial", "Product Serial Number"); v != "" {
psu.SerialNumber = &v
}
if v := firstNonEmptyField(fields, "Board Part Number", "Product Part Number", "Part Number"); v != "" {
psu.PartNumber = &v
}
if v := firstNonEmptyField(fields, "Board Extra", "Product Version", "Board Version"); v != "" {
psu.Firmware = &v
}
// wattage: some vendors put it in product name e.g. "PSU 800W"
if psu.Model != nil {
if w := parseWattage(*psu.Model); w > 0 {
psu.WattageW = &w
}
}
status := statusOK
psu.Status = &status
return psu, true
}
func isPSUHeader(headerLower string) bool {
return strings.Contains(headerLower, "psu") ||
strings.Contains(headerLower, "pws") ||
strings.Contains(headerLower, "power supply") ||
strings.Contains(headerLower, "power_supply") ||
strings.Contains(headerLower, "power module")
}
func firstNonEmptyField(fields map[string]string, keys ...string) string {
for _, key := range keys {
if value := cleanDMIValue(fields[key]); value != "" {
return value
}
}
return ""
}
type psuSDR struct {
slot int
status string
reason string
inputPowerW *float64
outputPowerW *float64
inputVoltage *float64
temperatureC *float64
healthPct *float64
}
var psuSlotPatterns = []*regexp.Regexp{
// MSI/underscore style: PSU1_POWER_IN, PSU2_POWER_OUT — underscore is \w so \b
// does not fire after the digit; match explicitly with underscore terminator.
regexp.MustCompile(`(?i)\bpsu([0-9]+)_`),
regexp.MustCompile(`(?i)\bpsu?\s*([0-9]+)\b`), // PSU1, PS1, ps 2
regexp.MustCompile(`(?i)\bps\s*([0-9]+)\b`), // PS 6, PS6
regexp.MustCompile(`(?i)\bpws\s*([0-9]+)\b`), // PWS1
regexp.MustCompile(`(?i)\bpower\s*supply(?:\s*bay)?\s*([0-9]+)\b`), // Power Supply 1, Power Supply Bay 3
regexp.MustCompile(`(?i)\bbay\s*([0-9]+)\b`), // Bay 1
// Fallback for xFusion-style generic numbered PSU sensors (Power1, Power2, …).
// Must be last: "power supply N" is already caught by the pattern above.
regexp.MustCompile(`(?i)\bpower([0-9]+)\b`),
}
// psuInputPowerKeywords matches AC-input power sensor names across vendors:
// MSI: PSU1_POWER_IN, PSU1_PIN
// MLT: PSU1_PIN
// xFusion: (matched via default fallback — no explicit keyword)
// HPE: PS1 Input Power, PS1 Input Watts
func isPSUInputPower(name string) bool {
return strings.Contains(name, "input power") ||
strings.Contains(name, "input watts") ||
strings.Contains(name, "_pin") ||
strings.Contains(name, " pin") ||
strings.Contains(name, "_power_in") ||
strings.Contains(name, "power_in")
}
// isPSUOutputPower matches DC-output power sensor names across vendors:
// MSI: PSU1_POWER_OUT
// MLT: PSU1_POUT
// xFusion: PS1 POut
func isPSUOutputPower(name string) bool {
return strings.Contains(name, "output power") ||
strings.Contains(name, "output watts") ||
strings.Contains(name, "_pout") ||
strings.Contains(name, " pout") ||
strings.Contains(name, "_power_out") ||
strings.Contains(name, "power_out") ||
strings.Contains(name, "power supply bay") ||
strings.Contains(name, "psu bay")
}
// parseBoundedFloat parses a numeric value from an SDR value field and
// validates it is within (0, max]. Returns nil for zero, negative, or
// out-of-range values — these indicate missing/off/fault sensor readings.
func parseBoundedFloat(raw string, max float64) *float64 {
v := parseFloatPtr(raw)
if v == nil || *v <= 0 || *v > max {
return nil
}
return v
}
func parsePSUSDR(raw string) map[int]psuSDR {
out := map[int]psuSDR{}
for _, line := range strings.Split(raw, "\n") {
fields := splitSDRFields(line)
if len(fields) < 3 {
continue
}
name := fields[0]
value := fields[1]
state := strings.ToLower(fields[2])
slot, ok := parsePSUSlot(name)
if !ok {
continue
}
entry := out[slot]
entry.slot = slot
if entry.status == "" {
entry.status = statusOK
}
if state != "" && state != "ok" && state != "ns" {
entry.status = statusCritical
entry.reason = "PSU sensor reported non-OK state: " + state
}
lowerName := strings.ToLower(name)
switch {
case isPSUInputPower(lowerName):
entry.inputPowerW = parseBoundedFloat(value, 6000)
case isPSUOutputPower(lowerName):
entry.outputPowerW = parseBoundedFloat(value, 6000)
case strings.Contains(lowerName, "input voltage"), strings.Contains(lowerName, "ac input"):
entry.inputVoltage = parseFloatPtr(value)
case strings.Contains(lowerName, "temp"):
entry.temperatureC = parseFloatPtr(value)
case strings.Contains(lowerName, "health"), strings.Contains(lowerName, "remaining life"), strings.Contains(lowerName, "life remaining"):
entry.healthPct = parsePercentPtr(value)
default:
// Generic PSU power reading: sensor matched a slot pattern but carries
// no input/output keyword (e.g. xFusion "Power1", "Power2"). Treat as
// AC input if the value looks like wattage and no better data is set yet.
if entry.inputPowerW == nil {
entry.inputPowerW = parseBoundedFloat(value, 6000)
}
}
out[slot] = entry
}
return out
}
// PSUSlotPower holds SDR power readings for one PSU slot.
// Slot key used by PSUSlotsFromSDR is the 0-based index string,
// matching HardwarePowerSupply.Slot in the audit schema.
type PSUSlotPower struct {
InputW *float64 `json:"input_w,omitempty"`
OutputW *float64 `json:"output_w,omitempty"`
Status string `json:"status,omitempty"`
}
// PSUSlotsFromSDR parses `ipmitool sdr` output and returns per-slot PSU data
// using the same battle-tested slot patterns as the hardware audit collector.
// Works across MSI (PSU1_POWER_IN), xFusion (Power1, PS1 POut), MLT (PSU1_PIN).
// Slot keys are 0-based index strings matching HardwarePowerSupply.Slot.
func PSUSlotsFromSDR(sdrOutput string) map[string]PSUSlotPower {
sdr := parsePSUSDR(sdrOutput)
if len(sdr) == 0 {
return nil
}
out := make(map[string]PSUSlotPower, len(sdr))
for slot, entry := range sdr {
key := strconv.Itoa(slot - 1) // audit uses 0-based slot
out[key] = PSUSlotPower{
InputW: entry.inputPowerW,
OutputW: entry.outputPowerW,
Status: entry.status,
}
}
return out
}
func synthesizePSUsFromSDR(sdr map[int]psuSDR) []schema.HardwarePowerSupply {
if len(sdr) == 0 {
return nil
}
slots := make([]int, 0, len(sdr))
for slot := range sdr {
slots = append(slots, slot)
}
sort.Ints(slots)
out := make([]schema.HardwarePowerSupply, 0, len(slots))
for _, slot := range slots {
entry := sdr[slot]
present := true
status := entry.status
if status == "" {
status = statusUnknown
}
slotStr := strconv.Itoa(slot - 1)
model := "PSU"
psu := schema.HardwarePowerSupply{
HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status},
Slot: &slotStr,
Present: &present,
Model: &model,
InputPowerW: entry.inputPowerW,
OutputPowerW: entry.outputPowerW,
InputVoltage: entry.inputVoltage,
TemperatureC: entry.temperatureC,
}
if entry.healthPct != nil {
psu.LifeRemainingPct = entry.healthPct
lifeUsed := 100 - *entry.healthPct
psu.LifeUsedPct = &lifeUsed
}
if entry.reason != "" {
psu.ErrorDescription = &entry.reason
}
out = append(out, psu)
}
return out
}
func mergePSUSDR(psus []schema.HardwarePowerSupply, sdr map[int]psuSDR) {
for i := range psus {
slotIdx, err := strconv.Atoi(derefPSUSlot(psus[i].Slot))
if err != nil {
continue
}
entry, ok := sdr[slotIdx+1]
if !ok {
continue
}
if entry.inputPowerW != nil {
psus[i].InputPowerW = entry.inputPowerW
}
if entry.outputPowerW != nil {
psus[i].OutputPowerW = entry.outputPowerW
}
if entry.inputVoltage != nil {
psus[i].InputVoltage = entry.inputVoltage
}
if entry.temperatureC != nil {
psus[i].TemperatureC = entry.temperatureC
}
if entry.healthPct != nil {
psus[i].LifeRemainingPct = entry.healthPct
lifeUsed := 100 - *entry.healthPct
psus[i].LifeUsedPct = &lifeUsed
}
if entry.status != "" {
psus[i].Status = &entry.status
}
if entry.reason != "" {
psus[i].ErrorDescription = &entry.reason
}
if psus[i].Status != nil && *psus[i].Status == statusOK {
if (entry.inputPowerW == nil && entry.outputPowerW == nil && entry.inputVoltage == nil) && entry.status == "" {
unknown := statusUnknown
psus[i].Status = &unknown
}
}
}
}
func splitSDRFields(line string) []string {
parts := strings.Split(line, "|")
out := make([]string, 0, len(parts))
for _, part := range parts {
part = strings.TrimSpace(part)
if part != "" {
out = append(out, part)
}
}
return out
}
func parsePSUSlot(name string) (int, bool) {
for _, re := range psuSlotPatterns {
m := re.FindStringSubmatch(strings.ToLower(name))
if len(m) == 0 {
continue
}
for _, group := range m[1:] {
if group == "" {
continue
}
n, err := strconv.Atoi(group)
if err == nil && n > 0 {
return n, true
}
}
}
return 0, false
}
func parseFloatPtr(raw string) *float64 {
raw = strings.TrimSpace(raw)
if raw == "" || strings.EqualFold(raw, "na") {
return nil
}
for _, field := range strings.Fields(raw) {
n, err := strconv.ParseFloat(strings.TrimSpace(field), 64)
if err == nil {
return &n
}
}
return nil
}
func derefPSUSlot(slot *string) string {
if slot == nil {
return ""
}
return *slot
}
// parseWattage extracts wattage from strings like "PSU 800W", "1200W PLATINUM".
func parseWattage(s string) int {
s = strings.ToUpper(s)
for _, part := range strings.Fields(s) {
part = strings.TrimSuffix(part, "W")
if n, err := strconv.Atoi(part); err == nil && n > 0 && n <= 5000 {
return n
}
}
return 0
}