Files
bee/audit/internal/collector/collector.go
Mikhail Chusavitin cbb0d1e522 Collect IPMI sensors, SEL and dmesg errors into audit JSON and support bundle
- audit JSON: IPMI sensor readings (ipmitool sensor) merged into hardware.sensors alongside lm-sensors data
- audit JSON: IPMI SEL entries (ipmitool sel list) in hardware.event_logs with source "ipmi-sel"
- audit JSON: dmesg error/warning lines in hardware.event_logs with source "dmesg" (filtered by error/warn/AER/Xid/NVRM/ECC/panic patterns)
- support bundle: added ipmitool-sensor.txt, ipmitool-sel.txt, ipmitool-sel-time.txt to techdump
- saa_dmi.go: fix dmiItemRE to accept SHN with parentheses (e.g. PS(4)LC for PSU fields)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-19 08:41:37 +03:00

72 lines
2.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Package collector runs all hardware collectors and merges results
// into a single HardwareSnapshot. Each sub-collector is independent:
// a failure in one does not abort the others.
package collector
import (
"bee/audit/internal/runtimeenv"
"bee/audit/internal/schema"
"log/slog"
"os"
"time"
)
// Run executes all collectors and returns the combined snapshot.
// Partial failures are logged as warnings; collection always completes.
func Run(_ runtimeenv.Mode) schema.HardwareIngestRequest {
start := time.Now()
collectedAt := time.Now().UTC().Format(time.RFC3339)
slog.Info("audit started")
snap := schema.HardwareSnapshot{}
board, biosFW := collectBoard()
snap.Board = board
snap.Firmware = append(snap.Firmware, biosFW...)
snap.Firmware = append(snap.Firmware, collectBMCFirmware(derefString(snap.Board.Manufacturer))...)
snap.CPUs = collectCPUs()
snap.Memory = collectMemory()
sensorDoc, err := readSensorsJSONDoc()
if err != nil {
slog.Info("sensors: unavailable for enrichment", "err", err)
}
snap.CPUs = enrichCPUsWithTelemetry(snap.CPUs, sensorDoc)
snap.Memory = enrichMemoryWithTelemetry(snap.Memory, sensorDoc)
bestEffortRescanHotplugStorage()
snap.Storage = collectStorage()
snap.PCIeDevices = collectPCIe()
snap.PCIeDevices = enrichPCIeWithAMD(snap.PCIeDevices)
snap.PCIeDevices = enrichPCIeWithPCISerials(snap.PCIeDevices)
snap.PCIeDevices = enrichPCIeWithNVIDIA(snap.PCIeDevices)
snap.PCIeDevices = enrichNVLinkBridgesWithGPUTopo(snap.PCIeDevices)
snap.PCIeDevices = enrichPCIeWithMellanox(snap.PCIeDevices)
snap.PCIeDevices = enrichPCIeWithNICTelemetry(snap.PCIeDevices)
snap.PCIeDevices = enrichPCIeWithRAIDTelemetry(snap.PCIeDevices)
snap.Storage = enrichStorageWithVROC(snap.Storage, snap.PCIeDevices)
snap.Storage = appendUniqueStorage(snap.Storage, collectRAIDStorage(snap.PCIeDevices))
snap.VROCLicense = collectVROCLicense(snap.PCIeDevices)
snap.PowerSupplies = collectPSUs(derefString(snap.Board.Manufacturer))
snap.PowerSupplies = enrichPSUsWithTelemetry(snap.PowerSupplies, sensorDoc)
snap.Sensors = mergeIPMISensors(buildSensorsFromDoc(sensorDoc), collectIPMISensors())
snap.EventLogs = append(collectIPMISEL(), collectDmesgErrors()...)
finalizeSnapshot(&snap, collectedAt)
// remaining collectors added in steps 1.8 1.10
slog.Info("audit completed", "duration", time.Since(start).Round(time.Millisecond))
sourceType := "manual"
var targetHost *string
if hostname, err := os.Hostname(); err == nil && hostname != "" {
targetHost = &hostname
}
return schema.HardwareIngestRequest{
SourceType: &sourceType,
TargetHost: targetHost,
CollectedAt: collectedAt,
Hardware: snap,
}
}