IPMI hang fix (Lenovo XCC SR650 V3): - Add pluggable ipmi_profile system with per-vendor timeouts and fruEarlyExit flag - Lenovo profile: 90s FRU timeout, streaming early-exit stops after PSU blocks found - collectFRUEarlyExit streams ipmitool fru print and kills process once PSU blocks are followed by a non-PSU header (~6s instead of ~108s on 54-device FRU list) - collectBMCFirmware and collectPSUs accept manufacturer and apply profile timeouts VROC license detection: - Detect VMD/VROC controller in PCIe list, run mdadm --detail-platform - Parse "License:" line; store as snap.VROCLicense in HardwareSnapshot Blackbox service fix: - bee-blackbox.service was missing from systemctl enable list in ISO build hook - Service never started on boot; state file never written; UI button stayed "Enable" Drop qrencode: - Remove from package list, standardTools API check, and runtime-flows doc Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
70 lines
2.4 KiB
Go
70 lines
2.4 KiB
Go
// Package collector runs all hardware collectors and merges results
|
||
// into a single HardwareSnapshot. Each sub-collector is independent:
|
||
// a failure in one does not abort the others.
|
||
package collector
|
||
|
||
import (
|
||
"bee/audit/internal/runtimeenv"
|
||
"bee/audit/internal/schema"
|
||
"log/slog"
|
||
"os"
|
||
"time"
|
||
)
|
||
|
||
// Run executes all collectors and returns the combined snapshot.
|
||
// Partial failures are logged as warnings; collection always completes.
|
||
func Run(_ runtimeenv.Mode) schema.HardwareIngestRequest {
|
||
start := time.Now()
|
||
collectedAt := time.Now().UTC().Format(time.RFC3339)
|
||
slog.Info("audit started")
|
||
|
||
snap := schema.HardwareSnapshot{}
|
||
|
||
board, biosFW := collectBoard()
|
||
snap.Board = board
|
||
snap.Firmware = append(snap.Firmware, biosFW...)
|
||
snap.Firmware = append(snap.Firmware, collectBMCFirmware(derefString(snap.Board.Manufacturer))...)
|
||
|
||
snap.CPUs = collectCPUs()
|
||
|
||
snap.Memory = collectMemory()
|
||
sensorDoc, err := readSensorsJSONDoc()
|
||
if err != nil {
|
||
slog.Info("sensors: unavailable for enrichment", "err", err)
|
||
}
|
||
snap.CPUs = enrichCPUsWithTelemetry(snap.CPUs, sensorDoc)
|
||
snap.Memory = enrichMemoryWithTelemetry(snap.Memory, sensorDoc)
|
||
bestEffortRescanHotplugStorage()
|
||
snap.Storage = collectStorage()
|
||
snap.PCIeDevices = collectPCIe()
|
||
snap.PCIeDevices = enrichPCIeWithAMD(snap.PCIeDevices)
|
||
snap.PCIeDevices = enrichPCIeWithPCISerials(snap.PCIeDevices)
|
||
snap.PCIeDevices = enrichPCIeWithNVIDIA(snap.PCIeDevices)
|
||
snap.PCIeDevices = enrichPCIeWithMellanox(snap.PCIeDevices)
|
||
snap.PCIeDevices = enrichPCIeWithNICTelemetry(snap.PCIeDevices)
|
||
snap.PCIeDevices = enrichPCIeWithRAIDTelemetry(snap.PCIeDevices)
|
||
snap.Storage = enrichStorageWithVROC(snap.Storage, snap.PCIeDevices)
|
||
snap.Storage = appendUniqueStorage(snap.Storage, collectRAIDStorage(snap.PCIeDevices))
|
||
snap.VROCLicense = collectVROCLicense(snap.PCIeDevices)
|
||
snap.PowerSupplies = collectPSUs(derefString(snap.Board.Manufacturer))
|
||
snap.PowerSupplies = enrichPSUsWithTelemetry(snap.PowerSupplies, sensorDoc)
|
||
snap.Sensors = buildSensorsFromDoc(sensorDoc)
|
||
finalizeSnapshot(&snap, collectedAt)
|
||
|
||
// remaining collectors added in steps 1.8 – 1.10
|
||
|
||
slog.Info("audit completed", "duration", time.Since(start).Round(time.Millisecond))
|
||
|
||
sourceType := "manual"
|
||
var targetHost *string
|
||
if hostname, err := os.Hostname(); err == nil && hostname != "" {
|
||
targetHost = &hostname
|
||
}
|
||
return schema.HardwareIngestRequest{
|
||
SourceType: &sourceType,
|
||
TargetHost: targetHost,
|
||
CollectedAt: collectedAt,
|
||
Hardware: snap,
|
||
}
|
||
}
|