Sync hardware ingest contract v2.7

This commit is contained in:
Mikhail Chusavitin
2026-03-15 23:03:38 +03:00
parent 72cf482ad3
commit 78c6dfc0ef
12 changed files with 200 additions and 176 deletions

View File

@@ -24,18 +24,17 @@ type nvidiaGPUInfo struct {
}
// enrichPCIeWithNVIDIA enriches NVIDIA PCIe devices with data from nvidia-smi.
// If the driver/tool is unavailable, NVIDIA devices get Unknown status and
// a stable serial fallback based on board serial + slot.
func enrichPCIeWithNVIDIA(devs []schema.HardwarePCIeDevice, boardSerial string) []schema.HardwarePCIeDevice {
// If the driver/tool is unavailable, NVIDIA devices get Unknown status.
func enrichPCIeWithNVIDIA(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
if !hasNVIDIADevices(devs) {
return devs
}
gpuByBDF, err := queryNVIDIAGPUs()
if err != nil {
slog.Info("nvidia: enrichment skipped", "err", err)
return enrichPCIeWithNVIDIAData(devs, nil, boardSerial, false)
return enrichPCIeWithNVIDIAData(devs, nil, false)
}
return enrichPCIeWithNVIDIAData(devs, gpuByBDF, boardSerial, true)
return enrichPCIeWithNVIDIAData(devs, gpuByBDF, true)
}
func hasNVIDIADevices(devs []schema.HardwarePCIeDevice) bool {
@@ -47,7 +46,7 @@ func hasNVIDIADevices(devs []schema.HardwarePCIeDevice) bool {
return false
}
func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[string]nvidiaGPUInfo, boardSerial string, driverLoaded bool) []schema.HardwarePCIeDevice {
func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[string]nvidiaGPUInfo, driverLoaded bool) []schema.HardwarePCIeDevice {
enriched := 0
for i := range devs {
if !isNVIDIADevice(devs[i]) {
@@ -55,7 +54,7 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
}
if !driverLoaded {
setPCIeFallback(&devs[i], boardSerial)
setPCIeFallback(&devs[i])
continue
}
@@ -65,14 +64,12 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
}
info, ok := gpuByBDF[bdf]
if !ok {
setPCIeFallback(&devs[i], boardSerial)
setPCIeFallback(&devs[i])
continue
}
if v := strings.TrimSpace(info.Serial); v != "" {
devs[i].SerialNumber = &v
} else {
setPCIeFallbackSerial(&devs[i], boardSerial)
}
if v := strings.TrimSpace(info.VBIOS); v != "" {
devs[i].Firmware = &v
@@ -213,26 +210,11 @@ func isNVIDIADevice(dev schema.HardwarePCIeDevice) bool {
return false
}
func setPCIeFallback(dev *schema.HardwarePCIeDevice, boardSerial string) {
setPCIeFallbackSerial(dev, boardSerial)
func setPCIeFallback(dev *schema.HardwarePCIeDevice) {
status := statusUnknown
dev.Status = &status
}
func setPCIeFallbackSerial(dev *schema.HardwarePCIeDevice, boardSerial string) {
if strings.TrimSpace(boardSerial) == "" || dev.SerialNumber != nil {
return
}
slot := "unknown"
if dev.BDF != nil && strings.TrimSpace(*dev.BDF) != "" {
slot = strings.TrimSpace(*dev.BDF)
} else if dev.Slot != nil && strings.TrimSpace(*dev.Slot) != "" {
slot = strings.TrimSpace(*dev.Slot)
}
fb := fmt.Sprintf("%s-PCIE-%s", boardSerial, slot)
dev.SerialNumber = &fb
}
func injectNVIDIATelemetry(dev *schema.HardwarePCIeDevice, info nvidiaGPUInfo) {
if info.TemperatureC != nil {
dev.TemperatureC = info.TemperatureC