Sync hardware ingest contract v2.7
This commit is contained in:
@@ -24,18 +24,17 @@ type nvidiaGPUInfo struct {
|
||||
}
|
||||
|
||||
// enrichPCIeWithNVIDIA enriches NVIDIA PCIe devices with data from nvidia-smi.
|
||||
// If the driver/tool is unavailable, NVIDIA devices get Unknown status and
|
||||
// a stable serial fallback based on board serial + slot.
|
||||
func enrichPCIeWithNVIDIA(devs []schema.HardwarePCIeDevice, boardSerial string) []schema.HardwarePCIeDevice {
|
||||
// If the driver/tool is unavailable, NVIDIA devices get Unknown status.
|
||||
func enrichPCIeWithNVIDIA(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
|
||||
if !hasNVIDIADevices(devs) {
|
||||
return devs
|
||||
}
|
||||
gpuByBDF, err := queryNVIDIAGPUs()
|
||||
if err != nil {
|
||||
slog.Info("nvidia: enrichment skipped", "err", err)
|
||||
return enrichPCIeWithNVIDIAData(devs, nil, boardSerial, false)
|
||||
return enrichPCIeWithNVIDIAData(devs, nil, false)
|
||||
}
|
||||
return enrichPCIeWithNVIDIAData(devs, gpuByBDF, boardSerial, true)
|
||||
return enrichPCIeWithNVIDIAData(devs, gpuByBDF, true)
|
||||
}
|
||||
|
||||
func hasNVIDIADevices(devs []schema.HardwarePCIeDevice) bool {
|
||||
@@ -47,7 +46,7 @@ func hasNVIDIADevices(devs []schema.HardwarePCIeDevice) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[string]nvidiaGPUInfo, boardSerial string, driverLoaded bool) []schema.HardwarePCIeDevice {
|
||||
func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[string]nvidiaGPUInfo, driverLoaded bool) []schema.HardwarePCIeDevice {
|
||||
enriched := 0
|
||||
for i := range devs {
|
||||
if !isNVIDIADevice(devs[i]) {
|
||||
@@ -55,7 +54,7 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
|
||||
}
|
||||
|
||||
if !driverLoaded {
|
||||
setPCIeFallback(&devs[i], boardSerial)
|
||||
setPCIeFallback(&devs[i])
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -65,14 +64,12 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
|
||||
}
|
||||
info, ok := gpuByBDF[bdf]
|
||||
if !ok {
|
||||
setPCIeFallback(&devs[i], boardSerial)
|
||||
setPCIeFallback(&devs[i])
|
||||
continue
|
||||
}
|
||||
|
||||
if v := strings.TrimSpace(info.Serial); v != "" {
|
||||
devs[i].SerialNumber = &v
|
||||
} else {
|
||||
setPCIeFallbackSerial(&devs[i], boardSerial)
|
||||
}
|
||||
if v := strings.TrimSpace(info.VBIOS); v != "" {
|
||||
devs[i].Firmware = &v
|
||||
@@ -213,26 +210,11 @@ func isNVIDIADevice(dev schema.HardwarePCIeDevice) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func setPCIeFallback(dev *schema.HardwarePCIeDevice, boardSerial string) {
|
||||
setPCIeFallbackSerial(dev, boardSerial)
|
||||
func setPCIeFallback(dev *schema.HardwarePCIeDevice) {
|
||||
status := statusUnknown
|
||||
dev.Status = &status
|
||||
}
|
||||
|
||||
func setPCIeFallbackSerial(dev *schema.HardwarePCIeDevice, boardSerial string) {
|
||||
if strings.TrimSpace(boardSerial) == "" || dev.SerialNumber != nil {
|
||||
return
|
||||
}
|
||||
slot := "unknown"
|
||||
if dev.BDF != nil && strings.TrimSpace(*dev.BDF) != "" {
|
||||
slot = strings.TrimSpace(*dev.BDF)
|
||||
} else if dev.Slot != nil && strings.TrimSpace(*dev.Slot) != "" {
|
||||
slot = strings.TrimSpace(*dev.Slot)
|
||||
}
|
||||
fb := fmt.Sprintf("%s-PCIE-%s", boardSerial, slot)
|
||||
dev.SerialNumber = &fb
|
||||
}
|
||||
|
||||
func injectNVIDIATelemetry(dev *schema.HardwarePCIeDevice, info nvidiaGPUInfo) {
|
||||
if info.TemperatureC != nil {
|
||||
dev.TemperatureC = info.TemperatureC
|
||||
|
||||
Reference in New Issue
Block a user