nvidia: improve component mapping, firmware, statuses and check times

This commit is contained in:
2026-02-16 23:17:13 +03:00
parent 514da76ddb
commit b33cca5fcc
19 changed files with 2051 additions and 65 deletions

View File

@@ -13,6 +13,11 @@ var (
// Regex to extract devname mappings from fieldiag command line
// Example: "devname=0000:ba:00.0,SXM5_SN_1653925027099"
devnameRegex = regexp.MustCompile(`devname=([\da-fA-F:\.]+),(\w+)`)
// Regex to capture BDF from commands like:
// "$ lspci -vvvs 0000:05:00.0" or "$ lspci -vvs 0000:05:00.0"
lspciBDFRegex = regexp.MustCompile(`^\$\s+lspci\s+-[^\s]*\s+([0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-7])\s*$`)
// Example: "Capabilities: [2f0 v1] Device Serial Number 99-d3-61-c8-ac-2d-b0-48"
deviceSerialRegex = regexp.MustCompile(`Device Serial Number\s+([0-9a-fA-F\-:]+)`)
)
// ParseInventoryLog parses inventory/output.log to extract GPU serial numbers
@@ -75,6 +80,64 @@ func ParseInventoryLog(content []byte, result *models.AnalysisResult) error {
}
}
// Third pass: parse lspci "Device Serial Number" by BDF (useful for NVSwitch serials).
bdfToDeviceSerial := make(map[string]string)
currentBDF := ""
scanner = bufio.NewScanner(strings.NewReader(string(content)))
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
if m := lspciBDFRegex.FindStringSubmatch(line); len(m) == 2 {
currentBDF = strings.ToLower(strings.TrimSpace(m[1]))
continue
}
if currentBDF == "" {
continue
}
if m := deviceSerialRegex.FindStringSubmatch(line); len(m) == 2 {
serial := strings.TrimSpace(m[1])
if serial != "" {
bdfToDeviceSerial[currentBDF] = serial
}
currentBDF = ""
}
}
// Apply to PCIe devices first (includes NVSwitch).
for i := range result.Hardware.PCIeDevices {
dev := &result.Hardware.PCIeDevices[i]
if strings.TrimSpace(dev.SerialNumber) != "" {
continue
}
bdf := strings.ToLower(strings.TrimSpace(dev.BDF))
if bdf == "" {
continue
}
if serial := bdfToDeviceSerial[bdf]; serial != "" {
dev.SerialNumber = serial
}
}
// Apply to GPUs only if GPU serial is still empty (do not overwrite prod serial from devname).
for i := range result.Hardware.GPUs {
gpu := &result.Hardware.GPUs[i]
if strings.TrimSpace(gpu.SerialNumber) != "" {
continue
}
bdf := strings.ToLower(strings.TrimSpace(gpu.BDF))
if bdf == "" {
continue
}
if serial := bdfToDeviceSerial[bdf]; serial != "" {
gpu.SerialNumber = serial
}
}
return scanner.Err()
}