nvidia: improve component mapping, firmware, statuses and check times
This commit is contained in:
63
internal/parser/vendors/nvidia/inventory_log.go
vendored
63
internal/parser/vendors/nvidia/inventory_log.go
vendored
@@ -13,6 +13,11 @@ var (
|
||||
// Regex to extract devname mappings from fieldiag command line
|
||||
// Example: "devname=0000:ba:00.0,SXM5_SN_1653925027099"
|
||||
devnameRegex = regexp.MustCompile(`devname=([\da-fA-F:\.]+),(\w+)`)
|
||||
// Regex to capture BDF from commands like:
|
||||
// "$ lspci -vvvs 0000:05:00.0" or "$ lspci -vvs 0000:05:00.0"
|
||||
lspciBDFRegex = regexp.MustCompile(`^\$\s+lspci\s+-[^\s]*\s+([0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-7])\s*$`)
|
||||
// Example: "Capabilities: [2f0 v1] Device Serial Number 99-d3-61-c8-ac-2d-b0-48"
|
||||
deviceSerialRegex = regexp.MustCompile(`Device Serial Number\s+([0-9a-fA-F\-:]+)`)
|
||||
)
|
||||
|
||||
// ParseInventoryLog parses inventory/output.log to extract GPU serial numbers
|
||||
@@ -75,6 +80,64 @@ func ParseInventoryLog(content []byte, result *models.AnalysisResult) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Third pass: parse lspci "Device Serial Number" by BDF (useful for NVSwitch serials).
|
||||
bdfToDeviceSerial := make(map[string]string)
|
||||
currentBDF := ""
|
||||
scanner = bufio.NewScanner(strings.NewReader(string(content)))
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if m := lspciBDFRegex.FindStringSubmatch(line); len(m) == 2 {
|
||||
currentBDF = strings.ToLower(strings.TrimSpace(m[1]))
|
||||
continue
|
||||
}
|
||||
|
||||
if currentBDF == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if m := deviceSerialRegex.FindStringSubmatch(line); len(m) == 2 {
|
||||
serial := strings.TrimSpace(m[1])
|
||||
if serial != "" {
|
||||
bdfToDeviceSerial[currentBDF] = serial
|
||||
}
|
||||
currentBDF = ""
|
||||
}
|
||||
}
|
||||
|
||||
// Apply to PCIe devices first (includes NVSwitch).
|
||||
for i := range result.Hardware.PCIeDevices {
|
||||
dev := &result.Hardware.PCIeDevices[i]
|
||||
if strings.TrimSpace(dev.SerialNumber) != "" {
|
||||
continue
|
||||
}
|
||||
bdf := strings.ToLower(strings.TrimSpace(dev.BDF))
|
||||
if bdf == "" {
|
||||
continue
|
||||
}
|
||||
if serial := bdfToDeviceSerial[bdf]; serial != "" {
|
||||
dev.SerialNumber = serial
|
||||
}
|
||||
}
|
||||
|
||||
// Apply to GPUs only if GPU serial is still empty (do not overwrite prod serial from devname).
|
||||
for i := range result.Hardware.GPUs {
|
||||
gpu := &result.Hardware.GPUs[i]
|
||||
if strings.TrimSpace(gpu.SerialNumber) != "" {
|
||||
continue
|
||||
}
|
||||
bdf := strings.ToLower(strings.TrimSpace(gpu.BDF))
|
||||
if bdf == "" {
|
||||
continue
|
||||
}
|
||||
if serial := bdfToDeviceSerial[bdf]; serial != "" {
|
||||
gpu.SerialNumber = serial
|
||||
}
|
||||
}
|
||||
|
||||
return scanner.Err()
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user