Fix NVIDIA GPU serial number format extraction

Extract decimal serial numbers from devname parameters (e.g., "SXM5_SN_1653925027099")
instead of hex PCIe Device Serial Numbers. This provides the correct GPU serial
numbers as they appear in NVIDIA diagnostics tooling.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-10 22:57:50 +03:00
parent bcce975fd6
commit 77e25ddc02
3 changed files with 40 additions and 119 deletions

View File

@@ -10,22 +10,13 @@ import (
)
var (
// Regex to extract GPU serial numbers from lspci output
// Example: " Capabilities: [2f0 v1] Device Serial Number 14-17-dc-65-77-2d-b0-48"
gpuSerialRegex = regexp.MustCompile(`Device Serial Number\s+([\da-fA-F-]+)`)
// Regex to extract PCI BDF from lspci header
// Example: "2a:00.0 3D controller: NVIDIA Corporation Device 2335 (rev a1)"
// Note: lspci format is bus:device.function (e.g., "2a:00.0")
pciBDFRegex = regexp.MustCompile(`^([0-9a-fA-F]{2,4}:[0-9a-fA-F]{2}\.[0-9])\s+3D controller.*NVIDIA`)
// Regex to extract devname mappings from fieldiag command line
// Example: "devname=0000:ba:00.0,SXM5_SN_1653925027099"
devnameRegex = regexp.MustCompile(`devname=([\da-fA-F:\.]+),(\w+)`)
)
// ParseInventoryLog parses inventory/output.log to extract GPU serial numbers
// from lspci output and map them to slots
// from fieldiag devname parameters (e.g., "SXM5_SN_1653925027099")
func ParseInventoryLog(content []byte, result *models.AnalysisResult) error {
if result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
// No GPUs to update
@@ -34,8 +25,9 @@ func ParseInventoryLog(content []byte, result *models.AnalysisResult) error {
scanner := bufio.NewScanner(strings.NewReader(string(content)))
// First pass: build mapping of PCI BDF -> Slot name from fieldiag command line
// First pass: build mapping of PCI BDF -> Slot name and serial number from fieldiag command line
pciToSlot := make(map[string]string)
pciToSerial := make(map[string]string)
for scanner.Scan() {
line := scanner.Text()
// Look for fieldiag command with devname parameters
@@ -45,8 +37,7 @@ func ParseInventoryLog(content []byte, result *models.AnalysisResult) error {
if len(match) == 3 {
pciBDF := match[1]
slotName := match[2]
// Extract slot number from name like "SXM5_SN_1653925027099"
// We want to map to slot like "GPUSXM5"
// Extract slot number and serial from name like "SXM5_SN_1653925027099"
if strings.HasPrefix(slotName, "SXM") {
parts := strings.Split(slotName, "_")
if len(parts) >= 1 {
@@ -54,81 +45,39 @@ func ParseInventoryLog(content []byte, result *models.AnalysisResult) error {
slot := "GPU" + parts[0]
pciToSlot[pciBDF] = slot
}
// Extract serial number from "SXM5_SN_1653925027099"
if len(parts) == 3 && parts[1] == "SN" {
serial := parts[2]
pciToSerial[pciBDF] = serial
}
}
}
}
}
}
// Second pass: extract GPU serial numbers from lspci output
scanner = bufio.NewScanner(strings.NewReader(string(content)))
var currentPCIBDF string
var currentSlot string
for scanner.Scan() {
line := scanner.Text()
// Check if this is a new GPU device header
if match := pciBDFRegex.FindStringSubmatch(line); len(match) > 1 {
currentPCIBDF = match[1]
// Normalize BDF format - lspci uses short format (bus:device.function)
// but fieldiag uses full format (domain:bus:device.function)
// Convert "2a:00.0" to "0000:2a:00.0"
normalizedBDF := currentPCIBDF
if len(strings.Split(currentPCIBDF, ":")) == 2 {
// Short format without domain, add 0000:
normalizedBDF = "0000:" + currentPCIBDF
}
// Map to slot name if we have it
if slot, ok := pciToSlot[normalizedBDF]; ok {
currentSlot = slot
} else if slot, ok := pciToSlot[currentPCIBDF]; ok {
currentSlot = slot
} else {
currentSlot = ""
}
continue
}
// Look for Device Serial Number in capabilities
if match := gpuSerialRegex.FindStringSubmatch(line); len(match) > 1 && currentSlot != "" {
serialNumber := match[1]
// Format: 14-17-dc-65-77-2d-b0-48
// Convert to more readable format: 48:b0:2d:77:65:dc:17:14 (reversed)
serialFormatted := formatGPUSerial(serialNumber)
// Find the GPU in our results and update its serial number
for i := range result.Hardware.GPUs {
if result.Hardware.GPUs[i].Slot == currentSlot {
result.Hardware.GPUs[i].SerialNumber = serialFormatted
// Second pass: assign serial numbers to GPUs based on slot mapping
for i := range result.Hardware.GPUs {
slot := result.Hardware.GPUs[i].Slot
// Find the PCI BDF for this slot
var foundSerial string
for pciBDF, mappedSlot := range pciToSlot {
if mappedSlot == slot {
// Found matching slot, get serial number
if serial, ok := pciToSerial[pciBDF]; ok {
foundSerial = serial
break
}
}
}
if foundSerial != "" {
result.Hardware.GPUs[i].SerialNumber = foundSerial
}
}
return scanner.Err()
}
// formatGPUSerial formats GPU serial number from PCIe format to human-readable
// Input: "14-17-dc-65-77-2d-b0-48" (little-endian from PCIe)
// Output: "48:b0:2d:77:65:dc:17:14" (reversed to match GPU label)
func formatGPUSerial(serial string) string {
parts := strings.Split(serial, "-")
if len(parts) != 8 {
return serial // Return as-is if unexpected format
}
// Reverse the bytes (PCIe reports in little-endian)
reversed := make([]string, len(parts))
for i := range parts {
reversed[len(parts)-1-i] = strings.ToUpper(parts[i])
}
return strings.Join(reversed, ":")
}
// findInventoryOutputLog finds the inventory/output.log file
func findInventoryOutputLog(files []parser.ExtractedFile) *parser.ExtractedFile {
for _, f := range files {