Files
logpile/internal/parser/vendors/nvidia/nvflash_verbose.go

371 lines
9.1 KiB
Go

package nvidia
import (
"bufio"
"fmt"
"regexp"
"strconv"
"strings"
"git.mchus.pro/mchus/logpile/internal/models"
"git.mchus.pro/mchus/logpile/internal/parser"
)
var (
nvflashAdapterRegex = regexp.MustCompile(`^Adapter:\s+.+\(([\da-fA-F]+),([\da-fA-F]+),([\da-fA-F]+),([\da-fA-F]+)\)\s+S:([0-9A-Fa-f]{2}),B:([0-9A-Fa-f]{2}),D:([0-9A-Fa-f]{2}),F:([0-9A-Fa-f])`)
gpuPCIIDRegex = regexp.MustCompile(`^GPU_SXM(\d+)_PCIID:\s*(\S+)$`)
nvsPCIIDRegex = regexp.MustCompile(`^NVSWITCH_NVSWITCH(\d+)_PCIID:\s*(\S+)$`)
)
var nvswitchProjectToPartNumber = map[string]string{
"5612-0002": "965-25612-0002-000",
}
type nvflashDeviceRecord struct {
BDF string
VendorID int
DeviceID int
SSVendorID int
SSDeviceID int
Version string
BoardID string
HierarchyID string
ChipSKU string
Project string
}
// ParseNVFlashVerboseLog parses inventory/nvflash_verbose.log and applies firmware versions
// to already discovered devices using PCI BDF with optional ID checks.
func ParseNVFlashVerboseLog(content []byte, result *models.AnalysisResult) error {
if result == nil || result.Hardware == nil {
return nil
}
records := parseNVFlashRecords(content)
if len(records) == 0 {
return nil
}
for i := range result.Hardware.GPUs {
gpu := &result.Hardware.GPUs[i]
bdf := normalizePCIBDF(gpu.BDF)
if bdf == "" {
continue
}
rec, ok := records[bdf]
if !ok {
continue
}
if gpu.DeviceID != 0 && rec.DeviceID != 0 && gpu.DeviceID != rec.DeviceID {
continue
}
if gpu.VendorID != 0 && rec.VendorID != 0 && gpu.VendorID != rec.VendorID {
continue
}
if strings.TrimSpace(rec.Version) != "" {
gpu.Firmware = strings.TrimSpace(rec.Version)
}
}
for i := range result.Hardware.PCIeDevices {
dev := &result.Hardware.PCIeDevices[i]
bdf := normalizePCIBDF(dev.BDF)
if bdf == "" {
continue
}
rec, ok := records[bdf]
if !ok {
continue
}
if dev.DeviceID != 0 && rec.DeviceID != 0 && dev.DeviceID != rec.DeviceID {
continue
}
if dev.VendorID != 0 && rec.VendorID != 0 && dev.VendorID != rec.VendorID {
continue
}
if strings.EqualFold(strings.TrimSpace(dev.DeviceClass), "NVSwitch") || strings.HasPrefix(strings.ToUpper(strings.TrimSpace(dev.Slot)), "NVSWITCH") {
if mappedPN := mapNVSwitchPartNumberByProject(rec.Project); mappedPN != "" {
dev.PartNumber = mappedPN
}
}
if strings.TrimSpace(rec.Version) != "" && strings.TrimSpace(dev.PartNumber) == "" {
// Fallback for non-NVSwitch devices where part number is unknown.
dev.PartNumber = strings.TrimSpace(rec.Version)
}
}
appendNVFlashFirmwareEntries(result, records)
return nil
}
// ApplyInventoryPCIIDs enriches devices with PCI BDFs from inventory/inventory.log.
func ApplyInventoryPCIIDs(content []byte, result *models.AnalysisResult) error {
if result == nil || result.Hardware == nil {
return nil
}
slotToBDF := parseInventoryPCIIDs(content)
if len(slotToBDF) == 0 {
return nil
}
for i := range result.Hardware.GPUs {
gpu := &result.Hardware.GPUs[i]
if strings.TrimSpace(gpu.BDF) != "" {
continue
}
if bdf := slotToBDF[strings.TrimSpace(gpu.Slot)]; bdf != "" {
gpu.BDF = bdf
}
}
for i := range result.Hardware.PCIeDevices {
dev := &result.Hardware.PCIeDevices[i]
if strings.TrimSpace(dev.BDF) != "" {
continue
}
if bdf := slotToBDF[normalizeNVSwitchSlot(strings.TrimSpace(dev.Slot))]; bdf != "" {
dev.BDF = bdf
}
}
return nil
}
func parseNVFlashRecords(content []byte) map[string]nvflashDeviceRecord {
scanner := bufio.NewScanner(strings.NewReader(string(content)))
records := make(map[string]nvflashDeviceRecord)
var current *nvflashDeviceRecord
commit := func() {
if current == nil {
return
}
if current.BDF == "" || strings.TrimSpace(current.Version) == "" {
return
}
records[current.BDF] = *current
}
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
if m := nvflashAdapterRegex.FindStringSubmatch(line); len(m) == 9 {
commit()
vendorID, _ := parseHexInt(m[1])
deviceID, _ := parseHexInt(m[2])
ssVendorID, _ := parseHexInt(m[3])
ssDeviceID, _ := parseHexInt(m[4])
current = &nvflashDeviceRecord{
BDF: fmt.Sprintf("0000:%s:%s.%s", strings.ToLower(m[6]), strings.ToLower(m[7]), strings.ToLower(m[8])),
VendorID: vendorID,
DeviceID: deviceID,
SSVendorID: ssVendorID,
SSDeviceID: ssDeviceID,
}
continue
}
if current == nil {
continue
}
if !strings.Contains(line, ":") {
continue
}
parts := strings.SplitN(line, ":", 2)
key := strings.TrimSpace(parts[0])
val := strings.TrimSpace(parts[1])
if key == "" || val == "" {
continue
}
switch key {
case "Version":
current.Version = val
case "Board ID":
current.BoardID = strings.ToLower(strings.TrimPrefix(val, "0x"))
case "Vendor ID":
if v, err := parseHexInt(val); err == nil {
current.VendorID = v
}
case "Device ID":
if v, err := parseHexInt(val); err == nil {
current.DeviceID = v
}
case "Hierarchy ID":
current.HierarchyID = val
case "Chip SKU":
current.ChipSKU = val
case "Project":
current.Project = val
}
}
commit()
return records
}
func parseInventoryPCIIDs(content []byte) map[string]string {
scanner := bufio.NewScanner(strings.NewReader(string(content)))
slotToBDF := make(map[string]string)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
if m := gpuPCIIDRegex.FindStringSubmatch(line); len(m) == 3 {
slotToBDF["GPUSXM"+m[1]] = normalizePCIBDF(m[2])
continue
}
if m := nvsPCIIDRegex.FindStringSubmatch(line); len(m) == 3 {
slotToBDF["NVSWITCH"+m[1]] = normalizePCIBDF(m[2])
}
}
return slotToBDF
}
func normalizePCIBDF(v string) string {
s := strings.TrimSpace(strings.ToLower(v))
if s == "" {
return ""
}
// bus:device.func -> 0000:bus:device.func
short := regexp.MustCompile(`^([0-9a-f]{2}:[0-9a-f]{2}\.[0-7])$`)
if m := short.FindStringSubmatch(s); len(m) == 2 {
return "0000:" + m[1]
}
full := regexp.MustCompile(`^([0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-7])$`)
if m := full.FindStringSubmatch(s); len(m) == 2 {
return m[1]
}
return s
}
func parseHexInt(v string) (int, error) {
s := strings.TrimSpace(strings.ToLower(v))
s = strings.TrimPrefix(s, "0x")
if s == "" {
return 0, fmt.Errorf("empty hex value")
}
n, err := strconv.ParseInt(s, 16, 32)
if err != nil {
return 0, err
}
return int(n), nil
}
func findNVFlashVerboseLog(files []parser.ExtractedFile) *parser.ExtractedFile {
for _, f := range files {
path := strings.ToLower(f.Path)
if strings.Contains(path, "inventory/nvflash_verbose.log") ||
strings.Contains(path, "inventory\\nvflash_verbose.log") {
return &f
}
}
return nil
}
func findInventoryInfoLog(files []parser.ExtractedFile) *parser.ExtractedFile {
for _, f := range files {
path := strings.ToLower(f.Path)
if strings.Contains(path, "inventory/inventory.log") ||
strings.Contains(path, "inventory\\inventory.log") {
return &f
}
}
return nil
}
func appendNVFlashFirmwareEntries(result *models.AnalysisResult, records map[string]nvflashDeviceRecord) {
if result == nil || result.Hardware == nil {
return
}
if result.Hardware.Firmware == nil {
result.Hardware.Firmware = make([]models.FirmwareInfo, 0)
}
seen := make(map[string]struct{})
for _, fw := range result.Hardware.Firmware {
key := strings.ToLower(strings.TrimSpace(fw.DeviceName)) + "|" + strings.TrimSpace(fw.Version)
seen[key] = struct{}{}
}
for _, gpu := range result.Hardware.GPUs {
version := strings.TrimSpace(gpu.Firmware)
if version == "" {
continue
}
model := strings.TrimSpace(gpu.PartNumber)
if model == "" {
model = strings.TrimSpace(gpu.Model)
}
if model == "" {
model = strings.TrimSpace(gpu.Slot)
}
deviceName := fmt.Sprintf("GPU %s (%s)", strings.TrimSpace(gpu.Slot), model)
key := strings.ToLower(deviceName) + "|" + version
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
result.Hardware.Firmware = append(result.Hardware.Firmware, models.FirmwareInfo{
DeviceName: deviceName,
Version: version,
})
}
for _, dev := range result.Hardware.PCIeDevices {
bdf := normalizePCIBDF(dev.BDF)
rec, ok := records[bdf]
if !ok {
continue
}
version := strings.TrimSpace(rec.Version)
if version == "" {
continue
}
slot := strings.TrimSpace(dev.Slot)
deviceClass := strings.TrimSpace(dev.DeviceClass)
if strings.EqualFold(deviceClass, "NVSwitch") || strings.HasPrefix(strings.ToUpper(slot), "NVSWITCH") {
model := slot
if pn := strings.TrimSpace(dev.PartNumber); pn != "" {
model = pn
}
deviceName := fmt.Sprintf("NVSwitch %s (%s)", slot, model)
key := strings.ToLower(deviceName) + "|" + version
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
result.Hardware.Firmware = append(result.Hardware.Firmware, models.FirmwareInfo{
DeviceName: deviceName,
Version: version,
})
}
}
}
func mapNVSwitchPartNumberByProject(project string) string {
key := strings.TrimSpace(strings.ToLower(project))
if key == "" {
return ""
}
return strings.TrimSpace(nvswitchProjectToPartNumber[key])
}