nvidia: improve component mapping, firmware, statuses and check times
This commit is contained in:
370
internal/parser/vendors/nvidia/nvflash_verbose.go
vendored
Normal file
370
internal/parser/vendors/nvidia/nvflash_verbose.go
vendored
Normal file
@@ -0,0 +1,370 @@
|
||||
package nvidia
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||
)
|
||||
|
||||
var (
|
||||
nvflashAdapterRegex = regexp.MustCompile(`^Adapter:\s+.+\(([\da-fA-F]+),([\da-fA-F]+),([\da-fA-F]+),([\da-fA-F]+)\)\s+S:([0-9A-Fa-f]{2}),B:([0-9A-Fa-f]{2}),D:([0-9A-Fa-f]{2}),F:([0-9A-Fa-f])`)
|
||||
gpuPCIIDRegex = regexp.MustCompile(`^GPU_SXM(\d+)_PCIID:\s*(\S+)$`)
|
||||
nvsPCIIDRegex = regexp.MustCompile(`^NVSWITCH_NVSWITCH(\d+)_PCIID:\s*(\S+)$`)
|
||||
)
|
||||
|
||||
var nvswitchProjectToPartNumber = map[string]string{
|
||||
"5612-0002": "965-25612-0002-000",
|
||||
}
|
||||
|
||||
type nvflashDeviceRecord struct {
|
||||
BDF string
|
||||
VendorID int
|
||||
DeviceID int
|
||||
SSVendorID int
|
||||
SSDeviceID int
|
||||
Version string
|
||||
BoardID string
|
||||
HierarchyID string
|
||||
ChipSKU string
|
||||
Project string
|
||||
}
|
||||
|
||||
// ParseNVFlashVerboseLog parses inventory/nvflash_verbose.log and applies firmware versions
|
||||
// to already discovered devices using PCI BDF with optional ID checks.
|
||||
func ParseNVFlashVerboseLog(content []byte, result *models.AnalysisResult) error {
|
||||
if result == nil || result.Hardware == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
records := parseNVFlashRecords(content)
|
||||
if len(records) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for i := range result.Hardware.GPUs {
|
||||
gpu := &result.Hardware.GPUs[i]
|
||||
bdf := normalizePCIBDF(gpu.BDF)
|
||||
if bdf == "" {
|
||||
continue
|
||||
}
|
||||
rec, ok := records[bdf]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if gpu.DeviceID != 0 && rec.DeviceID != 0 && gpu.DeviceID != rec.DeviceID {
|
||||
continue
|
||||
}
|
||||
if gpu.VendorID != 0 && rec.VendorID != 0 && gpu.VendorID != rec.VendorID {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(rec.Version) != "" {
|
||||
gpu.Firmware = strings.TrimSpace(rec.Version)
|
||||
}
|
||||
}
|
||||
|
||||
for i := range result.Hardware.PCIeDevices {
|
||||
dev := &result.Hardware.PCIeDevices[i]
|
||||
bdf := normalizePCIBDF(dev.BDF)
|
||||
if bdf == "" {
|
||||
continue
|
||||
}
|
||||
rec, ok := records[bdf]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if dev.DeviceID != 0 && rec.DeviceID != 0 && dev.DeviceID != rec.DeviceID {
|
||||
continue
|
||||
}
|
||||
if dev.VendorID != 0 && rec.VendorID != 0 && dev.VendorID != rec.VendorID {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.EqualFold(strings.TrimSpace(dev.DeviceClass), "NVSwitch") || strings.HasPrefix(strings.ToUpper(strings.TrimSpace(dev.Slot)), "NVSWITCH") {
|
||||
if mappedPN := mapNVSwitchPartNumberByProject(rec.Project); mappedPN != "" {
|
||||
dev.PartNumber = mappedPN
|
||||
}
|
||||
}
|
||||
|
||||
if strings.TrimSpace(rec.Version) != "" && strings.TrimSpace(dev.PartNumber) == "" {
|
||||
// Fallback for non-NVSwitch devices where part number is unknown.
|
||||
dev.PartNumber = strings.TrimSpace(rec.Version)
|
||||
}
|
||||
}
|
||||
|
||||
appendNVFlashFirmwareEntries(result, records)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// ApplyInventoryPCIIDs enriches devices with PCI BDFs from inventory/inventory.log.
|
||||
func ApplyInventoryPCIIDs(content []byte, result *models.AnalysisResult) error {
|
||||
if result == nil || result.Hardware == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
slotToBDF := parseInventoryPCIIDs(content)
|
||||
if len(slotToBDF) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for i := range result.Hardware.GPUs {
|
||||
gpu := &result.Hardware.GPUs[i]
|
||||
if strings.TrimSpace(gpu.BDF) != "" {
|
||||
continue
|
||||
}
|
||||
if bdf := slotToBDF[strings.TrimSpace(gpu.Slot)]; bdf != "" {
|
||||
gpu.BDF = bdf
|
||||
}
|
||||
}
|
||||
|
||||
for i := range result.Hardware.PCIeDevices {
|
||||
dev := &result.Hardware.PCIeDevices[i]
|
||||
if strings.TrimSpace(dev.BDF) != "" {
|
||||
continue
|
||||
}
|
||||
if bdf := slotToBDF[normalizeNVSwitchSlot(strings.TrimSpace(dev.Slot))]; bdf != "" {
|
||||
dev.BDF = bdf
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseNVFlashRecords(content []byte) map[string]nvflashDeviceRecord {
|
||||
scanner := bufio.NewScanner(strings.NewReader(string(content)))
|
||||
records := make(map[string]nvflashDeviceRecord)
|
||||
var current *nvflashDeviceRecord
|
||||
|
||||
commit := func() {
|
||||
if current == nil {
|
||||
return
|
||||
}
|
||||
if current.BDF == "" || strings.TrimSpace(current.Version) == "" {
|
||||
return
|
||||
}
|
||||
records[current.BDF] = *current
|
||||
}
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if m := nvflashAdapterRegex.FindStringSubmatch(line); len(m) == 9 {
|
||||
commit()
|
||||
vendorID, _ := parseHexInt(m[1])
|
||||
deviceID, _ := parseHexInt(m[2])
|
||||
ssVendorID, _ := parseHexInt(m[3])
|
||||
ssDeviceID, _ := parseHexInt(m[4])
|
||||
|
||||
current = &nvflashDeviceRecord{
|
||||
BDF: fmt.Sprintf("0000:%s:%s.%s", strings.ToLower(m[6]), strings.ToLower(m[7]), strings.ToLower(m[8])),
|
||||
VendorID: vendorID,
|
||||
DeviceID: deviceID,
|
||||
SSVendorID: ssVendorID,
|
||||
SSDeviceID: ssDeviceID,
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if current == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if !strings.Contains(line, ":") {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(line, ":", 2)
|
||||
key := strings.TrimSpace(parts[0])
|
||||
val := strings.TrimSpace(parts[1])
|
||||
if key == "" || val == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
switch key {
|
||||
case "Version":
|
||||
current.Version = val
|
||||
case "Board ID":
|
||||
current.BoardID = strings.ToLower(strings.TrimPrefix(val, "0x"))
|
||||
case "Vendor ID":
|
||||
if v, err := parseHexInt(val); err == nil {
|
||||
current.VendorID = v
|
||||
}
|
||||
case "Device ID":
|
||||
if v, err := parseHexInt(val); err == nil {
|
||||
current.DeviceID = v
|
||||
}
|
||||
case "Hierarchy ID":
|
||||
current.HierarchyID = val
|
||||
case "Chip SKU":
|
||||
current.ChipSKU = val
|
||||
case "Project":
|
||||
current.Project = val
|
||||
}
|
||||
}
|
||||
|
||||
commit()
|
||||
return records
|
||||
}
|
||||
|
||||
func parseInventoryPCIIDs(content []byte) map[string]string {
|
||||
scanner := bufio.NewScanner(strings.NewReader(string(content)))
|
||||
slotToBDF := make(map[string]string)
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if m := gpuPCIIDRegex.FindStringSubmatch(line); len(m) == 3 {
|
||||
slotToBDF["GPUSXM"+m[1]] = normalizePCIBDF(m[2])
|
||||
continue
|
||||
}
|
||||
if m := nvsPCIIDRegex.FindStringSubmatch(line); len(m) == 3 {
|
||||
slotToBDF["NVSWITCH"+m[1]] = normalizePCIBDF(m[2])
|
||||
}
|
||||
}
|
||||
|
||||
return slotToBDF
|
||||
}
|
||||
|
||||
func normalizePCIBDF(v string) string {
|
||||
s := strings.TrimSpace(strings.ToLower(v))
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// bus:device.func -> 0000:bus:device.func
|
||||
short := regexp.MustCompile(`^([0-9a-f]{2}:[0-9a-f]{2}\.[0-7])$`)
|
||||
if m := short.FindStringSubmatch(s); len(m) == 2 {
|
||||
return "0000:" + m[1]
|
||||
}
|
||||
|
||||
full := regexp.MustCompile(`^([0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-7])$`)
|
||||
if m := full.FindStringSubmatch(s); len(m) == 2 {
|
||||
return m[1]
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
func parseHexInt(v string) (int, error) {
|
||||
s := strings.TrimSpace(strings.ToLower(v))
|
||||
s = strings.TrimPrefix(s, "0x")
|
||||
if s == "" {
|
||||
return 0, fmt.Errorf("empty hex value")
|
||||
}
|
||||
n, err := strconv.ParseInt(s, 16, 32)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return int(n), nil
|
||||
}
|
||||
|
||||
func findNVFlashVerboseLog(files []parser.ExtractedFile) *parser.ExtractedFile {
|
||||
for _, f := range files {
|
||||
path := strings.ToLower(f.Path)
|
||||
if strings.Contains(path, "inventory/nvflash_verbose.log") ||
|
||||
strings.Contains(path, "inventory\\nvflash_verbose.log") {
|
||||
return &f
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func findInventoryInfoLog(files []parser.ExtractedFile) *parser.ExtractedFile {
|
||||
for _, f := range files {
|
||||
path := strings.ToLower(f.Path)
|
||||
if strings.Contains(path, "inventory/inventory.log") ||
|
||||
strings.Contains(path, "inventory\\inventory.log") {
|
||||
return &f
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func appendNVFlashFirmwareEntries(result *models.AnalysisResult, records map[string]nvflashDeviceRecord) {
|
||||
if result == nil || result.Hardware == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if result.Hardware.Firmware == nil {
|
||||
result.Hardware.Firmware = make([]models.FirmwareInfo, 0)
|
||||
}
|
||||
|
||||
seen := make(map[string]struct{})
|
||||
for _, fw := range result.Hardware.Firmware {
|
||||
key := strings.ToLower(strings.TrimSpace(fw.DeviceName)) + "|" + strings.TrimSpace(fw.Version)
|
||||
seen[key] = struct{}{}
|
||||
}
|
||||
|
||||
for _, gpu := range result.Hardware.GPUs {
|
||||
version := strings.TrimSpace(gpu.Firmware)
|
||||
if version == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
model := strings.TrimSpace(gpu.PartNumber)
|
||||
if model == "" {
|
||||
model = strings.TrimSpace(gpu.Model)
|
||||
}
|
||||
if model == "" {
|
||||
model = strings.TrimSpace(gpu.Slot)
|
||||
}
|
||||
deviceName := fmt.Sprintf("GPU %s (%s)", strings.TrimSpace(gpu.Slot), model)
|
||||
key := strings.ToLower(deviceName) + "|" + version
|
||||
if _, ok := seen[key]; ok {
|
||||
continue
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
result.Hardware.Firmware = append(result.Hardware.Firmware, models.FirmwareInfo{
|
||||
DeviceName: deviceName,
|
||||
Version: version,
|
||||
})
|
||||
}
|
||||
|
||||
for _, dev := range result.Hardware.PCIeDevices {
|
||||
bdf := normalizePCIBDF(dev.BDF)
|
||||
rec, ok := records[bdf]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
version := strings.TrimSpace(rec.Version)
|
||||
if version == "" {
|
||||
continue
|
||||
}
|
||||
slot := strings.TrimSpace(dev.Slot)
|
||||
deviceClass := strings.TrimSpace(dev.DeviceClass)
|
||||
if strings.EqualFold(deviceClass, "NVSwitch") || strings.HasPrefix(strings.ToUpper(slot), "NVSWITCH") {
|
||||
model := slot
|
||||
if pn := strings.TrimSpace(dev.PartNumber); pn != "" {
|
||||
model = pn
|
||||
}
|
||||
deviceName := fmt.Sprintf("NVSwitch %s (%s)", slot, model)
|
||||
key := strings.ToLower(deviceName) + "|" + version
|
||||
if _, ok := seen[key]; ok {
|
||||
continue
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
result.Hardware.Firmware = append(result.Hardware.Firmware, models.FirmwareInfo{
|
||||
DeviceName: deviceName,
|
||||
Version: version,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mapNVSwitchPartNumberByProject(project string) string {
|
||||
key := strings.TrimSpace(strings.ToLower(project))
|
||||
if key == "" {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(nvswitchProjectToPartNumber[key])
|
||||
}
|
||||
Reference in New Issue
Block a user