371 lines
9.1 KiB
Go
371 lines
9.1 KiB
Go
package nvidia
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"git.mchus.pro/mchus/logpile/internal/models"
|
|
"git.mchus.pro/mchus/logpile/internal/parser"
|
|
)
|
|
|
|
var (
|
|
nvflashAdapterRegex = regexp.MustCompile(`^Adapter:\s+.+\(([\da-fA-F]+),([\da-fA-F]+),([\da-fA-F]+),([\da-fA-F]+)\)\s+S:([0-9A-Fa-f]{2}),B:([0-9A-Fa-f]{2}),D:([0-9A-Fa-f]{2}),F:([0-9A-Fa-f])`)
|
|
gpuPCIIDRegex = regexp.MustCompile(`^GPU_SXM(\d+)_PCIID:\s*(\S+)$`)
|
|
nvsPCIIDRegex = regexp.MustCompile(`^NVSWITCH_NVSWITCH(\d+)_PCIID:\s*(\S+)$`)
|
|
)
|
|
|
|
var nvswitchProjectToPartNumber = map[string]string{
|
|
"5612-0002": "965-25612-0002-000",
|
|
}
|
|
|
|
type nvflashDeviceRecord struct {
|
|
BDF string
|
|
VendorID int
|
|
DeviceID int
|
|
SSVendorID int
|
|
SSDeviceID int
|
|
Version string
|
|
BoardID string
|
|
HierarchyID string
|
|
ChipSKU string
|
|
Project string
|
|
}
|
|
|
|
// ParseNVFlashVerboseLog parses inventory/nvflash_verbose.log and applies firmware versions
|
|
// to already discovered devices using PCI BDF with optional ID checks.
|
|
func ParseNVFlashVerboseLog(content []byte, result *models.AnalysisResult) error {
|
|
if result == nil || result.Hardware == nil {
|
|
return nil
|
|
}
|
|
|
|
records := parseNVFlashRecords(content)
|
|
if len(records) == 0 {
|
|
return nil
|
|
}
|
|
|
|
for i := range result.Hardware.GPUs {
|
|
gpu := &result.Hardware.GPUs[i]
|
|
bdf := normalizePCIBDF(gpu.BDF)
|
|
if bdf == "" {
|
|
continue
|
|
}
|
|
rec, ok := records[bdf]
|
|
if !ok {
|
|
continue
|
|
}
|
|
if gpu.DeviceID != 0 && rec.DeviceID != 0 && gpu.DeviceID != rec.DeviceID {
|
|
continue
|
|
}
|
|
if gpu.VendorID != 0 && rec.VendorID != 0 && gpu.VendorID != rec.VendorID {
|
|
continue
|
|
}
|
|
if strings.TrimSpace(rec.Version) != "" {
|
|
gpu.Firmware = strings.TrimSpace(rec.Version)
|
|
}
|
|
}
|
|
|
|
for i := range result.Hardware.PCIeDevices {
|
|
dev := &result.Hardware.PCIeDevices[i]
|
|
bdf := normalizePCIBDF(dev.BDF)
|
|
if bdf == "" {
|
|
continue
|
|
}
|
|
rec, ok := records[bdf]
|
|
if !ok {
|
|
continue
|
|
}
|
|
if dev.DeviceID != 0 && rec.DeviceID != 0 && dev.DeviceID != rec.DeviceID {
|
|
continue
|
|
}
|
|
if dev.VendorID != 0 && rec.VendorID != 0 && dev.VendorID != rec.VendorID {
|
|
continue
|
|
}
|
|
|
|
if strings.EqualFold(strings.TrimSpace(dev.DeviceClass), "NVSwitch") || strings.HasPrefix(strings.ToUpper(strings.TrimSpace(dev.Slot)), "NVSWITCH") {
|
|
if mappedPN := mapNVSwitchPartNumberByProject(rec.Project); mappedPN != "" {
|
|
dev.PartNumber = mappedPN
|
|
}
|
|
}
|
|
|
|
if strings.TrimSpace(rec.Version) != "" && strings.TrimSpace(dev.PartNumber) == "" {
|
|
// Fallback for non-NVSwitch devices where part number is unknown.
|
|
dev.PartNumber = strings.TrimSpace(rec.Version)
|
|
}
|
|
}
|
|
|
|
appendNVFlashFirmwareEntries(result, records)
|
|
|
|
return nil
|
|
}
|
|
|
|
// ApplyInventoryPCIIDs enriches devices with PCI BDFs from inventory/inventory.log.
|
|
func ApplyInventoryPCIIDs(content []byte, result *models.AnalysisResult) error {
|
|
if result == nil || result.Hardware == nil {
|
|
return nil
|
|
}
|
|
|
|
slotToBDF := parseInventoryPCIIDs(content)
|
|
if len(slotToBDF) == 0 {
|
|
return nil
|
|
}
|
|
|
|
for i := range result.Hardware.GPUs {
|
|
gpu := &result.Hardware.GPUs[i]
|
|
if strings.TrimSpace(gpu.BDF) != "" {
|
|
continue
|
|
}
|
|
if bdf := slotToBDF[strings.TrimSpace(gpu.Slot)]; bdf != "" {
|
|
gpu.BDF = bdf
|
|
}
|
|
}
|
|
|
|
for i := range result.Hardware.PCIeDevices {
|
|
dev := &result.Hardware.PCIeDevices[i]
|
|
if strings.TrimSpace(dev.BDF) != "" {
|
|
continue
|
|
}
|
|
if bdf := slotToBDF[normalizeNVSwitchSlot(strings.TrimSpace(dev.Slot))]; bdf != "" {
|
|
dev.BDF = bdf
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func parseNVFlashRecords(content []byte) map[string]nvflashDeviceRecord {
|
|
scanner := bufio.NewScanner(strings.NewReader(string(content)))
|
|
records := make(map[string]nvflashDeviceRecord)
|
|
var current *nvflashDeviceRecord
|
|
|
|
commit := func() {
|
|
if current == nil {
|
|
return
|
|
}
|
|
if current.BDF == "" || strings.TrimSpace(current.Version) == "" {
|
|
return
|
|
}
|
|
records[current.BDF] = *current
|
|
}
|
|
|
|
for scanner.Scan() {
|
|
line := strings.TrimSpace(scanner.Text())
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
if m := nvflashAdapterRegex.FindStringSubmatch(line); len(m) == 9 {
|
|
commit()
|
|
vendorID, _ := parseHexInt(m[1])
|
|
deviceID, _ := parseHexInt(m[2])
|
|
ssVendorID, _ := parseHexInt(m[3])
|
|
ssDeviceID, _ := parseHexInt(m[4])
|
|
|
|
current = &nvflashDeviceRecord{
|
|
BDF: fmt.Sprintf("0000:%s:%s.%s", strings.ToLower(m[6]), strings.ToLower(m[7]), strings.ToLower(m[8])),
|
|
VendorID: vendorID,
|
|
DeviceID: deviceID,
|
|
SSVendorID: ssVendorID,
|
|
SSDeviceID: ssDeviceID,
|
|
}
|
|
continue
|
|
}
|
|
|
|
if current == nil {
|
|
continue
|
|
}
|
|
|
|
if !strings.Contains(line, ":") {
|
|
continue
|
|
}
|
|
parts := strings.SplitN(line, ":", 2)
|
|
key := strings.TrimSpace(parts[0])
|
|
val := strings.TrimSpace(parts[1])
|
|
if key == "" || val == "" {
|
|
continue
|
|
}
|
|
|
|
switch key {
|
|
case "Version":
|
|
current.Version = val
|
|
case "Board ID":
|
|
current.BoardID = strings.ToLower(strings.TrimPrefix(val, "0x"))
|
|
case "Vendor ID":
|
|
if v, err := parseHexInt(val); err == nil {
|
|
current.VendorID = v
|
|
}
|
|
case "Device ID":
|
|
if v, err := parseHexInt(val); err == nil {
|
|
current.DeviceID = v
|
|
}
|
|
case "Hierarchy ID":
|
|
current.HierarchyID = val
|
|
case "Chip SKU":
|
|
current.ChipSKU = val
|
|
case "Project":
|
|
current.Project = val
|
|
}
|
|
}
|
|
|
|
commit()
|
|
return records
|
|
}
|
|
|
|
func parseInventoryPCIIDs(content []byte) map[string]string {
|
|
scanner := bufio.NewScanner(strings.NewReader(string(content)))
|
|
slotToBDF := make(map[string]string)
|
|
|
|
for scanner.Scan() {
|
|
line := strings.TrimSpace(scanner.Text())
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
if m := gpuPCIIDRegex.FindStringSubmatch(line); len(m) == 3 {
|
|
slotToBDF["GPUSXM"+m[1]] = normalizePCIBDF(m[2])
|
|
continue
|
|
}
|
|
if m := nvsPCIIDRegex.FindStringSubmatch(line); len(m) == 3 {
|
|
slotToBDF["NVSWITCH"+m[1]] = normalizePCIBDF(m[2])
|
|
}
|
|
}
|
|
|
|
return slotToBDF
|
|
}
|
|
|
|
func normalizePCIBDF(v string) string {
|
|
s := strings.TrimSpace(strings.ToLower(v))
|
|
if s == "" {
|
|
return ""
|
|
}
|
|
|
|
// bus:device.func -> 0000:bus:device.func
|
|
short := regexp.MustCompile(`^([0-9a-f]{2}:[0-9a-f]{2}\.[0-7])$`)
|
|
if m := short.FindStringSubmatch(s); len(m) == 2 {
|
|
return "0000:" + m[1]
|
|
}
|
|
|
|
full := regexp.MustCompile(`^([0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-7])$`)
|
|
if m := full.FindStringSubmatch(s); len(m) == 2 {
|
|
return m[1]
|
|
}
|
|
|
|
return s
|
|
}
|
|
|
|
func parseHexInt(v string) (int, error) {
|
|
s := strings.TrimSpace(strings.ToLower(v))
|
|
s = strings.TrimPrefix(s, "0x")
|
|
if s == "" {
|
|
return 0, fmt.Errorf("empty hex value")
|
|
}
|
|
n, err := strconv.ParseInt(s, 16, 32)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return int(n), nil
|
|
}
|
|
|
|
func findNVFlashVerboseLog(files []parser.ExtractedFile) *parser.ExtractedFile {
|
|
for _, f := range files {
|
|
path := strings.ToLower(f.Path)
|
|
if strings.Contains(path, "inventory/nvflash_verbose.log") ||
|
|
strings.Contains(path, "inventory\\nvflash_verbose.log") {
|
|
return &f
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func findInventoryInfoLog(files []parser.ExtractedFile) *parser.ExtractedFile {
|
|
for _, f := range files {
|
|
path := strings.ToLower(f.Path)
|
|
if strings.Contains(path, "inventory/inventory.log") ||
|
|
strings.Contains(path, "inventory\\inventory.log") {
|
|
return &f
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func appendNVFlashFirmwareEntries(result *models.AnalysisResult, records map[string]nvflashDeviceRecord) {
|
|
if result == nil || result.Hardware == nil {
|
|
return
|
|
}
|
|
|
|
if result.Hardware.Firmware == nil {
|
|
result.Hardware.Firmware = make([]models.FirmwareInfo, 0)
|
|
}
|
|
|
|
seen := make(map[string]struct{})
|
|
for _, fw := range result.Hardware.Firmware {
|
|
key := strings.ToLower(strings.TrimSpace(fw.DeviceName)) + "|" + strings.TrimSpace(fw.Version)
|
|
seen[key] = struct{}{}
|
|
}
|
|
|
|
for _, gpu := range result.Hardware.GPUs {
|
|
version := strings.TrimSpace(gpu.Firmware)
|
|
if version == "" {
|
|
continue
|
|
}
|
|
|
|
model := strings.TrimSpace(gpu.PartNumber)
|
|
if model == "" {
|
|
model = strings.TrimSpace(gpu.Model)
|
|
}
|
|
if model == "" {
|
|
model = strings.TrimSpace(gpu.Slot)
|
|
}
|
|
deviceName := fmt.Sprintf("GPU %s (%s)", strings.TrimSpace(gpu.Slot), model)
|
|
key := strings.ToLower(deviceName) + "|" + version
|
|
if _, ok := seen[key]; ok {
|
|
continue
|
|
}
|
|
seen[key] = struct{}{}
|
|
result.Hardware.Firmware = append(result.Hardware.Firmware, models.FirmwareInfo{
|
|
DeviceName: deviceName,
|
|
Version: version,
|
|
})
|
|
}
|
|
|
|
for _, dev := range result.Hardware.PCIeDevices {
|
|
bdf := normalizePCIBDF(dev.BDF)
|
|
rec, ok := records[bdf]
|
|
if !ok {
|
|
continue
|
|
}
|
|
version := strings.TrimSpace(rec.Version)
|
|
if version == "" {
|
|
continue
|
|
}
|
|
slot := strings.TrimSpace(dev.Slot)
|
|
deviceClass := strings.TrimSpace(dev.DeviceClass)
|
|
if strings.EqualFold(deviceClass, "NVSwitch") || strings.HasPrefix(strings.ToUpper(slot), "NVSWITCH") {
|
|
model := slot
|
|
if pn := strings.TrimSpace(dev.PartNumber); pn != "" {
|
|
model = pn
|
|
}
|
|
deviceName := fmt.Sprintf("NVSwitch %s (%s)", slot, model)
|
|
key := strings.ToLower(deviceName) + "|" + version
|
|
if _, ok := seen[key]; ok {
|
|
continue
|
|
}
|
|
seen[key] = struct{}{}
|
|
result.Hardware.Firmware = append(result.Hardware.Firmware, models.FirmwareInfo{
|
|
DeviceName: deviceName,
|
|
Version: version,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func mapNVSwitchPartNumberByProject(project string) string {
|
|
key := strings.TrimSpace(strings.ToLower(project))
|
|
if key == "" {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(nvswitchProjectToPartNumber[key])
|
|
}
|