nvidia: improve component mapping, firmware, statuses and check times

This commit is contained in:
2026-02-16 23:17:13 +03:00
parent 514da76ddb
commit b33cca5fcc
19 changed files with 2051 additions and 65 deletions

View File

@@ -11,6 +11,8 @@ import (
"git.mchus.pro/mchus/logpile/internal/models"
)
var cpuMicrocodeFirmwareRegex = regexp.MustCompile(`(?i)^cpu\d+\s+microcode$`)
// ConvertToReanimator converts AnalysisResult to Reanimator export format
func ConvertToReanimator(result *models.AnalysisResult) (*ReanimatorExport, error) {
if result == nil {
@@ -77,14 +79,39 @@ func convertFirmware(firmware []models.FirmwareInfo) []ReanimatorFirmware {
result := make([]ReanimatorFirmware, 0, len(firmware))
for _, fw := range firmware {
if isDeviceBoundFirmwareName(fw.DeviceName) {
continue
}
result = append(result, ReanimatorFirmware{
DeviceName: fw.DeviceName,
Version: fw.Version,
})
}
if len(result) == 0 {
return nil
}
return result
}
func isDeviceBoundFirmwareName(name string) bool {
n := strings.TrimSpace(strings.ToLower(name))
if n == "" {
return false
}
if strings.HasPrefix(n, "gpu ") ||
strings.HasPrefix(n, "nvswitch ") ||
strings.HasPrefix(n, "nic ") ||
strings.HasPrefix(n, "hdd ") ||
strings.HasPrefix(n, "ssd ") ||
strings.HasPrefix(n, "nvme ") ||
strings.HasPrefix(n, "psu") {
return true
}
return cpuMicrocodeFirmwareRegex.MatchString(strings.TrimSpace(name))
}
// convertCPUs converts CPU information to Reanimator format
func convertCPUs(cpus []models.CPU, collectedAt string) []ReanimatorCPU {
if len(cpus) == 0 {
@@ -229,6 +256,7 @@ func convertStorage(storage []models.Storage, collectedAt string) []ReanimatorSt
func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []ReanimatorPCIe {
result := make([]ReanimatorPCIe, 0)
gpuSlots := make(map[string]struct{}, len(hw.GPUs))
nvswitchFirmwareBySlot := buildNVSwitchFirmwareBySlot(hw.Firmware)
for _, gpu := range hw.GPUs {
slot := strings.ToLower(strings.TrimSpace(gpu.Slot))
if slot != "" {
@@ -254,6 +282,10 @@ func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []Reanima
}
status := normalizeStatus(pcie.Status, false)
firmware := ""
if isNVSwitchPCIeDevice(pcie) {
firmware = nvswitchFirmwareBySlot[normalizeNVSwitchSlotForLookup(pcie.Slot)]
}
meta := buildStatusMeta(
status,
pcie.StatusCheckedAt,
@@ -277,7 +309,7 @@ func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []Reanima
MaxLinkWidth: pcie.MaxLinkWidth,
MaxLinkSpeed: pcie.MaxLinkSpeed,
SerialNumber: serialNumber,
Firmware: "", // PCIeDevice doesn't have firmware in models
Firmware: firmware,
Status: status,
StatusCheckedAt: meta.StatusCheckedAt,
StatusChangedAt: meta.StatusChangedAt,
@@ -373,6 +405,57 @@ func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []Reanima
return result
}
func isNVSwitchPCIeDevice(pcie models.PCIeDevice) bool {
deviceClass := strings.TrimSpace(pcie.DeviceClass)
if strings.EqualFold(deviceClass, "NVSwitch") {
return true
}
slot := normalizeNVSwitchSlotForLookup(pcie.Slot)
return strings.HasPrefix(slot, "NVSWITCH")
}
func buildNVSwitchFirmwareBySlot(firmware []models.FirmwareInfo) map[string]string {
result := make(map[string]string)
for _, fw := range firmware {
name := strings.TrimSpace(fw.DeviceName)
if !strings.HasPrefix(strings.ToUpper(name), "NVSWITCH ") {
continue
}
rest := strings.TrimSpace(name[len("NVSwitch "):])
if rest == "" {
continue
}
slot := rest
if idx := strings.Index(rest, " ("); idx > 0 {
slot = strings.TrimSpace(rest[:idx])
}
slot = normalizeNVSwitchSlotForLookup(slot)
if slot == "" {
continue
}
if _, exists := result[slot]; exists {
continue
}
version := strings.TrimSpace(fw.Version)
if version == "" {
continue
}
result[slot] = version
}
return result
}
func normalizeNVSwitchSlotForLookup(slot string) string {
normalized := strings.ToUpper(strings.TrimSpace(slot))
if strings.HasPrefix(normalized, "NVSWITCHNVSWITCH") {
return "NVSWITCH" + strings.TrimPrefix(normalized, "NVSWITCHNVSWITCH")
}
return normalized
}
func isDisplayClass(deviceClass string) bool {
class := strings.ToLower(strings.TrimSpace(deviceClass))
return strings.Contains(class, "display") ||

View File

@@ -359,6 +359,12 @@ func TestConvertPCIeDevices(t *testing.T) {
func TestConvertPCIeDevices_NVSwitchWithoutSerialRemainsEmpty(t *testing.T) {
hw := &models.HardwareConfig{
Firmware: []models.FirmwareInfo{
{
DeviceName: "NVSwitch NVSWITCH1 (965-25612-0002-000)",
Version: "96.10.6D.00.01",
},
},
PCIeDevices: []models.PCIeDevice{
{
Slot: "NVSWITCH1",
@@ -378,6 +384,9 @@ func TestConvertPCIeDevices_NVSwitchWithoutSerialRemainsEmpty(t *testing.T) {
if result[0].SerialNumber != "" {
t.Fatalf("expected empty NVSwitch serial, got %q", result[0].SerialNumber)
}
if result[0].Firmware != "96.10.6D.00.01" {
t.Fatalf("expected NVSwitch firmware 96.10.6D.00.01, got %q", result[0].Firmware)
}
}
func TestConvertPCIeDevices_SkipsDisplayControllerDuplicates(t *testing.T) {
@@ -646,3 +655,47 @@ func TestConvertToReanimator_DeduplicatesAllSections(t *testing.T) {
t.Fatalf("expected single #GPU0 record, got %d", gpuCount)
}
}
func TestConvertToReanimator_FirmwareExcludesDeviceBoundEntries(t *testing.T) {
input := &models.AnalysisResult{
Filename: "fw-filter-test.json",
Hardware: &models.HardwareConfig{
BoardInfo: models.BoardInfo{SerialNumber: "BOARD-001"},
Firmware: []models.FirmwareInfo{
{DeviceName: "BIOS", Version: "1.0.0"},
{DeviceName: "BMC", Version: "2.0.0"},
{DeviceName: "GPU GPUSXM1 (692-2G520-0280-501)", Version: "96.00.D0.00.03"},
{DeviceName: "NVSwitch NVSWITCH0 (965-25612-0002-000)", Version: "96.10.6D.00.01"},
{DeviceName: "NIC #CPU1_PCIE9 (MCX512A-ACAT)", Version: "28.38.1900"},
{DeviceName: "CPU0 Microcode", Version: "0x2b000643"},
},
},
}
out, err := ConvertToReanimator(input)
if err != nil {
t.Fatalf("ConvertToReanimator() failed: %v", err)
}
if len(out.Hardware.Firmware) != 2 {
t.Fatalf("expected only machine-level firmware entries, got %d", len(out.Hardware.Firmware))
}
got := map[string]string{}
for _, fw := range out.Hardware.Firmware {
got[fw.DeviceName] = fw.Version
}
if got["BIOS"] != "1.0.0" {
t.Fatalf("expected BIOS firmware to be kept")
}
if got["BMC"] != "2.0.0" {
t.Fatalf("expected BMC firmware to be kept")
}
if _, exists := got["GPU GPUSXM1 (692-2G520-0280-501)"]; exists {
t.Fatalf("expected GPU firmware to be excluded from hardware.firmware")
}
if _, exists := got["NVSwitch NVSWITCH0 (965-25612-0002-000)"]; exists {
t.Fatalf("expected NVSwitch firmware to be excluded from hardware.firmware")
}
}