nvidia: improve component mapping, firmware, statuses and check times
This commit is contained in:
@@ -11,6 +11,8 @@ import (
|
||||
"git.mchus.pro/mchus/logpile/internal/models"
|
||||
)
|
||||
|
||||
var cpuMicrocodeFirmwareRegex = regexp.MustCompile(`(?i)^cpu\d+\s+microcode$`)
|
||||
|
||||
// ConvertToReanimator converts AnalysisResult to Reanimator export format
|
||||
func ConvertToReanimator(result *models.AnalysisResult) (*ReanimatorExport, error) {
|
||||
if result == nil {
|
||||
@@ -77,14 +79,39 @@ func convertFirmware(firmware []models.FirmwareInfo) []ReanimatorFirmware {
|
||||
|
||||
result := make([]ReanimatorFirmware, 0, len(firmware))
|
||||
for _, fw := range firmware {
|
||||
if isDeviceBoundFirmwareName(fw.DeviceName) {
|
||||
continue
|
||||
}
|
||||
result = append(result, ReanimatorFirmware{
|
||||
DeviceName: fw.DeviceName,
|
||||
Version: fw.Version,
|
||||
})
|
||||
}
|
||||
if len(result) == 0 {
|
||||
return nil
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func isDeviceBoundFirmwareName(name string) bool {
|
||||
n := strings.TrimSpace(strings.ToLower(name))
|
||||
if n == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
if strings.HasPrefix(n, "gpu ") ||
|
||||
strings.HasPrefix(n, "nvswitch ") ||
|
||||
strings.HasPrefix(n, "nic ") ||
|
||||
strings.HasPrefix(n, "hdd ") ||
|
||||
strings.HasPrefix(n, "ssd ") ||
|
||||
strings.HasPrefix(n, "nvme ") ||
|
||||
strings.HasPrefix(n, "psu") {
|
||||
return true
|
||||
}
|
||||
|
||||
return cpuMicrocodeFirmwareRegex.MatchString(strings.TrimSpace(name))
|
||||
}
|
||||
|
||||
// convertCPUs converts CPU information to Reanimator format
|
||||
func convertCPUs(cpus []models.CPU, collectedAt string) []ReanimatorCPU {
|
||||
if len(cpus) == 0 {
|
||||
@@ -229,6 +256,7 @@ func convertStorage(storage []models.Storage, collectedAt string) []ReanimatorSt
|
||||
func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []ReanimatorPCIe {
|
||||
result := make([]ReanimatorPCIe, 0)
|
||||
gpuSlots := make(map[string]struct{}, len(hw.GPUs))
|
||||
nvswitchFirmwareBySlot := buildNVSwitchFirmwareBySlot(hw.Firmware)
|
||||
for _, gpu := range hw.GPUs {
|
||||
slot := strings.ToLower(strings.TrimSpace(gpu.Slot))
|
||||
if slot != "" {
|
||||
@@ -254,6 +282,10 @@ func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []Reanima
|
||||
}
|
||||
|
||||
status := normalizeStatus(pcie.Status, false)
|
||||
firmware := ""
|
||||
if isNVSwitchPCIeDevice(pcie) {
|
||||
firmware = nvswitchFirmwareBySlot[normalizeNVSwitchSlotForLookup(pcie.Slot)]
|
||||
}
|
||||
meta := buildStatusMeta(
|
||||
status,
|
||||
pcie.StatusCheckedAt,
|
||||
@@ -277,7 +309,7 @@ func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []Reanima
|
||||
MaxLinkWidth: pcie.MaxLinkWidth,
|
||||
MaxLinkSpeed: pcie.MaxLinkSpeed,
|
||||
SerialNumber: serialNumber,
|
||||
Firmware: "", // PCIeDevice doesn't have firmware in models
|
||||
Firmware: firmware,
|
||||
Status: status,
|
||||
StatusCheckedAt: meta.StatusCheckedAt,
|
||||
StatusChangedAt: meta.StatusChangedAt,
|
||||
@@ -373,6 +405,57 @@ func convertPCIeDevices(hw *models.HardwareConfig, collectedAt string) []Reanima
|
||||
return result
|
||||
}
|
||||
|
||||
func isNVSwitchPCIeDevice(pcie models.PCIeDevice) bool {
|
||||
deviceClass := strings.TrimSpace(pcie.DeviceClass)
|
||||
if strings.EqualFold(deviceClass, "NVSwitch") {
|
||||
return true
|
||||
}
|
||||
slot := normalizeNVSwitchSlotForLookup(pcie.Slot)
|
||||
return strings.HasPrefix(slot, "NVSWITCH")
|
||||
}
|
||||
|
||||
func buildNVSwitchFirmwareBySlot(firmware []models.FirmwareInfo) map[string]string {
|
||||
result := make(map[string]string)
|
||||
for _, fw := range firmware {
|
||||
name := strings.TrimSpace(fw.DeviceName)
|
||||
if !strings.HasPrefix(strings.ToUpper(name), "NVSWITCH ") {
|
||||
continue
|
||||
}
|
||||
|
||||
rest := strings.TrimSpace(name[len("NVSwitch "):])
|
||||
if rest == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
slot := rest
|
||||
if idx := strings.Index(rest, " ("); idx > 0 {
|
||||
slot = strings.TrimSpace(rest[:idx])
|
||||
}
|
||||
slot = normalizeNVSwitchSlotForLookup(slot)
|
||||
if slot == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, exists := result[slot]; exists {
|
||||
continue
|
||||
}
|
||||
version := strings.TrimSpace(fw.Version)
|
||||
if version == "" {
|
||||
continue
|
||||
}
|
||||
result[slot] = version
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func normalizeNVSwitchSlotForLookup(slot string) string {
|
||||
normalized := strings.ToUpper(strings.TrimSpace(slot))
|
||||
if strings.HasPrefix(normalized, "NVSWITCHNVSWITCH") {
|
||||
return "NVSWITCH" + strings.TrimPrefix(normalized, "NVSWITCHNVSWITCH")
|
||||
}
|
||||
return normalized
|
||||
}
|
||||
|
||||
func isDisplayClass(deviceClass string) bool {
|
||||
class := strings.ToLower(strings.TrimSpace(deviceClass))
|
||||
return strings.Contains(class, "display") ||
|
||||
|
||||
@@ -359,6 +359,12 @@ func TestConvertPCIeDevices(t *testing.T) {
|
||||
|
||||
func TestConvertPCIeDevices_NVSwitchWithoutSerialRemainsEmpty(t *testing.T) {
|
||||
hw := &models.HardwareConfig{
|
||||
Firmware: []models.FirmwareInfo{
|
||||
{
|
||||
DeviceName: "NVSwitch NVSWITCH1 (965-25612-0002-000)",
|
||||
Version: "96.10.6D.00.01",
|
||||
},
|
||||
},
|
||||
PCIeDevices: []models.PCIeDevice{
|
||||
{
|
||||
Slot: "NVSWITCH1",
|
||||
@@ -378,6 +384,9 @@ func TestConvertPCIeDevices_NVSwitchWithoutSerialRemainsEmpty(t *testing.T) {
|
||||
if result[0].SerialNumber != "" {
|
||||
t.Fatalf("expected empty NVSwitch serial, got %q", result[0].SerialNumber)
|
||||
}
|
||||
if result[0].Firmware != "96.10.6D.00.01" {
|
||||
t.Fatalf("expected NVSwitch firmware 96.10.6D.00.01, got %q", result[0].Firmware)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertPCIeDevices_SkipsDisplayControllerDuplicates(t *testing.T) {
|
||||
@@ -646,3 +655,47 @@ func TestConvertToReanimator_DeduplicatesAllSections(t *testing.T) {
|
||||
t.Fatalf("expected single #GPU0 record, got %d", gpuCount)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertToReanimator_FirmwareExcludesDeviceBoundEntries(t *testing.T) {
|
||||
input := &models.AnalysisResult{
|
||||
Filename: "fw-filter-test.json",
|
||||
Hardware: &models.HardwareConfig{
|
||||
BoardInfo: models.BoardInfo{SerialNumber: "BOARD-001"},
|
||||
Firmware: []models.FirmwareInfo{
|
||||
{DeviceName: "BIOS", Version: "1.0.0"},
|
||||
{DeviceName: "BMC", Version: "2.0.0"},
|
||||
{DeviceName: "GPU GPUSXM1 (692-2G520-0280-501)", Version: "96.00.D0.00.03"},
|
||||
{DeviceName: "NVSwitch NVSWITCH0 (965-25612-0002-000)", Version: "96.10.6D.00.01"},
|
||||
{DeviceName: "NIC #CPU1_PCIE9 (MCX512A-ACAT)", Version: "28.38.1900"},
|
||||
{DeviceName: "CPU0 Microcode", Version: "0x2b000643"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
out, err := ConvertToReanimator(input)
|
||||
if err != nil {
|
||||
t.Fatalf("ConvertToReanimator() failed: %v", err)
|
||||
}
|
||||
|
||||
if len(out.Hardware.Firmware) != 2 {
|
||||
t.Fatalf("expected only machine-level firmware entries, got %d", len(out.Hardware.Firmware))
|
||||
}
|
||||
|
||||
got := map[string]string{}
|
||||
for _, fw := range out.Hardware.Firmware {
|
||||
got[fw.DeviceName] = fw.Version
|
||||
}
|
||||
|
||||
if got["BIOS"] != "1.0.0" {
|
||||
t.Fatalf("expected BIOS firmware to be kept")
|
||||
}
|
||||
if got["BMC"] != "2.0.0" {
|
||||
t.Fatalf("expected BMC firmware to be kept")
|
||||
}
|
||||
if _, exists := got["GPU GPUSXM1 (692-2G520-0280-501)"]; exists {
|
||||
t.Fatalf("expected GPU firmware to be excluded from hardware.firmware")
|
||||
}
|
||||
if _, exists := got["NVSwitch NVSWITCH0 (965-25612-0002-000)"]; exists {
|
||||
t.Fatalf("expected NVSwitch firmware to be excluded from hardware.firmware")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user