From 93ce676f043d09617f36906230b60835049b09e6 Mon Sep 17 00:00:00 2001 From: Mikhail Chusavitin Date: Wed, 1 Apr 2026 15:48:47 +0300 Subject: [PATCH] fix(redfish): recover MSI NIC serials from PCIe functions --- internal/collector/redfish.go | 59 +++++++++++++++++++------ internal/collector/redfish_test.go | 69 +++++++++++++++++++++++++++++- 2 files changed, 113 insertions(+), 15 deletions(-) diff --git a/internal/collector/redfish.go b/internal/collector/redfish.go index bda3f87..06bff27 100644 --- a/internal/collector/redfish.go +++ b/internal/collector/redfish.go @@ -2810,13 +2810,6 @@ func shouldCrawlPath(path string) bool { if isAllowedNVSwitchFabricPath(normalized) { return true } - if strings.Contains(normalized, "/Chassis/") && - strings.Contains(normalized, "/PCIeDevices/") && - strings.Contains(normalized, "/PCIeFunctions/") { - // Chassis-level PCIeFunctions links are frequently noisy/slow on some BMCs - // and duplicate data we already collect from PCIe devices/functions elsewhere. - return false - } if strings.Contains(normalized, "/Memory/") { after := strings.SplitN(normalized, "/Memory/", 2) if len(after) == 2 && strings.Count(after[1], "/") >= 1 { @@ -3867,23 +3860,31 @@ func parseNIC(doc map[string]interface{}) models.NetworkAdapter { var linkSpeed string var maxLinkSpeed string if controllers, ok := doc["Controllers"].([]interface{}); ok && len(controllers) > 0 { - if ctrl, ok := controllers[0].(map[string]interface{}); ok { + totalPortCount := 0 + for _, ctrlAny := range controllers { + ctrl, ok := ctrlAny.(map[string]interface{}) + if !ok { + continue + } ctrlLocation := redfishLocationLabel(ctrl["Location"]) location = firstNonEmpty(location, ctrlLocation) if isWeakRedfishNICSlotLabel(slot) { slot = firstNonEmpty(ctrlLocation, slot) } - firmware = asString(ctrl["FirmwarePackageVersion"]) - if caps, ok := ctrl["ControllerCapabilities"].(map[string]interface{}); ok { - portCount = sanitizeNetworkPortCount(asInt(caps["NetworkPortCount"])) + if normalizeRedfishIdentityField(firmware) == "" { + firmware = findFirstNormalizedStringByKeys(ctrl, "FirmwarePackageVersion", "FirmwareVersion") } - if pcieIf, ok := ctrl["PCIeInterface"].(map[string]interface{}); ok { + if caps, ok := ctrl["ControllerCapabilities"].(map[string]interface{}); ok { + totalPortCount += sanitizeNetworkPortCount(asInt(caps["NetworkPortCount"])) + } + if pcieIf, ok := ctrl["PCIeInterface"].(map[string]interface{}); ok && linkWidth == 0 && maxLinkWidth == 0 && linkSpeed == "" && maxLinkSpeed == "" { linkWidth = asInt(pcieIf["LanesInUse"]) maxLinkWidth = asInt(pcieIf["MaxLanes"]) linkSpeed = firstNonEmpty(asString(pcieIf["PCIeType"]), asString(pcieIf["CurrentLinkSpeedGTs"]), asString(pcieIf["CurrentLinkSpeed"])) maxLinkSpeed = firstNonEmpty(asString(pcieIf["MaxPCIeType"]), asString(pcieIf["MaxLinkSpeedGTs"]), asString(pcieIf["MaxLinkSpeed"])) } } + portCount = sanitizeNetworkPortCount(totalPortCount) } return models.NetworkAdapter{ @@ -3913,10 +3914,14 @@ func isWeakRedfishNICSlotLabel(slot string) bool { if slot == "" { return true } + lower := strings.ToLower(slot) if isNumericString(slot) { return true } - if strings.EqualFold(slot, "nic") || strings.HasPrefix(strings.ToLower(slot), "nic") && !strings.Contains(strings.ToLower(slot), "slot") { + if strings.EqualFold(slot, "nic") || strings.HasPrefix(lower, "nic") && !strings.Contains(lower, "slot") { + return true + } + if strings.HasPrefix(lower, "devtype") { return true } return false @@ -3956,6 +3961,16 @@ func enrichNICFromPCIe(nic *models.NetworkAdapter, pcieDoc map[string]interface{ if nic == nil { return } + pcieSlot := redfishLocationLabel(pcieDoc["Slot"]) + if pcieSlot == "" { + pcieSlot = redfishLocationLabel(pcieDoc["Location"]) + } + if isWeakRedfishNICSlotLabel(nic.Slot) && pcieSlot != "" { + nic.Slot = pcieSlot + } + if strings.TrimSpace(nic.Location) == "" && pcieSlot != "" { + nic.Location = pcieSlot + } if strings.TrimSpace(nic.BDF) == "" { nic.BDF = firstNonEmpty(asString(pcieDoc["BDF"]), buildBDFfromOemPublic(pcieDoc)) } @@ -3977,6 +3992,15 @@ func enrichNICFromPCIe(nic *models.NetworkAdapter, pcieDoc map[string]interface{ if strings.TrimSpace(nic.MaxLinkSpeed) == "" { nic.MaxLinkSpeed = firstNonEmpty(asString(pcieDoc["MaxLinkSpeedGTs"]), asString(pcieDoc["MaxLinkSpeed"])) } + if normalizeRedfishIdentityField(nic.SerialNumber) == "" { + nic.SerialNumber = findFirstNormalizedStringByKeys(pcieDoc, "SerialNumber") + } + if normalizeRedfishIdentityField(nic.PartNumber) == "" { + nic.PartNumber = findFirstNormalizedStringByKeys(pcieDoc, "PartNumber", "ProductPartNumber") + } + if normalizeRedfishIdentityField(nic.Firmware) == "" { + nic.Firmware = findFirstNormalizedStringByKeys(pcieDoc, "FirmwareVersion", "FirmwarePackageVersion") + } for _, fn := range functionDocs { if strings.TrimSpace(nic.BDF) == "" { nic.BDF = sanitizeRedfishBDF(asString(fn["FunctionId"])) @@ -3999,6 +4023,15 @@ func enrichNICFromPCIe(nic *models.NetworkAdapter, pcieDoc map[string]interface{ if strings.TrimSpace(nic.MaxLinkSpeed) == "" { nic.MaxLinkSpeed = firstNonEmpty(asString(fn["MaxLinkSpeedGTs"]), asString(fn["MaxLinkSpeed"])) } + if normalizeRedfishIdentityField(nic.SerialNumber) == "" { + nic.SerialNumber = findFirstNormalizedStringByKeys(fn, "SerialNumber") + } + if normalizeRedfishIdentityField(nic.PartNumber) == "" { + nic.PartNumber = findFirstNormalizedStringByKeys(fn, "PartNumber", "ProductPartNumber") + } + if normalizeRedfishIdentityField(nic.Firmware) == "" { + nic.Firmware = findFirstNormalizedStringByKeys(fn, "FirmwareVersion", "FirmwarePackageVersion") + } } if strings.TrimSpace(nic.Vendor) == "" { nic.Vendor = pciids.VendorName(nic.VendorID) diff --git a/internal/collector/redfish_test.go b/internal/collector/redfish_test.go index cf470e3..8107c73 100644 --- a/internal/collector/redfish_test.go +++ b/internal/collector/redfish_test.go @@ -1197,6 +1197,8 @@ func TestEnrichNICFromPCIeFunctions(t *testing.T) { "FunctionId": "0000:17:00.0", "VendorId": "0x15b3", "DeviceId": "0x1021", + "SerialNumber": "MT-SN-0001", + "PartNumber": "MCX623106AC-CDAT", "CurrentLinkWidth": 16, "CurrentLinkSpeedGTs": "32 GT/s", "MaxLinkWidth": 16, @@ -1214,6 +1216,12 @@ func TestEnrichNICFromPCIeFunctions(t *testing.T) { if nic.BDF != "0000:17:00.0" { t.Fatalf("unexpected NIC BDF: %q", nic.BDF) } + if nic.SerialNumber != "NIC-SN-1" { + t.Fatalf("expected existing NIC serial to be preserved, got %q", nic.SerialNumber) + } + if nic.PartNumber != "MCX623106AC-CDAT" { + t.Fatalf("expected NIC part number from PCIe function, got %q", nic.PartNumber) + } if nic.LinkWidth != 16 || nic.MaxLinkWidth != 16 { t.Fatalf("unexpected NIC link width state: current=%d max=%d", nic.LinkWidth, nic.MaxLinkWidth) } @@ -1222,6 +1230,63 @@ func TestEnrichNICFromPCIeFunctions(t *testing.T) { } } +func TestEnrichNICFromPCIeFunctions_FillsMissingIdentityFromFunctionDoc(t *testing.T) { + nic := parseNIC(map[string]interface{}{ + "Id": "DevType7_NIC1", + "Controllers": []interface{}{ + map[string]interface{}{ + "ControllerCapabilities": map[string]interface{}{ + "NetworkPortCount": 1, + }, + }, + map[string]interface{}{ + "ControllerCapabilities": map[string]interface{}{ + "NetworkPortCount": 1, + }, + }, + }, + }) + + pcieDoc := map[string]interface{}{ + "Slot": map[string]interface{}{ + "Location": map[string]interface{}{ + "PartLocation": map[string]interface{}{ + "ServiceLabel": "RISER4", + }, + }, + }, + } + functionDocs := []map[string]interface{}{ + { + "FunctionId": "0000:0f:00.0", + "VendorId": "0x15b3", + "DeviceId": "0x101f", + "SerialNumber": "MT2412X00001", + "PartNumber": "MCX623432AC-GDA_Ax", + }, + } + + enrichNICFromPCIe(&nic, pcieDoc, functionDocs, nil) + if nic.Slot != "RISER4" { + t.Fatalf("expected slot from PCIe slot label, got %q", nic.Slot) + } + if nic.Location != "RISER4" { + t.Fatalf("expected location from PCIe slot label, got %q", nic.Location) + } + if nic.PortCount != 2 { + t.Fatalf("expected combined port count from controllers, got %d", nic.PortCount) + } + if nic.SerialNumber != "MT2412X00001" { + t.Fatalf("expected serial from PCIe function, got %q", nic.SerialNumber) + } + if nic.PartNumber != "MCX623432AC-GDA_Ax" { + t.Fatalf("expected part number from PCIe function, got %q", nic.PartNumber) + } + if nic.BDF != "0000:0f:00.0" { + t.Fatalf("expected BDF from PCIe function, got %q", nic.BDF) + } +} + func TestParseNIC_PortCountFromControllerCapabilities(t *testing.T) { nic := parseNIC(map[string]interface{}{ "Id": "1", @@ -3462,8 +3527,8 @@ func TestShouldCrawlPath_MemoryAndProcessorMetricsAreAllowed(t *testing.T) { if !shouldCrawlPath("/redfish/v1/Systems/1/Processors/CPU0/ProcessorMetrics") { t.Fatalf("expected CPU metrics subresource to be crawlable") } - if shouldCrawlPath("/redfish/v1/Chassis/1/PCIeDevices/0/PCIeFunctions/1") { - t.Fatalf("expected noisy chassis pciefunctions branch to be skipped") + if !shouldCrawlPath("/redfish/v1/Chassis/1/PCIeDevices/0/PCIeFunctions/1") { + t.Fatalf("expected chassis pciefunctions resource to be crawlable for NIC/GPU identity recovery") } if !shouldCrawlPath("/redfish/v1/Fabrics/HGX_NVLinkFabric_0/Switches/NVSwitch_0") { t.Fatalf("expected NVSwitch fabric resource to be crawlable")