From bb82387d48236070d3571814280f26c6bbdc7794 Mon Sep 17 00:00:00 2001 From: Mikhail Chusavitin Date: Wed, 1 Apr 2026 16:50:51 +0300 Subject: [PATCH] fix(redfish): narrow MSI PCIeFunctions crawl --- internal/collector/redfish.go | 38 ++++++++ internal/collector/redfish_replay.go | 36 ++++++++ .../collector/redfish_replay_inventory.go | 21 +++++ internal/collector/redfish_test.go | 86 ++++++++++++++++++- 4 files changed, 180 insertions(+), 1 deletion(-) diff --git a/internal/collector/redfish.go b/internal/collector/redfish.go index 06bff27..7747be3 100644 --- a/internal/collector/redfish.go +++ b/internal/collector/redfish.go @@ -1288,12 +1288,17 @@ func (c *RedfishConnector) collectNICs(ctx context.Context, client *http.Client, } for _, doc := range adapterDocs { nic := parseNIC(doc) + adapterFunctionDocs := c.getNetworkAdapterFunctionDocs(ctx, client, req, baseURL, doc) for _, pciePath := range networkAdapterPCIeDevicePaths(doc) { pcieDoc, err := c.getJSON(ctx, client, req, baseURL, pciePath) if err != nil { continue } functionDocs := c.getLinkedPCIeFunctions(ctx, client, req, baseURL, pcieDoc) + for _, adapterFnDoc := range adapterFunctionDocs { + functionDocs = append(functionDocs, c.getLinkedPCIeFunctions(ctx, client, req, baseURL, adapterFnDoc)...) + } + functionDocs = dedupeJSONDocsByPath(functionDocs) supplementalDocs := c.getLinkedSupplementalDocs(ctx, client, req, baseURL, pcieDoc, "EnvironmentMetrics", "Metrics") for _, fn := range functionDocs { supplementalDocs = append(supplementalDocs, c.getLinkedSupplementalDocs(ctx, client, req, baseURL, fn, "EnvironmentMetrics", "Metrics")...) @@ -2810,6 +2815,14 @@ func shouldCrawlPath(path string) bool { if isAllowedNVSwitchFabricPath(normalized) { return true } + if strings.Contains(normalized, "/Chassis/") && + strings.Contains(normalized, "/PCIeDevices/") && + strings.HasSuffix(normalized, "/PCIeFunctions") { + // Avoid crawling entire chassis PCIeFunctions collections. Concrete member + // docs can still be reached through direct links such as + // NetworkDeviceFunction Links.PCIeFunction. + return false + } if strings.Contains(normalized, "/Memory/") { after := strings.SplitN(normalized, "/Memory/", 2) if len(after) == 2 && strings.Count(after[1], "/") >= 1 { @@ -2982,6 +2995,15 @@ func (c *RedfishConnector) getLinkedPCIeFunctions(ctx context.Context, client *h } return out } + if ref, ok := links["PCIeFunction"].(map[string]interface{}); ok { + memberPath := asString(ref["@odata.id"]) + if memberPath != "" { + memberDoc, err := c.getJSON(ctx, client, req, baseURL, memberPath) + if err == nil { + return []map[string]interface{}{memberDoc} + } + } + } } // Some implementations expose a collection object in PCIeFunctions.@odata.id. @@ -2997,6 +3019,22 @@ func (c *RedfishConnector) getLinkedPCIeFunctions(ctx context.Context, client *h return nil } +func (c *RedfishConnector) getNetworkAdapterFunctionDocs(ctx context.Context, client *http.Client, req Request, baseURL string, adapterDoc map[string]interface{}) []map[string]interface{} { + ndfCol, ok := adapterDoc["NetworkDeviceFunctions"].(map[string]interface{}) + if !ok { + return nil + } + colPath := asString(ndfCol["@odata.id"]) + if colPath == "" { + return nil + } + funcDocs, err := c.getCollectionMembers(ctx, client, req, baseURL, colPath) + if err != nil { + return nil + } + return funcDocs +} + func (c *RedfishConnector) getCollectionMembers(ctx context.Context, client *http.Client, req Request, baseURL, collectionPath string) ([]map[string]interface{}, error) { collection, err := c.getJSON(ctx, client, req, baseURL, collectionPath) if err != nil { diff --git a/internal/collector/redfish_replay.go b/internal/collector/redfish_replay.go index 5adbf8b..657a136 100644 --- a/internal/collector/redfish_replay.go +++ b/internal/collector/redfish_replay.go @@ -1244,6 +1244,15 @@ func (r redfishSnapshotReader) getLinkedPCIeFunctions(doc map[string]interface{} } return out } + if ref, ok := links["PCIeFunction"].(map[string]interface{}); ok { + memberPath := asString(ref["@odata.id"]) + if memberPath != "" { + memberDoc, err := r.getJSON(memberPath) + if err == nil { + return []map[string]interface{}{memberDoc} + } + } + } } if pcieFunctions, ok := doc["PCIeFunctions"].(map[string]interface{}); ok { if collectionPath := asString(pcieFunctions["@odata.id"]); collectionPath != "" { @@ -1256,6 +1265,33 @@ func (r redfishSnapshotReader) getLinkedPCIeFunctions(doc map[string]interface{} return nil } +func dedupeJSONDocsByPath(docs []map[string]interface{}) []map[string]interface{} { + if len(docs) == 0 { + return nil + } + seen := make(map[string]struct{}, len(docs)) + out := make([]map[string]interface{}, 0, len(docs)) + for _, doc := range docs { + if len(doc) == 0 { + continue + } + key := normalizeRedfishPath(asString(doc["@odata.id"])) + if key == "" { + payload, err := json.Marshal(doc) + if err != nil { + continue + } + key = string(payload) + } + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + out = append(out, doc) + } + return out +} + func (r redfishSnapshotReader) getLinkedSupplementalDocs(doc map[string]interface{}, keys ...string) []map[string]interface{} { if len(doc) == 0 || len(keys) == 0 { return nil diff --git a/internal/collector/redfish_replay_inventory.go b/internal/collector/redfish_replay_inventory.go index 2253248..bc62085 100644 --- a/internal/collector/redfish_replay_inventory.go +++ b/internal/collector/redfish_replay_inventory.go @@ -76,12 +76,17 @@ func (r redfishSnapshotReader) collectNICs(chassisPaths []string) []models.Netwo } for _, doc := range adapterDocs { nic := parseNIC(doc) + adapterFunctionDocs := r.getNetworkAdapterFunctionDocs(doc) for _, pciePath := range networkAdapterPCIeDevicePaths(doc) { pcieDoc, err := r.getJSON(pciePath) if err != nil { continue } functionDocs := r.getLinkedPCIeFunctions(pcieDoc) + for _, adapterFnDoc := range adapterFunctionDocs { + functionDocs = append(functionDocs, r.getLinkedPCIeFunctions(adapterFnDoc)...) + } + functionDocs = dedupeJSONDocsByPath(functionDocs) supplementalDocs := r.getLinkedSupplementalDocs(pcieDoc, "EnvironmentMetrics", "Metrics") for _, fn := range functionDocs { supplementalDocs = append(supplementalDocs, r.getLinkedSupplementalDocs(fn, "EnvironmentMetrics", "Metrics")...) @@ -97,6 +102,22 @@ func (r redfishSnapshotReader) collectNICs(chassisPaths []string) []models.Netwo return dedupeNetworkAdapters(nics) } +func (r redfishSnapshotReader) getNetworkAdapterFunctionDocs(adapterDoc map[string]interface{}) []map[string]interface{} { + ndfCol, ok := adapterDoc["NetworkDeviceFunctions"].(map[string]interface{}) + if !ok { + return nil + } + colPath := asString(ndfCol["@odata.id"]) + if colPath == "" { + return nil + } + funcDocs, err := r.getCollectionMembers(colPath) + if err != nil { + return nil + } + return funcDocs +} + func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []string) []models.PCIeDevice { collections := make([]string, 0, len(systemPaths)+len(chassisPaths)) for _, systemPath := range systemPaths { diff --git a/internal/collector/redfish_test.go b/internal/collector/redfish_test.go index 8107c73..9adccf8 100644 --- a/internal/collector/redfish_test.go +++ b/internal/collector/redfish_test.go @@ -1287,6 +1287,87 @@ func TestEnrichNICFromPCIeFunctions_FillsMissingIdentityFromFunctionDoc(t *testi } } +func TestReplayCollectNICs_UsesNetworkDeviceFunctionPCIeFunctionLink(t *testing.T) { + tree := map[string]interface{}{ + "/redfish/v1/Chassis/1/NetworkAdapters": map[string]interface{}{ + "Members": []interface{}{ + map[string]interface{}{"@odata.id": "/redfish/v1/Chassis/1/NetworkAdapters/NIC1"}, + }, + }, + "/redfish/v1/Chassis/1/NetworkAdapters/NIC1": map[string]interface{}{ + "Id": "DevType7_NIC1", + "Name": "NetworkAdapter_1", + "Controllers": []interface{}{ + map[string]interface{}{ + "ControllerCapabilities": map[string]interface{}{ + "NetworkPortCount": 2, + }, + "Links": map[string]interface{}{ + "PCIeDevices": []interface{}{ + map[string]interface{}{"@odata.id": "/redfish/v1/Chassis/1/PCIeDevices/00_0F_00"}, + }, + }, + }, + }, + "NetworkDeviceFunctions": map[string]interface{}{ + "@odata.id": "/redfish/v1/Chassis/1/NetworkAdapters/NIC1/NetworkDeviceFunctions", + }, + }, + "/redfish/v1/Chassis/1/NetworkAdapters/NIC1/NetworkDeviceFunctions": map[string]interface{}{ + "Members": []interface{}{ + map[string]interface{}{"@odata.id": "/redfish/v1/Chassis/1/NetworkAdapters/NIC1/NetworkDeviceFunctions/Function0"}, + }, + }, + "/redfish/v1/Chassis/1/NetworkAdapters/NIC1/NetworkDeviceFunctions/Function0": map[string]interface{}{ + "Id": "Function0", + "Links": map[string]interface{}{ + "PCIeFunction": map[string]interface{}{ + "@odata.id": "/redfish/v1/Chassis/1/PCIeDevices/00_0F_00/PCIeFunctions/Function0", + }, + }, + }, + "/redfish/v1/Chassis/1/PCIeDevices/00_0F_00": map[string]interface{}{ + "Id": "00_0F_00", + "Name": "PCIeDevice_00_0F_00", + "Manufacturer": "Mellanox Technologies", + "FirmwareVersion": "26.43.25.66", + "Slot": map[string]interface{}{ + "Location": map[string]interface{}{ + "PartLocation": map[string]interface{}{ + "ServiceLabel": "RISER4", + }, + }, + }, + }, + "/redfish/v1/Chassis/1/PCIeDevices/00_0F_00/PCIeFunctions/Function0": map[string]interface{}{ + "Id": "Function0", + "FunctionId": "0000:0f:00.0", + "VendorId": "0x15b3", + "DeviceId": "0x101f", + "SerialNumber": "MT2412X00001", + "PartNumber": "MCX623432AC-GDA_Ax", + }, + } + + r := redfishSnapshotReader{tree: tree} + nics := r.collectNICs([]string{"/redfish/v1/Chassis/1"}) + if len(nics) != 1 { + t.Fatalf("expected one NIC, got %d", len(nics)) + } + if nics[0].Slot != "RISER4" { + t.Fatalf("expected slot from PCIe device, got %q", nics[0].Slot) + } + if nics[0].SerialNumber != "MT2412X00001" { + t.Fatalf("expected serial from NetworkDeviceFunction PCIeFunction link, got %q", nics[0].SerialNumber) + } + if nics[0].PartNumber != "MCX623432AC-GDA_Ax" { + t.Fatalf("expected part number from linked PCIeFunction, got %q", nics[0].PartNumber) + } + if nics[0].BDF != "0000:0f:00.0" { + t.Fatalf("expected BDF from linked PCIeFunction, got %q", nics[0].BDF) + } +} + func TestParseNIC_PortCountFromControllerCapabilities(t *testing.T) { nic := parseNIC(map[string]interface{}{ "Id": "1", @@ -3527,8 +3608,11 @@ func TestShouldCrawlPath_MemoryAndProcessorMetricsAreAllowed(t *testing.T) { if !shouldCrawlPath("/redfish/v1/Systems/1/Processors/CPU0/ProcessorMetrics") { t.Fatalf("expected CPU metrics subresource to be crawlable") } + if shouldCrawlPath("/redfish/v1/Chassis/1/PCIeDevices/0/PCIeFunctions") { + t.Fatalf("expected broad chassis PCIeFunctions collection to be skipped") + } if !shouldCrawlPath("/redfish/v1/Chassis/1/PCIeDevices/0/PCIeFunctions/1") { - t.Fatalf("expected chassis pciefunctions resource to be crawlable for NIC/GPU identity recovery") + t.Fatalf("expected direct chassis PCIeFunction member to remain crawlable") } if !shouldCrawlPath("/redfish/v1/Fabrics/HGX_NVLinkFabric_0/Switches/NVSwitch_0") { t.Fatalf("expected NVSwitch fabric resource to be crawlable")