From f35cabac48f57ef02a500a63f8cc3d5da413811f Mon Sep 17 00:00:00 2001 From: Michael Chus Date: Sat, 28 Feb 2026 14:50:02 +0300 Subject: [PATCH] collector/redfish: fix server model fallback and GPU/NVMe regressions --- internal/collector/redfish.go | 46 +++++++++++++++-- internal/collector/redfish_replay.go | 28 +++++++++- internal/collector/redfish_test.go | 77 ++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 5 deletions(-) diff --git a/internal/collector/redfish.go b/internal/collector/redfish.go index 2f00490..433f458 100644 --- a/internal/collector/redfish.go +++ b/internal/collector/redfish.go @@ -460,7 +460,7 @@ func (c *RedfishConnector) collectGPUs(ctx context.Context, client *http.Client, } } - return out + return dropModelOnlyGPUPlaceholders(out) } func (c *RedfishConnector) collectPCIeDevices(ctx context.Context, client *http.Client, req Request, baseURL string, systemPaths, chassisPaths []string) []models.PCIeDevice { @@ -2070,7 +2070,9 @@ func shouldSkipGenericGPUDuplicate(existing []models.GPU, candidate models.GPU) if !strings.EqualFold(strings.TrimSpace(gpu.Model), model) { continue } - if !strings.EqualFold(strings.TrimSpace(gpu.Manufacturer), strings.TrimSpace(candidate.Manufacturer)) { + existingMfr := strings.TrimSpace(gpu.Manufacturer) + candidateMfr := strings.TrimSpace(candidate.Manufacturer) + if existingMfr != "" && candidateMfr != "" && !strings.EqualFold(existingMfr, candidateMfr) { continue } if normalizeRedfishIdentityField(gpu.SerialNumber) != "" || strings.TrimSpace(gpu.BDF) != "" { @@ -2080,6 +2082,41 @@ func shouldSkipGenericGPUDuplicate(existing []models.GPU, candidate models.GPU) return false } +func dropModelOnlyGPUPlaceholders(items []models.GPU) []models.GPU { + if len(items) < 2 { + return items + } + + concreteByModel := make(map[string]struct{}, len(items)) + for _, gpu := range items { + modelKey := strings.ToLower(strings.TrimSpace(gpu.Model)) + if modelKey == "" { + continue + } + if normalizeRedfishIdentityField(gpu.SerialNumber) != "" || strings.TrimSpace(gpu.BDF) != "" { + concreteByModel[modelKey] = struct{}{} + } + } + if len(concreteByModel) == 0 { + return items + } + + out := make([]models.GPU, 0, len(items)) + for _, gpu := range items { + modelKey := strings.ToLower(strings.TrimSpace(gpu.Model)) + slot := strings.TrimSpace(gpu.Slot) + if _, hasConcrete := concreteByModel[modelKey]; hasConcrete && + normalizeRedfishIdentityField(gpu.SerialNumber) == "" && + strings.TrimSpace(gpu.BDF) == "" && + (strings.EqualFold(slot, strings.TrimSpace(gpu.Model)) || + strings.HasPrefix(strings.ToUpper(slot), "GPU")) { + continue + } + out = append(out, gpu) + } + return out +} + func looksLikeGPU(doc map[string]interface{}, functionDocs []map[string]interface{}) bool { deviceType := strings.ToLower(asString(doc["DeviceType"])) if strings.Contains(deviceType, "gpu") || strings.Contains(deviceType, "graphics") || strings.Contains(deviceType, "accelerator") { @@ -2183,7 +2220,10 @@ func dedupeStorage(items []models.Storage) []models.Storage { out := make([]models.Storage, 0, len(items)) seen := make(map[string]struct{}, len(items)) for _, item := range items { - key := firstNonEmpty(item.SerialNumber, item.Slot+"|"+item.Model) + key := firstNonEmpty( + normalizeRedfishIdentityField(item.SerialNumber), + strings.TrimSpace(item.Slot)+"|"+strings.TrimSpace(item.Model), + ) if key == "" { continue } diff --git a/internal/collector/redfish_replay.go b/internal/collector/redfish_replay.go index 61c783e..3be2ce0 100644 --- a/internal/collector/redfish_replay.go +++ b/internal/collector/redfish_replay.go @@ -448,7 +448,7 @@ func dedupeStrings(items []string) []string { func (r redfishSnapshotReader) collectBoardFallbackDocs(chassisPaths []string) []map[string]interface{} { out := make([]map[string]interface{}, 0) for _, chassisPath := range chassisPaths { - for _, suffix := range []string{"/Boards", "/Backplanes", "/Assembly"} { + for _, suffix := range []string{"/Boards", "/Backplanes"} { path := joinPath(chassisPath, suffix) if docs, err := r.getCollectionMembers(path); err == nil && len(docs) > 0 { out = append(out, docs...) @@ -468,6 +468,9 @@ func applyBoardInfoFallbackFromDocs(board *models.BoardInfo, docs []map[string]i } for _, doc := range docs { candidate := parseBoardInfoFromFRUDoc(doc) + if !isLikelyServerProductName(candidate.ProductName) { + continue + } if board.Manufacturer == "" { board.Manufacturer = candidate.Manufacturer } @@ -486,6 +489,27 @@ func applyBoardInfoFallbackFromDocs(board *models.BoardInfo, docs []map[string]i } } +func isLikelyServerProductName(v string) bool { + v = strings.TrimSpace(v) + if v == "" { + return false + } + n := strings.ToUpper(v) + if strings.Contains(n, "NULL") { + return false + } + componentTokens := []string{ + "DIMM", "DDR", "NVME", "SSD", "HDD", "GPU", "NIC", "RAID", + "PSU", "FAN", "BACKPLANE", "FRU", + } + for _, token := range componentTokens { + if strings.Contains(n, strings.ToUpper(token)) { + return false + } + } + return true +} + type redfishSnapshotReader struct { tree map[string]interface{} } @@ -918,7 +942,7 @@ func (r redfishSnapshotReader) collectGPUs(systemPaths, chassisPaths []string) [ out = append(out, gpu) } } - return out + return dropModelOnlyGPUPlaceholders(out) } func (r redfishSnapshotReader) collectPCIeDevices(systemPaths, chassisPaths []string) []models.PCIeDevice { diff --git a/internal/collector/redfish_test.go b/internal/collector/redfish_test.go index 17e10a9..178597b 100644 --- a/internal/collector/redfish_test.go +++ b/internal/collector/redfish_test.go @@ -6,6 +6,8 @@ import ( "net/http" "net/http/httptest" "testing" + + "git.mchus.pro/mchus/logpile/internal/models" ) func TestRedfishConnectorCollect(t *testing.T) { @@ -691,3 +693,78 @@ func TestReplayCollectGPUs_SkipsModelOnlyDuplicateFromGraphicsControllers(t *tes } } } + +func TestApplyBoardInfoFallbackFromDocs_SkipsComponentProductNames(t *testing.T) { + board := models.BoardInfo{ + SerialNumber: "23E100051", + } + docs := []map[string]interface{}{ + { + "Model": "DDR5 DIMM", + "Manufacturer": "DELTA", + "SerialNumber": "802C1A2507D284B001", + }, + { + "ProductName": "NF5688M7", + "Manufacturer": "Inspur", + "PartNumber": "YZMB-00001", + }, + } + + applyBoardInfoFallbackFromDocs(&board, docs) + if board.ProductName != "NF5688M7" { + t.Fatalf("expected server model from fallback docs, got %q", board.ProductName) + } + if board.Manufacturer != "Inspur" { + t.Fatalf("expected manufacturer from server fallback doc, got %q", board.Manufacturer) + } +} + +func TestDedupeStorage_IgnoresPlaceholderSerial(t *testing.T) { + in := []models.Storage{ + {Slot: "OB01", Model: "N/A", SerialNumber: "N/A"}, + {Slot: "OB02", Model: "N/A", SerialNumber: "N/A"}, + {Slot: "OB03", Model: "N/A", SerialNumber: "N/A"}, + {Slot: "OB04", Model: "N/A", SerialNumber: "N/A"}, + } + + out := dedupeStorage(in) + if len(out) != 4 { + t.Fatalf("expected all placeholder-serial NVMe drives to be kept by slot key, got %d", len(out)) + } +} + +func TestReplayCollectGPUs_DropsModelOnlyPlaceholderWhenConcreteDiscoveredLater(t *testing.T) { + r := redfishSnapshotReader{tree: map[string]interface{}{ + "/redfish/v1/Systems/1/GraphicsControllers": map[string]interface{}{ + "Members": []interface{}{ + map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/GraphicsControllers/GPU0"}, + }, + }, + "/redfish/v1/Systems/1/GraphicsControllers/GPU0": map[string]interface{}{ + "Id": "GPU0", + "Name": "H200-SXM5-141G", + "Model": "H200-SXM5-141G", + }, + "/redfish/v1/Chassis/1/PCIeDevices": map[string]interface{}{ + "Members": []interface{}{ + map[string]interface{}{"@odata.id": "/redfish/v1/Chassis/1/PCIeDevices/4"}, + }, + }, + "/redfish/v1/Chassis/1/PCIeDevices/4": map[string]interface{}{ + "Id": "4", + "Name": "PCIeCard4", + "Model": "H200-SXM5-141G", + "Manufacturer": "NVIDIA", + "BDF": "0000:0f:00.0", + }, + }} + + got := r.collectGPUs([]string{"/redfish/v1/Systems/1"}, []string{"/redfish/v1/Chassis/1"}) + if len(got) != 1 { + t.Fatalf("expected generic graphics placeholder to be dropped, got %d GPUs", len(got)) + } + if got[0].Slot != "PCIeCard4" { + t.Fatalf("expected concrete PCIe GPU to remain, got slot=%q", got[0].Slot) + } +}