diff --git a/internal/collector/redfish.go b/internal/collector/redfish.go index f26c0f5..9d02589 100644 --- a/internal/collector/redfish.go +++ b/internal/collector/redfish.go @@ -444,6 +444,9 @@ func (c *RedfishConnector) collectGPUs(ctx context.Context, client *http.Client, gpu := parseGPU(doc, functionDocs, idx) idx++ + if shouldSkipGenericGPUDuplicate(out, gpu) { + continue + } key := gpuDedupKey(gpu) if key == "" { @@ -2034,6 +2037,39 @@ func gpuDedupKey(gpu models.GPU) string { return firstNonEmpty(strings.TrimSpace(gpu.Slot)+"|"+strings.TrimSpace(gpu.Model), strings.TrimSpace(gpu.Slot)) } +func shouldSkipGenericGPUDuplicate(existing []models.GPU, candidate models.GPU) bool { + if len(existing) == 0 { + return false + } + if normalizeRedfishIdentityField(candidate.SerialNumber) != "" || strings.TrimSpace(candidate.BDF) != "" { + return false + } + slot := strings.TrimSpace(candidate.Slot) + model := strings.TrimSpace(candidate.Model) + if slot == "" || model == "" { + return false + } + + // Typical GraphicsControllers fallback on some BMCs reports only model/name + // as slot and lacks stable identifiers. If we already have concrete GPUs of the + // same model/manufacturer from PCIe inventory, this candidate is a duplicate. + if !strings.EqualFold(slot, model) { + return false + } + for _, gpu := range existing { + if !strings.EqualFold(strings.TrimSpace(gpu.Model), model) { + continue + } + if !strings.EqualFold(strings.TrimSpace(gpu.Manufacturer), strings.TrimSpace(candidate.Manufacturer)) { + continue + } + if normalizeRedfishIdentityField(gpu.SerialNumber) != "" || strings.TrimSpace(gpu.BDF) != "" { + return true + } + } + return false +} + func looksLikeGPU(doc map[string]interface{}, functionDocs []map[string]interface{}) bool { deviceType := strings.ToLower(asString(doc["DeviceType"])) if strings.Contains(deviceType, "gpu") || strings.Contains(deviceType, "graphics") || strings.Contains(deviceType, "accelerator") { diff --git a/internal/collector/redfish_replay.go b/internal/collector/redfish_replay.go index 57792b4..61c783e 100644 --- a/internal/collector/redfish_replay.go +++ b/internal/collector/redfish_replay.go @@ -904,6 +904,9 @@ func (r redfishSnapshotReader) collectGPUs(systemPaths, chassisPaths []string) [ } gpu := parseGPU(doc, functionDocs, idx) idx++ + if shouldSkipGenericGPUDuplicate(out, gpu) { + continue + } key := gpuDedupKey(gpu) if key == "" { continue diff --git a/internal/collector/redfish_test.go b/internal/collector/redfish_test.go index a535f29..17e10a9 100644 --- a/internal/collector/redfish_test.go +++ b/internal/collector/redfish_test.go @@ -644,3 +644,50 @@ func TestReplayRedfishFromRawPayloads_AddsMissingServerModelWarning(t *testing.T t.Fatalf("expected collection warning event about missing server model") } } + +func TestReplayCollectGPUs_SkipsModelOnlyDuplicateFromGraphicsControllers(t *testing.T) { + r := redfishSnapshotReader{tree: map[string]interface{}{ + "/redfish/v1/Systems/1/PCIeDevices": map[string]interface{}{ + "Members": []interface{}{ + map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/PCIeDevices/3"}, + map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/PCIeDevices/9"}, + }, + }, + "/redfish/v1/Systems/1/PCIeDevices/3": map[string]interface{}{ + "Id": "3", + "Name": "PCIeCard3", + "Model": "H200-SXM5-141G", + "Manufacturer": "NVIDIA", + "SerialNumber": "1654225094493", + }, + "/redfish/v1/Systems/1/PCIeDevices/9": map[string]interface{}{ + "Id": "9", + "Name": "PCIeCard9", + "Model": "H200-SXM5-141G", + "Manufacturer": "NVIDIA", + "SerialNumber": "1654425002635", + }, + "/redfish/v1/Systems/1/GraphicsControllers": map[string]interface{}{ + "Members": []interface{}{ + map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/GraphicsControllers/GPU0"}, + }, + }, + "/redfish/v1/Systems/1/GraphicsControllers/GPU0": map[string]interface{}{ + "Id": "GPU0", + "Name": "H200-SXM5-141G", + "Model": "H200-SXM5-141G", + "Manufacturer": "NVIDIA", + "SerialNumber": "N/A", + }, + }} + + got := r.collectGPUs([]string{"/redfish/v1/Systems/1"}, nil) + if len(got) != 2 { + t.Fatalf("expected 2 GPUs without generic duplicate, got %d", len(got)) + } + for _, gpu := range got { + if gpu.Slot == "H200-SXM5-141G" { + t.Fatalf("unexpected model-only duplicate GPU row") + } + } +}