collector/redfish: dedupe model-only GPU rows from graphics controllers

This commit is contained in:
2026-02-28 13:04:34 +03:00
parent 6c19a58b24
commit b918363252
3 changed files with 86 additions and 0 deletions

View File

@@ -444,6 +444,9 @@ func (c *RedfishConnector) collectGPUs(ctx context.Context, client *http.Client,
gpu := parseGPU(doc, functionDocs, idx)
idx++
if shouldSkipGenericGPUDuplicate(out, gpu) {
continue
}
key := gpuDedupKey(gpu)
if key == "" {
@@ -2034,6 +2037,39 @@ func gpuDedupKey(gpu models.GPU) string {
return firstNonEmpty(strings.TrimSpace(gpu.Slot)+"|"+strings.TrimSpace(gpu.Model), strings.TrimSpace(gpu.Slot))
}
func shouldSkipGenericGPUDuplicate(existing []models.GPU, candidate models.GPU) bool {
if len(existing) == 0 {
return false
}
if normalizeRedfishIdentityField(candidate.SerialNumber) != "" || strings.TrimSpace(candidate.BDF) != "" {
return false
}
slot := strings.TrimSpace(candidate.Slot)
model := strings.TrimSpace(candidate.Model)
if slot == "" || model == "" {
return false
}
// Typical GraphicsControllers fallback on some BMCs reports only model/name
// as slot and lacks stable identifiers. If we already have concrete GPUs of the
// same model/manufacturer from PCIe inventory, this candidate is a duplicate.
if !strings.EqualFold(slot, model) {
return false
}
for _, gpu := range existing {
if !strings.EqualFold(strings.TrimSpace(gpu.Model), model) {
continue
}
if !strings.EqualFold(strings.TrimSpace(gpu.Manufacturer), strings.TrimSpace(candidate.Manufacturer)) {
continue
}
if normalizeRedfishIdentityField(gpu.SerialNumber) != "" || strings.TrimSpace(gpu.BDF) != "" {
return true
}
}
return false
}
func looksLikeGPU(doc map[string]interface{}, functionDocs []map[string]interface{}) bool {
deviceType := strings.ToLower(asString(doc["DeviceType"]))
if strings.Contains(deviceType, "gpu") || strings.Contains(deviceType, "graphics") || strings.Contains(deviceType, "accelerator") {

View File

@@ -904,6 +904,9 @@ func (r redfishSnapshotReader) collectGPUs(systemPaths, chassisPaths []string) [
}
gpu := parseGPU(doc, functionDocs, idx)
idx++
if shouldSkipGenericGPUDuplicate(out, gpu) {
continue
}
key := gpuDedupKey(gpu)
if key == "" {
continue

View File

@@ -644,3 +644,50 @@ func TestReplayRedfishFromRawPayloads_AddsMissingServerModelWarning(t *testing.T
t.Fatalf("expected collection warning event about missing server model")
}
}
func TestReplayCollectGPUs_SkipsModelOnlyDuplicateFromGraphicsControllers(t *testing.T) {
r := redfishSnapshotReader{tree: map[string]interface{}{
"/redfish/v1/Systems/1/PCIeDevices": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/PCIeDevices/3"},
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/PCIeDevices/9"},
},
},
"/redfish/v1/Systems/1/PCIeDevices/3": map[string]interface{}{
"Id": "3",
"Name": "PCIeCard3",
"Model": "H200-SXM5-141G",
"Manufacturer": "NVIDIA",
"SerialNumber": "1654225094493",
},
"/redfish/v1/Systems/1/PCIeDevices/9": map[string]interface{}{
"Id": "9",
"Name": "PCIeCard9",
"Model": "H200-SXM5-141G",
"Manufacturer": "NVIDIA",
"SerialNumber": "1654425002635",
},
"/redfish/v1/Systems/1/GraphicsControllers": map[string]interface{}{
"Members": []interface{}{
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/GraphicsControllers/GPU0"},
},
},
"/redfish/v1/Systems/1/GraphicsControllers/GPU0": map[string]interface{}{
"Id": "GPU0",
"Name": "H200-SXM5-141G",
"Model": "H200-SXM5-141G",
"Manufacturer": "NVIDIA",
"SerialNumber": "N/A",
},
}}
got := r.collectGPUs([]string{"/redfish/v1/Systems/1"}, nil)
if len(got) != 2 {
t.Fatalf("expected 2 GPUs without generic duplicate, got %d", len(got))
}
for _, gpu := range got {
if gpu.Slot == "H200-SXM5-141G" {
t.Fatalf("unexpected model-only duplicate GPU row")
}
}
}