collector/redfish: dedupe model-only GPU rows from graphics controllers
This commit is contained in:
@@ -444,6 +444,9 @@ func (c *RedfishConnector) collectGPUs(ctx context.Context, client *http.Client,
|
||||
|
||||
gpu := parseGPU(doc, functionDocs, idx)
|
||||
idx++
|
||||
if shouldSkipGenericGPUDuplicate(out, gpu) {
|
||||
continue
|
||||
}
|
||||
|
||||
key := gpuDedupKey(gpu)
|
||||
if key == "" {
|
||||
@@ -2034,6 +2037,39 @@ func gpuDedupKey(gpu models.GPU) string {
|
||||
return firstNonEmpty(strings.TrimSpace(gpu.Slot)+"|"+strings.TrimSpace(gpu.Model), strings.TrimSpace(gpu.Slot))
|
||||
}
|
||||
|
||||
func shouldSkipGenericGPUDuplicate(existing []models.GPU, candidate models.GPU) bool {
|
||||
if len(existing) == 0 {
|
||||
return false
|
||||
}
|
||||
if normalizeRedfishIdentityField(candidate.SerialNumber) != "" || strings.TrimSpace(candidate.BDF) != "" {
|
||||
return false
|
||||
}
|
||||
slot := strings.TrimSpace(candidate.Slot)
|
||||
model := strings.TrimSpace(candidate.Model)
|
||||
if slot == "" || model == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
// Typical GraphicsControllers fallback on some BMCs reports only model/name
|
||||
// as slot and lacks stable identifiers. If we already have concrete GPUs of the
|
||||
// same model/manufacturer from PCIe inventory, this candidate is a duplicate.
|
||||
if !strings.EqualFold(slot, model) {
|
||||
return false
|
||||
}
|
||||
for _, gpu := range existing {
|
||||
if !strings.EqualFold(strings.TrimSpace(gpu.Model), model) {
|
||||
continue
|
||||
}
|
||||
if !strings.EqualFold(strings.TrimSpace(gpu.Manufacturer), strings.TrimSpace(candidate.Manufacturer)) {
|
||||
continue
|
||||
}
|
||||
if normalizeRedfishIdentityField(gpu.SerialNumber) != "" || strings.TrimSpace(gpu.BDF) != "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func looksLikeGPU(doc map[string]interface{}, functionDocs []map[string]interface{}) bool {
|
||||
deviceType := strings.ToLower(asString(doc["DeviceType"]))
|
||||
if strings.Contains(deviceType, "gpu") || strings.Contains(deviceType, "graphics") || strings.Contains(deviceType, "accelerator") {
|
||||
|
||||
@@ -904,6 +904,9 @@ func (r redfishSnapshotReader) collectGPUs(systemPaths, chassisPaths []string) [
|
||||
}
|
||||
gpu := parseGPU(doc, functionDocs, idx)
|
||||
idx++
|
||||
if shouldSkipGenericGPUDuplicate(out, gpu) {
|
||||
continue
|
||||
}
|
||||
key := gpuDedupKey(gpu)
|
||||
if key == "" {
|
||||
continue
|
||||
|
||||
@@ -644,3 +644,50 @@ func TestReplayRedfishFromRawPayloads_AddsMissingServerModelWarning(t *testing.T
|
||||
t.Fatalf("expected collection warning event about missing server model")
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplayCollectGPUs_SkipsModelOnlyDuplicateFromGraphicsControllers(t *testing.T) {
|
||||
r := redfishSnapshotReader{tree: map[string]interface{}{
|
||||
"/redfish/v1/Systems/1/PCIeDevices": map[string]interface{}{
|
||||
"Members": []interface{}{
|
||||
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/PCIeDevices/3"},
|
||||
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/PCIeDevices/9"},
|
||||
},
|
||||
},
|
||||
"/redfish/v1/Systems/1/PCIeDevices/3": map[string]interface{}{
|
||||
"Id": "3",
|
||||
"Name": "PCIeCard3",
|
||||
"Model": "H200-SXM5-141G",
|
||||
"Manufacturer": "NVIDIA",
|
||||
"SerialNumber": "1654225094493",
|
||||
},
|
||||
"/redfish/v1/Systems/1/PCIeDevices/9": map[string]interface{}{
|
||||
"Id": "9",
|
||||
"Name": "PCIeCard9",
|
||||
"Model": "H200-SXM5-141G",
|
||||
"Manufacturer": "NVIDIA",
|
||||
"SerialNumber": "1654425002635",
|
||||
},
|
||||
"/redfish/v1/Systems/1/GraphicsControllers": map[string]interface{}{
|
||||
"Members": []interface{}{
|
||||
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/GraphicsControllers/GPU0"},
|
||||
},
|
||||
},
|
||||
"/redfish/v1/Systems/1/GraphicsControllers/GPU0": map[string]interface{}{
|
||||
"Id": "GPU0",
|
||||
"Name": "H200-SXM5-141G",
|
||||
"Model": "H200-SXM5-141G",
|
||||
"Manufacturer": "NVIDIA",
|
||||
"SerialNumber": "N/A",
|
||||
},
|
||||
}}
|
||||
|
||||
got := r.collectGPUs([]string{"/redfish/v1/Systems/1"}, nil)
|
||||
if len(got) != 2 {
|
||||
t.Fatalf("expected 2 GPUs without generic duplicate, got %d", len(got))
|
||||
}
|
||||
for _, gpu := range got {
|
||||
if gpu.Slot == "H200-SXM5-141G" {
|
||||
t.Fatalf("unexpected model-only duplicate GPU row")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user