collector/redfish: dedupe model-only GPU rows from graphics controllers
This commit is contained in:
@@ -444,6 +444,9 @@ func (c *RedfishConnector) collectGPUs(ctx context.Context, client *http.Client,
|
|||||||
|
|
||||||
gpu := parseGPU(doc, functionDocs, idx)
|
gpu := parseGPU(doc, functionDocs, idx)
|
||||||
idx++
|
idx++
|
||||||
|
if shouldSkipGenericGPUDuplicate(out, gpu) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
key := gpuDedupKey(gpu)
|
key := gpuDedupKey(gpu)
|
||||||
if key == "" {
|
if key == "" {
|
||||||
@@ -2034,6 +2037,39 @@ func gpuDedupKey(gpu models.GPU) string {
|
|||||||
return firstNonEmpty(strings.TrimSpace(gpu.Slot)+"|"+strings.TrimSpace(gpu.Model), strings.TrimSpace(gpu.Slot))
|
return firstNonEmpty(strings.TrimSpace(gpu.Slot)+"|"+strings.TrimSpace(gpu.Model), strings.TrimSpace(gpu.Slot))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func shouldSkipGenericGPUDuplicate(existing []models.GPU, candidate models.GPU) bool {
|
||||||
|
if len(existing) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if normalizeRedfishIdentityField(candidate.SerialNumber) != "" || strings.TrimSpace(candidate.BDF) != "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
slot := strings.TrimSpace(candidate.Slot)
|
||||||
|
model := strings.TrimSpace(candidate.Model)
|
||||||
|
if slot == "" || model == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Typical GraphicsControllers fallback on some BMCs reports only model/name
|
||||||
|
// as slot and lacks stable identifiers. If we already have concrete GPUs of the
|
||||||
|
// same model/manufacturer from PCIe inventory, this candidate is a duplicate.
|
||||||
|
if !strings.EqualFold(slot, model) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for _, gpu := range existing {
|
||||||
|
if !strings.EqualFold(strings.TrimSpace(gpu.Model), model) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !strings.EqualFold(strings.TrimSpace(gpu.Manufacturer), strings.TrimSpace(candidate.Manufacturer)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if normalizeRedfishIdentityField(gpu.SerialNumber) != "" || strings.TrimSpace(gpu.BDF) != "" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func looksLikeGPU(doc map[string]interface{}, functionDocs []map[string]interface{}) bool {
|
func looksLikeGPU(doc map[string]interface{}, functionDocs []map[string]interface{}) bool {
|
||||||
deviceType := strings.ToLower(asString(doc["DeviceType"]))
|
deviceType := strings.ToLower(asString(doc["DeviceType"]))
|
||||||
if strings.Contains(deviceType, "gpu") || strings.Contains(deviceType, "graphics") || strings.Contains(deviceType, "accelerator") {
|
if strings.Contains(deviceType, "gpu") || strings.Contains(deviceType, "graphics") || strings.Contains(deviceType, "accelerator") {
|
||||||
|
|||||||
@@ -904,6 +904,9 @@ func (r redfishSnapshotReader) collectGPUs(systemPaths, chassisPaths []string) [
|
|||||||
}
|
}
|
||||||
gpu := parseGPU(doc, functionDocs, idx)
|
gpu := parseGPU(doc, functionDocs, idx)
|
||||||
idx++
|
idx++
|
||||||
|
if shouldSkipGenericGPUDuplicate(out, gpu) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
key := gpuDedupKey(gpu)
|
key := gpuDedupKey(gpu)
|
||||||
if key == "" {
|
if key == "" {
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -644,3 +644,50 @@ func TestReplayRedfishFromRawPayloads_AddsMissingServerModelWarning(t *testing.T
|
|||||||
t.Fatalf("expected collection warning event about missing server model")
|
t.Fatalf("expected collection warning event about missing server model")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestReplayCollectGPUs_SkipsModelOnlyDuplicateFromGraphicsControllers(t *testing.T) {
|
||||||
|
r := redfishSnapshotReader{tree: map[string]interface{}{
|
||||||
|
"/redfish/v1/Systems/1/PCIeDevices": map[string]interface{}{
|
||||||
|
"Members": []interface{}{
|
||||||
|
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/PCIeDevices/3"},
|
||||||
|
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/PCIeDevices/9"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"/redfish/v1/Systems/1/PCIeDevices/3": map[string]interface{}{
|
||||||
|
"Id": "3",
|
||||||
|
"Name": "PCIeCard3",
|
||||||
|
"Model": "H200-SXM5-141G",
|
||||||
|
"Manufacturer": "NVIDIA",
|
||||||
|
"SerialNumber": "1654225094493",
|
||||||
|
},
|
||||||
|
"/redfish/v1/Systems/1/PCIeDevices/9": map[string]interface{}{
|
||||||
|
"Id": "9",
|
||||||
|
"Name": "PCIeCard9",
|
||||||
|
"Model": "H200-SXM5-141G",
|
||||||
|
"Manufacturer": "NVIDIA",
|
||||||
|
"SerialNumber": "1654425002635",
|
||||||
|
},
|
||||||
|
"/redfish/v1/Systems/1/GraphicsControllers": map[string]interface{}{
|
||||||
|
"Members": []interface{}{
|
||||||
|
map[string]interface{}{"@odata.id": "/redfish/v1/Systems/1/GraphicsControllers/GPU0"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"/redfish/v1/Systems/1/GraphicsControllers/GPU0": map[string]interface{}{
|
||||||
|
"Id": "GPU0",
|
||||||
|
"Name": "H200-SXM5-141G",
|
||||||
|
"Model": "H200-SXM5-141G",
|
||||||
|
"Manufacturer": "NVIDIA",
|
||||||
|
"SerialNumber": "N/A",
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
|
||||||
|
got := r.collectGPUs([]string{"/redfish/v1/Systems/1"}, nil)
|
||||||
|
if len(got) != 2 {
|
||||||
|
t.Fatalf("expected 2 GPUs without generic duplicate, got %d", len(got))
|
||||||
|
}
|
||||||
|
for _, gpu := range got {
|
||||||
|
if gpu.Slot == "H200-SXM5-141G" {
|
||||||
|
t.Fatalf("unexpected model-only duplicate GPU row")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user