fix: dedup GPUs across multiple chassis PCIeDevice trees in Redfish collector

Supermicro HGX exposes each GPU under both Chassis/1/PCIeDevices and a
dedicated Chassis/HGX_GPU_SXM_N/PCIeDevices. gpuDocDedupKey was keying
by @odata.id path, so identical GPUs with the same serial were not
deduplicated across sources. Now stable identifiers (serial → BDF →
slot+model) take priority over path.

Also includes Inspur parser improvements: NVMe model/serial enrichment
from devicefrusdr.log and audit.log, RAID drive slot normalization to
BP notation, PSU slot normalization, BMC/CPLD/VR firmware from RESTful
version info section, and parser version bump to 1.8.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Chusavitin
2026-03-06 14:44:36 +03:00
parent 62d6ad6f66
commit 9df29b1be9
9 changed files with 362 additions and 8 deletions

View File

@@ -2606,8 +2606,9 @@ func parseDrive(doc map[string]interface{}) models.Storage {
storageType := classifyStorageType(doc)
slot := normalizeRAIDDriveSlot(firstNonEmpty(asString(doc["Id"]), asString(doc["Name"])))
return models.Storage{
Slot: firstNonEmpty(asString(doc["Id"]), asString(doc["Name"])),
Slot: slot,
Type: storageType,
Model: firstNonEmpty(asString(doc["Model"]), asString(doc["Name"])),
SizeGB: sizeGB,
@@ -2619,6 +2620,43 @@ func parseDrive(doc map[string]interface{}) models.Storage {
}
}
// isNumericString returns true if s is a non-empty string of only ASCII digits.
func isNumericString(s string) bool {
if s == "" {
return false
}
for _, c := range s {
if c < '0' || c > '9' {
return false
}
}
return true
}
// normalizeRAIDDriveSlot converts Inspur-style RAID drive IDs to canonical BP notation.
// Example: "PCIe8_RAID_Disk_1:0" → "BP0:0" (enclosure_id - 1 = backplane_index)
// Other slot names are returned unchanged.
func normalizeRAIDDriveSlot(slot string) string {
// Pattern: {anything}_RAID_Disk_{enclosure}:{slot}
const marker = "_RAID_Disk_"
idx := strings.Index(slot, marker)
if idx < 0 {
return slot
}
rest := slot[idx+len(marker):] // e.g. "1:0"
colonIdx := strings.Index(rest, ":")
if colonIdx < 0 {
return slot
}
encStr := rest[:colonIdx]
slotStr := rest[colonIdx+1:]
enc, err := strconv.Atoi(encStr)
if err != nil || enc < 1 {
return slot
}
return fmt.Sprintf("BP%d:%s", enc-1, slotStr)
}
func parseStorageVolume(doc map[string]interface{}, controller string) models.StorageVolume {
sizeGB := 0
capBytes := asInt64(doc["CapacityBytes"])
@@ -2767,6 +2805,11 @@ func parsePSU(doc map[string]interface{}, idx int) models.PSU {
if slot == "" {
slot = fmt.Sprintf("PSU%d", idx)
}
// Normalize numeric-only slots ("0", "1") to "PSU0", "PSU1" for consistency
// with BMC log parsers (Inspur, Dell etc.) that use the PSU prefix.
if isNumericString(slot) {
slot = "PSU" + slot
}
return models.PSU{
Slot: slot,
@@ -3065,10 +3108,17 @@ func gpuDedupKey(gpu models.GPU) string {
}
func gpuDocDedupKey(doc map[string]interface{}, gpu models.GPU) string {
// Prefer stable GPU identifiers (serial, BDF) over path so that the same
// physical GPU exposed under multiple Chassis PCIeDevice trees (e.g. Supermicro
// HGX: Chassis/1/PCIeDevices/GPU1 and Chassis/HGX_GPU_SXM_1/PCIeDevices/GPU_SXM_1)
// is correctly deduplicated.
if key := gpuDedupKey(gpu); key != "" {
return key
}
if path := normalizeRedfishPath(asString(doc["@odata.id"])); path != "" {
return "path:" + path
}
return gpuDedupKey(gpu)
return ""
}
func shouldSkipGenericGPUDuplicate(existing []models.GPU, candidate models.GPU) bool {