fix(exporter): preserve all HGX GPUs with generic PCIe slot name
Supermicro HGX BMC reports all 8 B200 GPU PCIe devices with Name "PCIe Device" — a generic label shared by every GPU, not a unique hardware position. pcieDedupKey used slot as the primary key, so all 8 GPUs collapsed to one entry in the UI (the first, serial 1654925165720). Add isGenericPCIeSlotName to detect non-positional slot labels and fall through to serial/BDF for dedup instead, preserving each GPU separately. Positional slots (#GPU0, SLOT-NIC1, etc.) continue to use slot-first dedup. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1961,7 +1961,10 @@ func pcieDedupKey(item ReanimatorPCIe) string {
|
||||
slot := strings.ToLower(strings.TrimSpace(item.Slot))
|
||||
serial := strings.ToLower(strings.TrimSpace(item.SerialNumber))
|
||||
bdf := strings.ToLower(strings.TrimSpace(item.BDF))
|
||||
if slot != "" {
|
||||
// Generic slot names (e.g. "PCIe Device" from HGX BMC) are not unique
|
||||
// hardware positions — multiple distinct devices share the same name.
|
||||
// Fall through to serial/BDF so they are not incorrectly collapsed.
|
||||
if slot != "" && !isGenericPCIeSlotName(slot) {
|
||||
return "slot:" + slot
|
||||
}
|
||||
if serial != "" {
|
||||
@@ -1970,9 +1973,22 @@ func pcieDedupKey(item ReanimatorPCIe) string {
|
||||
if bdf != "" {
|
||||
return "bdf:" + bdf
|
||||
}
|
||||
if slot != "" {
|
||||
return "slot:" + slot
|
||||
}
|
||||
return strings.ToLower(strings.TrimSpace(item.DeviceClass)) + "|" + strings.ToLower(strings.TrimSpace(item.Model))
|
||||
}
|
||||
|
||||
// isGenericPCIeSlotName reports whether slot is a generic device-type label
|
||||
// rather than a unique hardware position identifier.
|
||||
func isGenericPCIeSlotName(slot string) bool {
|
||||
switch slot {
|
||||
case "pcie device", "pcie slot", "pcie":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func pcieQualityScore(item ReanimatorPCIe) int {
|
||||
score := 0
|
||||
if strings.TrimSpace(item.SerialNumber) != "" {
|
||||
|
||||
Reference in New Issue
Block a user