fix(exporter): preserve all HGX GPUs with generic PCIe slot name
Supermicro HGX BMC reports all 8 B200 GPU PCIe devices with Name "PCIe Device" — a generic label shared by every GPU, not a unique hardware position. pcieDedupKey used slot as the primary key, so all 8 GPUs collapsed to one entry in the UI (the first, serial 1654925165720). Add isGenericPCIeSlotName to detect non-positional slot labels and fall through to serial/BDF for dedup instead, preserving each GPU separately. Positional slots (#GPU0, SLOT-NIC1, etc.) continue to use slot-first dedup. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -733,6 +733,42 @@ func TestConvertPCIeDevices_SkipsDisplayControllerDuplicates(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertPCIeDevices_PreservesAllGPUsWithGenericSlot(t *testing.T) {
|
||||
// Supermicro HGX BMC reports all GPU PCIe devices with Name "PCIe Device" —
|
||||
// a generic label that is not a unique hardware position. All 8 GPUs must
|
||||
// be preserved; dedup by generic slot name must not collapse them into one.
|
||||
gpus := make([]models.GPU, 8)
|
||||
serials := []string{
|
||||
"1654925165720", "1654925166160", "1654925165942", "1654925165271",
|
||||
"1654925165719", "1654925165252", "1654925165304", "1654925165587",
|
||||
}
|
||||
for i, sn := range serials {
|
||||
gpus[i] = models.GPU{
|
||||
Slot: "PCIe Device",
|
||||
Model: "B200 180GB HBM3e",
|
||||
Manufacturer: "NVIDIA",
|
||||
SerialNumber: sn,
|
||||
PartNumber: "2901-886-A1",
|
||||
Status: "OK",
|
||||
}
|
||||
}
|
||||
hw := &models.HardwareConfig{GPUs: gpus}
|
||||
result := convertPCIeDevices(hw, "2026-04-13T10:00:00Z")
|
||||
if len(result) != 8 {
|
||||
t.Fatalf("expected 8 GPU entries (one per serial), got %d", len(result))
|
||||
}
|
||||
seen := make(map[string]bool)
|
||||
for _, r := range result {
|
||||
if seen[r.SerialNumber] {
|
||||
t.Fatalf("duplicate serial %q in PCIe result", r.SerialNumber)
|
||||
}
|
||||
seen[r.SerialNumber] = true
|
||||
if r.DeviceClass != "VideoController" {
|
||||
t.Fatalf("expected VideoController device class, got %q", r.DeviceClass)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertPCIeDevices_MapsGPUStatusHistory(t *testing.T) {
|
||||
hw := &models.HardwareConfig{
|
||||
GPUs: []models.GPU{
|
||||
|
||||
Reference in New Issue
Block a user