package nvidia import ( "os" "path/filepath" "testing" "time" "git.mchus.pro/mchus/logpile/internal/parser" ) func TestNVIDIAParser_RealArchive(t *testing.T) { // Test with the real archive that was reported as problematic archivePath := filepath.Join("../../../../example", "A514359X5A09844_logs-20260115-151707.tar") // Check if file exists if _, err := os.Stat(archivePath); os.IsNotExist(err) { t.Skip("Test archive not found, skipping test") } // Extract files from archive files, err := parser.ExtractArchive(archivePath) if err != nil { t.Fatalf("Failed to extract archive: %v", err) } // Check if inventory/output.log exists hasInventoryLog := false for _, f := range files { if filepath.Base(f.Path) == "output.log" { t.Logf("Found file: %s", f.Path) } if f.Path == "./inventory/output.log" || f.Path == "inventory/output.log" { hasInventoryLog = true t.Logf("Found inventory/output.log with %d bytes", len(f.Content)) } } if !hasInventoryLog { t.Error("inventory/output.log not found in extracted files") } // Create parser and parse p := &Parser{} result, err := p.Parse(files) if err != nil { t.Fatalf("Failed to parse archive: %v", err) } // Verify basic system info if result.Hardware.BoardInfo.Manufacturer == "" { t.Error("Expected Manufacturer to be set") } if result.Hardware.BoardInfo.ProductName == "" { t.Error("Expected ProductName to be set") } if result.Hardware.BoardInfo.SerialNumber == "" { t.Error("Expected SerialNumber to be set") } t.Logf("System Info:") t.Logf(" Manufacturer: %s", result.Hardware.BoardInfo.Manufacturer) t.Logf(" Product: %s", result.Hardware.BoardInfo.ProductName) t.Logf(" Serial: %s", result.Hardware.BoardInfo.SerialNumber) // Verify GPUs were found if len(result.Hardware.GPUs) == 0 { t.Error("Expected to find GPUs") } t.Logf("\nFound %d GPUs:", len(result.Hardware.GPUs)) gpusWithSerials := 0 for _, gpu := range result.Hardware.GPUs { t.Logf(" %s: %s (Firmware: %s, Serial: %s, BDF: %s)", gpu.Slot, gpu.Model, gpu.Firmware, gpu.SerialNumber, gpu.BDF) if gpu.SerialNumber != "" { gpusWithSerials++ } } // Verify that GPU serial numbers were extracted if gpusWithSerials == 0 { t.Error("Expected at least some GPUs to have serial numbers") } t.Logf("\nGPUs with serial numbers: %d/%d", gpusWithSerials, len(result.Hardware.GPUs)) // Check events for SXM2 failures t.Logf("\nTotal events: %d", len(result.Events)) // Look for the specific serial or SXM2 sxm2Events := 0 for _, event := range result.Events { desc := event.Description + " " + event.RawData + " " + event.EventType if contains(desc, "SXM2") || contains(desc, "1653925025827") { t.Logf(" SXM2 Event: [%s] %s (Severity: %s)", event.EventType, event.Description, event.Severity) sxm2Events++ } } if sxm2Events == 0 { t.Error("Expected to find events for SXM2 (faulty GPU 1653925025827)") } t.Logf("\nSXM2 failure events: %d", sxm2Events) } func TestNVIDIAParser_GPUStatusFromSummary_RealArchive07900(t *testing.T) { archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar") if _, err := os.Stat(archivePath); os.IsNotExist(err) { t.Skip("Test archive not found, skipping test") } files, err := parser.ExtractArchive(archivePath) if err != nil { t.Fatalf("Failed to extract archive: %v", err) } p := &Parser{} result, err := p.Parse(files) if err != nil { t.Fatalf("Failed to parse archive: %v", err) } if result.Hardware == nil || len(result.Hardware.GPUs) == 0 { t.Fatalf("expected GPUs in parsed result") } statusBySerial := make(map[string]string, len(result.Hardware.GPUs)) for _, gpu := range result.Hardware.GPUs { if gpu.SerialNumber != "" { statusBySerial[gpu.SerialNumber] = gpu.Status } } if got := statusBySerial["1653925025497"]; got != "FAIL" { t.Fatalf("expected GPU serial 1653925025497 status FAIL, got %q", got) } for serial, st := range statusBySerial { if serial == "1653925025497" { continue } if st != "PASS" { t.Fatalf("expected non-failing GPU serial %s status PASS, got %q", serial, st) } } } func TestNVIDIAParser_GPUErrorDetailsFromSummary_RealArchive07900(t *testing.T) { archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar") if _, err := os.Stat(archivePath); os.IsNotExist(err) { t.Skip("Test archive not found, skipping test") } files, err := parser.ExtractArchive(archivePath) if err != nil { t.Fatalf("Failed to extract archive: %v", err) } p := &Parser{} result, err := p.Parse(files) if err != nil { t.Fatalf("Failed to parse archive: %v", err) } if result.Hardware == nil || len(result.Hardware.GPUs) == 0 { t.Fatalf("expected GPUs in parsed result") } errBySerial := make(map[string]string, len(result.Hardware.GPUs)) for _, gpu := range result.Hardware.GPUs { if gpu.SerialNumber != "" { errBySerial[gpu.SerialNumber] = gpu.ErrorDescription } } if got := errBySerial["1653925025497"]; got != "Row remapping failed" { t.Fatalf("expected GPU serial 1653925025497 error Row remapping failed, got %q", got) } } func TestNVIDIAParser_GPUModelFromSKU_RealArchive07900(t *testing.T) { archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar") if _, err := os.Stat(archivePath); os.IsNotExist(err) { t.Skip("Test archive not found, skipping test") } files, err := parser.ExtractArchive(archivePath) if err != nil { t.Fatalf("Failed to extract archive: %v", err) } p := &Parser{} result, err := p.Parse(files) if err != nil { t.Fatalf("Failed to parse archive: %v", err) } if result.Hardware == nil || len(result.Hardware.GPUs) == 0 { t.Fatalf("expected GPUs in parsed result") } found := false for _, gpu := range result.Hardware.GPUs { if gpu.Model == "692-2G520-0280-501" && gpu.Description == "hgx h200 8 gpu 141g aircooled" { found = true break } } if !found { t.Fatalf("expected at least one GPU with model 692-2G520-0280-501 and description hgx h200 8 gpu 141g aircooled") } } func TestNVIDIAParser_ComponentCheckTimes_RealArchive07900(t *testing.T) { archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar") if _, err := os.Stat(archivePath); os.IsNotExist(err) { t.Skip("Test archive not found, skipping test") } files, err := parser.ExtractArchive(archivePath) if err != nil { t.Fatalf("Failed to extract archive: %v", err) } p := &Parser{} result, err := p.Parse(files) if err != nil { t.Fatalf("Failed to parse archive: %v", err) } if result.Hardware == nil { t.Fatalf("expected hardware in parsed result") } expectedGPU := time.Date(2026, 1, 22, 6, 45, 36, 0, time.UTC) expectedNVSwitch := time.Date(2026, 1, 22, 6, 11, 32, 0, time.UTC) if len(result.Hardware.GPUs) == 0 { t.Fatalf("expected GPUs in parsed result") } for _, gpu := range result.Hardware.GPUs { if !gpu.StatusCheckedAt.Equal(expectedGPU) { t.Fatalf("expected GPU %s status_checked_at %s, got %s", gpu.Slot, expectedGPU.Format(time.RFC3339), gpu.StatusCheckedAt.Format(time.RFC3339)) } if gpu.StatusAtCollect == nil || !gpu.StatusAtCollect.At.Equal(expectedGPU) { t.Fatalf("expected GPU %s status_at_collection.at %s", gpu.Slot, expectedGPU.Format(time.RFC3339)) } } nvsCount := 0 for _, dev := range result.Hardware.PCIeDevices { slot := normalizeNVSwitchSlot(dev.Slot) if slot == "" { continue } if dev.DeviceClass != "NVSwitch" && len(slot) < len("NVSWITCH") { continue } if dev.DeviceClass != "NVSwitch" && slot[:len("NVSWITCH")] != "NVSWITCH" { continue } nvsCount++ if !dev.StatusCheckedAt.Equal(expectedNVSwitch) { t.Fatalf("expected NVSwitch %s status_checked_at %s, got %s", dev.Slot, expectedNVSwitch.Format(time.RFC3339), dev.StatusCheckedAt.Format(time.RFC3339)) } if dev.StatusAtCollect == nil || !dev.StatusAtCollect.At.Equal(expectedNVSwitch) { t.Fatalf("expected NVSwitch %s status_at_collection.at %s", dev.Slot, expectedNVSwitch.Format(time.RFC3339)) } } if nvsCount == 0 { t.Fatalf("expected NVSwitch devices in parsed result") } } func contains(s, substr string) bool { return len(s) >= len(substr) && (s == substr || len(s) > len(substr) && (s[:len(substr)] == substr || s[len(s)-len(substr):] == substr || findSubstring(s, substr))) } func findSubstring(s, substr string) bool { for i := 0; i <= len(s)-len(substr); i++ { if s[i:i+len(substr)] == substr { return true } } return false }