292 lines
8.4 KiB
Go
292 lines
8.4 KiB
Go
package nvidia
|
|
|
|
import (
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
"time"
|
|
|
|
"git.mchus.pro/mchus/logpile/internal/parser"
|
|
)
|
|
|
|
func TestNVIDIAParser_RealArchive(t *testing.T) {
|
|
// Test with the real archive that was reported as problematic
|
|
archivePath := filepath.Join("../../../../example", "A514359X5A09844_logs-20260115-151707.tar")
|
|
|
|
// Check if file exists
|
|
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
|
t.Skip("Test archive not found, skipping test")
|
|
}
|
|
|
|
// Extract files from archive
|
|
files, err := parser.ExtractArchive(archivePath)
|
|
if err != nil {
|
|
t.Fatalf("Failed to extract archive: %v", err)
|
|
}
|
|
|
|
// Check if inventory/output.log exists
|
|
hasInventoryLog := false
|
|
for _, f := range files {
|
|
if filepath.Base(f.Path) == "output.log" {
|
|
t.Logf("Found file: %s", f.Path)
|
|
}
|
|
if f.Path == "./inventory/output.log" || f.Path == "inventory/output.log" {
|
|
hasInventoryLog = true
|
|
t.Logf("Found inventory/output.log with %d bytes", len(f.Content))
|
|
}
|
|
}
|
|
if !hasInventoryLog {
|
|
t.Error("inventory/output.log not found in extracted files")
|
|
}
|
|
|
|
// Create parser and parse
|
|
p := &Parser{}
|
|
result, err := p.Parse(files)
|
|
if err != nil {
|
|
t.Fatalf("Failed to parse archive: %v", err)
|
|
}
|
|
|
|
// Verify basic system info
|
|
if result.Hardware.BoardInfo.Manufacturer == "" {
|
|
t.Error("Expected Manufacturer to be set")
|
|
}
|
|
if result.Hardware.BoardInfo.ProductName == "" {
|
|
t.Error("Expected ProductName to be set")
|
|
}
|
|
if result.Hardware.BoardInfo.SerialNumber == "" {
|
|
t.Error("Expected SerialNumber to be set")
|
|
}
|
|
|
|
t.Logf("System Info:")
|
|
t.Logf(" Manufacturer: %s", result.Hardware.BoardInfo.Manufacturer)
|
|
t.Logf(" Product: %s", result.Hardware.BoardInfo.ProductName)
|
|
t.Logf(" Serial: %s", result.Hardware.BoardInfo.SerialNumber)
|
|
|
|
// Verify GPUs were found
|
|
if len(result.Hardware.GPUs) == 0 {
|
|
t.Error("Expected to find GPUs")
|
|
}
|
|
|
|
t.Logf("\nFound %d GPUs:", len(result.Hardware.GPUs))
|
|
|
|
gpusWithSerials := 0
|
|
for _, gpu := range result.Hardware.GPUs {
|
|
t.Logf(" %s: %s (Firmware: %s, Serial: %s, BDF: %s)",
|
|
gpu.Slot, gpu.Model, gpu.Firmware, gpu.SerialNumber, gpu.BDF)
|
|
|
|
if gpu.SerialNumber != "" {
|
|
gpusWithSerials++
|
|
}
|
|
}
|
|
|
|
// Verify that GPU serial numbers were extracted
|
|
if gpusWithSerials == 0 {
|
|
t.Error("Expected at least some GPUs to have serial numbers")
|
|
}
|
|
|
|
t.Logf("\nGPUs with serial numbers: %d/%d", gpusWithSerials, len(result.Hardware.GPUs))
|
|
|
|
// Check events for SXM2 failures
|
|
t.Logf("\nTotal events: %d", len(result.Events))
|
|
|
|
// Look for the specific serial or SXM2
|
|
sxm2Events := 0
|
|
for _, event := range result.Events {
|
|
desc := event.Description + " " + event.RawData + " " + event.EventType
|
|
if contains(desc, "SXM2") || contains(desc, "1653925025827") {
|
|
t.Logf(" SXM2 Event: [%s] %s (Severity: %s)", event.EventType, event.Description, event.Severity)
|
|
sxm2Events++
|
|
}
|
|
}
|
|
|
|
if sxm2Events == 0 {
|
|
t.Error("Expected to find events for SXM2 (faulty GPU 1653925025827)")
|
|
}
|
|
t.Logf("\nSXM2 failure events: %d", sxm2Events)
|
|
}
|
|
|
|
func TestNVIDIAParser_GPUStatusFromSummary_RealArchive07900(t *testing.T) {
|
|
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
|
|
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
|
t.Skip("Test archive not found, skipping test")
|
|
}
|
|
|
|
files, err := parser.ExtractArchive(archivePath)
|
|
if err != nil {
|
|
t.Fatalf("Failed to extract archive: %v", err)
|
|
}
|
|
|
|
p := &Parser{}
|
|
result, err := p.Parse(files)
|
|
if err != nil {
|
|
t.Fatalf("Failed to parse archive: %v", err)
|
|
}
|
|
|
|
if result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
|
|
t.Fatalf("expected GPUs in parsed result")
|
|
}
|
|
|
|
statusBySerial := make(map[string]string, len(result.Hardware.GPUs))
|
|
for _, gpu := range result.Hardware.GPUs {
|
|
if gpu.SerialNumber != "" {
|
|
statusBySerial[gpu.SerialNumber] = gpu.Status
|
|
}
|
|
}
|
|
|
|
if got := statusBySerial["1653925025497"]; got != "FAIL" {
|
|
t.Fatalf("expected GPU serial 1653925025497 status FAIL, got %q", got)
|
|
}
|
|
|
|
for serial, st := range statusBySerial {
|
|
if serial == "1653925025497" {
|
|
continue
|
|
}
|
|
if st != "PASS" {
|
|
t.Fatalf("expected non-failing GPU serial %s status PASS, got %q", serial, st)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestNVIDIAParser_GPUErrorDetailsFromSummary_RealArchive07900(t *testing.T) {
|
|
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
|
|
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
|
t.Skip("Test archive not found, skipping test")
|
|
}
|
|
|
|
files, err := parser.ExtractArchive(archivePath)
|
|
if err != nil {
|
|
t.Fatalf("Failed to extract archive: %v", err)
|
|
}
|
|
|
|
p := &Parser{}
|
|
result, err := p.Parse(files)
|
|
if err != nil {
|
|
t.Fatalf("Failed to parse archive: %v", err)
|
|
}
|
|
|
|
if result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
|
|
t.Fatalf("expected GPUs in parsed result")
|
|
}
|
|
|
|
errBySerial := make(map[string]string, len(result.Hardware.GPUs))
|
|
for _, gpu := range result.Hardware.GPUs {
|
|
if gpu.SerialNumber != "" {
|
|
errBySerial[gpu.SerialNumber] = gpu.ErrorDescription
|
|
}
|
|
}
|
|
|
|
if got := errBySerial["1653925025497"]; got != "Row remapping failed" {
|
|
t.Fatalf("expected GPU serial 1653925025497 error Row remapping failed, got %q", got)
|
|
}
|
|
}
|
|
|
|
func TestNVIDIAParser_GPUModelFromSKU_RealArchive07900(t *testing.T) {
|
|
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
|
|
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
|
t.Skip("Test archive not found, skipping test")
|
|
}
|
|
|
|
files, err := parser.ExtractArchive(archivePath)
|
|
if err != nil {
|
|
t.Fatalf("Failed to extract archive: %v", err)
|
|
}
|
|
|
|
p := &Parser{}
|
|
result, err := p.Parse(files)
|
|
if err != nil {
|
|
t.Fatalf("Failed to parse archive: %v", err)
|
|
}
|
|
|
|
if result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
|
|
t.Fatalf("expected GPUs in parsed result")
|
|
}
|
|
|
|
found := false
|
|
for _, gpu := range result.Hardware.GPUs {
|
|
if gpu.Model == "692-2G520-0280-501" && gpu.Description == "hgx h200 8 gpu 141g aircooled" {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if !found {
|
|
t.Fatalf("expected at least one GPU with model 692-2G520-0280-501 and description hgx h200 8 gpu 141g aircooled")
|
|
}
|
|
}
|
|
|
|
func TestNVIDIAParser_ComponentCheckTimes_RealArchive07900(t *testing.T) {
|
|
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
|
|
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
|
t.Skip("Test archive not found, skipping test")
|
|
}
|
|
|
|
files, err := parser.ExtractArchive(archivePath)
|
|
if err != nil {
|
|
t.Fatalf("Failed to extract archive: %v", err)
|
|
}
|
|
|
|
p := &Parser{}
|
|
result, err := p.Parse(files)
|
|
if err != nil {
|
|
t.Fatalf("Failed to parse archive: %v", err)
|
|
}
|
|
|
|
if result.Hardware == nil {
|
|
t.Fatalf("expected hardware in parsed result")
|
|
}
|
|
|
|
expectedGPU := time.Date(2026, 1, 22, 6, 45, 36, 0, time.UTC)
|
|
expectedNVSwitch := time.Date(2026, 1, 22, 6, 11, 32, 0, time.UTC)
|
|
|
|
if len(result.Hardware.GPUs) == 0 {
|
|
t.Fatalf("expected GPUs in parsed result")
|
|
}
|
|
for _, gpu := range result.Hardware.GPUs {
|
|
if !gpu.StatusCheckedAt.Equal(expectedGPU) {
|
|
t.Fatalf("expected GPU %s status_checked_at %s, got %s", gpu.Slot, expectedGPU.Format(time.RFC3339), gpu.StatusCheckedAt.Format(time.RFC3339))
|
|
}
|
|
if gpu.StatusAtCollect == nil || !gpu.StatusAtCollect.At.Equal(expectedGPU) {
|
|
t.Fatalf("expected GPU %s status_at_collection.at %s", gpu.Slot, expectedGPU.Format(time.RFC3339))
|
|
}
|
|
}
|
|
|
|
nvsCount := 0
|
|
for _, dev := range result.Hardware.PCIeDevices {
|
|
slot := normalizeNVSwitchSlot(dev.Slot)
|
|
if slot == "" {
|
|
continue
|
|
}
|
|
if dev.DeviceClass != "NVSwitch" && len(slot) < len("NVSWITCH") {
|
|
continue
|
|
}
|
|
if dev.DeviceClass != "NVSwitch" && slot[:len("NVSWITCH")] != "NVSWITCH" {
|
|
continue
|
|
}
|
|
nvsCount++
|
|
if !dev.StatusCheckedAt.Equal(expectedNVSwitch) {
|
|
t.Fatalf("expected NVSwitch %s status_checked_at %s, got %s", dev.Slot, expectedNVSwitch.Format(time.RFC3339), dev.StatusCheckedAt.Format(time.RFC3339))
|
|
}
|
|
if dev.StatusAtCollect == nil || !dev.StatusAtCollect.At.Equal(expectedNVSwitch) {
|
|
t.Fatalf("expected NVSwitch %s status_at_collection.at %s", dev.Slot, expectedNVSwitch.Format(time.RFC3339))
|
|
}
|
|
}
|
|
if nvsCount == 0 {
|
|
t.Fatalf("expected NVSwitch devices in parsed result")
|
|
}
|
|
}
|
|
|
|
func contains(s, substr string) bool {
|
|
return len(s) >= len(substr) && (s == substr || len(s) > len(substr) &&
|
|
(s[:len(substr)] == substr || s[len(s)-len(substr):] == substr ||
|
|
findSubstring(s, substr)))
|
|
}
|
|
|
|
func findSubstring(s, substr string) bool {
|
|
for i := 0; i <= len(s)-len(substr); i++ {
|
|
if s[i:i+len(substr)] == substr {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|