nvidia: improve component mapping, firmware, statuses and check times
This commit is contained in:
101
internal/parser/vendors/nvidia/parser_test.go
vendored
101
internal/parser/vendors/nvidia/parser_test.go
vendored
@@ -4,6 +4,7 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.mchus.pro/mchus/logpile/internal/parser"
|
||||
)
|
||||
@@ -146,6 +147,39 @@ func TestNVIDIAParser_GPUStatusFromSummary_RealArchive07900(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestNVIDIAParser_GPUErrorDetailsFromSummary_RealArchive07900(t *testing.T) {
|
||||
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
|
||||
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
||||
t.Skip("Test archive not found, skipping test")
|
||||
}
|
||||
|
||||
files, err := parser.ExtractArchive(archivePath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to extract archive: %v", err)
|
||||
}
|
||||
|
||||
p := &Parser{}
|
||||
result, err := p.Parse(files)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse archive: %v", err)
|
||||
}
|
||||
|
||||
if result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
|
||||
t.Fatalf("expected GPUs in parsed result")
|
||||
}
|
||||
|
||||
errBySerial := make(map[string]string, len(result.Hardware.GPUs))
|
||||
for _, gpu := range result.Hardware.GPUs {
|
||||
if gpu.SerialNumber != "" {
|
||||
errBySerial[gpu.SerialNumber] = gpu.ErrorDescription
|
||||
}
|
||||
}
|
||||
|
||||
if got := errBySerial["1653925025497"]; got != "Row remapping failed" {
|
||||
t.Fatalf("expected GPU serial 1653925025497 error Row remapping failed, got %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNVIDIAParser_GPUModelFromSKU_RealArchive07900(t *testing.T) {
|
||||
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
|
||||
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
||||
@@ -169,21 +203,82 @@ func TestNVIDIAParser_GPUModelFromSKU_RealArchive07900(t *testing.T) {
|
||||
|
||||
found := false
|
||||
for _, gpu := range result.Hardware.GPUs {
|
||||
if gpu.Model == "NVIDIA H200 SXM" {
|
||||
if gpu.Model == "692-2G520-0280-501" && gpu.Description == "hgx h200 8 gpu 141g aircooled" {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
t.Fatalf("expected at least one GPU model NVIDIA H200 SXM")
|
||||
t.Fatalf("expected at least one GPU with model 692-2G520-0280-501 and description hgx h200 8 gpu 141g aircooled")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNVIDIAParser_ComponentCheckTimes_RealArchive07900(t *testing.T) {
|
||||
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
|
||||
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
|
||||
t.Skip("Test archive not found, skipping test")
|
||||
}
|
||||
|
||||
files, err := parser.ExtractArchive(archivePath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to extract archive: %v", err)
|
||||
}
|
||||
|
||||
p := &Parser{}
|
||||
result, err := p.Parse(files)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse archive: %v", err)
|
||||
}
|
||||
|
||||
if result.Hardware == nil {
|
||||
t.Fatalf("expected hardware in parsed result")
|
||||
}
|
||||
|
||||
expectedGPU := time.Date(2026, 1, 22, 9, 45, 36, 0, time.UTC)
|
||||
expectedNVSwitch := time.Date(2026, 1, 22, 9, 11, 32, 0, time.UTC)
|
||||
|
||||
if len(result.Hardware.GPUs) == 0 {
|
||||
t.Fatalf("expected GPUs in parsed result")
|
||||
}
|
||||
for _, gpu := range result.Hardware.GPUs {
|
||||
if !gpu.StatusCheckedAt.Equal(expectedGPU) {
|
||||
t.Fatalf("expected GPU %s status_checked_at %s, got %s", gpu.Slot, expectedGPU.Format(time.RFC3339), gpu.StatusCheckedAt.Format(time.RFC3339))
|
||||
}
|
||||
if gpu.StatusAtCollect == nil || !gpu.StatusAtCollect.At.Equal(expectedGPU) {
|
||||
t.Fatalf("expected GPU %s status_at_collection.at %s", gpu.Slot, expectedGPU.Format(time.RFC3339))
|
||||
}
|
||||
}
|
||||
|
||||
nvsCount := 0
|
||||
for _, dev := range result.Hardware.PCIeDevices {
|
||||
slot := normalizeNVSwitchSlot(dev.Slot)
|
||||
if slot == "" {
|
||||
continue
|
||||
}
|
||||
if dev.DeviceClass != "NVSwitch" && len(slot) < len("NVSWITCH") {
|
||||
continue
|
||||
}
|
||||
if dev.DeviceClass != "NVSwitch" && slot[:len("NVSWITCH")] != "NVSWITCH" {
|
||||
continue
|
||||
}
|
||||
nvsCount++
|
||||
if !dev.StatusCheckedAt.Equal(expectedNVSwitch) {
|
||||
t.Fatalf("expected NVSwitch %s status_checked_at %s, got %s", dev.Slot, expectedNVSwitch.Format(time.RFC3339), dev.StatusCheckedAt.Format(time.RFC3339))
|
||||
}
|
||||
if dev.StatusAtCollect == nil || !dev.StatusAtCollect.At.Equal(expectedNVSwitch) {
|
||||
t.Fatalf("expected NVSwitch %s status_at_collection.at %s", dev.Slot, expectedNVSwitch.Format(time.RFC3339))
|
||||
}
|
||||
}
|
||||
if nvsCount == 0 {
|
||||
t.Fatalf("expected NVSwitch devices in parsed result")
|
||||
}
|
||||
}
|
||||
|
||||
func contains(s, substr string) bool {
|
||||
return len(s) >= len(substr) && (s == substr || len(s) > len(substr) &&
|
||||
(s[:len(substr)] == substr || s[len(s)-len(substr):] == substr ||
|
||||
findSubstring(s, substr)))
|
||||
findSubstring(s, substr)))
|
||||
}
|
||||
|
||||
func findSubstring(s, substr string) bool {
|
||||
|
||||
Reference in New Issue
Block a user