nvidia: improve component mapping, firmware, statuses and check times

This commit is contained in:
2026-02-16 23:17:13 +03:00
parent 514da76ddb
commit b33cca5fcc
19 changed files with 2051 additions and 65 deletions

View File

@@ -4,6 +4,7 @@ import (
"os"
"path/filepath"
"testing"
"time"
"git.mchus.pro/mchus/logpile/internal/parser"
)
@@ -146,6 +147,39 @@ func TestNVIDIAParser_GPUStatusFromSummary_RealArchive07900(t *testing.T) {
}
}
func TestNVIDIAParser_GPUErrorDetailsFromSummary_RealArchive07900(t *testing.T) {
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
t.Skip("Test archive not found, skipping test")
}
files, err := parser.ExtractArchive(archivePath)
if err != nil {
t.Fatalf("Failed to extract archive: %v", err)
}
p := &Parser{}
result, err := p.Parse(files)
if err != nil {
t.Fatalf("Failed to parse archive: %v", err)
}
if result.Hardware == nil || len(result.Hardware.GPUs) == 0 {
t.Fatalf("expected GPUs in parsed result")
}
errBySerial := make(map[string]string, len(result.Hardware.GPUs))
for _, gpu := range result.Hardware.GPUs {
if gpu.SerialNumber != "" {
errBySerial[gpu.SerialNumber] = gpu.ErrorDescription
}
}
if got := errBySerial["1653925025497"]; got != "Row remapping failed" {
t.Fatalf("expected GPU serial 1653925025497 error Row remapping failed, got %q", got)
}
}
func TestNVIDIAParser_GPUModelFromSKU_RealArchive07900(t *testing.T) {
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
@@ -169,21 +203,82 @@ func TestNVIDIAParser_GPUModelFromSKU_RealArchive07900(t *testing.T) {
found := false
for _, gpu := range result.Hardware.GPUs {
if gpu.Model == "NVIDIA H200 SXM" {
if gpu.Model == "692-2G520-0280-501" && gpu.Description == "hgx h200 8 gpu 141g aircooled" {
found = true
break
}
}
if !found {
t.Fatalf("expected at least one GPU model NVIDIA H200 SXM")
t.Fatalf("expected at least one GPU with model 692-2G520-0280-501 and description hgx h200 8 gpu 141g aircooled")
}
}
func TestNVIDIAParser_ComponentCheckTimes_RealArchive07900(t *testing.T) {
archivePath := filepath.Join("../../../../example", "A514359X5A07900_logs-20260122-074208.tar")
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
t.Skip("Test archive not found, skipping test")
}
files, err := parser.ExtractArchive(archivePath)
if err != nil {
t.Fatalf("Failed to extract archive: %v", err)
}
p := &Parser{}
result, err := p.Parse(files)
if err != nil {
t.Fatalf("Failed to parse archive: %v", err)
}
if result.Hardware == nil {
t.Fatalf("expected hardware in parsed result")
}
expectedGPU := time.Date(2026, 1, 22, 9, 45, 36, 0, time.UTC)
expectedNVSwitch := time.Date(2026, 1, 22, 9, 11, 32, 0, time.UTC)
if len(result.Hardware.GPUs) == 0 {
t.Fatalf("expected GPUs in parsed result")
}
for _, gpu := range result.Hardware.GPUs {
if !gpu.StatusCheckedAt.Equal(expectedGPU) {
t.Fatalf("expected GPU %s status_checked_at %s, got %s", gpu.Slot, expectedGPU.Format(time.RFC3339), gpu.StatusCheckedAt.Format(time.RFC3339))
}
if gpu.StatusAtCollect == nil || !gpu.StatusAtCollect.At.Equal(expectedGPU) {
t.Fatalf("expected GPU %s status_at_collection.at %s", gpu.Slot, expectedGPU.Format(time.RFC3339))
}
}
nvsCount := 0
for _, dev := range result.Hardware.PCIeDevices {
slot := normalizeNVSwitchSlot(dev.Slot)
if slot == "" {
continue
}
if dev.DeviceClass != "NVSwitch" && len(slot) < len("NVSWITCH") {
continue
}
if dev.DeviceClass != "NVSwitch" && slot[:len("NVSWITCH")] != "NVSWITCH" {
continue
}
nvsCount++
if !dev.StatusCheckedAt.Equal(expectedNVSwitch) {
t.Fatalf("expected NVSwitch %s status_checked_at %s, got %s", dev.Slot, expectedNVSwitch.Format(time.RFC3339), dev.StatusCheckedAt.Format(time.RFC3339))
}
if dev.StatusAtCollect == nil || !dev.StatusAtCollect.At.Equal(expectedNVSwitch) {
t.Fatalf("expected NVSwitch %s status_at_collection.at %s", dev.Slot, expectedNVSwitch.Format(time.RFC3339))
}
}
if nvsCount == 0 {
t.Fatalf("expected NVSwitch devices in parsed result")
}
}
func contains(s, substr string) bool {
return len(s) >= len(substr) && (s == substr || len(s) > len(substr) &&
(s[:len(substr)] == substr || s[len(s)-len(substr):] == substr ||
findSubstring(s, substr)))
findSubstring(s, substr)))
}
func findSubstring(s, substr string) bool {