110 lines
3.5 KiB
Go
110 lines
3.5 KiB
Go
package app
|
|
|
|
import (
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
|
|
"bee/audit/internal/schema"
|
|
)
|
|
|
|
func TestApplyLatestSATStatusesMarksStorageByDevice(t *testing.T) {
|
|
baseDir := t.TempDir()
|
|
runDir := filepath.Join(baseDir, "storage-20260325-161151")
|
|
if err := os.MkdirAll(runDir, 0755); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
raw := "run_at_utc=2026-03-25T16:11:51Z\nnvme0n1_nvme_smart_log_status=OK\nsda_smartctl_health_status=FAILED\noverall_status=FAILED\n"
|
|
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(raw), 0644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
nvme := schema.HardwareStorage{Telemetry: map[string]any{"linux_device": "/dev/nvme0n1"}}
|
|
usb := schema.HardwareStorage{Telemetry: map[string]any{"linux_device": "/dev/sda"}}
|
|
snap := schema.HardwareSnapshot{Storage: []schema.HardwareStorage{nvme, usb}}
|
|
|
|
applyLatestSATStatuses(&snap, baseDir, nil)
|
|
|
|
if snap.Storage[0].Status == nil || *snap.Storage[0].Status != "OK" {
|
|
t.Fatalf("nvme status=%v want OK", snap.Storage[0].Status)
|
|
}
|
|
if snap.Storage[1].Status == nil || *snap.Storage[1].Status != "Critical" {
|
|
t.Fatalf("sda status=%v want Critical", snap.Storage[1].Status)
|
|
}
|
|
}
|
|
|
|
func TestApplyLatestSATStatusesMarksAMDGPUs(t *testing.T) {
|
|
baseDir := t.TempDir()
|
|
runDir := filepath.Join(baseDir, "gpu-amd-20260325-161436")
|
|
if err := os.MkdirAll(runDir, 0755); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
raw := "run_at_utc=2026-03-25T16:14:36Z\noverall_status=FAILED\n"
|
|
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(raw), 0644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
class := "DisplayController"
|
|
manufacturer := "Advanced Micro Devices, Inc. [AMD/ATI]"
|
|
snap := schema.HardwareSnapshot{
|
|
PCIeDevices: []schema.HardwarePCIeDevice{{
|
|
DeviceClass: &class,
|
|
Manufacturer: &manufacturer,
|
|
}},
|
|
}
|
|
|
|
applyLatestSATStatuses(&snap, baseDir, nil)
|
|
|
|
if snap.PCIeDevices[0].Status == nil || *snap.PCIeDevices[0].Status != "Critical" {
|
|
t.Fatalf("gpu status=%v want Critical", snap.PCIeDevices[0].Status)
|
|
}
|
|
}
|
|
|
|
func TestApplyLatestSATStatusesMarksNvidiaGPUByPerGPUStatusFile(t *testing.T) {
|
|
baseDir := t.TempDir()
|
|
runDir := filepath.Join(baseDir, "gpu-nvidia-20260407-162123")
|
|
if err := os.MkdirAll(runDir, 0755); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte("run_at_utc=2026-04-07T16:21:23Z\noverall_status=FAILED\n"), 0644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if err := os.WriteFile(filepath.Join(runDir, "gpu-1-status.txt"), []byte("gpu_index=1\ngpu_name=NVIDIA H100 PCIe\nrun_status=FAILED\nreason=GPU requires reset\n"), 0644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
class := "VideoController"
|
|
manufacturer := "NVIDIA Corporation"
|
|
bdf0 := "0000:4b:00.0"
|
|
bdf1 := "0000:4f:00.0"
|
|
snap := schema.HardwareSnapshot{
|
|
PCIeDevices: []schema.HardwarePCIeDevice{
|
|
{
|
|
DeviceClass: &class,
|
|
Manufacturer: &manufacturer,
|
|
BDF: &bdf0,
|
|
Telemetry: map[string]any{"nvidia_gpu_index": 0},
|
|
},
|
|
{
|
|
DeviceClass: &class,
|
|
Manufacturer: &manufacturer,
|
|
BDF: &bdf1,
|
|
Telemetry: map[string]any{"nvidia_gpu_index": 1},
|
|
},
|
|
},
|
|
}
|
|
|
|
applyLatestSATStatuses(&snap, baseDir, nil)
|
|
|
|
if snap.PCIeDevices[1].Status == nil || *snap.PCIeDevices[1].Status != "Critical" {
|
|
t.Fatalf("gpu1 status=%v want Critical", snap.PCIeDevices[1].Status)
|
|
}
|
|
if snap.PCIeDevices[1].ErrorDescription == nil || *snap.PCIeDevices[1].ErrorDescription != "GPU requires reset failed" {
|
|
got := "<nil>"
|
|
if snap.PCIeDevices[1].ErrorDescription != nil {
|
|
got = *snap.PCIeDevices[1].ErrorDescription
|
|
}
|
|
t.Fatalf("gpu1 error=%q want per-gpu reason", got)
|
|
}
|
|
}
|