diff --git a/audit/internal/webui/pages.go b/audit/internal/webui/pages.go index 0abaa4a..b9cdae1 100644 --- a/audit/internal/webui/pages.go +++ b/audit/internal/webui/pages.go @@ -9,6 +9,9 @@ import ( "path/filepath" "sort" "strings" + + "bee/audit/internal/app" + "bee/audit/internal/schema" ) // ── Layout ──────────────────────────────────────────────────────────────────── @@ -42,6 +45,8 @@ a{color:var(--accent);text-decoration:none} /* Cards */ .card{background:var(--surface);border:1px solid var(--border);border-radius:4px;box-shadow:0 1px 2px rgba(34,36,38,.15);margin-bottom:16px;overflow:hidden} .card-head{padding:11px 16px;background:var(--surface-2);border-bottom:1px solid var(--border);font-weight:700;font-size:13px;display:flex;align-items:center;gap:8px} +.card-head-actions{justify-content:space-between} +.card-head-buttons{display:flex;align-items:center;gap:8px;margin-left:auto;flex-wrap:wrap} .card-body{padding:16px} /* Buttons */ .btn{display:inline-flex;align-items:center;gap:6px;padding:8px 16px;border-radius:4px;font-size:13px;font-weight:700;cursor:pointer;border:none;transition:background .1s;font-family:inherit} @@ -72,7 +77,7 @@ tbody tr:hover td{background:rgba(0,0,0,.03)} /* Grid */ .grid2{display:grid;grid-template-columns:1fr 1fr;gap:16px} .grid3{display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px} -@media(max-width:900px){.grid2,.grid3{grid-template-columns:1fr}} +@media(max-width:900px){.grid2,.grid3{grid-template-columns:1fr}.card-head-actions{align-items:flex-start;flex-direction:column}.card-head-buttons{margin-left:0}} /* iframe viewer */ .viewer-frame{width:100%;height:calc(100vh - 160px);border:0;border-radius:4px;background:var(--surface-2)} /* Alerts */ @@ -136,7 +141,7 @@ func renderPage(page string, opts HandlerOptions) string { case "validate": pageID = "validate" title = "Validate" - body = renderValidate() + body = renderValidate(opts) case "burn": pageID = "burn" title = "Burn" @@ -161,7 +166,7 @@ func renderPage(page string, opts HandlerOptions) string { case "tests": pageID = "validate" title = "Acceptance Tests" - body = renderValidate() + body = renderValidate(opts) case "burn-in": pageID = "burn" title = "Burn-in Tests" @@ -295,7 +300,7 @@ func renderAudit() string { func renderHardwareSummaryCard(opts HandlerOptions) string { data, err := loadSnapshot(opts.AuditPath) if err != nil { - return `
Hardware Summary
` + return `
Hardware Summary
` } // Parse just enough fields for the summary banner var snap struct { @@ -434,11 +439,14 @@ func renderHealthCard(opts HandlerOptions) string { if err != nil { return `
Runtime Health
No data
` } - var health map[string]any + var health schema.RuntimeHealth if err := json.Unmarshal(data, &health); err != nil { return `
Runtime Health
Parse error
` } - status := fmt.Sprintf("%v", health["status"]) + status := strings.TrimSpace(health.Status) + if status == "" { + status = "UNKNOWN" + } badge := "badge-ok" if status == "PARTIAL" { badge = "badge-warn" @@ -448,19 +456,312 @@ func renderHealthCard(opts HandlerOptions) string { var b strings.Builder b.WriteString(`
Runtime Health
`) b.WriteString(fmt.Sprintf(`
%s
`, badge, html.EscapeString(status))) - if issues, ok := health["issues"].([]any); ok && len(issues) > 0 { - b.WriteString(`
Issues:
`) - for _, issue := range issues { - if m, ok := issue.(map[string]any); ok { - b.WriteString(html.EscapeString(fmt.Sprintf("%v: %v", m["code"], m["message"])) + "
") - } - } - b.WriteString(`
`) + if checkedAt := strings.TrimSpace(health.CheckedAt); checkedAt != "" { + b.WriteString(`
Checked at: ` + html.EscapeString(checkedAt) + `
`) } + rows := []runtimeHealthRow{ + buildRuntimeExportRow(health), + buildRuntimeNetworkRow(health), + buildRuntimeDriverRow(health), + buildRuntimeAccelerationRow(health), + buildRuntimeToolsRow(health), + buildRuntimeServicesRow(health), + } + rows = append(rows, buildHardwareComponentRows(opts.ExportDir)...) + b.WriteString(``) + for _, row := range rows { + b.WriteString(``) + } + b.WriteString(`
CheckStatusSourceIssue
` + html.EscapeString(row.Title) + `` + runtimeStatusBadge(row.Status) + `` + html.EscapeString(row.Source) + `` + rowIssueHTML(row.Issue) + `
`) b.WriteString(`
`) return b.String() } +type runtimeHealthRow struct { + Title string + Status string + Source string + Issue string +} + +func buildRuntimeExportRow(health schema.RuntimeHealth) runtimeHealthRow { + issue := runtimeIssueDescriptions(health.Issues, "export_dir_unavailable") + status := "UNKNOWN" + switch { + case issue != "": + status = "FAILED" + case strings.TrimSpace(health.ExportDir) != "": + status = "OK" + } + source := "os.MkdirAll" + if dir := strings.TrimSpace(health.ExportDir); dir != "" { + source += " " + dir + } + return runtimeHealthRow{Title: "Export Directory", Status: status, Source: source, Issue: issue} +} + +func buildRuntimeNetworkRow(health schema.RuntimeHealth) runtimeHealthRow { + status := strings.TrimSpace(health.NetworkStatus) + if status == "" { + status = "UNKNOWN" + } + issue := runtimeIssueDescriptions(health.Issues, "dhcp_partial", "dhcp_failed") + return runtimeHealthRow{Title: "Network", Status: status, Source: "ListInterfaces / DHCP", Issue: issue} +} + +func buildRuntimeDriverRow(health schema.RuntimeHealth) runtimeHealthRow { + issue := runtimeIssueDescriptions(health.Issues, "nvidia_kernel_module_missing", "nvidia_modeset_failed", "amdgpu_kernel_module_missing") + status := "UNKNOWN" + switch { + case health.DriverReady && issue == "": + status = "OK" + case health.DriverReady: + status = "PARTIAL" + case issue != "": + status = "FAILED" + } + return runtimeHealthRow{Title: "NVIDIA/AMD Driver", Status: status, Source: "lsmod / vendor probe", Issue: issue} +} + +func buildRuntimeAccelerationRow(health schema.RuntimeHealth) runtimeHealthRow { + issue := runtimeIssueDescriptions(health.Issues, "cuda_runtime_not_ready", "rocm_smi_unavailable") + status := "UNKNOWN" + switch { + case health.CUDAReady && issue == "": + status = "OK" + case health.CUDAReady: + status = "PARTIAL" + case issue != "": + status = "FAILED" + } + return runtimeHealthRow{Title: "CUDA / ROCm", Status: status, Source: "bee-gpu-burn / rocm-smi", Issue: issue} +} + +func buildRuntimeToolsRow(health schema.RuntimeHealth) runtimeHealthRow { + if len(health.Tools) == 0 { + return runtimeHealthRow{Title: "Required Utilities", Status: "UNKNOWN", Source: "CheckTools", Issue: "No tool status data."} + } + missing := make([]string, 0) + for _, tool := range health.Tools { + if !tool.OK { + missing = append(missing, tool.Name) + } + } + status := "OK" + issue := "" + if len(missing) > 0 { + status = "PARTIAL" + issue = "Missing: " + strings.Join(missing, ", ") + } + return runtimeHealthRow{Title: "Required Utilities", Status: status, Source: "CheckTools", Issue: issue} +} + +func buildRuntimeServicesRow(health schema.RuntimeHealth) runtimeHealthRow { + if len(health.Services) == 0 { + return runtimeHealthRow{Title: "Bee Services", Status: "UNKNOWN", Source: "systemctl is-active", Issue: "No service status data."} + } + nonActive := make([]string, 0) + for _, svc := range health.Services { + state := strings.TrimSpace(strings.ToLower(svc.Status)) + if state != "active" { + nonActive = append(nonActive, svc.Name+"="+svc.Status) + } + } + status := "OK" + issue := "" + if len(nonActive) > 0 { + status = "PARTIAL" + issue = strings.Join(nonActive, ", ") + } + return runtimeHealthRow{Title: "Bee Services", Status: status, Source: "ServiceState", Issue: issue} +} + +func buildHardwareComponentRows(exportDir string) []runtimeHealthRow { + path := filepath.Join(exportDir, "component-status.json") + db, err := app.OpenComponentStatusDB(path) + if err != nil { + return []runtimeHealthRow{ + {Title: "CPU Component Health", Status: "UNKNOWN", Source: "component-status.json", Issue: "Component status DB not available."}, + {Title: "Memory Component Health", Status: "UNKNOWN", Source: "component-status.json", Issue: "Component status DB not available."}, + {Title: "Storage Component Health", Status: "UNKNOWN", Source: "component-status.json", Issue: "Component status DB not available."}, + {Title: "GPU Component Health", Status: "UNKNOWN", Source: "component-status.json", Issue: "Component status DB not available."}, + {Title: "PSU Component Health", Status: "UNKNOWN", Source: "component-status.json", Issue: "No PSU component checks recorded."}, + } + } + records := db.All() + return []runtimeHealthRow{ + aggregateComponentStatus("CPU", records, []string{"cpu:all"}, nil), + aggregateComponentStatus("Memory", records, []string{"memory:all"}, []string{"memory:"}), + aggregateComponentStatus("Storage", records, []string{"storage:all"}, []string{"storage:"}), + aggregateComponentStatus("GPU", records, nil, []string{"pcie:gpu:"}), + aggregateComponentStatus("PSU", records, nil, []string{"psu:"}), + } +} + +func aggregateComponentStatus(title string, records []app.ComponentStatusRecord, exact []string, prefixes []string) runtimeHealthRow { + matched := make([]app.ComponentStatusRecord, 0) + for _, rec := range records { + key := strings.TrimSpace(rec.ComponentKey) + if key == "" { + continue + } + if containsExactKey(key, exact) || hasAnyPrefix(key, prefixes) { + matched = append(matched, rec) + } + } + if len(matched) == 0 { + return runtimeHealthRow{Title: title, Status: "UNKNOWN", Source: "component-status.json", Issue: "No component status data."} + } + + maxSev := -1 + for _, rec := range matched { + if sev := runtimeComponentSeverity(rec.Status); sev > maxSev { + maxSev = sev + } + } + status := "UNKNOWN" + switch maxSev { + case 3: + status = "CRITICAL" + case 2: + status = "WARNING" + case 1: + status = "OK" + } + + sources := make([]string, 0) + sourceSeen := map[string]struct{}{} + issues := make([]string, 0) + issueSeen := map[string]struct{}{} + for _, rec := range matched { + if runtimeComponentSeverity(rec.Status) != maxSev { + continue + } + source := latestComponentSource(rec) + if source == "" { + source = "component-status.json" + } + if _, ok := sourceSeen[source]; !ok { + sourceSeen[source] = struct{}{} + sources = append(sources, source) + } + issue := strings.TrimSpace(rec.ErrorSummary) + if issue == "" { + issue = latestComponentDetail(rec) + } + if issue == "" { + continue + } + if _, ok := issueSeen[issue]; ok { + continue + } + issueSeen[issue] = struct{}{} + issues = append(issues, issue) + } + if len(sources) == 0 { + sources = append(sources, "component-status.json") + } + issue := strings.Join(issues, "; ") + if issue == "" { + issue = "—" + } + return runtimeHealthRow{ + Title: title, + Status: status, + Source: strings.Join(sources, ", "), + Issue: issue, + } +} + +func containsExactKey(key string, exact []string) bool { + for _, candidate := range exact { + if key == candidate { + return true + } + } + return false +} + +func hasAnyPrefix(key string, prefixes []string) bool { + for _, prefix := range prefixes { + if strings.HasPrefix(key, prefix) { + return true + } + } + return false +} + +func runtimeComponentSeverity(status string) int { + switch strings.TrimSpace(strings.ToLower(status)) { + case "critical": + return 3 + case "warning": + return 2 + case "ok": + return 1 + default: + return 0 + } +} + +func latestComponentSource(rec app.ComponentStatusRecord) string { + if len(rec.History) == 0 { + return "" + } + return strings.TrimSpace(rec.History[len(rec.History)-1].Source) +} + +func latestComponentDetail(rec app.ComponentStatusRecord) string { + if len(rec.History) == 0 { + return "" + } + return strings.TrimSpace(rec.History[len(rec.History)-1].Detail) +} + +func runtimeIssueDescriptions(issues []schema.RuntimeIssue, codes ...string) string { + if len(issues) == 0 || len(codes) == 0 { + return "" + } + allowed := make(map[string]struct{}, len(codes)) + for _, code := range codes { + allowed[code] = struct{}{} + } + messages := make([]string, 0) + for _, issue := range issues { + if _, ok := allowed[issue.Code]; !ok { + continue + } + desc := strings.TrimSpace(issue.Description) + if desc == "" { + desc = issue.Code + } + messages = append(messages, desc) + } + return strings.Join(messages, "; ") +} + +func runtimeStatusBadge(status string) string { + status = strings.ToUpper(strings.TrimSpace(status)) + badge := "badge-unknown" + switch status { + case "OK": + badge = "badge-ok" + case "PARTIAL", "WARNING", "WARN": + badge = "badge-warn" + case "FAIL", "FAILED", "CRITICAL": + badge = "badge-err" + } + return `` + html.EscapeString(status) + `` +} + +func rowIssueHTML(issue string) string { + issue = strings.TrimSpace(issue) + if issue == "" { + return `` + } + return html.EscapeString(issue) +} + // ── Metrics ─────────────────────────────────────────────────────────────────── func renderMetrics() string { @@ -675,50 +976,137 @@ setInterval(loadMetricsLayout, 5000); // ── Validate (Acceptance Tests) ─────────────────────────────────────────────── -func renderValidate() string { +type validateInventory struct { + CPU string + Memory string + Storage string + NVIDIA string + AMD string +} + +func renderValidate(opts HandlerOptions) string { + inv := loadValidateInventory(opts) return `
Non-destructive: Validate tests collect diagnostics only. They do not write to disks, do not run sustained load, and do not increment hardware wear counters.

Tasks continue in the background — view progress in Tasks.

-
Run All Tests
-
-
- +
Validate Profile
+
+
+
+
+
+
+

Runs validate modules sequentially with the selected cycle count. NVIDIA dcgmi diag uses the selected diag level from this profile.

+ +
+
+
+
-` + renderSATCard("nvidia", "NVIDIA GPU", `

Official DCGM `+"targeted_stress"+` stays in Validate as a controlled diagnostic load, not a max-burn recipe.

`) + - renderSATCard("memory", "Memory", "") + - renderSATCard("storage", "Storage", "") + - renderSATCard("cpu", "CPU", `
`) + - renderSATCard("amd", "AMD GPU", `
- - -
-

Additional AMD memory diagnostics: RVS MEM for integrity and BABEL + rocm-bandwidth-test for memory/interconnect bandwidth.

`) + +` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody( + inv.CPU, + `Collects CPU inventory and temperatures, then runs a bounded CPU stress pass.`, + `lscpu, sensors, stress-ng`, + `Duration is taken from Validate Profile diag level: Level 1 = 60s, Level 2 = 5m, Level 3 = 1h, Level 4 = 1h.`, + )) + + renderSATCard("memory", "Memory", "runSAT('memory')", "", renderValidateCardBody( + inv.Memory, + `Runs a short RAM validation pass and records memory state around the test.`, + `free, memtester`, + `No extra settings.`, + )) + + renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody( + inv.Storage, + `Scans all storage devices and runs the matching health or self-test path for each device type.`, + `lsblk; NVMe: nvme; SATA/SAS: smartctl`, + `No extra settings.`, + )) + + `
+
+
+` + renderSATCard("nvidia", "NVIDIA GPU", "runSAT('nvidia')", "", renderValidateCardBody( + inv.NVIDIA, + `Runs NVIDIA diagnostics and board inventory checks.`, + `nvidia-smi, dmidecode, dcgmi diag`, + `Diag level is taken from Validate Profile.`, + )) + + renderSATCard("nvidia-targeted-stress", "NVIDIA GPU Targeted Stress", "runSAT('nvidia-targeted-stress')", "", renderValidateCardBody( + inv.NVIDIA, + `Runs a controlled NVIDIA DCGM load in Validate to check stability under moderate stress.`, + `dcgmi diag targeted_stress`, + `Uses the fixed DCGM targeted stress recipe.`, + )) + + `
+
+` + renderSATCard("amd", "AMD GPU", "runAMDValidateSet()", "", renderValidateCardBody( + inv.AMD, + `Runs the selected AMD checks only. GPU Validate collects inventory; MEM Integrity uses the RVS MEM module; MEM Bandwidth uses rocm-bandwidth-test and the RVS BABEL module.`, + `GPU Validate: rocm-smi, dmidecode; MEM Integrity: rvs mem; MEM Bandwidth: rocm-bandwidth-test, rvs babel`, + `
`, + )) + `
+ ` } -func renderSATCard(id, label, extra string) string { - return fmt.Sprintf(`
%s
%s
`, - label, extra, id, id) +func loadValidateInventory(opts HandlerOptions) validateInventory { + unknown := "Audit snapshot not loaded." + out := validateInventory{ + CPU: unknown, + Memory: unknown, + Storage: unknown, + NVIDIA: unknown, + AMD: unknown, + } + data, err := loadSnapshot(opts.AuditPath) + if err != nil { + return out + } + var snap schema.HardwareIngestRequest + if err := json.Unmarshal(data, &snap); err != nil { + return out + } + + cpuCounts := map[string]int{} + cpuTotal := 0 + for _, cpu := range snap.Hardware.CPUs { + if cpu.Present != nil && !*cpu.Present { + continue + } + cpuTotal++ + addValidateModel(cpuCounts, validateFirstNonEmpty(validateTrimPtr(cpu.Model), validateTrimPtr(cpu.Manufacturer), "unknown")) + } + + memCounts := map[string]int{} + memTotal := 0 + for _, dimm := range snap.Hardware.Memory { + if dimm.Present != nil && !*dimm.Present { + continue + } + memTotal++ + addValidateModel(memCounts, validateFirstNonEmpty(validateTrimPtr(dimm.PartNumber), validateTrimPtr(dimm.Type), validateTrimPtr(dimm.Manufacturer), "unknown")) + } + + storageCounts := map[string]int{} + storageTotal := 0 + for _, dev := range snap.Hardware.Storage { + if dev.Present != nil && !*dev.Present { + continue + } + storageTotal++ + addValidateModel(storageCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown")) + } + + nvidiaCounts := map[string]int{} + nvidiaTotal := 0 + amdCounts := map[string]int{} + amdTotal := 0 + for _, dev := range snap.Hardware.PCIeDevices { + if dev.Present != nil && !*dev.Present { + continue + } + if validateIsVendorGPU(dev, "nvidia") { + nvidiaTotal++ + addValidateModel(nvidiaCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown")) + } + if validateIsVendorGPU(dev, "amd") { + amdTotal++ + addValidateModel(amdCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown")) + } + } + + out.CPU = formatValidateDeviceSummary(cpuTotal, cpuCounts, "CPU") + out.Memory = formatValidateDeviceSummary(memTotal, memCounts, "module") + out.Storage = formatValidateDeviceSummary(storageTotal, storageCounts, "device") + out.NVIDIA = formatValidateDeviceSummary(nvidiaTotal, nvidiaCounts, "GPU") + out.AMD = formatValidateDeviceSummary(amdTotal, amdCounts, "GPU") + return out +} + +func renderValidateCardBody(devices, description, commands, settings string) string { + return `
` + devices + `
` + + `
` + description + `
` + + `
` + commands + `
` + + `
` + settings + `
` +} + +func formatValidateDeviceSummary(total int, models map[string]int, unit string) string { + if total == 0 { + return "0 " + unit + "s detected." + } + keys := make([]string, 0, len(models)) + for key := range models { + keys = append(keys, key) + } + sort.Strings(keys) + parts := make([]string, 0, len(keys)) + for _, key := range keys { + parts = append(parts, fmt.Sprintf("%d x %s", models[key], html.EscapeString(key))) + } + label := unit + if total != 1 { + label += "s" + } + return fmt.Sprintf("%d %s: %s", total, label, strings.Join(parts, ", ")) +} + +func addValidateModel(counts map[string]int, name string) { + name = strings.TrimSpace(name) + if name == "" { + name = "unknown" + } + counts[name]++ +} + +func validateTrimPtr(value *string) string { + if value == nil { + return "" + } + return strings.TrimSpace(*value) +} + +func validateFirstNonEmpty(values ...string) string { + for _, value := range values { + value = strings.TrimSpace(value) + if value != "" { + return value + } + } + return "" +} + +func validateIsVendorGPU(dev schema.HardwarePCIeDevice, vendor string) bool { + model := strings.ToLower(validateTrimPtr(dev.Model)) + manufacturer := strings.ToLower(validateTrimPtr(dev.Manufacturer)) + class := strings.ToLower(validateTrimPtr(dev.DeviceClass)) + if strings.Contains(model, "aspeed") || strings.Contains(manufacturer, "aspeed") { + return false + } + switch vendor { + case "nvidia": + return strings.Contains(model, "nvidia") || strings.Contains(manufacturer, "nvidia") + case "amd": + isGPUClass := class == "processingaccelerator" || class == "displaycontroller" || class == "videocontroller" + isAMDVendor := strings.Contains(manufacturer, "advanced micro devices") || strings.Contains(manufacturer, "amd") || strings.Contains(manufacturer, "ati") + isAMDModel := strings.Contains(model, "instinct") || strings.Contains(model, "radeon") || strings.Contains(model, "amd") + return isGPUClass && (isAMDVendor || isAMDModel) + default: + return false + } +} + +func renderSATCard(id, label, runAction, headerActions, body string) string { + actions := `` + if strings.TrimSpace(headerActions) != "" { + actions += headerActions + } + return fmt.Sprintf(`
%s
%s
%s
`, + label, actions, body) } // ── Benchmark ───────────────────────────────────────────────────────────────── diff --git a/audit/internal/webui/server_test.go b/audit/internal/webui/server_test.go index 95eecb9..b534b4c 100644 --- a/audit/internal/webui/server_test.go +++ b/audit/internal/webui/server_test.go @@ -543,7 +543,7 @@ func TestRootShowsRunAuditButtonWhenSnapshotMissing(t *testing.T) { t.Fatalf("status=%d", rec.Code) } body := rec.Body.String() - if !strings.Contains(body, `Run Audit`) { + if !strings.Contains(body, `onclick="auditModalRun()">Run audit`) { t.Fatalf("dashboard missing run audit button: %s", body) } if strings.Contains(body, `No audit data`) { @@ -650,7 +650,7 @@ func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) { } } -func TestValidatePageRendersNvidiaTargetedStressAction(t *testing.T) { +func TestValidatePageRendersNvidiaTargetedStressCard(t *testing.T) { handler := NewHandler(HandlerOptions{}) rec := httptest.NewRecorder() handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/validate", nil)) @@ -659,9 +659,10 @@ func TestValidatePageRendersNvidiaTargetedStressAction(t *testing.T) { } body := rec.Body.String() for _, needle := range []string{ - `Targeted Stress`, + `NVIDIA GPU Targeted Stress`, `nvidia-targeted-stress`, - `Official DCGM`, + `controlled NVIDIA DCGM load`, + `dcgmi diag targeted_stress`, } { if !strings.Contains(body, needle) { t.Fatalf("validate page missing %q: %s", needle, body) @@ -845,3 +846,98 @@ func TestRuntimeHealthEndpointReturnsJSON(t *testing.T) { t.Fatalf("body=%q want %q", strings.TrimSpace(rec.Body.String()), body) } } + +func TestDashboardRendersRuntimeHealthTable(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "audit.json") + exportDir := filepath.Join(dir, "export") + if err := os.MkdirAll(exportDir, 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z","hardware":{"board":{"serial_number":"SERIAL-1"}}}`), 0644); err != nil { + t.Fatal(err) + } + health := `{ + "status":"PARTIAL", + "checked_at":"2026-03-16T10:00:00Z", + "export_dir":"/tmp/export", + "driver_ready":true, + "cuda_ready":false, + "network_status":"PARTIAL", + "issues":[ + {"code":"dhcp_partial","description":"At least one interface did not obtain IPv4 connectivity."}, + {"code":"cuda_runtime_not_ready","description":"CUDA runtime is not ready for GPU SAT."} + ], + "tools":[ + {"name":"dmidecode","ok":true}, + {"name":"nvidia-smi","ok":false} + ], + "services":[ + {"name":"bee-web","status":"active"}, + {"name":"bee-nvidia","status":"inactive"} + ] +}` + if err := os.WriteFile(filepath.Join(exportDir, "runtime-health.json"), []byte(health), 0644); err != nil { + t.Fatal(err) + } + componentStatus := `[ + { + "component_key":"cpu:all", + "status":"Warning", + "error_summary":"cpu SAT: FAILED", + "history":[{"at":"2026-03-16T10:00:00Z","status":"Warning","source":"sat:cpu","detail":"cpu SAT: FAILED"}] + }, + { + "component_key":"memory:all", + "status":"OK", + "history":[{"at":"2026-03-16T10:01:00Z","status":"OK","source":"sat:memory","detail":"memory SAT: OK"}] + }, + { + "component_key":"storage:nvme0n1", + "status":"Critical", + "error_summary":"storage SAT: FAILED", + "history":[{"at":"2026-03-16T10:02:00Z","status":"Critical","source":"sat:storage","detail":"storage SAT: FAILED"}] + }, + { + "component_key":"pcie:gpu:nvidia", + "status":"Warning", + "error_summary":"nvidia SAT: FAILED", + "history":[{"at":"2026-03-16T10:03:00Z","status":"Warning","source":"sat:nvidia","detail":"nvidia SAT: FAILED"}] + } +]` + if err := os.WriteFile(filepath.Join(exportDir, "component-status.json"), []byte(componentStatus), 0644); err != nil { + t.Fatal(err) + } + + handler := NewHandler(HandlerOptions{AuditPath: path, ExportDir: exportDir}) + rec := httptest.NewRecorder() + handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/", nil)) + if rec.Code != http.StatusOK { + t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String()) + } + body := rec.Body.String() + for _, needle := range []string{ + `Runtime Health`, + `CheckStatusSourceIssue`, + `Export Directory`, + `Network`, + `NVIDIA/AMD Driver`, + `CUDA / ROCm`, + `Required Utilities`, + `Bee Services`, + `CPU`, + `Memory`, + `Storage`, + `GPU`, + `CUDA runtime is not ready for GPU SAT.`, + `Missing: nvidia-smi`, + `bee-nvidia=inactive`, + `cpu SAT: FAILED`, + `storage SAT: FAILED`, + `sat:nvidia`, + } { + if !strings.Contains(body, needle) { + t.Fatalf("dashboard missing %q: %s", needle, body) + } + } +}