diff --git a/audit/internal/webui/pages.go b/audit/internal/webui/pages.go index 0abaa4a..b9cdae1 100644 --- a/audit/internal/webui/pages.go +++ b/audit/internal/webui/pages.go @@ -9,6 +9,9 @@ import ( "path/filepath" "sort" "strings" + + "bee/audit/internal/app" + "bee/audit/internal/schema" ) // ── Layout ──────────────────────────────────────────────────────────────────── @@ -42,6 +45,8 @@ a{color:var(--accent);text-decoration:none} /* Cards */ .card{background:var(--surface);border:1px solid var(--border);border-radius:4px;box-shadow:0 1px 2px rgba(34,36,38,.15);margin-bottom:16px;overflow:hidden} .card-head{padding:11px 16px;background:var(--surface-2);border-bottom:1px solid var(--border);font-weight:700;font-size:13px;display:flex;align-items:center;gap:8px} +.card-head-actions{justify-content:space-between} +.card-head-buttons{display:flex;align-items:center;gap:8px;margin-left:auto;flex-wrap:wrap} .card-body{padding:16px} /* Buttons */ .btn{display:inline-flex;align-items:center;gap:6px;padding:8px 16px;border-radius:4px;font-size:13px;font-weight:700;cursor:pointer;border:none;transition:background .1s;font-family:inherit} @@ -72,7 +77,7 @@ tbody tr:hover td{background:rgba(0,0,0,.03)} /* Grid */ .grid2{display:grid;grid-template-columns:1fr 1fr;gap:16px} .grid3{display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px} -@media(max-width:900px){.grid2,.grid3{grid-template-columns:1fr}} +@media(max-width:900px){.grid2,.grid3{grid-template-columns:1fr}.card-head-actions{align-items:flex-start;flex-direction:column}.card-head-buttons{margin-left:0}} /* iframe viewer */ .viewer-frame{width:100%;height:calc(100vh - 160px);border:0;border-radius:4px;background:var(--surface-2)} /* Alerts */ @@ -136,7 +141,7 @@ func renderPage(page string, opts HandlerOptions) string { case "validate": pageID = "validate" title = "Validate" - body = renderValidate() + body = renderValidate(opts) case "burn": pageID = "burn" title = "Burn" @@ -161,7 +166,7 @@ func renderPage(page string, opts HandlerOptions) string { case "tests": pageID = "validate" title = "Acceptance Tests" - body = renderValidate() + body = renderValidate(opts) case "burn-in": pageID = "burn" title = "Burn-in Tests" @@ -295,7 +300,7 @@ func renderAudit() string { func renderHardwareSummaryCard(opts HandlerOptions) string { data, err := loadSnapshot(opts.AuditPath) if err != nil { - return `
| Check | Status | Source | Issue |
|---|---|---|---|
| ` + html.EscapeString(row.Title) + ` | ` + runtimeStatusBadge(row.Status) + ` | ` + html.EscapeString(row.Source) + ` | ` + rowIssueHTML(row.Issue) + ` |
Tasks continue in the background — view progress in Tasks.
Runs validate modules sequentially with the selected cycle count. NVIDIA dcgmi diag uses the selected diag level from this profile.
Official DCGM `+"targeted_stress"+` stays in Validate as a controlled diagnostic load, not a max-burn recipe.
`) + - renderSATCard("memory", "Memory", "") + - renderSATCard("storage", "Storage", "") + - renderSATCard("cpu", "CPU", ``) + - renderSATCard("amd", "AMD GPU", `Additional AMD memory diagnostics: RVS MEM for integrity and BABEL + rocm-bandwidth-test for memory/interconnect bandwidth.
`) + +` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody( + inv.CPU, + `Collects CPU inventory and temperatures, then runs a bounded CPU stress pass.`, + `lscpu, sensors, stress-ng`,
+ `Duration is taken from Validate Profile diag level: Level 1 = 60s, Level 2 = 5m, Level 3 = 1h, Level 4 = 1h.`,
+ )) +
+ renderSATCard("memory", "Memory", "runSAT('memory')", "", renderValidateCardBody(
+ inv.Memory,
+ `Runs a short RAM validation pass and records memory state around the test.`,
+ `free, memtester`,
+ `No extra settings.`,
+ )) +
+ renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody(
+ inv.Storage,
+ `Scans all storage devices and runs the matching health or self-test path for each device type.`,
+ `lsblk; NVMe: nvme; SATA/SAS: smartctl`,
+ `No extra settings.`,
+ )) +
+ `nvidia-smi, dmidecode, dcgmi diag`,
+ `Diag level is taken from Validate Profile.`,
+ )) +
+ renderSATCard("nvidia-targeted-stress", "NVIDIA GPU Targeted Stress", "runSAT('nvidia-targeted-stress')", "", renderValidateCardBody(
+ inv.NVIDIA,
+ `Runs a controlled NVIDIA DCGM load in Validate to check stability under moderate stress.`,
+ `dcgmi diag targeted_stress`,
+ `Uses the fixed DCGM targeted stress recipe.`,
+ )) +
+ `rocm-smi, dmidecode; MEM Integrity: rvs mem; MEM Bandwidth: rocm-bandwidth-test, rvs babel`,
+ ``,
+ )) +
`dcgmi diag targeted_stress`,
} {
if !strings.Contains(body, needle) {
t.Fatalf("validate page missing %q: %s", needle, body)
@@ -845,3 +846,98 @@ func TestRuntimeHealthEndpointReturnsJSON(t *testing.T) {
t.Fatalf("body=%q want %q", strings.TrimSpace(rec.Body.String()), body)
}
}
+
+func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
+ dir := t.TempDir()
+ path := filepath.Join(dir, "audit.json")
+ exportDir := filepath.Join(dir, "export")
+ if err := os.MkdirAll(exportDir, 0755); err != nil {
+ t.Fatal(err)
+ }
+ if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z","hardware":{"board":{"serial_number":"SERIAL-1"}}}`), 0644); err != nil {
+ t.Fatal(err)
+ }
+ health := `{
+ "status":"PARTIAL",
+ "checked_at":"2026-03-16T10:00:00Z",
+ "export_dir":"/tmp/export",
+ "driver_ready":true,
+ "cuda_ready":false,
+ "network_status":"PARTIAL",
+ "issues":[
+ {"code":"dhcp_partial","description":"At least one interface did not obtain IPv4 connectivity."},
+ {"code":"cuda_runtime_not_ready","description":"CUDA runtime is not ready for GPU SAT."}
+ ],
+ "tools":[
+ {"name":"dmidecode","ok":true},
+ {"name":"nvidia-smi","ok":false}
+ ],
+ "services":[
+ {"name":"bee-web","status":"active"},
+ {"name":"bee-nvidia","status":"inactive"}
+ ]
+}`
+ if err := os.WriteFile(filepath.Join(exportDir, "runtime-health.json"), []byte(health), 0644); err != nil {
+ t.Fatal(err)
+ }
+ componentStatus := `[
+ {
+ "component_key":"cpu:all",
+ "status":"Warning",
+ "error_summary":"cpu SAT: FAILED",
+ "history":[{"at":"2026-03-16T10:00:00Z","status":"Warning","source":"sat:cpu","detail":"cpu SAT: FAILED"}]
+ },
+ {
+ "component_key":"memory:all",
+ "status":"OK",
+ "history":[{"at":"2026-03-16T10:01:00Z","status":"OK","source":"sat:memory","detail":"memory SAT: OK"}]
+ },
+ {
+ "component_key":"storage:nvme0n1",
+ "status":"Critical",
+ "error_summary":"storage SAT: FAILED",
+ "history":[{"at":"2026-03-16T10:02:00Z","status":"Critical","source":"sat:storage","detail":"storage SAT: FAILED"}]
+ },
+ {
+ "component_key":"pcie:gpu:nvidia",
+ "status":"Warning",
+ "error_summary":"nvidia SAT: FAILED",
+ "history":[{"at":"2026-03-16T10:03:00Z","status":"Warning","source":"sat:nvidia","detail":"nvidia SAT: FAILED"}]
+ }
+]`
+ if err := os.WriteFile(filepath.Join(exportDir, "component-status.json"), []byte(componentStatus), 0644); err != nil {
+ t.Fatal(err)
+ }
+
+ handler := NewHandler(HandlerOptions{AuditPath: path, ExportDir: exportDir})
+ rec := httptest.NewRecorder()
+ handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/", nil))
+ if rec.Code != http.StatusOK {
+ t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+ }
+ body := rec.Body.String()
+ for _, needle := range []string{
+ `Runtime Health`,
+ `