diff --git a/audit/internal/webui/layout.go b/audit/internal/webui/layout.go index 2aaae80..31e09f7 100644 --- a/audit/internal/webui/layout.go +++ b/audit/internal/webui/layout.go @@ -68,6 +68,11 @@ tbody tr:hover td{background:rgba(0,0,0,.03)} .chip-warn{background:var(--warn-bg);color:var(--warn-fg);border:1px solid #c9ba9b} .chip-fail{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)} .chip-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)} +/* Tasks nav badge */ +.tasks-nav-btn{display:flex;justify-content:space-between;align-items:center;padding:10px 16px;color:rgba(255,255,255,.55);font-size:12px;text-decoration:none;border-top:1px solid rgba(255,255,255,.12);margin-top:auto;transition:color .15s} +.tasks-nav-btn:hover{color:#fff} +.tasks-nav-count{background:var(--accent);color:#fff;border-radius:10px;padding:1px 7px;font-size:11px;font-weight:700;display:none} +.tasks-nav-count.active{display:inline} /* Output terminal */ .terminal{background:#1b1c1d;border:1px solid rgba(0,0,0,.2);border-radius:4px;padding:14px;font-family:monospace;font-size:12px;color:#b5cea8;max-height:400px;overflow-y:auto;white-space:pre-wrap;word-break:break-all;user-select:text;-webkit-user-select:text} .terminal-wrap{position:relative}.terminal-copy{position:absolute;top:6px;right:6px;background:#2d2f30;border:1px solid #444;color:#aaa;font-size:11px;padding:2px 8px;border-radius:3px;cursor:pointer;opacity:.7}.terminal-copy:hover{opacity:1} @@ -93,14 +98,15 @@ tbody tr:hover td{background:rgba(0,0,0,.03)} } func layoutNav(active string, buildLabel string) string { - items := []struct{ id, label, href, onclick string }{ - {"dashboard", "Dashboard", "/", ""}, - {"audit", "Audit", "/audit", ""}, - {"validate", "Validate", "/validate", ""}, - {"burn", "Burn", "/burn", ""}, - {"benchmark", "Benchmark", "/benchmark", ""}, - {"tasks", "Tasks", "/tasks", ""}, - {"tools", "Tools", "/tools", ""}, + items := []struct{ id, label, href string }{ + {"dashboard", "Dashboard", "/"}, + {"audit", "1. Audit", "/audit"}, + {"check", "2. Check", "/check"}, + {"load", "3. Load", "/load"}, + {"speed", "4. Speed", "/speed"}, + {"endurance", "5. Endurance", "/endurance"}, + {"tools", "6. Tools", "/tools"}, + {"settings", "7. Settings", "/settings"}, } var b strings.Builder b.WriteString(``) return b.String() } diff --git a/audit/internal/webui/page_benchmark.go b/audit/internal/webui/page_benchmark.go index f8b876d..0a7dc71 100644 --- a/audit/internal/webui/page_benchmark.go +++ b/audit/internal/webui/page_benchmark.go @@ -611,3 +611,20 @@ func renderPowerBenchmarkResultsCard(exportDir string) string { b.WriteString(``) return b.String() } + +// renderSpeed renders the Speed page (step 4): performance benchmarks. +// Uses the same benchmark infrastructure; defaults to Standard profile (throughput/bandwidth). +// For long-duration stability/overnight runs, see Endurance (step 5). +func renderSpeed(opts HandlerOptions) string { + base := renderBenchmark(opts) + return `
targeted_stress, targeted_power, pulse_test) and NCCL/NVBandwidth are on the 2. Check page. For overnight endurance runs, see 5. Endurance.Tasks continue in the background — view progress in Tasks.
Continuous hardware monitoring that writes a rolling log of sensor readings to the export directory. Useful for capturing thermal or power anomalies during long runs.
+Reset NVIDIA GPU driver state. Use when nvidia-smi reports errors or GPUs appear stuck after a failed test.
| Version | ` + html.EscapeString(version) + ` |
| Title | ` + html.EscapeString(opts.Title) + ` |
lscpu, sensors, stress-ng`,
+ validateFmtDur(platform.SATEstimatedCPUValidateSec)+` (stress-ng 60 s).`,
+ )) +
+ renderSATCard("memory", "Memory", "runSAT('memory')", "", renderValidateCardBody(
+ inv.Memory,
+ `Runs a RAM validation pass and records memory state around the test.`,
+ `free, memtester`,
+ validateFmtDur(platform.SATEstimatedMemoryValidateSec)+` (256 MB × 1 pass).`,
+ )) +
+ renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody(
+ inv.Storage,
+ `Scans all storage devices and runs the matching health or self-test path for each.`,
+ `lsblk; NVMe: nvme; SATA/SAS: smartctl`,
+ `Seconds (NVMe: instant device query; SATA/SAS: short self-test).`,
+ )) +
+ `` + inv.NVIDIA + `
+Loading NVIDIA GPUs...
+Select at least one NVIDIA GPU to enable NVIDIA check tasks.
+nvidia-smi, dmidecode, dcgmi diag`,
+ validateFmtDur(platform.SATEstimatedNvidiaGPUValidateSec)+` (Level 2, all GPUs simultaneously).`,
+ )) +
+ renderSATCard("nvidia-interconnect", "NVIDIA Interconnect (NCCL)", "runNvidiaFabricValidate('nvidia-interconnect')", "", renderValidateCardBody(
+ inv.NVIDIA,
+ `Verifies NVLink/NVSwitch fabric bandwidth using NCCL all_reduce_perf across all selected GPUs.`,
+ `all_reduce_perf (NCCL tests)`,
+ validateFmtDur(platform.SATEstimatedNvidiaInterconnectSec)+` (all GPUs simultaneously, requires ≥2).`,
+ )) +
+ renderSATCard("nvidia-bandwidth", "NVIDIA Bandwidth (NVBandwidth)", "runNvidiaFabricValidate('nvidia-bandwidth')", "", renderValidateCardBody(
+ inv.NVIDIA,
+ `Validates GPU memory copy and peer-to-peer bandwidth paths using NVBandwidth.`,
+ `nvbandwidth`,
+ validateFmtDur(platform.SATEstimatedNvidiaBandwidthSec)+` (all GPUs simultaneously).`,
+ )) +
+ `rocm-smi, dmidecode; MEM Integrity: rvs mem; MEM Bandwidth: rocm-bandwidth-test, rvs babel`,
+ ``,
+ )) +
+ `dcgmi diag targeted_stress`,
`NVIDIA GPU Selection`,
- `All NVIDIA validate tasks use only the GPUs selected here.`,
- `Select All`,
`id="sat-gpu-list"`,
+ `Select All`,
+ `id="sat-btn-nvidia"`,
+ `NVIDIA Interconnect (NCCL)`,
+ `NVIDIA Bandwidth (NVBandwidth)`,
+ `Non-destructive`,
} {
if !strings.Contains(body, needle) {
- t.Fatalf("validate page missing %q: %s", needle, body)
+ t.Fatalf("check page missing %q: %s", needle, body)
}
}
}
-func TestValidatePageRendersNvidiaFabricCardsInValidateMode(t *testing.T) {
+func TestCheckPageRendersNvidiaFabricCards(t *testing.T) {
handler := NewHandler(HandlerOptions{})
rec := httptest.NewRecorder()
- handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/validate", nil))
+ handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/check", nil))
if rec.Code != http.StatusOK {
t.Fatalf("status=%d", rec.Code)
}
body := rec.Body.String()
for _, needle := range []string{
`NVIDIA Interconnect (NCCL)`,
- `Validate and Stress:`,
`NVIDIA Bandwidth (NVBandwidth)`,
- `nvbandwidth runs all built-in tests without a time limit`,
+ `nvbandwidth`,
+ `all_reduce_perf`,
} {
if !strings.Contains(body, needle) {
- t.Fatalf("validate page missing %q: %s", needle, body)
+ t.Fatalf("check page missing %q: %s", needle, body)
}
}
}
-func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
+func TestLoadPageRendersGoalBasedNVIDIACards(t *testing.T) {
handler := NewHandler(HandlerOptions{})
rec := httptest.NewRecorder()
- handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/burn", nil))
+ handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/load", nil))
if rec.Code != http.StatusOK {
t.Fatalf("status=%d", rec.Code)
}
@@ -847,7 +846,6 @@ func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
`NVIDIA Max Compute Load`,
`dcgmproftester`,
`NCCL`,
- `Validate → Stress mode`,
`id="burn-gpu-list"`,
} {
if !strings.Contains(body, needle) {