diff --git a/audit/internal/webui/layout.go b/audit/internal/webui/layout.go index 246ef12..f9c6d68 100644 --- a/audit/internal/webui/layout.go +++ b/audit/internal/webui/layout.go @@ -106,8 +106,8 @@ func layoutNav(active string, buildLabel string) string { {id: "audit", label: "1. Audit", href: "/audit"}, {id: "check", label: "2. Check", href: "/check"}, {id: "load", label: "3. Load", href: "/load"}, - {id: "speed", label: "4. Speed", href: "/speed"}, - {id: "endurance", label: "5. Endurance", href: "/endurance"}, + {id: "burn", label: "4. Burn", href: "/burn"}, + {id: "benchmark", label: "5. Benchmark", href: "/benchmark"}, {sep: true}, {id: "tasks", label: "Tasks", href: "/tasks"}, {id: "tools", label: "Tools", href: "/tools"}, diff --git a/audit/internal/webui/page_benchmark.go b/audit/internal/webui/page_benchmark.go index 0a7dc71..aac0457 100644 --- a/audit/internal/webui/page_benchmark.go +++ b/audit/internal/webui/page_benchmark.go @@ -612,19 +612,6 @@ func renderPowerBenchmarkResultsCard(exportDir string) string { return b.String() } -// renderSpeed renders the Speed page (step 4): performance benchmarks. -// Uses the same benchmark infrastructure; defaults to Standard profile (throughput/bandwidth). -// For long-duration stability/overnight runs, see Endurance (step 5). -func renderSpeed(opts HandlerOptions) string { - base := renderBenchmark(opts) - return `
Speed: Measures GPU compute throughput and memory bandwidth. For overnight stability testing, go to 5. Endurance.
` + base -} - -// renderEndurance renders the Endurance page (step 5): long-duration reliability tests. -// Focuses on Stability and Overnight profiles for multi-hour burn validation. -// For short load tests, see Load (step 3). For throughput measurement, see Speed (step 4). -func renderEndurance(opts HandlerOptions) string { - base := renderBenchmark(opts) - return `
Endurance: Long-duration reliability tests — Stability (several hours) and Overnight (8+ h) profiles. These profiles run hardware at sustained load; results show whether the server holds its performance envelope over time.
-
Use the Stability or Overnight profile in the setup card below. The Standard profile is available too but is better suited for the 4. Speed page.
` + base -} +// renderSpeed and renderEndurance are legacy wrappers; canonical page is 5. Benchmark at /benchmark. +func renderSpeed(opts HandlerOptions) string { return renderBenchmark(opts) } +func renderEndurance(opts HandlerOptions) string { return renderBenchmark(opts) } diff --git a/audit/internal/webui/page_burn.go b/audit/internal/webui/page_burn.go index f6de5cc..fac65d4 100644 --- a/audit/internal/webui/page_burn.go +++ b/audit/internal/webui/page_burn.go @@ -1,13 +1,8 @@ package webui -// renderLoad renders the Load page (step 3): sustained stress tests. -// For non-destructive status checks, see Check (step 2). -// For DCGM targeted diagnostics (targeted_stress, targeted_power, pulse), see Check → Validate mode. -func renderLoad() string { return renderBurn() } - func renderBurn() string { return `
⚠ Warning: Stress tests on this page run hardware at high load. Repeated or prolonged use may reduce hardware lifespan. Use only when necessary.
-
Scope: Load runs sustained GPU compute and CPU/memory stress recipes. DCGM diagnostics (targeted_stress, targeted_power, pulse_test) and NCCL/NVBandwidth are on the 2. Check page. For overnight endurance runs, see 5. Endurance.
+
Scope: Burn runs sustained GPU compute and CPU/memory stress recipes. DCGM targeted diagnostics (targeted_stress, targeted_power, pulse_test) and NCCL/NVBandwidth are on the 3. Load page. For performance benchmarks, see 5. Benchmark.

Tasks continue in the background — view progress in Tasks.

diff --git a/audit/internal/webui/page_validate.go b/audit/internal/webui/page_validate.go index 9c0e1db..a6919c7 100644 --- a/audit/internal/webui/page_validate.go +++ b/audit/internal/webui/page_validate.go @@ -68,6 +68,14 @@ func validateTotalStressSec(n int) int { } func renderValidate(opts HandlerOptions) string { + return renderValidateMode(opts, false) +} + +func renderValidateStress(opts HandlerOptions) string { + return renderValidateMode(opts, true) +} + +func renderValidateMode(opts HandlerOptions, stressDefault bool) string { inv := loadValidateInventory(opts) n := inv.NvidiaGPUCount validateTotalStr := validateFmtDur(validateTotalValidateSec(n)) @@ -76,7 +84,19 @@ func renderValidate(opts HandlerOptions) string { if n > 0 { gpuNote = fmt.Sprintf(" (%d GPU)", n) } - return `
Non-destructive: Validate tests collect diagnostics only. They do not write to disks, do not run sustained load, and do not increment hardware wear counters.
+ validateChecked, stressChecked := "checked", "" + if stressDefault { + validateChecked, stressChecked = "", "checked" + } + alert := `
Non-destructive: Validate tests collect diagnostics only. They do not write to disks, do not run sustained load, and do not increment hardware wear counters.
` + if stressDefault { + alert = `
⚠ Stress mode: Runs extended load tests — CPU stress-ng, memory passes, DCGM targeted diagnostics. Higher wear than Validate.
` + } + onloadJS := "" + if stressDefault { + onloadJS = `` + } + return alert + `

Tasks continue in the background — view progress in Tasks.

@@ -84,8 +104,8 @@ func renderValidate(opts HandlerOptions) string {
- - + +

Runs validate modules sequentially. Validate: ` + validateTotalStr + gpuNote + `; Stress: ` + stressTotalStr + gpuNote + `. Estimates are based on real log data and scale with GPU count.

@@ -95,7 +115,7 @@ func renderValidate(opts HandlerOptions) string {
-
+ ` + onloadJS
` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody( @@ -667,7 +687,7 @@ func renderCheck(opts HandlerOptions) string { if n > 0 { gpuNote = fmt.Sprintf(" (%d GPU)", n) } - return `
Non-destructive: Check tests collect diagnostics only — no writes to disks, no sustained load, no hardware wear counters incremented. For stress testing, go to 3. Load.
+ return `
Non-destructive: Check tests collect diagnostics only — no writes to disks, no sustained load, no hardware wear counters incremented. For stress testing, go to 4. Burn.
diff --git a/audit/internal/webui/pages.go b/audit/internal/webui/pages.go index a3f2b80..4aa4b5b 100644 --- a/audit/internal/webui/pages.go +++ b/audit/internal/webui/pages.go @@ -33,15 +33,15 @@ func renderPage(page string, opts HandlerOptions) string { case "load": pageID = "load" title = "3. Load" - body = renderLoad() - case "speed": - pageID = "speed" - title = "4. Speed" - body = renderSpeed(opts) - case "endurance": - pageID = "endurance" - title = "5. Endurance" - body = renderEndurance(opts) + body = renderValidateStress(opts) + case "burn": + pageID = "burn" + title = "4. Burn" + body = renderBurn() + case "benchmark": + pageID = "benchmark" + title = "5. Benchmark" + body = renderBenchmark(opts) case "tools": pageID = "tools" title = "Tools" @@ -52,17 +52,17 @@ func renderPage(page string, opts HandlerOptions) string { body = renderSettings(opts) // Legacy routes (redirected at HTTP level in handlePage; these are fallbacks) case "validate", "tests": - pageID = "check" - title = "2. Check" - body = renderCheck(opts) - case "burn", "burn-in": pageID = "load" title = "3. Load" - body = renderLoad() - case "benchmark": - pageID = "speed" - title = "4. Speed" - body = renderSpeed(opts) + body = renderValidate(opts) + case "burn-in": + pageID = "burn" + title = "4. Burn" + body = renderBurn() + case "speed", "endurance": + pageID = "benchmark" + title = "5. Benchmark" + body = renderBenchmark(opts) case "tasks": pageID = "tasks" title = "Tasks" diff --git a/audit/internal/webui/server.go b/audit/internal/webui/server.go index 6b26d78..58c1666 100644 --- a/audit/internal/webui/server.go +++ b/audit/internal/webui/server.go @@ -1424,13 +1424,13 @@ func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) { // Redirect legacy routes to new named pages switch page { case "validate", "tests": - http.Redirect(w, r, "/check", http.StatusMovedPermanently) - return - case "burn", "burn-in": http.Redirect(w, r, "/load", http.StatusMovedPermanently) return - case "benchmark": - http.Redirect(w, r, "/speed", http.StatusMovedPermanently) + case "burn-in": + http.Redirect(w, r, "/burn", http.StatusMovedPermanently) + return + case "speed", "endurance": + http.Redirect(w, r, "/benchmark", http.StatusMovedPermanently) return } body := renderPage(page, h.opts) diff --git a/audit/internal/webui/server_test.go b/audit/internal/webui/server_test.go index ab6f746..7f21502 100644 --- a/audit/internal/webui/server_test.go +++ b/audit/internal/webui/server_test.go @@ -717,13 +717,13 @@ func TestToolsPageRendersNvidiaSelfHealSection(t *testing.T) { func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) { handler := NewHandler(HandlerOptions{}) rec := httptest.NewRecorder() - handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/speed", nil)) + handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/benchmark", nil)) if rec.Code != http.StatusOK { t.Fatalf("status=%d", rec.Code) } body := rec.Body.String() for _, needle := range []string{ - `href="/speed"`, + `href="/benchmark"`, `id="benchmark-gpu-list"`, `/api/gpu/nvidia`, `/api/bee-bench/nvidia/perf/run`, @@ -779,7 +779,7 @@ func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) { handler := NewHandler(HandlerOptions{ExportDir: exportDir}) rec := httptest.NewRecorder() - handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/speed", nil)) + handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/benchmark", nil)) if rec.Code != http.StatusOK { t.Fatalf("status=%d", rec.Code) } @@ -844,10 +844,10 @@ func TestCheckPageRendersNvidiaFabricCards(t *testing.T) { } } -func TestLoadPageRendersGoalBasedNVIDIACards(t *testing.T) { +func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) { handler := NewHandler(HandlerOptions{}) rec := httptest.NewRecorder() - handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/load", nil)) + handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/burn", nil)) if rec.Code != http.StatusOK { t.Fatalf("status=%d", rec.Code) } diff --git a/iso/builder/build.sh b/iso/builder/build.sh index 21ef4b8..d4f4abe 100755 --- a/iso/builder/build.sh +++ b/iso/builder/build.sh @@ -1483,6 +1483,16 @@ for tool in storcli64 sas2ircu sas3ircu arcconf ssacli saa; do fi done +# saa requires acpica_bin/acpidump relative to its own location +if [ -f "${VENDOR_DIR}/acpica_bin/acpidump" ]; then + mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/bin/acpica_bin" + cp "${VENDOR_DIR}/acpica_bin/acpidump" "${OVERLAY_STAGE_DIR}/usr/local/bin/acpica_bin/acpidump" + chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/acpica_bin/acpidump" || true + echo "vendor tool: acpica_bin/acpidump (included)" +else + echo "vendor tool: acpica_bin/acpidump (not found, skipped)" +fi + # --- NVIDIA kernel modules and userspace libs --- if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then run_step "build NVIDIA ${NVIDIA_DRIVER_VERSION} modules" "40-nvidia-module" \ diff --git a/iso/vendor/acpica_bin/acpidump b/iso/vendor/acpica_bin/acpidump new file mode 100755 index 0000000..ae492c4 Binary files /dev/null and b/iso/vendor/acpica_bin/acpidump differ