Restructure nav to Load/Burn/Benchmark; fix SAA acpidump dependency
- Nav steps 3-5: Load (validate), Burn (burn-in), Benchmark (speed+endurance merged) - /load now renders validate mode; /burn renders burn-in; /benchmark replaces /speed+/endurance - Legacy redirects updated: /validate→/load, /burn-in→/burn, /speed+/endurance→/benchmark - Add acpica_bin/acpidump from SAA 1.5.0 package; required by saa GetDmiInfo (ExitCode 8) - build.sh copies acpica_bin/acpidump to /usr/local/bin/acpica_bin/ alongside saa Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -106,8 +106,8 @@ func layoutNav(active string, buildLabel string) string {
|
||||
{id: "audit", label: "1. Audit", href: "/audit"},
|
||||
{id: "check", label: "2. Check", href: "/check"},
|
||||
{id: "load", label: "3. Load", href: "/load"},
|
||||
{id: "speed", label: "4. Speed", href: "/speed"},
|
||||
{id: "endurance", label: "5. Endurance", href: "/endurance"},
|
||||
{id: "burn", label: "4. Burn", href: "/burn"},
|
||||
{id: "benchmark", label: "5. Benchmark", href: "/benchmark"},
|
||||
{sep: true},
|
||||
{id: "tasks", label: "Tasks", href: "/tasks"},
|
||||
{id: "tools", label: "Tools", href: "/tools"},
|
||||
|
||||
@@ -612,19 +612,6 @@ func renderPowerBenchmarkResultsCard(exportDir string) string {
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// renderSpeed renders the Speed page (step 4): performance benchmarks.
|
||||
// Uses the same benchmark infrastructure; defaults to Standard profile (throughput/bandwidth).
|
||||
// For long-duration stability/overnight runs, see Endurance (step 5).
|
||||
func renderSpeed(opts HandlerOptions) string {
|
||||
base := renderBenchmark(opts)
|
||||
return `<div class="alert alert-info" style="margin-bottom:16px"><strong>Speed:</strong> Measures GPU compute throughput and memory bandwidth. For overnight stability testing, go to <a href="/endurance">5. Endurance</a>.</div>` + base
|
||||
}
|
||||
|
||||
// renderEndurance renders the Endurance page (step 5): long-duration reliability tests.
|
||||
// Focuses on Stability and Overnight profiles for multi-hour burn validation.
|
||||
// For short load tests, see Load (step 3). For throughput measurement, see Speed (step 4).
|
||||
func renderEndurance(opts HandlerOptions) string {
|
||||
base := renderBenchmark(opts)
|
||||
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>Endurance:</strong> Long-duration reliability tests — Stability (several hours) and Overnight (8+ h) profiles. These profiles run hardware at sustained load; results show whether the server holds its performance envelope over time.</div>
|
||||
<div class="alert alert-info" style="margin-bottom:16px">Use the <strong>Stability</strong> or <strong>Overnight</strong> profile in the setup card below. The Standard profile is available too but is better suited for the <a href="/speed">4. Speed</a> page.</div>` + base
|
||||
}
|
||||
// renderSpeed and renderEndurance are legacy wrappers; canonical page is 5. Benchmark at /benchmark.
|
||||
func renderSpeed(opts HandlerOptions) string { return renderBenchmark(opts) }
|
||||
func renderEndurance(opts HandlerOptions) string { return renderBenchmark(opts) }
|
||||
|
||||
@@ -1,13 +1,8 @@
|
||||
package webui
|
||||
|
||||
// renderLoad renders the Load page (step 3): sustained stress tests.
|
||||
// For non-destructive status checks, see Check (step 2).
|
||||
// For DCGM targeted diagnostics (targeted_stress, targeted_power, pulse), see Check → Validate mode.
|
||||
func renderLoad() string { return renderBurn() }
|
||||
|
||||
func renderBurn() string {
|
||||
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>⚠ Warning:</strong> Stress tests on this page run hardware at high load. Repeated or prolonged use may reduce hardware lifespan. Use only when necessary.</div>
|
||||
<div class="alert alert-info" style="margin-bottom:16px"><strong>Scope:</strong> Load runs sustained GPU compute and CPU/memory stress recipes. DCGM diagnostics (<code>targeted_stress</code>, <code>targeted_power</code>, <code>pulse_test</code>) and NCCL/NVBandwidth are on the <a href="/check">2. Check</a> page. For overnight endurance runs, see <a href="/endurance">5. Endurance</a>.</div>
|
||||
<div class="alert alert-info" style="margin-bottom:16px"><strong>Scope:</strong> Burn runs sustained GPU compute and CPU/memory stress recipes. DCGM targeted diagnostics (<code>targeted_stress</code>, <code>targeted_power</code>, <code>pulse_test</code>) and NCCL/NVBandwidth are on the <a href="/load">3. Load</a> page. For performance benchmarks, see <a href="/benchmark">5. Benchmark</a>.</div>
|
||||
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
||||
|
||||
<div class="card" style="margin-bottom:16px">
|
||||
|
||||
@@ -68,6 +68,14 @@ func validateTotalStressSec(n int) int {
|
||||
}
|
||||
|
||||
func renderValidate(opts HandlerOptions) string {
|
||||
return renderValidateMode(opts, false)
|
||||
}
|
||||
|
||||
func renderValidateStress(opts HandlerOptions) string {
|
||||
return renderValidateMode(opts, true)
|
||||
}
|
||||
|
||||
func renderValidateMode(opts HandlerOptions, stressDefault bool) string {
|
||||
inv := loadValidateInventory(opts)
|
||||
n := inv.NvidiaGPUCount
|
||||
validateTotalStr := validateFmtDur(validateTotalValidateSec(n))
|
||||
@@ -76,7 +84,19 @@ func renderValidate(opts HandlerOptions) string {
|
||||
if n > 0 {
|
||||
gpuNote = fmt.Sprintf(" (%d GPU)", n)
|
||||
}
|
||||
return `<div class="alert alert-info" style="margin-bottom:16px"><strong>Non-destructive:</strong> Validate tests collect diagnostics only. They do not write to disks, do not run sustained load, and do not increment hardware wear counters.</div>
|
||||
validateChecked, stressChecked := "checked", ""
|
||||
if stressDefault {
|
||||
validateChecked, stressChecked = "", "checked"
|
||||
}
|
||||
alert := `<div class="alert alert-info" style="margin-bottom:16px"><strong>Non-destructive:</strong> Validate tests collect diagnostics only. They do not write to disks, do not run sustained load, and do not increment hardware wear counters.</div>`
|
||||
if stressDefault {
|
||||
alert = `<div class="alert alert-warn" style="margin-bottom:16px"><strong>⚠ Stress mode:</strong> Runs extended load tests — CPU stress-ng, memory passes, DCGM targeted diagnostics. Higher wear than Validate.</div>`
|
||||
}
|
||||
onloadJS := ""
|
||||
if stressDefault {
|
||||
onloadJS = `<script>satModeChanged();</script>`
|
||||
}
|
||||
return alert + `
|
||||
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
||||
|
||||
<div class="card" style="margin-bottom:16px">
|
||||
@@ -84,8 +104,8 @@ func renderValidate(opts HandlerOptions) string {
|
||||
<div class="card-body validate-profile-body">
|
||||
<div class="validate-profile-col">
|
||||
<div class="form-row" style="margin:12px 0 0"><label>Mode</label></div>
|
||||
<label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-validate" value="validate" checked onchange="satModeChanged()"><span>Validate — quick non-destructive check</span></label>
|
||||
<label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-stress" value="stress" onchange="satModeChanged()"><span>Stress — thorough load test (` + stressTotalStr + gpuNote + `)</span></label>
|
||||
<label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-validate" value="validate" ` + validateChecked + ` onchange="satModeChanged()"><span>Validate — quick non-destructive check</span></label>
|
||||
<label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-stress" value="stress" ` + stressChecked + ` onchange="satModeChanged()"><span>Stress — thorough load test (` + stressTotalStr + gpuNote + `)</span></label>
|
||||
</div>
|
||||
<div class="validate-profile-col validate-profile-action">
|
||||
<p style="color:var(--muted);font-size:12px;margin:0 0 10px">Runs validate modules sequentially. Validate: ` + validateTotalStr + gpuNote + `; Stress: ` + stressTotalStr + gpuNote + `. Estimates are based on real log data and scale with GPU count.</p>
|
||||
@@ -95,7 +115,7 @@ func renderValidate(opts HandlerOptions) string {
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>` + onloadJS
|
||||
|
||||
<div class="grid3">
|
||||
` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody(
|
||||
@@ -667,7 +687,7 @@ func renderCheck(opts HandlerOptions) string {
|
||||
if n > 0 {
|
||||
gpuNote = fmt.Sprintf(" (%d GPU)", n)
|
||||
}
|
||||
return `<div class="alert alert-info" style="margin-bottom:16px"><strong>Non-destructive:</strong> Check tests collect diagnostics only — no writes to disks, no sustained load, no hardware wear counters incremented. For stress testing, go to <a href="/load">3. Load</a>.</div>
|
||||
return `<div class="alert alert-info" style="margin-bottom:16px"><strong>Non-destructive:</strong> Check tests collect diagnostics only — no writes to disks, no sustained load, no hardware wear counters incremented. For stress testing, go to <a href="/burn">4. Burn</a>.</div>
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px">
|
||||
<button type="button" class="btn btn-primary" onclick="runAllCheckSAT()">Run All Checks</button>
|
||||
<span id="sat-all-status" style="font-size:12px;color:var(--muted)"></span>
|
||||
|
||||
@@ -33,15 +33,15 @@ func renderPage(page string, opts HandlerOptions) string {
|
||||
case "load":
|
||||
pageID = "load"
|
||||
title = "3. Load"
|
||||
body = renderLoad()
|
||||
case "speed":
|
||||
pageID = "speed"
|
||||
title = "4. Speed"
|
||||
body = renderSpeed(opts)
|
||||
case "endurance":
|
||||
pageID = "endurance"
|
||||
title = "5. Endurance"
|
||||
body = renderEndurance(opts)
|
||||
body = renderValidateStress(opts)
|
||||
case "burn":
|
||||
pageID = "burn"
|
||||
title = "4. Burn"
|
||||
body = renderBurn()
|
||||
case "benchmark":
|
||||
pageID = "benchmark"
|
||||
title = "5. Benchmark"
|
||||
body = renderBenchmark(opts)
|
||||
case "tools":
|
||||
pageID = "tools"
|
||||
title = "Tools"
|
||||
@@ -52,17 +52,17 @@ func renderPage(page string, opts HandlerOptions) string {
|
||||
body = renderSettings(opts)
|
||||
// Legacy routes (redirected at HTTP level in handlePage; these are fallbacks)
|
||||
case "validate", "tests":
|
||||
pageID = "check"
|
||||
title = "2. Check"
|
||||
body = renderCheck(opts)
|
||||
case "burn", "burn-in":
|
||||
pageID = "load"
|
||||
title = "3. Load"
|
||||
body = renderLoad()
|
||||
case "benchmark":
|
||||
pageID = "speed"
|
||||
title = "4. Speed"
|
||||
body = renderSpeed(opts)
|
||||
body = renderValidate(opts)
|
||||
case "burn-in":
|
||||
pageID = "burn"
|
||||
title = "4. Burn"
|
||||
body = renderBurn()
|
||||
case "speed", "endurance":
|
||||
pageID = "benchmark"
|
||||
title = "5. Benchmark"
|
||||
body = renderBenchmark(opts)
|
||||
case "tasks":
|
||||
pageID = "tasks"
|
||||
title = "Tasks"
|
||||
|
||||
@@ -1424,13 +1424,13 @@ func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {
|
||||
// Redirect legacy routes to new named pages
|
||||
switch page {
|
||||
case "validate", "tests":
|
||||
http.Redirect(w, r, "/check", http.StatusMovedPermanently)
|
||||
return
|
||||
case "burn", "burn-in":
|
||||
http.Redirect(w, r, "/load", http.StatusMovedPermanently)
|
||||
return
|
||||
case "benchmark":
|
||||
http.Redirect(w, r, "/speed", http.StatusMovedPermanently)
|
||||
case "burn-in":
|
||||
http.Redirect(w, r, "/burn", http.StatusMovedPermanently)
|
||||
return
|
||||
case "speed", "endurance":
|
||||
http.Redirect(w, r, "/benchmark", http.StatusMovedPermanently)
|
||||
return
|
||||
}
|
||||
body := renderPage(page, h.opts)
|
||||
|
||||
@@ -717,13 +717,13 @@ func TestToolsPageRendersNvidiaSelfHealSection(t *testing.T) {
|
||||
func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
|
||||
handler := NewHandler(HandlerOptions{})
|
||||
rec := httptest.NewRecorder()
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/speed", nil))
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/benchmark", nil))
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d", rec.Code)
|
||||
}
|
||||
body := rec.Body.String()
|
||||
for _, needle := range []string{
|
||||
`href="/speed"`,
|
||||
`href="/benchmark"`,
|
||||
`id="benchmark-gpu-list"`,
|
||||
`/api/gpu/nvidia`,
|
||||
`/api/bee-bench/nvidia/perf/run`,
|
||||
@@ -779,7 +779,7 @@ func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
|
||||
|
||||
handler := NewHandler(HandlerOptions{ExportDir: exportDir})
|
||||
rec := httptest.NewRecorder()
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/speed", nil))
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/benchmark", nil))
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d", rec.Code)
|
||||
}
|
||||
@@ -844,10 +844,10 @@ func TestCheckPageRendersNvidiaFabricCards(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadPageRendersGoalBasedNVIDIACards(t *testing.T) {
|
||||
func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
|
||||
handler := NewHandler(HandlerOptions{})
|
||||
rec := httptest.NewRecorder()
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/load", nil))
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/burn", nil))
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d", rec.Code)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user