Restructure web UI navigation into 7 numbered workflow stages
Replace the flat menu (Dashboard, Audit, Validate, Burn, Benchmark, Tasks, Tools) with a numbered progression that guides engineers through a logical acceptance workflow: Dashboard (landing) → 1. Audit → 2. Check → 3. Load → 4. Speed → 5. Endurance → 6. Tools → 7. Settings Key changes: - layout.go: numbered nav labels, new hrefs, Tasks removed from nav and replaced with a persistent sidebar badge (polls /api/tasks every 5 s, highlights amber when tasks are active) - server.go: 301 redirects from /validate→/check, /burn→/load, /benchmark→/speed for backward compatibility - pages.go: dispatch cases for all new routes; old routes kept as fallbacks - page_validate.go: add renderCheck() — non-destructive check page with validate-mode tests only (no stress toggle, no targeted-stress/ targeted-power/pulse cards) - page_burn.go: add renderLoad() wrapper; update scope alert to reference /check instead of /validate - page_benchmark.go: add renderSpeed() (performance focus) and renderEndurance() (stability/overnight focus) wrappers - page_settings.go: new Settings page with blackbox logging toggle, NVIDIA driver reset, and build info - server_test.go: update five tests to use new route names and content expectations Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -68,6 +68,11 @@ tbody tr:hover td{background:rgba(0,0,0,.03)}
|
||||
.chip-warn{background:var(--warn-bg);color:var(--warn-fg);border:1px solid #c9ba9b}
|
||||
.chip-fail{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)}
|
||||
.chip-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)}
|
||||
/* Tasks nav badge */
|
||||
.tasks-nav-btn{display:flex;justify-content:space-between;align-items:center;padding:10px 16px;color:rgba(255,255,255,.55);font-size:12px;text-decoration:none;border-top:1px solid rgba(255,255,255,.12);margin-top:auto;transition:color .15s}
|
||||
.tasks-nav-btn:hover{color:#fff}
|
||||
.tasks-nav-count{background:var(--accent);color:#fff;border-radius:10px;padding:1px 7px;font-size:11px;font-weight:700;display:none}
|
||||
.tasks-nav-count.active{display:inline}
|
||||
/* Output terminal */
|
||||
.terminal{background:#1b1c1d;border:1px solid rgba(0,0,0,.2);border-radius:4px;padding:14px;font-family:monospace;font-size:12px;color:#b5cea8;max-height:400px;overflow-y:auto;white-space:pre-wrap;word-break:break-all;user-select:text;-webkit-user-select:text}
|
||||
.terminal-wrap{position:relative}.terminal-copy{position:absolute;top:6px;right:6px;background:#2d2f30;border:1px solid #444;color:#aaa;font-size:11px;padding:2px 8px;border-radius:3px;cursor:pointer;opacity:.7}.terminal-copy:hover{opacity:1}
|
||||
@@ -93,14 +98,15 @@ tbody tr:hover td{background:rgba(0,0,0,.03)}
|
||||
}
|
||||
|
||||
func layoutNav(active string, buildLabel string) string {
|
||||
items := []struct{ id, label, href, onclick string }{
|
||||
{"dashboard", "Dashboard", "/", ""},
|
||||
{"audit", "Audit", "/audit", ""},
|
||||
{"validate", "Validate", "/validate", ""},
|
||||
{"burn", "Burn", "/burn", ""},
|
||||
{"benchmark", "Benchmark", "/benchmark", ""},
|
||||
{"tasks", "Tasks", "/tasks", ""},
|
||||
{"tools", "Tools", "/tools", ""},
|
||||
items := []struct{ id, label, href string }{
|
||||
{"dashboard", "Dashboard", "/"},
|
||||
{"audit", "1. Audit", "/audit"},
|
||||
{"check", "2. Check", "/check"},
|
||||
{"load", "3. Load", "/load"},
|
||||
{"speed", "4. Speed", "/speed"},
|
||||
{"endurance", "5. Endurance", "/endurance"},
|
||||
{"tools", "6. Tools", "/tools"},
|
||||
{"settings", "7. Settings", "/settings"},
|
||||
}
|
||||
var b strings.Builder
|
||||
b.WriteString(`<aside class="sidebar">`)
|
||||
@@ -124,15 +130,16 @@ func layoutNav(active string, buildLabel string) string {
|
||||
if item.id == active {
|
||||
cls += " active"
|
||||
}
|
||||
if item.onclick != "" {
|
||||
b.WriteString(fmt.Sprintf(`<a class="%s" href="%s" onclick="%s">%s</a>`,
|
||||
cls, item.href, item.onclick, item.label))
|
||||
} else {
|
||||
b.WriteString(fmt.Sprintf(`<a class="%s" href="%s">%s</a>`,
|
||||
cls, item.href, item.label))
|
||||
}
|
||||
b.WriteString(fmt.Sprintf(`<a class="%s" href="%s">%s</a>`, cls, item.href, item.label))
|
||||
}
|
||||
b.WriteString(`</nav>`)
|
||||
b.WriteString(`<a href="/tasks" class="tasks-nav-btn" id="tasks-nav-btn">`)
|
||||
b.WriteString(`<span>Tasks</span>`)
|
||||
b.WriteString(`<span class="tasks-nav-count" id="tasks-nav-count"></span>`)
|
||||
b.WriteString(`</a>`)
|
||||
b.WriteString(`<script>`)
|
||||
b.WriteString(`(function(){function u(){fetch('/api/tasks',{cache:'no-store'}).then(function(r){return r.json();}).then(function(d){var n=Array.isArray(d)?d.filter(function(t){return t.status==='pending'||t.status==='running';}).length:0;var c=document.getElementById('tasks-nav-count');var b=document.getElementById('tasks-nav-btn');if(c){c.textContent=n>0?String(n):'';c.className='tasks-nav-count'+(n>0?' active':'');}if(b){b.style.color=n>0?'#f6c90e':'';}}).catch(function(){});}u();setInterval(u,5000);})();`)
|
||||
b.WriteString(`</script>`)
|
||||
b.WriteString(`</aside>`)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
@@ -611,3 +611,20 @@ func renderPowerBenchmarkResultsCard(exportDir string) string {
|
||||
b.WriteString(`</div></div>`)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// renderSpeed renders the Speed page (step 4): performance benchmarks.
|
||||
// Uses the same benchmark infrastructure; defaults to Standard profile (throughput/bandwidth).
|
||||
// For long-duration stability/overnight runs, see Endurance (step 5).
|
||||
func renderSpeed(opts HandlerOptions) string {
|
||||
base := renderBenchmark(opts)
|
||||
return `<div class="alert alert-info" style="margin-bottom:16px"><strong>Speed:</strong> Measures GPU compute throughput and memory bandwidth. For overnight stability testing, go to <a href="/endurance">5. Endurance</a>.</div>` + base
|
||||
}
|
||||
|
||||
// renderEndurance renders the Endurance page (step 5): long-duration reliability tests.
|
||||
// Focuses on Stability and Overnight profiles for multi-hour burn validation.
|
||||
// For short load tests, see Load (step 3). For throughput measurement, see Speed (step 4).
|
||||
func renderEndurance(opts HandlerOptions) string {
|
||||
base := renderBenchmark(opts)
|
||||
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>Endurance:</strong> Long-duration reliability tests — Stability (several hours) and Overnight (8+ h) profiles. These profiles run hardware at sustained load; results show whether the server holds its performance envelope over time.</div>
|
||||
<div class="alert alert-info" style="margin-bottom:16px">Use the <strong>Stability</strong> or <strong>Overnight</strong> profile in the setup card below. The Standard profile is available too but is better suited for the <a href="/speed">4. Speed</a> page.</div>` + base
|
||||
}
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
package webui
|
||||
|
||||
// renderLoad renders the Load page (step 3): sustained stress tests.
|
||||
// For non-destructive status checks, see Check (step 2).
|
||||
// For DCGM targeted diagnostics (targeted_stress, targeted_power, pulse), see Check → Validate mode.
|
||||
func renderLoad() string { return renderBurn() }
|
||||
|
||||
func renderBurn() string {
|
||||
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>⚠ Warning:</strong> Stress tests on this page run hardware at high load. Repeated or prolonged use may reduce hardware lifespan. Use only when necessary.</div>
|
||||
<div class="alert alert-info" style="margin-bottom:16px"><strong>Scope:</strong> Burn exposes sustained GPU compute load recipes. DCGM diagnostics (` + "targeted_stress, targeted_power, pulse_test" + `) and LINPACK remain in <a href="/validate">Validate → Stress mode</a>; NCCL and NVBandwidth are available directly from <a href="/validate">Validate</a>.</div>
|
||||
<div class="alert alert-info" style="margin-bottom:16px"><strong>Scope:</strong> Load runs sustained GPU compute and CPU/memory stress recipes. DCGM diagnostics (<code>targeted_stress</code>, <code>targeted_power</code>, <code>pulse_test</code>) and NCCL/NVBandwidth are on the <a href="/check">2. Check</a> page. For overnight endurance runs, see <a href="/endurance">5. Endurance</a>.</div>
|
||||
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
||||
|
||||
<div class="card" style="margin-bottom:16px">
|
||||
|
||||
77
audit/internal/webui/page_settings.go
Normal file
77
audit/internal/webui/page_settings.go
Normal file
@@ -0,0 +1,77 @@
|
||||
package webui
|
||||
|
||||
import "html"
|
||||
|
||||
func renderSettings(opts HandlerOptions) string {
|
||||
version := opts.BuildLabel
|
||||
if version == "" {
|
||||
version = "dev"
|
||||
}
|
||||
return `<div class="grid2">
|
||||
|
||||
<div class="card">
|
||||
<div class="card-head">Blackbox Logging</div>
|
||||
<div class="card-body">
|
||||
<p style="font-size:13px;color:var(--muted);margin-bottom:14px">Continuous hardware monitoring that writes a rolling log of sensor readings to the export directory. Useful for capturing thermal or power anomalies during long runs.</p>
|
||||
<div style="display:flex;gap:8px;align-items:center">
|
||||
<button class="btn btn-primary btn-sm" onclick="blackboxToggle('enable')">Enable</button>
|
||||
<button class="btn btn-secondary btn-sm" onclick="blackboxToggle('disable')">Disable</button>
|
||||
<span id="blackbox-status" style="font-size:12px;color:var(--muted)">Loading...</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<div class="card-head">NVIDIA Recovery</div>
|
||||
<div class="card-body">
|
||||
<p style="font-size:13px;color:var(--muted);margin-bottom:14px">Reset NVIDIA GPU driver state. Use when <code>nvidia-smi</code> reports errors or GPUs appear stuck after a failed test.</p>
|
||||
<div style="display:flex;gap:8px;align-items:center">
|
||||
<button class="btn btn-danger btn-sm" onclick="nvidiaReset()">Reset NVIDIA Driver</button>
|
||||
<span id="nvidia-reset-status" style="font-size:12px;color:var(--muted)"></span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="card" style="margin-top:0">
|
||||
<div class="card-head">Build Info</div>
|
||||
<div class="card-body">
|
||||
<table style="width:auto">
|
||||
<tbody>
|
||||
<tr><td style="color:var(--muted);padding-right:24px">Version</td><td>` + html.EscapeString(version) + `</td></tr>
|
||||
<tr><td style="color:var(--muted);padding-right:24px">Title</td><td>` + html.EscapeString(opts.Title) + `</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
(function() {
|
||||
fetch('/api/blackbox/status', {cache:'no-store'}).then(r => r.json()).then(d => {
|
||||
var el = document.getElementById('blackbox-status');
|
||||
if (el) el.textContent = d.enabled ? 'Enabled' : 'Disabled';
|
||||
}).catch(() => {
|
||||
var el = document.getElementById('blackbox-status');
|
||||
if (el) el.textContent = 'Status unavailable';
|
||||
});
|
||||
})();
|
||||
function blackboxToggle(action) {
|
||||
var el = document.getElementById('blackbox-status');
|
||||
if (el) el.textContent = 'Updating...';
|
||||
fetch('/api/blackbox/' + action, {method:'POST', cache:'no-store'})
|
||||
.then(r => r.json())
|
||||
.then(d => { if (el) el.textContent = d.enabled ? 'Enabled' : 'Disabled'; })
|
||||
.catch(err => { if (el) el.textContent = 'Error: ' + err.message; });
|
||||
}
|
||||
function nvidiaReset() {
|
||||
var el = document.getElementById('nvidia-reset-status');
|
||||
if (!confirm('Reset NVIDIA driver? This will interrupt any running GPU tasks.')) return;
|
||||
if (el) el.textContent = 'Resetting...';
|
||||
fetch('/api/gpu/nvidia-reset', {method:'POST', cache:'no-store'})
|
||||
.then(r => r.json())
|
||||
.then(d => { if (el) el.textContent = d.error ? ('Error: ' + d.error) : 'Done — driver reset.'; })
|
||||
.catch(err => { if (el) el.textContent = 'Error: ' + err.message; });
|
||||
}
|
||||
</script>`
|
||||
}
|
||||
@@ -656,6 +656,292 @@ func validateIsVendorGPU(dev schema.HardwarePCIeDevice, vendor string) bool {
|
||||
}
|
||||
}
|
||||
|
||||
// renderCheck renders the non-destructive Check page (step 2).
|
||||
// Shows validate-mode tests only: CPU, Memory, Storage, NVIDIA L2, NCCL, NVBandwidth, AMD.
|
||||
// Stress-mode tests (targeted-stress, targeted-power, pulse) are on the Load page.
|
||||
func renderCheck(opts HandlerOptions) string {
|
||||
inv := loadValidateInventory(opts)
|
||||
n := inv.NvidiaGPUCount
|
||||
validateTotalStr := validateFmtDur(validateTotalValidateSec(n))
|
||||
gpuNote := ""
|
||||
if n > 0 {
|
||||
gpuNote = fmt.Sprintf(" (%d GPU)", n)
|
||||
}
|
||||
return `<div class="alert alert-info" style="margin-bottom:16px"><strong>Non-destructive:</strong> Check tests collect diagnostics only — no writes to disks, no sustained load, no hardware wear counters incremented. For stress testing, go to <a href="/load">3. Load</a>.</div>
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px">
|
||||
<button type="button" class="btn btn-primary" onclick="runAllCheckSAT()">Run All Checks</button>
|
||||
<span id="sat-all-status" style="font-size:12px;color:var(--muted)"></span>
|
||||
<span style="font-size:12px;color:var(--muted)">est. ` + validateTotalStr + gpuNote + `</span>
|
||||
</div>
|
||||
|
||||
<div class="grid3">
|
||||
` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody(
|
||||
inv.CPU,
|
||||
`Collects CPU inventory and temperatures, then runs a bounded CPU stress pass.`,
|
||||
`<code>lscpu</code>, <code>sensors</code>, <code>stress-ng</code>`,
|
||||
validateFmtDur(platform.SATEstimatedCPUValidateSec)+` (stress-ng 60 s).`,
|
||||
)) +
|
||||
renderSATCard("memory", "Memory", "runSAT('memory')", "", renderValidateCardBody(
|
||||
inv.Memory,
|
||||
`Runs a RAM validation pass and records memory state around the test.`,
|
||||
`<code>free</code>, <code>memtester</code>`,
|
||||
validateFmtDur(platform.SATEstimatedMemoryValidateSec)+` (256 MB × 1 pass).`,
|
||||
)) +
|
||||
renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody(
|
||||
inv.Storage,
|
||||
`Scans all storage devices and runs the matching health or self-test path for each.`,
|
||||
`<code>lsblk</code>; NVMe: <code>nvme</code>; SATA/SAS: <code>smartctl</code>`,
|
||||
`Seconds (NVMe: instant device query; SATA/SAS: short self-test).`,
|
||||
)) +
|
||||
`</div>
|
||||
<div style="height:1px;background:var(--border);margin:16px 0"></div>
|
||||
<div class="card" style="margin-bottom:16px">
|
||||
<div class="card-head">NVIDIA GPU Selection</div>
|
||||
<div class="card-body">
|
||||
<p style="font-size:12px;color:var(--muted);margin:0 0 8px">` + inv.NVIDIA + `</p>
|
||||
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
|
||||
<button class="btn btn-sm btn-secondary" type="button" onclick="satSelectAllGPUs()">Select All</button>
|
||||
<button class="btn btn-sm btn-secondary" type="button" onclick="satSelectNoGPUs()">Clear</button>
|
||||
</div>
|
||||
<div id="sat-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
|
||||
<p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
|
||||
</div>
|
||||
<p id="sat-gpu-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 0">Select at least one NVIDIA GPU to enable NVIDIA check tasks.</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid3">
|
||||
` + renderSATCard("nvidia", "NVIDIA GPU", "runNvidiaValidateSet('nvidia')", "", renderValidateCardBody(
|
||||
inv.NVIDIA,
|
||||
`Runs NVIDIA diagnostics and board inventory checks (DCGM Level 2).`,
|
||||
`<code>nvidia-smi</code>, <code>dmidecode</code>, <code>dcgmi diag</code>`,
|
||||
validateFmtDur(platform.SATEstimatedNvidiaGPUValidateSec)+` (Level 2, all GPUs simultaneously).`,
|
||||
)) +
|
||||
renderSATCard("nvidia-interconnect", "NVIDIA Interconnect (NCCL)", "runNvidiaFabricValidate('nvidia-interconnect')", "", renderValidateCardBody(
|
||||
inv.NVIDIA,
|
||||
`Verifies NVLink/NVSwitch fabric bandwidth using NCCL all_reduce_perf across all selected GPUs.`,
|
||||
`<code>all_reduce_perf</code> (NCCL tests)`,
|
||||
validateFmtDur(platform.SATEstimatedNvidiaInterconnectSec)+` (all GPUs simultaneously, requires ≥2).`,
|
||||
)) +
|
||||
renderSATCard("nvidia-bandwidth", "NVIDIA Bandwidth (NVBandwidth)", "runNvidiaFabricValidate('nvidia-bandwidth')", "", renderValidateCardBody(
|
||||
inv.NVIDIA,
|
||||
`Validates GPU memory copy and peer-to-peer bandwidth paths using NVBandwidth.`,
|
||||
`<code>nvbandwidth</code>`,
|
||||
validateFmtDur(platform.SATEstimatedNvidiaBandwidthSec)+` (all GPUs simultaneously).`,
|
||||
)) +
|
||||
`</div>
|
||||
<div class="grid3" style="margin-top:16px">
|
||||
` + renderSATCard("amd", "AMD GPU", "runAMDValidateSet()", "", renderValidateCardBody(
|
||||
inv.AMD,
|
||||
`Runs AMD GPU inventory, MEM integrity, and MEM bandwidth checks.`,
|
||||
`GPU Validate: <code>rocm-smi</code>, <code>dmidecode</code>; MEM Integrity: <code>rvs mem</code>; MEM Bandwidth: <code>rocm-bandwidth-test</code>, <code>rvs babel</code>`,
|
||||
`<div style="display:flex;flex-direction:column;gap:4px"><label class="cb-row"><input type="checkbox" id="sat-amd-target" checked><span>GPU Validate</span></label><label class="cb-row"><input type="checkbox" id="sat-amd-mem-target" checked><span>MEM Integrity</span></label><label class="cb-row"><input type="checkbox" id="sat-amd-bandwidth-target" checked><span>MEM Bandwidth</span></label></div>`,
|
||||
)) +
|
||||
`</div>
|
||||
<div id="sat-output" style="display:none;margin-top:16px" class="card">
|
||||
<div class="card-head">Test Output <span id="sat-title"></span></div>
|
||||
<div class="card-body"><div id="sat-terminal" class="terminal"></div></div>
|
||||
</div>
|
||||
<style>
|
||||
.validate-card-body { padding:0; }
|
||||
.validate-card-section { padding:12px 16px 0; }
|
||||
.validate-card-section:last-child { padding-bottom:16px; }
|
||||
.sat-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
|
||||
.sat-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
|
||||
.cb-row { display:flex; align-items:flex-start; gap:8px; padding:4px 0; cursor:pointer; font-size:13px; }
|
||||
.cb-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
|
||||
</style>
|
||||
<script>
|
||||
let satES = null;
|
||||
function satLabels() {
|
||||
return {nvidia:'Check GPU (DCGM L2)', 'nvidia-interconnect':'NVIDIA Interconnect (NCCL)', 'nvidia-bandwidth':'NVIDIA Bandwidth (NVBandwidth)', memory:'Check Memory', storage:'Check Storage', cpu:'Check CPU', amd:'Check AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
|
||||
}
|
||||
let satNvidiaGPUsPromise = null;
|
||||
function loadSatNvidiaGPUs() {
|
||||
if (!satNvidiaGPUsPromise) {
|
||||
satNvidiaGPUsPromise = fetch('/api/gpu/nvidia').then(r => {
|
||||
if (!r.ok) throw new Error('Failed to load NVIDIA GPUs.');
|
||||
return r.json();
|
||||
}).then(list => Array.isArray(list) ? list : []);
|
||||
}
|
||||
return satNvidiaGPUsPromise;
|
||||
}
|
||||
function satSelectedGPUIndices() {
|
||||
return Array.from(document.querySelectorAll('.sat-nvidia-checkbox'))
|
||||
.filter(el => el.checked && !el.disabled)
|
||||
.map(el => parseInt(el.value, 10))
|
||||
.filter(v => !Number.isNaN(v))
|
||||
.sort((a, b) => a - b);
|
||||
}
|
||||
function satUpdateGPUSelectionNote() {
|
||||
const note = document.getElementById('sat-gpu-selection-note');
|
||||
if (!note) return;
|
||||
const sel = satSelectedGPUIndices();
|
||||
note.textContent = sel.length
|
||||
? 'Selected GPUs: ' + sel.join(', ') + '. Multi-GPU tests will use all selected GPUs.'
|
||||
: 'Select at least one NVIDIA GPU to enable NVIDIA check tasks.';
|
||||
}
|
||||
function satRenderGPUList(gpus) {
|
||||
const root = document.getElementById('sat-gpu-list');
|
||||
if (!root) return;
|
||||
if (!gpus || !gpus.length) {
|
||||
root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
|
||||
satUpdateGPUSelectionNote(); return;
|
||||
}
|
||||
root.innerHTML = gpus.map(gpu => {
|
||||
const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
|
||||
return '<label class="sat-gpu-row"><input class="sat-nvidia-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="satUpdateGPUSelectionNote()"><span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span></label>';
|
||||
}).join('');
|
||||
satUpdateGPUSelectionNote();
|
||||
}
|
||||
function satSelectAllGPUs() { document.querySelectorAll('.sat-nvidia-checkbox').forEach(el => { el.checked = true; }); satUpdateGPUSelectionNote(); }
|
||||
function satSelectNoGPUs() { document.querySelectorAll('.sat-nvidia-checkbox').forEach(el => { el.checked = false; }); satUpdateGPUSelectionNote(); }
|
||||
function satGPULoadInit() {
|
||||
loadSatNvidiaGPUs().then(satRenderGPUList).catch(err => {
|
||||
const root = document.getElementById('sat-gpu-list');
|
||||
if (root) root.innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
|
||||
satUpdateGPUSelectionNote();
|
||||
});
|
||||
}
|
||||
function satRequestBody(target, overrides) {
|
||||
const body = {};
|
||||
const labels = satLabels();
|
||||
body.display_name = labels[target] || ('Check ' + target);
|
||||
body.stress_mode = false;
|
||||
if (target === 'cpu') body.duration = 60;
|
||||
if (overrides) Object.keys(overrides).forEach(k => { body[k] = overrides[k]; });
|
||||
return body;
|
||||
}
|
||||
function enqueueSATTarget(target, overrides) {
|
||||
return fetch('/api/sat/' + target + '/run', {method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify(satRequestBody(target, overrides))}).then(r => r.json());
|
||||
}
|
||||
function streamSATTask(taskId, title, resetTerminal) {
|
||||
if (satES) { satES.close(); satES = null; }
|
||||
document.getElementById('sat-output').style.display = 'block';
|
||||
document.getElementById('sat-title').textContent = '— ' + title;
|
||||
const term = document.getElementById('sat-terminal');
|
||||
if (resetTerminal) term.textContent = '';
|
||||
term.textContent += 'Task ' + taskId + ' queued. Streaming log...\n';
|
||||
return new Promise(resolve => {
|
||||
satES = new EventSource('/api/tasks/' + taskId + '/stream');
|
||||
satES.onmessage = e => { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
|
||||
satES.addEventListener('done', e => {
|
||||
satES.close(); satES = null;
|
||||
term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
|
||||
term.scrollTop = term.scrollHeight;
|
||||
resolve({ok: !e.data, error: e.data || ''});
|
||||
});
|
||||
satES.onerror = () => {
|
||||
if (satES) { satES.close(); satES = null; }
|
||||
term.textContent += '\nERROR: stream disconnected.\n';
|
||||
term.scrollTop = term.scrollHeight;
|
||||
resolve({ok: false, error: 'stream disconnected'});
|
||||
};
|
||||
});
|
||||
}
|
||||
function selectedAMDValidateTargets() {
|
||||
const targets = [];
|
||||
const gpu = document.getElementById('sat-amd-target');
|
||||
const mem = document.getElementById('sat-amd-mem-target');
|
||||
const bw = document.getElementById('sat-amd-bandwidth-target');
|
||||
if (gpu && gpu.checked && !gpu.disabled) targets.push('amd');
|
||||
if (mem && mem.checked && !mem.disabled) targets.push('amd-mem');
|
||||
if (bw && bw.checked && !bw.disabled) targets.push('amd-bandwidth');
|
||||
return targets;
|
||||
}
|
||||
function runSAT(target) { return runSATWithOverrides(target, null); }
|
||||
function runSATWithOverrides(target, overrides) {
|
||||
const title = (overrides && overrides.display_name) || target;
|
||||
document.getElementById('sat-output').style.display = 'block';
|
||||
document.getElementById('sat-title').textContent = '— ' + title;
|
||||
const term = document.getElementById('sat-terminal');
|
||||
term.textContent = 'Enqueuing ' + title + ' test...\n';
|
||||
return enqueueSATTarget(target, overrides).then(d => streamSATTask(d.task_id, title, false));
|
||||
}
|
||||
function runNvidiaFabricValidate(target) {
|
||||
const indices = satSelectedGPUIndices();
|
||||
if (!indices.length) { alert('No NVIDIA GPUs available.'); return; }
|
||||
runSATWithOverrides(target, {gpu_indices: indices, display_name: satLabels()[target] || target});
|
||||
}
|
||||
function runNvidiaValidateSet(target) {
|
||||
const sel = satSelectedGPUIndices();
|
||||
if (!sel.length) { alert('Select at least one NVIDIA GPU.'); return; }
|
||||
return runSATWithOverrides(target, {gpu_indices: sel, display_name: satLabels()[target] || target});
|
||||
}
|
||||
function runAMDValidateSet() {
|
||||
const targets = selectedAMDValidateTargets();
|
||||
if (!targets.length) return;
|
||||
if (targets.length === 1) return runSAT(targets[0]);
|
||||
const term = document.getElementById('sat-terminal');
|
||||
document.getElementById('sat-output').style.display = 'block';
|
||||
document.getElementById('sat-title').textContent = '— amd';
|
||||
term.textContent = 'Running AMD check set...\n';
|
||||
const labels = satLabels();
|
||||
const runNext = idx => {
|
||||
if (idx >= targets.length) return Promise.resolve();
|
||||
const t = targets[idx];
|
||||
term.textContent += '\n[' + (idx + 1) + '/' + targets.length + '] ' + labels[t] + '\n';
|
||||
return enqueueSATTarget(t).then(d => streamSATTask(d.task_id, labels[t], false)).then(() => runNext(idx + 1));
|
||||
};
|
||||
return runNext(0);
|
||||
}
|
||||
function runAllCheckSAT() {
|
||||
const status = document.getElementById('sat-all-status');
|
||||
status.textContent = 'Enqueuing...';
|
||||
const nvidiaIndices = satSelectedGPUIndices();
|
||||
const nvidiaAllTargets = ['nvidia', 'nvidia-interconnect', 'nvidia-bandwidth'];
|
||||
const baseTargets = ['cpu', 'memory', 'storage'];
|
||||
const amdTargets = selectedAMDValidateTargets();
|
||||
const expanded = [];
|
||||
baseTargets.forEach(t => expanded.push({target: t}));
|
||||
if (nvidiaIndices.length) {
|
||||
nvidiaAllTargets.forEach(t => {
|
||||
const btn = document.getElementById('sat-btn-' + t);
|
||||
if (!(btn && btn.disabled)) expanded.push({target: t, overrides: {gpu_indices: nvidiaIndices, display_name: satLabels()[t] || t}});
|
||||
});
|
||||
}
|
||||
amdTargets.forEach(t => expanded.push({target: t}));
|
||||
if (!expanded.length) { status.textContent = 'No tasks selected.'; return; }
|
||||
const total = expanded.length;
|
||||
const runNext = idx => {
|
||||
if (idx >= expanded.length) { status.textContent = 'Completed ' + total + ' task(s).'; return Promise.resolve(); }
|
||||
const item = expanded[idx];
|
||||
status.textContent = 'Running ' + (idx + 1) + '/' + total + '...';
|
||||
return enqueueSATTarget(item.target, item.overrides).then(() => runNext(idx + 1));
|
||||
};
|
||||
runNext(0).catch(err => { status.textContent = 'Error: ' + err.message; });
|
||||
}
|
||||
function disableSATCard(id, reason) {
|
||||
const btn = document.getElementById('sat-btn-' + id);
|
||||
if (!btn) return;
|
||||
btn.disabled = true; btn.title = reason; btn.style.opacity = '0.4';
|
||||
const card = btn.closest('.card');
|
||||
if (card) {
|
||||
let note = card.querySelector('.sat-unavail');
|
||||
if (!note) {
|
||||
note = document.createElement('p');
|
||||
note.className = 'sat-unavail';
|
||||
note.style.cssText = 'color:var(--muted);font-size:12px;margin:0 0 8px';
|
||||
const body = card.querySelector('.card-body');
|
||||
if (body) body.insertBefore(note, body.firstChild);
|
||||
}
|
||||
note.textContent = reason;
|
||||
}
|
||||
}
|
||||
fetch('/api/gpu/presence').then(r => r.json()).then(gp => {
|
||||
if (!gp.nvidia) ['nvidia','nvidia-interconnect','nvidia-bandwidth'].forEach(t => disableSATCard(t, 'No NVIDIA GPU detected'));
|
||||
if (!gp.amd) {
|
||||
disableSATCard('amd', 'No AMD GPU detected');
|
||||
['sat-amd-target','sat-amd-mem-target','sat-amd-bandwidth-target'].forEach(id => {
|
||||
const cb = document.getElementById(id);
|
||||
if (cb) { cb.disabled = true; cb.checked = false; }
|
||||
});
|
||||
}
|
||||
});
|
||||
satGPULoadInit();
|
||||
</script>`
|
||||
}
|
||||
|
||||
func renderSATCard(id, label, runAction, headerActions, body string) string {
|
||||
actions := `<button id="sat-btn-` + id + `" class="btn btn-primary btn-sm" onclick="` + runAction + `">Run</button>`
|
||||
if strings.TrimSpace(headerActions) != "" {
|
||||
|
||||
@@ -24,41 +24,54 @@ func renderPage(page string, opts HandlerOptions) string {
|
||||
body = renderDashboard(opts)
|
||||
case "audit":
|
||||
pageID = "audit"
|
||||
title = "Audit"
|
||||
title = "1. Audit"
|
||||
body = renderAudit()
|
||||
case "validate":
|
||||
pageID = "validate"
|
||||
title = "Validate"
|
||||
body = renderValidate(opts)
|
||||
case "burn":
|
||||
pageID = "burn"
|
||||
title = "Burn"
|
||||
body = renderBurn()
|
||||
case "check":
|
||||
pageID = "check"
|
||||
title = "2. Check"
|
||||
body = renderCheck(opts)
|
||||
case "load":
|
||||
pageID = "load"
|
||||
title = "3. Load"
|
||||
body = renderLoad()
|
||||
case "speed":
|
||||
pageID = "speed"
|
||||
title = "4. Speed"
|
||||
body = renderSpeed(opts)
|
||||
case "endurance":
|
||||
pageID = "endurance"
|
||||
title = "5. Endurance"
|
||||
body = renderEndurance(opts)
|
||||
case "tools":
|
||||
pageID = "tools"
|
||||
title = "6. Tools"
|
||||
body = renderTools()
|
||||
case "settings":
|
||||
pageID = "settings"
|
||||
title = "7. Settings"
|
||||
body = renderSettings(opts)
|
||||
// Legacy routes (redirected at HTTP level in handlePage; these are fallbacks)
|
||||
case "validate", "tests":
|
||||
pageID = "check"
|
||||
title = "2. Check"
|
||||
body = renderCheck(opts)
|
||||
case "burn", "burn-in":
|
||||
pageID = "load"
|
||||
title = "3. Load"
|
||||
body = renderLoad()
|
||||
case "benchmark":
|
||||
pageID = "benchmark"
|
||||
title = "Benchmark"
|
||||
body = renderBenchmark(opts)
|
||||
pageID = "speed"
|
||||
title = "4. Speed"
|
||||
body = renderSpeed(opts)
|
||||
case "tasks":
|
||||
pageID = "tasks"
|
||||
title = "Tasks"
|
||||
body = renderTasks()
|
||||
case "tools":
|
||||
pageID = "tools"
|
||||
title = "Tools"
|
||||
body = renderTools()
|
||||
// Legacy routes kept accessible but not in nav
|
||||
// Hidden pages (not in nav, accessible by direct URL)
|
||||
case "metrics":
|
||||
pageID = "metrics"
|
||||
title = "Live Metrics"
|
||||
body = renderMetrics()
|
||||
case "tests":
|
||||
pageID = "validate"
|
||||
title = "Acceptance Tests"
|
||||
body = renderValidate(opts)
|
||||
case "burn-in":
|
||||
pageID = "burn"
|
||||
title = "Burn-in Tests"
|
||||
body = renderBurn()
|
||||
case "network":
|
||||
pageID = "network"
|
||||
title = "Network"
|
||||
|
||||
@@ -1419,13 +1419,16 @@ func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {
|
||||
if page == "" {
|
||||
page = "dashboard"
|
||||
}
|
||||
// Redirect old routes to new names
|
||||
// Redirect legacy routes to new named pages
|
||||
switch page {
|
||||
case "tests":
|
||||
http.Redirect(w, r, "/validate", http.StatusMovedPermanently)
|
||||
case "validate", "tests":
|
||||
http.Redirect(w, r, "/check", http.StatusMovedPermanently)
|
||||
return
|
||||
case "burn-in":
|
||||
http.Redirect(w, r, "/burn", http.StatusMovedPermanently)
|
||||
case "burn", "burn-in":
|
||||
http.Redirect(w, r, "/load", http.StatusMovedPermanently)
|
||||
return
|
||||
case "benchmark":
|
||||
http.Redirect(w, r, "/speed", http.StatusMovedPermanently)
|
||||
return
|
||||
}
|
||||
body := renderPage(page, h.opts)
|
||||
|
||||
@@ -707,13 +707,13 @@ func TestToolsPageRendersNvidiaSelfHealSection(t *testing.T) {
|
||||
func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
|
||||
handler := NewHandler(HandlerOptions{})
|
||||
rec := httptest.NewRecorder()
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/benchmark", nil))
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/speed", nil))
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d", rec.Code)
|
||||
}
|
||||
body := rec.Body.String()
|
||||
for _, needle := range []string{
|
||||
`href="/benchmark"`,
|
||||
`href="/speed"`,
|
||||
`id="benchmark-gpu-list"`,
|
||||
`/api/gpu/nvidia`,
|
||||
`/api/bee-bench/nvidia/perf/run`,
|
||||
@@ -769,7 +769,7 @@ func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
|
||||
|
||||
handler := NewHandler(HandlerOptions{ExportDir: exportDir})
|
||||
rec := httptest.NewRecorder()
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/benchmark", nil))
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/speed", nil))
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d", rec.Code)
|
||||
}
|
||||
@@ -791,54 +791,53 @@ func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidatePageRendersNvidiaTargetedStressCard(t *testing.T) {
|
||||
func TestCheckPageRendersGPUSelectionAndNvidiaCards(t *testing.T) {
|
||||
handler := NewHandler(HandlerOptions{})
|
||||
rec := httptest.NewRecorder()
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/validate", nil))
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/check", nil))
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d", rec.Code)
|
||||
}
|
||||
body := rec.Body.String()
|
||||
for _, needle := range []string{
|
||||
`NVIDIA GPU Targeted Stress`,
|
||||
`nvidia-targeted-stress`,
|
||||
`controlled NVIDIA DCGM load`,
|
||||
`<code>dcgmi diag targeted_stress</code>`,
|
||||
`NVIDIA GPU Selection`,
|
||||
`All NVIDIA validate tasks use only the GPUs selected here.`,
|
||||
`Select All`,
|
||||
`id="sat-gpu-list"`,
|
||||
`Select All`,
|
||||
`id="sat-btn-nvidia"`,
|
||||
`NVIDIA Interconnect (NCCL)`,
|
||||
`NVIDIA Bandwidth (NVBandwidth)`,
|
||||
`Non-destructive`,
|
||||
} {
|
||||
if !strings.Contains(body, needle) {
|
||||
t.Fatalf("validate page missing %q: %s", needle, body)
|
||||
t.Fatalf("check page missing %q: %s", needle, body)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidatePageRendersNvidiaFabricCardsInValidateMode(t *testing.T) {
|
||||
func TestCheckPageRendersNvidiaFabricCards(t *testing.T) {
|
||||
handler := NewHandler(HandlerOptions{})
|
||||
rec := httptest.NewRecorder()
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/validate", nil))
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/check", nil))
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d", rec.Code)
|
||||
}
|
||||
body := rec.Body.String()
|
||||
for _, needle := range []string{
|
||||
`NVIDIA Interconnect (NCCL)`,
|
||||
`Validate and Stress:`,
|
||||
`NVIDIA Bandwidth (NVBandwidth)`,
|
||||
`nvbandwidth runs all built-in tests without a time limit`,
|
||||
`nvbandwidth`,
|
||||
`all_reduce_perf`,
|
||||
} {
|
||||
if !strings.Contains(body, needle) {
|
||||
t.Fatalf("validate page missing %q: %s", needle, body)
|
||||
t.Fatalf("check page missing %q: %s", needle, body)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
|
||||
func TestLoadPageRendersGoalBasedNVIDIACards(t *testing.T) {
|
||||
handler := NewHandler(HandlerOptions{})
|
||||
rec := httptest.NewRecorder()
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/burn", nil))
|
||||
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/load", nil))
|
||||
if rec.Code != http.StatusOK {
|
||||
t.Fatalf("status=%d", rec.Code)
|
||||
}
|
||||
@@ -847,7 +846,6 @@ func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
|
||||
`NVIDIA Max Compute Load`,
|
||||
`dcgmproftester`,
|
||||
`NCCL`,
|
||||
`Validate → Stress mode`,
|
||||
`id="burn-gpu-list"`,
|
||||
} {
|
||||
if !strings.Contains(body, needle) {
|
||||
|
||||
Reference in New Issue
Block a user