Files
bee/audit/internal/webui/page_burn.go

384 lines
20 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package webui
func renderBurn() string {
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>&#9888; Warning:</strong> Stress tests on this page run hardware at high load. Repeated or prolonged use may reduce hardware lifespan. Use only when necessary.</div>
<div class="alert alert-info" style="margin-bottom:16px"><strong>Scope:</strong> Burn exposes sustained GPU compute load recipes. DCGM diagnostics (` + "targeted_stress, targeted_power, pulse_test" + `) and LINPACK remain in <a href="/validate">Validate → Stress mode</a>; NCCL and NVBandwidth are available directly from <a href="/validate">Validate</a>.</div>
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
<div class="card" style="margin-bottom:16px">
<div class="card-head">Burn Profile</div>
<div class="card-body burn-profile-body">
<div class="burn-profile-col">
<div class="form-row" style="margin:0 0 8px"><label>Preset</label></div>
<label class="cb-row"><input type="radio" name="burn-profile" value="smoke" checked><span>Smoke — 5 min/GPU (sequential) or 5 min (parallel)</span></label>
<label class="cb-row"><input type="radio" name="burn-profile" value="acceptance"><span>Acceptance — 1 h/GPU (sequential) or 1 h (parallel)</span></label>
<label class="cb-row"><input type="radio" name="burn-profile" value="overnight"><span>Overnight — 8 h/GPU (sequential) or 8 h (parallel)</span></label>
</div>
<div class="burn-profile-col burn-profile-action">
<button type="button" class="btn btn-primary" onclick="runAllBurnTasks()">Burn one by one</button>
<p>Runs checked tests as separate sequential tasks. In sequential GPU mode, total time = profile duration × N GPU. In parallel mode, all selected GPUs burn simultaneously for one profile duration.</p>
</div>
<div class="burn-profile-col burn-profile-action">
<button type="button" class="btn btn-secondary" onclick="runPlatformStress()">Thermal Cycling</button>
<p>Run checked core test modules (CPU, MEM, GPU). Tests start at the same time and run for a period with short cooldown phases to stress the server cooling system.</p>
</div>
</div>
<div class="card-body" style="padding-top:0;display:flex;justify-content:center">
<span id="burn-all-status" style="font-size:12px;color:var(--muted)"></span>
</div>
</div>
<div class="card" style="margin-bottom:16px">
<div class="card-head">NVIDIA GPU Selection</div>
<div class="card-body">
<p style="font-size:12px;color:var(--muted);margin:0 0 10px">Official NVIDIA recipes and custom NVIDIA stressors use only the GPUs selected here. Multi-GPU interconnect tests are limited to this selection as well.</p>
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
<button class="btn btn-sm btn-secondary" type="button" onclick="burnSelectAll()">Select All</button>
<button class="btn btn-sm btn-secondary" type="button" onclick="burnSelectNone()">Clear</button>
</div>
<div id="burn-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
<p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
</div>
<p id="burn-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 0">Select at least one NVIDIA GPU to enable NVIDIA burn recipes.</p>
<div style="display:flex;flex-direction:column;gap:4px;margin-top:10px">
<label class="cb-row">
<input type="radio" name="burn-nvidia-mode" value="sequential" checked>
<span>Sequential — selected GPUs one at a time</span>
</label>
<label class="cb-row" id="burn-parallel-label">
<input type="radio" name="burn-nvidia-mode" value="parallel">
<span>Parallel — all selected GPUs simultaneously</span>
</label>
<label class="cb-row" id="burn-ramp-label">
<input type="radio" name="burn-nvidia-mode" value="ramp-up">
<span>Ramp-up — add one GPU at a time</span>
</label>
</div>
</div>
</div>
<div class="burn-section">Core Burn Paths</div>
<div class="grid2 burn-grid" style="margin-bottom:16px">
<div class="card burn-card">
<div class="card-head card-head-actions"><span>GPU Max Load</span><button class="btn btn-primary btn-sm" onclick="runBurnTaskSet([{id:'burn-nvidia-compute',target:'nvidia-compute',label:'NVIDIA Max Compute Load (dcgmproftester)',nvidia:true},{id:'burn-gpu-bee',target:'nvidia-stress',label:'GPU Burn (bee-gpu-burn)',nvidia:true,extra:{loader:'builtin'}},{id:'burn-gpu-john',target:'nvidia-stress',label:'John GPU Stress (john/OpenCL)',nvidia:true,extra:{loader:'john'}},{id:'burn-gpu-rvs',target:'amd-stress',label:'AMD GPU Stress (rvs gst)'}])">Run</button></div>
<div class="card-body burn-card-body">
<p style="font-size:12px;color:var(--muted);margin:0 0 10px">Combine vendor-backed and custom GPU max-load recipes in one run set. ` + "dcgmproftester" + ` is the primary official NVIDIA path; custom stressors remain available as parallel checkbox options.</p>
<label class="cb-row"><input type="checkbox" id="burn-nvidia-compute" checked disabled><span>NVIDIA Max Compute Load (dcgmproftester) <span class="cb-note" id="note-nvidia-compute"></span></span></label>
<label class="cb-row"><input type="checkbox" id="burn-gpu-bee" checked disabled><span>GPU Burn (bee-gpu-burn) <span class="cb-note" id="note-bee"></span></span></label>
<label class="cb-row"><input type="checkbox" id="burn-gpu-john" disabled><span>John GPU Stress (john/OpenCL) <span class="cb-note" id="note-john"></span></span></label>
<label class="cb-row"><input type="checkbox" id="burn-gpu-rvs" disabled><span>AMD GPU Stress (rvs gst) <span class="cb-note" id="note-rvs"></span></span></label>
</div>
</div>
<div class="card burn-card">
<div class="card-head card-head-actions"><span>Compute Stress</span><button class="btn btn-primary btn-sm" onclick="runBurnTaskSet([{id:'burn-cpu',target:'cpu',label:'CPU Burn-in'},{id:'burn-mem-stress',target:'memory-stress',label:'Memory Burn-in'},{id:'burn-sat-stress',target:'sat-stress',label:'SAT Stress (stressapptest)'}])">Run</button></div>
<div class="card-body burn-card-body">
<p style="font-size:12px;color:var(--muted);margin:0 0 10px">Select which subsystems to stress. Each checked item runs as a separate task.</p>
<label class="cb-row"><input type="checkbox" id="burn-cpu" checked><span>CPU stress (stress-ng)</span></label>
<label class="cb-row"><input type="checkbox" id="burn-mem-stress" checked><span>Memory stress (stress-ng --vm)</span></label>
<label class="cb-row"><input type="checkbox" id="burn-sat-stress"><span>stressapptest (CPU + memory bus)</span></label>
</div>
</div>
</div>
<div id="bi-output" style="display:none;margin-top:16px" class="card">
<div class="card-head">Output <span id="bi-title"></span></div>
<div class="card-body"><div id="bi-terminal" class="terminal"></div></div>
</div>
<style>
.cb-row { display:flex; align-items:flex-start; gap:8px; padding:4px 0; cursor:pointer; font-size:13px; }
.cb-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
.cb-row input[type=checkbox]:disabled { opacity:0.4; cursor:not-allowed; }
.cb-row input[type=checkbox]:disabled ~ span { opacity:0.45; cursor:not-allowed; }
.cb-note { font-size:11px; color:var(--muted); font-style:italic; }
.burn-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
.burn-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
.burn-profile-body { display:grid; grid-template-columns:1fr 1fr 1fr; gap:24px; align-items:stretch; }
.burn-profile-col { min-width:0; }
.burn-profile-action { display:flex; flex-direction:column; align-items:center; justify-content:flex-start; gap:8px; }
.burn-profile-action p { font-size:12px; color:var(--muted); margin:0; width:100%; text-align:left; }
.burn-section { font-size:12px; font-weight:700; letter-spacing:.06em; text-transform:uppercase; color:var(--muted); margin:0 0 10px; padding-top:4px; }
.burn-grid { align-items:stretch; }
.burn-card { height:100%; display:flex; flex-direction:column; }
.burn-card-body { flex:1; display:flex; flex-direction:column; }
.card-head-actions { justify-content:space-between; }
.card-head-buttons { display:flex; align-items:center; gap:8px; margin-left:auto; }
@media(max-width:900px){ .card-head-actions { align-items:flex-start; flex-direction:column; } .card-head-buttons { margin-left:0; } .burn-profile-body { grid-template-columns:1fr; } }
</style>
<script>
let biES = null;
function burnTaskIDs(payload) {
if (payload && Array.isArray(payload.task_ids) && payload.task_ids.length) return payload.task_ids;
if (payload && payload.task_id) return [payload.task_id];
return [];
}
function burnProfile() {
const selected = document.querySelector('input[name="burn-profile"]:checked');
return selected ? selected.value : 'smoke';
}
function burnSelectedGPUIndices() {
return Array.from(document.querySelectorAll('.burn-gpu-checkbox'))
.filter(function(el) { return el.checked && !el.disabled; })
.map(function(el) { return parseInt(el.value, 10); })
.filter(function(v) { return !Number.isNaN(v); })
.sort(function(a, b) { return a - b; });
}
function burnNvidiaMode() {
const el = document.querySelector('input[name="burn-nvidia-mode"]:checked');
return el ? el.value : 'sequential';
}
function burnApplyMultiGPUState(gpuCount) {
var multiValues = ['parallel', 'ramp-up'];
var radios = document.querySelectorAll('input[name="burn-nvidia-mode"]');
radios.forEach(function(el) {
var isMulti = multiValues.indexOf(el.value) >= 0;
if (gpuCount < 2 && isMulti) {
el.disabled = true;
if (el.checked) {
var seq = document.querySelector('input[name="burn-nvidia-mode"][value="sequential"]');
if (seq) seq.checked = true;
}
var label = el.closest('label');
if (label) label.style.opacity = '0.4';
} else {
el.disabled = false;
var label = el.closest('label');
if (label) label.style.opacity = '';
}
});
}
function burnUpdateSelectionNote() {
const note = document.getElementById('burn-selection-note');
const selected = burnSelectedGPUIndices();
if (!selected.length) {
note.textContent = 'Select at least one NVIDIA GPU to enable NVIDIA burn recipes.';
return;
}
note.textContent = 'Selected NVIDIA GPUs: ' + selected.join(', ') + '. Official and custom NVIDIA tasks will use only these GPUs.';
}
function burnRenderGPUList(gpus) {
const root = document.getElementById('burn-gpu-list');
if (!gpus || !gpus.length) {
root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
burnUpdateSelectionNote();
return;
}
root.innerHTML = gpus.map(function(gpu) {
const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
return '<label class="burn-gpu-row">'
+ '<input class="burn-gpu-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="burnUpdateSelectionNote()">'
+ '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
+ '</label>';
}).join('');
burnApplyMultiGPUState(gpus.length);
burnUpdateSelectionNote();
}
function burnSelectAll() {
document.querySelectorAll('.burn-gpu-checkbox').forEach(function(el) { el.checked = true; });
burnUpdateSelectionNote();
}
function burnSelectNone() {
document.querySelectorAll('.burn-gpu-checkbox').forEach(function(el) { el.checked = false; });
burnUpdateSelectionNote();
}
function burnLoadGPUs() {
fetch('/api/gpu/nvidia').then(function(r) {
return r.json().then(function(body) {
if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
return body;
});
}).then(function(gpus) {
burnRenderGPUList(gpus);
}).catch(function(err) {
document.getElementById('burn-gpu-list').innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
burnUpdateSelectionNote();
});
}
function enqueueBurnTask(target, label, extra, useSelectedNvidia) {
const body = Object.assign({ profile: burnProfile(), display_name: label }, extra || {});
if (useSelectedNvidia) {
const selected = burnSelectedGPUIndices();
if (!selected.length) {
return Promise.reject(new Error('Select at least one NVIDIA GPU.'));
}
body.gpu_indices = selected;
const bMode = burnNvidiaMode();
if (bMode === 'ramp-up' && selected.length > 1) {
body.stagger_gpu_start = true;
} else if (bMode === 'parallel' && selected.length > 1) {
body.parallel_gpus = true;
}
}
return fetch('/api/sat/' + target + '/run', {
method: 'POST',
headers: {'Content-Type':'application/json'},
body: JSON.stringify(body)
}).then(function(r) {
return r.json().then(function(payload) {
if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
return payload;
});
});
}
function streamTask(taskId, label) {
if (biES) { biES.close(); biES = null; }
document.getElementById('bi-output').style.display = 'block';
document.getElementById('bi-title').textContent = '— ' + label + ' [' + burnProfile() + ']';
const term = document.getElementById('bi-terminal');
term.textContent = 'Task ' + taskId + ' queued. Streaming...\n';
biES = new EventSource('/api/tasks/' + taskId + '/stream');
biES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
biES.addEventListener('done', function(e) {
biES.close();
biES = null;
term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
term.scrollTop = term.scrollHeight;
});
}
function streamBurnTask(taskId, label, resetTerminal) {
return streamBurnTaskSet([taskId], label, resetTerminal);
}
function streamBurnTaskSet(taskIds, label, resetTerminal) {
if (biES) { biES.close(); biES = null; }
document.getElementById('bi-output').style.display = 'block';
document.getElementById('bi-title').textContent = '— ' + label + ' [' + burnProfile() + ']';
const term = document.getElementById('bi-terminal');
if (resetTerminal) {
term.textContent = '';
}
if (!Array.isArray(taskIds) || !taskIds.length) {
term.textContent += 'ERROR: no tasks queued.\n';
return Promise.resolve({ok:false, error:'no tasks queued'});
}
const streamNext = function(idx, failures) {
if (idx >= taskIds.length) {
return Promise.resolve({ok: failures === 0, error: failures ? (failures + ' task(s) failed') : ''});
}
const taskId = taskIds[idx];
term.textContent += '[' + (idx + 1) + '/' + taskIds.length + '] Task ' + taskId + ' queued. Streaming...\n';
return new Promise(function(resolve) {
biES = new EventSource('/api/tasks/' + taskId + '/stream');
biES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
biES.addEventListener('done', function(e) {
biES.close();
biES = null;
term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
term.scrollTop = term.scrollHeight;
resolve(failures + (e.data ? 1 : 0));
});
biES.onerror = function() {
if (biES) {
biES.close();
biES = null;
}
term.textContent += '\nERROR: stream disconnected.\n';
term.scrollTop = term.scrollHeight;
resolve(failures + 1);
};
}).then(function(nextFailures) {
return streamNext(idx + 1, nextFailures);
});
};
return streamNext(0, 0);
}
function runBurnTaskSet(tasks, statusElId) {
const enabled = tasks.filter(function(t) {
const el = document.getElementById(t.id);
return el && el.checked && !el.disabled;
});
const status = statusElId ? document.getElementById(statusElId) : null;
if (status) status.textContent = '';
if (!enabled.length) {
if (status) status.textContent = 'No tasks selected.';
return;
}
const term = document.getElementById('bi-terminal');
document.getElementById('bi-output').style.display = 'block';
document.getElementById('bi-title').textContent = '— Burn one by one [' + burnProfile() + ']';
term.textContent = '';
const runNext = function(idx) {
if (idx >= enabled.length) {
if (status) status.textContent = 'Completed ' + enabled.length + ' task(s).';
return Promise.resolve();
}
const t = enabled[idx];
term.textContent += '\n[' + (idx + 1) + '/' + enabled.length + '] ' + t.label + '\n';
if (status) status.textContent = 'Running ' + (idx + 1) + '/' + enabled.length + '...';
return enqueueBurnTask(t.target, t.label, t.extra, !!t.nvidia)
.then(function(d) {
return streamBurnTaskSet(burnTaskIDs(d), t.label, false);
})
.then(function() {
return runNext(idx + 1);
})
.catch(function(err) {
if (status) status.textContent = 'Error: ' + err.message;
document.getElementById('bi-output').style.display = 'block';
term.textContent += 'ERROR: ' + err.message + '\n';
return Promise.reject(err);
});
};
return runNext(0);
}
function runPlatformStress() {
const comps = [];
const computeIDs = ['burn-cpu', 'burn-mem-stress', 'burn-sat-stress'];
const gpuIDs = ['burn-nvidia-compute', 'burn-gpu-bee', 'burn-gpu-john', 'burn-gpu-rvs'];
const hasChecked = function(ids) {
return ids.some(function(id) {
const el = document.getElementById(id);
return el && el.checked && !el.disabled;
});
};
if (hasChecked(computeIDs)) comps.push('cpu');
if (hasChecked(gpuIDs)) comps.push('gpu');
if (!comps.length) {
const status = document.getElementById('burn-all-status');
if (status) status.textContent = 'Select at least one test in GPU Max Load or Compute Stress.';
return;
}
const extra = comps.length > 0 ? {platform_components: comps} : {};
enqueueBurnTask('platform-stress', 'Platform Thermal Cycling', extra, false).then(function(d) {
streamTask(d.task_id, 'Platform Thermal Cycling');
});
}
function runAllBurnTasks() {
const status = document.getElementById('burn-all-status');
const all = [
{id:'burn-nvidia-compute',target:'nvidia-compute',label:'NVIDIA Max Compute Load (dcgmproftester)',nvidia:true},
{id:'burn-gpu-bee',target:'nvidia-stress',label:'GPU Burn (bee-gpu-burn)',nvidia:true,extra:{loader:'builtin'}},
{id:'burn-gpu-john',target:'nvidia-stress',label:'John GPU Stress (john/OpenCL)',nvidia:true,extra:{loader:'john'}},
{id:'burn-gpu-rvs',target:'amd-stress',label:'AMD GPU Stress (rvs gst)'},
{id:'burn-cpu',target:'cpu',label:'CPU Burn-in'},
{id:'burn-mem-stress',target:'memory-stress',label:'Memory Burn-in'},
{id:'burn-sat-stress',target:'sat-stress',label:'SAT Stress (stressapptest)'},
];
status.textContent = 'Enqueuing...';
runBurnTaskSet(all, 'burn-all-status');
}
fetch('/api/gpu/tools').then(function(r) { return r.json(); }).then(function(tools) {
const map = {
'nvidia-compute': {cb:'burn-nvidia-compute', note:'note-nvidia-compute', reason:'dcgmproftester not available or NVIDIA driver not running'},
'bee-gpu-burn': {cb:'burn-gpu-bee', note:'note-bee', reason:'bee-gpu-burn not available or NVIDIA driver not running'},
'john': {cb:'burn-gpu-john', note:'note-john', reason:'bee-john-gpu-stress not available or NVIDIA driver not running'},
'rvs': {cb:'burn-gpu-rvs', note:'note-rvs', reason:'AMD driver not running'},
};
tools.forEach(function(t) {
const spec = map[t.id];
if (!spec) return;
const cb = document.getElementById(spec.cb);
const note = document.getElementById(spec.note);
if (!cb) return;
if (t.available) {
cb.disabled = false;
} else if (note) {
note.textContent = '— ' + spec.reason;
}
});
}).catch(function() {});
burnLoadGPUs();
</script>`
}