384 lines
20 KiB
Go
384 lines
20 KiB
Go
package webui
|
||
|
||
func renderBurn() string {
|
||
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>⚠ Warning:</strong> Stress tests on this page run hardware at high load. Repeated or prolonged use may reduce hardware lifespan. Use only when necessary.</div>
|
||
<div class="alert alert-info" style="margin-bottom:16px"><strong>Scope:</strong> Burn exposes sustained GPU compute load recipes. DCGM diagnostics (` + "targeted_stress, targeted_power, pulse_test" + `) and LINPACK remain in <a href="/validate">Validate → Stress mode</a>; NCCL and NVBandwidth are available directly from <a href="/validate">Validate</a>.</div>
|
||
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
||
|
||
<div class="card" style="margin-bottom:16px">
|
||
<div class="card-head">Burn Profile</div>
|
||
<div class="card-body burn-profile-body">
|
||
<div class="burn-profile-col">
|
||
<div class="form-row" style="margin:0 0 8px"><label>Preset</label></div>
|
||
<label class="cb-row"><input type="radio" name="burn-profile" value="smoke" checked><span>Smoke — 5 min/GPU (sequential) or 5 min (parallel)</span></label>
|
||
<label class="cb-row"><input type="radio" name="burn-profile" value="acceptance"><span>Acceptance — 1 h/GPU (sequential) or 1 h (parallel)</span></label>
|
||
<label class="cb-row"><input type="radio" name="burn-profile" value="overnight"><span>Overnight — 8 h/GPU (sequential) or 8 h (parallel)</span></label>
|
||
</div>
|
||
<div class="burn-profile-col burn-profile-action">
|
||
<button type="button" class="btn btn-primary" onclick="runAllBurnTasks()">Burn one by one</button>
|
||
<p>Runs checked tests as separate sequential tasks. In sequential GPU mode, total time = profile duration × N GPU. In parallel mode, all selected GPUs burn simultaneously for one profile duration.</p>
|
||
</div>
|
||
<div class="burn-profile-col burn-profile-action">
|
||
<button type="button" class="btn btn-secondary" onclick="runPlatformStress()">Thermal Cycling</button>
|
||
<p>Run checked core test modules (CPU, MEM, GPU). Tests start at the same time and run for a period with short cooldown phases to stress the server cooling system.</p>
|
||
</div>
|
||
</div>
|
||
<div class="card-body" style="padding-top:0;display:flex;justify-content:center">
|
||
<span id="burn-all-status" style="font-size:12px;color:var(--muted)"></span>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="card" style="margin-bottom:16px">
|
||
<div class="card-head">NVIDIA GPU Selection</div>
|
||
<div class="card-body">
|
||
<p style="font-size:12px;color:var(--muted);margin:0 0 10px">Official NVIDIA recipes and custom NVIDIA stressors use only the GPUs selected here. Multi-GPU interconnect tests are limited to this selection as well.</p>
|
||
<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
|
||
<button class="btn btn-sm btn-secondary" type="button" onclick="burnSelectAll()">Select All</button>
|
||
<button class="btn btn-sm btn-secondary" type="button" onclick="burnSelectNone()">Clear</button>
|
||
</div>
|
||
<div id="burn-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
|
||
<p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
|
||
</div>
|
||
<p id="burn-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 0">Select at least one NVIDIA GPU to enable NVIDIA burn recipes.</p>
|
||
<div style="display:flex;flex-direction:column;gap:4px;margin-top:10px">
|
||
<label class="cb-row">
|
||
<input type="radio" name="burn-nvidia-mode" value="sequential" checked>
|
||
<span>Sequential — selected GPUs one at a time</span>
|
||
</label>
|
||
<label class="cb-row" id="burn-parallel-label">
|
||
<input type="radio" name="burn-nvidia-mode" value="parallel">
|
||
<span>Parallel — all selected GPUs simultaneously</span>
|
||
</label>
|
||
<label class="cb-row" id="burn-ramp-label">
|
||
<input type="radio" name="burn-nvidia-mode" value="ramp-up">
|
||
<span>Ramp-up — add one GPU at a time</span>
|
||
</label>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="burn-section">Core Burn Paths</div>
|
||
<div class="grid2 burn-grid" style="margin-bottom:16px">
|
||
<div class="card burn-card">
|
||
<div class="card-head card-head-actions"><span>GPU Max Load</span><button class="btn btn-primary btn-sm" onclick="runBurnTaskSet([{id:'burn-nvidia-compute',target:'nvidia-compute',label:'NVIDIA Max Compute Load (dcgmproftester)',nvidia:true},{id:'burn-gpu-bee',target:'nvidia-stress',label:'GPU Burn (bee-gpu-burn)',nvidia:true,extra:{loader:'builtin'}},{id:'burn-gpu-john',target:'nvidia-stress',label:'John GPU Stress (john/OpenCL)',nvidia:true,extra:{loader:'john'}},{id:'burn-gpu-rvs',target:'amd-stress',label:'AMD GPU Stress (rvs gst)'}])">Run</button></div>
|
||
<div class="card-body burn-card-body">
|
||
<p style="font-size:12px;color:var(--muted);margin:0 0 10px">Combine vendor-backed and custom GPU max-load recipes in one run set. ` + "dcgmproftester" + ` is the primary official NVIDIA path; custom stressors remain available as parallel checkbox options.</p>
|
||
<label class="cb-row"><input type="checkbox" id="burn-nvidia-compute" checked disabled><span>NVIDIA Max Compute Load (dcgmproftester) <span class="cb-note" id="note-nvidia-compute"></span></span></label>
|
||
<label class="cb-row"><input type="checkbox" id="burn-gpu-bee" checked disabled><span>GPU Burn (bee-gpu-burn) <span class="cb-note" id="note-bee"></span></span></label>
|
||
<label class="cb-row"><input type="checkbox" id="burn-gpu-john" disabled><span>John GPU Stress (john/OpenCL) <span class="cb-note" id="note-john"></span></span></label>
|
||
<label class="cb-row"><input type="checkbox" id="burn-gpu-rvs" disabled><span>AMD GPU Stress (rvs gst) <span class="cb-note" id="note-rvs"></span></span></label>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="card burn-card">
|
||
<div class="card-head card-head-actions"><span>Compute Stress</span><button class="btn btn-primary btn-sm" onclick="runBurnTaskSet([{id:'burn-cpu',target:'cpu',label:'CPU Burn-in'},{id:'burn-mem-stress',target:'memory-stress',label:'Memory Burn-in'},{id:'burn-sat-stress',target:'sat-stress',label:'SAT Stress (stressapptest)'}])">Run</button></div>
|
||
<div class="card-body burn-card-body">
|
||
<p style="font-size:12px;color:var(--muted);margin:0 0 10px">Select which subsystems to stress. Each checked item runs as a separate task.</p>
|
||
<label class="cb-row"><input type="checkbox" id="burn-cpu" checked><span>CPU stress (stress-ng)</span></label>
|
||
<label class="cb-row"><input type="checkbox" id="burn-mem-stress" checked><span>Memory stress (stress-ng --vm)</span></label>
|
||
<label class="cb-row"><input type="checkbox" id="burn-sat-stress"><span>stressapptest (CPU + memory bus)</span></label>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<div id="bi-output" style="display:none;margin-top:16px" class="card">
|
||
<div class="card-head">Output <span id="bi-title"></span></div>
|
||
<div class="card-body"><div id="bi-terminal" class="terminal"></div></div>
|
||
</div>
|
||
|
||
<style>
|
||
.cb-row { display:flex; align-items:flex-start; gap:8px; padding:4px 0; cursor:pointer; font-size:13px; }
|
||
.cb-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
|
||
.cb-row input[type=checkbox]:disabled { opacity:0.4; cursor:not-allowed; }
|
||
.cb-row input[type=checkbox]:disabled ~ span { opacity:0.45; cursor:not-allowed; }
|
||
.cb-note { font-size:11px; color:var(--muted); font-style:italic; }
|
||
.burn-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
|
||
.burn-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
|
||
.burn-profile-body { display:grid; grid-template-columns:1fr 1fr 1fr; gap:24px; align-items:stretch; }
|
||
.burn-profile-col { min-width:0; }
|
||
.burn-profile-action { display:flex; flex-direction:column; align-items:center; justify-content:flex-start; gap:8px; }
|
||
.burn-profile-action p { font-size:12px; color:var(--muted); margin:0; width:100%; text-align:left; }
|
||
.burn-section { font-size:12px; font-weight:700; letter-spacing:.06em; text-transform:uppercase; color:var(--muted); margin:0 0 10px; padding-top:4px; }
|
||
.burn-grid { align-items:stretch; }
|
||
.burn-card { height:100%; display:flex; flex-direction:column; }
|
||
.burn-card-body { flex:1; display:flex; flex-direction:column; }
|
||
.card-head-actions { justify-content:space-between; }
|
||
.card-head-buttons { display:flex; align-items:center; gap:8px; margin-left:auto; }
|
||
@media(max-width:900px){ .card-head-actions { align-items:flex-start; flex-direction:column; } .card-head-buttons { margin-left:0; } .burn-profile-body { grid-template-columns:1fr; } }
|
||
</style>
|
||
|
||
<script>
|
||
let biES = null;
|
||
function burnTaskIDs(payload) {
|
||
if (payload && Array.isArray(payload.task_ids) && payload.task_ids.length) return payload.task_ids;
|
||
if (payload && payload.task_id) return [payload.task_id];
|
||
return [];
|
||
}
|
||
function burnProfile() {
|
||
const selected = document.querySelector('input[name="burn-profile"]:checked');
|
||
return selected ? selected.value : 'smoke';
|
||
}
|
||
function burnSelectedGPUIndices() {
|
||
return Array.from(document.querySelectorAll('.burn-gpu-checkbox'))
|
||
.filter(function(el) { return el.checked && !el.disabled; })
|
||
.map(function(el) { return parseInt(el.value, 10); })
|
||
.filter(function(v) { return !Number.isNaN(v); })
|
||
.sort(function(a, b) { return a - b; });
|
||
}
|
||
function burnNvidiaMode() {
|
||
const el = document.querySelector('input[name="burn-nvidia-mode"]:checked');
|
||
return el ? el.value : 'sequential';
|
||
}
|
||
function burnApplyMultiGPUState(gpuCount) {
|
||
var multiValues = ['parallel', 'ramp-up'];
|
||
var radios = document.querySelectorAll('input[name="burn-nvidia-mode"]');
|
||
radios.forEach(function(el) {
|
||
var isMulti = multiValues.indexOf(el.value) >= 0;
|
||
if (gpuCount < 2 && isMulti) {
|
||
el.disabled = true;
|
||
if (el.checked) {
|
||
var seq = document.querySelector('input[name="burn-nvidia-mode"][value="sequential"]');
|
||
if (seq) seq.checked = true;
|
||
}
|
||
var label = el.closest('label');
|
||
if (label) label.style.opacity = '0.4';
|
||
} else {
|
||
el.disabled = false;
|
||
var label = el.closest('label');
|
||
if (label) label.style.opacity = '';
|
||
}
|
||
});
|
||
}
|
||
function burnUpdateSelectionNote() {
|
||
const note = document.getElementById('burn-selection-note');
|
||
const selected = burnSelectedGPUIndices();
|
||
if (!selected.length) {
|
||
note.textContent = 'Select at least one NVIDIA GPU to enable NVIDIA burn recipes.';
|
||
return;
|
||
}
|
||
note.textContent = 'Selected NVIDIA GPUs: ' + selected.join(', ') + '. Official and custom NVIDIA tasks will use only these GPUs.';
|
||
}
|
||
function burnRenderGPUList(gpus) {
|
||
const root = document.getElementById('burn-gpu-list');
|
||
if (!gpus || !gpus.length) {
|
||
root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
|
||
burnUpdateSelectionNote();
|
||
return;
|
||
}
|
||
root.innerHTML = gpus.map(function(gpu) {
|
||
const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
|
||
return '<label class="burn-gpu-row">'
|
||
+ '<input class="burn-gpu-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="burnUpdateSelectionNote()">'
|
||
+ '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
|
||
+ '</label>';
|
||
}).join('');
|
||
burnApplyMultiGPUState(gpus.length);
|
||
burnUpdateSelectionNote();
|
||
}
|
||
function burnSelectAll() {
|
||
document.querySelectorAll('.burn-gpu-checkbox').forEach(function(el) { el.checked = true; });
|
||
burnUpdateSelectionNote();
|
||
}
|
||
function burnSelectNone() {
|
||
document.querySelectorAll('.burn-gpu-checkbox').forEach(function(el) { el.checked = false; });
|
||
burnUpdateSelectionNote();
|
||
}
|
||
function burnLoadGPUs() {
|
||
fetch('/api/gpu/nvidia').then(function(r) {
|
||
return r.json().then(function(body) {
|
||
if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
|
||
return body;
|
||
});
|
||
}).then(function(gpus) {
|
||
burnRenderGPUList(gpus);
|
||
}).catch(function(err) {
|
||
document.getElementById('burn-gpu-list').innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
|
||
burnUpdateSelectionNote();
|
||
});
|
||
}
|
||
function enqueueBurnTask(target, label, extra, useSelectedNvidia) {
|
||
const body = Object.assign({ profile: burnProfile(), display_name: label }, extra || {});
|
||
if (useSelectedNvidia) {
|
||
const selected = burnSelectedGPUIndices();
|
||
if (!selected.length) {
|
||
return Promise.reject(new Error('Select at least one NVIDIA GPU.'));
|
||
}
|
||
body.gpu_indices = selected;
|
||
const bMode = burnNvidiaMode();
|
||
if (bMode === 'ramp-up' && selected.length > 1) {
|
||
body.stagger_gpu_start = true;
|
||
} else if (bMode === 'parallel' && selected.length > 1) {
|
||
body.parallel_gpus = true;
|
||
}
|
||
}
|
||
return fetch('/api/sat/' + target + '/run', {
|
||
method: 'POST',
|
||
headers: {'Content-Type':'application/json'},
|
||
body: JSON.stringify(body)
|
||
}).then(function(r) {
|
||
return r.json().then(function(payload) {
|
||
if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
|
||
return payload;
|
||
});
|
||
});
|
||
}
|
||
function streamTask(taskId, label) {
|
||
if (biES) { biES.close(); biES = null; }
|
||
document.getElementById('bi-output').style.display = 'block';
|
||
document.getElementById('bi-title').textContent = '— ' + label + ' [' + burnProfile() + ']';
|
||
const term = document.getElementById('bi-terminal');
|
||
term.textContent = 'Task ' + taskId + ' queued. Streaming...\n';
|
||
biES = new EventSource('/api/tasks/' + taskId + '/stream');
|
||
biES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
|
||
biES.addEventListener('done', function(e) {
|
||
biES.close();
|
||
biES = null;
|
||
term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
|
||
term.scrollTop = term.scrollHeight;
|
||
});
|
||
}
|
||
function streamBurnTask(taskId, label, resetTerminal) {
|
||
return streamBurnTaskSet([taskId], label, resetTerminal);
|
||
}
|
||
function streamBurnTaskSet(taskIds, label, resetTerminal) {
|
||
if (biES) { biES.close(); biES = null; }
|
||
document.getElementById('bi-output').style.display = 'block';
|
||
document.getElementById('bi-title').textContent = '— ' + label + ' [' + burnProfile() + ']';
|
||
const term = document.getElementById('bi-terminal');
|
||
if (resetTerminal) {
|
||
term.textContent = '';
|
||
}
|
||
if (!Array.isArray(taskIds) || !taskIds.length) {
|
||
term.textContent += 'ERROR: no tasks queued.\n';
|
||
return Promise.resolve({ok:false, error:'no tasks queued'});
|
||
}
|
||
const streamNext = function(idx, failures) {
|
||
if (idx >= taskIds.length) {
|
||
return Promise.resolve({ok: failures === 0, error: failures ? (failures + ' task(s) failed') : ''});
|
||
}
|
||
const taskId = taskIds[idx];
|
||
term.textContent += '[' + (idx + 1) + '/' + taskIds.length + '] Task ' + taskId + ' queued. Streaming...\n';
|
||
return new Promise(function(resolve) {
|
||
biES = new EventSource('/api/tasks/' + taskId + '/stream');
|
||
biES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
|
||
biES.addEventListener('done', function(e) {
|
||
biES.close();
|
||
biES = null;
|
||
term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
|
||
term.scrollTop = term.scrollHeight;
|
||
resolve(failures + (e.data ? 1 : 0));
|
||
});
|
||
biES.onerror = function() {
|
||
if (biES) {
|
||
biES.close();
|
||
biES = null;
|
||
}
|
||
term.textContent += '\nERROR: stream disconnected.\n';
|
||
term.scrollTop = term.scrollHeight;
|
||
resolve(failures + 1);
|
||
};
|
||
}).then(function(nextFailures) {
|
||
return streamNext(idx + 1, nextFailures);
|
||
});
|
||
};
|
||
return streamNext(0, 0);
|
||
}
|
||
function runBurnTaskSet(tasks, statusElId) {
|
||
const enabled = tasks.filter(function(t) {
|
||
const el = document.getElementById(t.id);
|
||
return el && el.checked && !el.disabled;
|
||
});
|
||
const status = statusElId ? document.getElementById(statusElId) : null;
|
||
if (status) status.textContent = '';
|
||
if (!enabled.length) {
|
||
if (status) status.textContent = 'No tasks selected.';
|
||
return;
|
||
}
|
||
const term = document.getElementById('bi-terminal');
|
||
document.getElementById('bi-output').style.display = 'block';
|
||
document.getElementById('bi-title').textContent = '— Burn one by one [' + burnProfile() + ']';
|
||
term.textContent = '';
|
||
const runNext = function(idx) {
|
||
if (idx >= enabled.length) {
|
||
if (status) status.textContent = 'Completed ' + enabled.length + ' task(s).';
|
||
return Promise.resolve();
|
||
}
|
||
const t = enabled[idx];
|
||
term.textContent += '\n[' + (idx + 1) + '/' + enabled.length + '] ' + t.label + '\n';
|
||
if (status) status.textContent = 'Running ' + (idx + 1) + '/' + enabled.length + '...';
|
||
return enqueueBurnTask(t.target, t.label, t.extra, !!t.nvidia)
|
||
.then(function(d) {
|
||
return streamBurnTaskSet(burnTaskIDs(d), t.label, false);
|
||
})
|
||
.then(function() {
|
||
return runNext(idx + 1);
|
||
})
|
||
.catch(function(err) {
|
||
if (status) status.textContent = 'Error: ' + err.message;
|
||
document.getElementById('bi-output').style.display = 'block';
|
||
term.textContent += 'ERROR: ' + err.message + '\n';
|
||
return Promise.reject(err);
|
||
});
|
||
};
|
||
return runNext(0);
|
||
}
|
||
function runPlatformStress() {
|
||
const comps = [];
|
||
const computeIDs = ['burn-cpu', 'burn-mem-stress', 'burn-sat-stress'];
|
||
const gpuIDs = ['burn-nvidia-compute', 'burn-gpu-bee', 'burn-gpu-john', 'burn-gpu-rvs'];
|
||
const hasChecked = function(ids) {
|
||
return ids.some(function(id) {
|
||
const el = document.getElementById(id);
|
||
return el && el.checked && !el.disabled;
|
||
});
|
||
};
|
||
if (hasChecked(computeIDs)) comps.push('cpu');
|
||
if (hasChecked(gpuIDs)) comps.push('gpu');
|
||
if (!comps.length) {
|
||
const status = document.getElementById('burn-all-status');
|
||
if (status) status.textContent = 'Select at least one test in GPU Max Load or Compute Stress.';
|
||
return;
|
||
}
|
||
const extra = comps.length > 0 ? {platform_components: comps} : {};
|
||
enqueueBurnTask('platform-stress', 'Platform Thermal Cycling', extra, false).then(function(d) {
|
||
streamTask(d.task_id, 'Platform Thermal Cycling');
|
||
});
|
||
}
|
||
function runAllBurnTasks() {
|
||
const status = document.getElementById('burn-all-status');
|
||
const all = [
|
||
{id:'burn-nvidia-compute',target:'nvidia-compute',label:'NVIDIA Max Compute Load (dcgmproftester)',nvidia:true},
|
||
{id:'burn-gpu-bee',target:'nvidia-stress',label:'GPU Burn (bee-gpu-burn)',nvidia:true,extra:{loader:'builtin'}},
|
||
{id:'burn-gpu-john',target:'nvidia-stress',label:'John GPU Stress (john/OpenCL)',nvidia:true,extra:{loader:'john'}},
|
||
{id:'burn-gpu-rvs',target:'amd-stress',label:'AMD GPU Stress (rvs gst)'},
|
||
{id:'burn-cpu',target:'cpu',label:'CPU Burn-in'},
|
||
{id:'burn-mem-stress',target:'memory-stress',label:'Memory Burn-in'},
|
||
{id:'burn-sat-stress',target:'sat-stress',label:'SAT Stress (stressapptest)'},
|
||
];
|
||
status.textContent = 'Enqueuing...';
|
||
runBurnTaskSet(all, 'burn-all-status');
|
||
}
|
||
fetch('/api/gpu/tools').then(function(r) { return r.json(); }).then(function(tools) {
|
||
const map = {
|
||
'nvidia-compute': {cb:'burn-nvidia-compute', note:'note-nvidia-compute', reason:'dcgmproftester not available or NVIDIA driver not running'},
|
||
'bee-gpu-burn': {cb:'burn-gpu-bee', note:'note-bee', reason:'bee-gpu-burn not available or NVIDIA driver not running'},
|
||
'john': {cb:'burn-gpu-john', note:'note-john', reason:'bee-john-gpu-stress not available or NVIDIA driver not running'},
|
||
'rvs': {cb:'burn-gpu-rvs', note:'note-rvs', reason:'AMD driver not running'},
|
||
};
|
||
tools.forEach(function(t) {
|
||
const spec = map[t.id];
|
||
if (!spec) return;
|
||
const cb = document.getElementById(spec.cb);
|
||
const note = document.getElementById(spec.note);
|
||
if (!cb) return;
|
||
if (t.available) {
|
||
cb.disabled = false;
|
||
} else if (note) {
|
||
note.textContent = '— ' + spec.reason;
|
||
}
|
||
});
|
||
}).catch(function() {});
|
||
burnLoadGPUs();
|
||
</script>`
|
||
}
|