feat(metrics): single chart engine + full-width stacked layout

- One engine: go-analyze/charts (grafana theme) for all live metrics
- Server chart: CPU temp, CPU load%, mem load%, power W, fan RPMs
- GPU charts: temp, load%, mem%, power W — one card per GPU, added dynamically
- Charts 1400x280px SVG, rendered at width:100% in single-column layout
- Add CPU load (from /proc/stat) and mem load (from /proc/meminfo) to LiveMetricSample
- Add GPU mem utilization to GPUMetricRow (nvidia-smi utilization.memory)
- Document charting architecture in bible-local/architecture/charting.md

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-27 23:26:13 +03:00
parent e7a7ff54b9
commit ec0b7f7ff9
6 changed files with 336 additions and 78 deletions

View File

@@ -242,28 +242,27 @@ func renderHealthCard(opts HandlerOptions) string {
// ── Metrics ───────────────────────────────────────────────────────────────────
func renderMetrics() string {
return `<p style="color:#64748b;font-size:13px;margin-bottom:16px">Live server metrics, charts updated every 2 seconds.</p>
<div class="grid2">
<div class="card">
<div class="card-head">System</div>
<div class="card-body">
<img id="chart-cpu-temp" src="/api/metrics/chart/cpu-temp.svg" style="width:100%;border-radius:6px" alt="CPU Temp">
<img id="chart-power" src="/api/metrics/chart/power.svg" style="width:100%;border-radius:6px;margin-top:8px" alt="Power">
<div id="sys-table" style="margin-top:8px"></div>
</div>
</div>
<div class="card">
<div class="card-head">GPU</div>
<div class="card-body">
<div id="gpu-table"><p style="color:#64748b;font-size:12px">Waiting for data...</p></div>
</div>
return `<p style="color:#64748b;font-size:13px;margin-bottom:16px">Live metrics — updated every 2 seconds. Charts use go-analyze/charts (grafana theme).</p>
<div class="card" style="margin-bottom:16px">
<div class="card-head">Server</div>
<div class="card-body" style="padding:8px">
<img id="chart-server" src="/api/metrics/chart/server.svg" style="width:100%;display:block;border-radius:6px" alt="Server metrics">
<div id="sys-table" style="margin-top:8px;font-size:12px"></div>
</div>
</div>
<div id="gpu-charts"></div>
<script>
let knownGPUs = [];
function refreshCharts() {
const t = '?t=' + Date.now();
['chart-cpu-temp','chart-power'].forEach(id => {
const el = document.getElementById(id);
const srv = document.getElementById('chart-server');
if (srv) srv.src = srv.src.split('?')[0] + t;
knownGPUs.forEach(idx => {
const el = document.getElementById('chart-gpu-' + idx);
if (el) el.src = el.src.split('?')[0] + t;
});
}
@@ -272,21 +271,42 @@ setInterval(refreshCharts, 2000);
const es = new EventSource('/api/metrics/stream');
es.addEventListener('metrics', e => {
const d = JSON.parse(e.data);
const gpuRows = (d.gpus||[]).map(g =>
'<tr><td>GPU '+g.index+'</td><td>'+g.temp_c+'°C</td><td>'+g.usage_pct+'%</td><td>'+g.power_w+'W</td><td>'+g.clock_mhz+'MHz</td></tr>'
).join('');
document.getElementById('gpu-table').innerHTML = gpuRows ?
'<table><tr><th>GPU</th><th>Temp</th><th>Usage</th><th>Power</th><th>Clock</th></tr>'+gpuRows+'</table>' :
'<p style="color:#64748b;font-size:12px">No NVIDIA GPU detected</p>';
// Add GPU chart cards as GPUs appear
(d.gpus||[]).forEach(g => {
if (knownGPUs.includes(g.index)) return;
knownGPUs.push(g.index);
const div = document.createElement('div');
div.className = 'card';
div.style.marginBottom = '16px';
div.innerHTML = '<div class="card-head">GPU ' + g.index + '</div>' +
'<div class="card-body" style="padding:8px">' +
'<img id="chart-gpu-' + g.index + '" src="/api/metrics/chart/gpu/' + g.index + '.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + '">' +
'<div id="gpu-table-' + g.index + '" style="margin-top:8px;font-size:12px"></div>' +
'</div>';
document.getElementById('gpu-charts').appendChild(div);
});
// Update numeric tables
let sysHTML = '';
const cpuTemp = (d.temps||[]).find(t => t.name==='CPU');
if (cpuTemp) sysHTML += '<tr><td>CPU Temp</td><td>'+cpuTemp.celsius.toFixed(1)+'°C</td></tr>';
if (d.cpu_load_pct) sysHTML += '<tr><td>CPU Load</td><td>'+d.cpu_load_pct.toFixed(1)+'%</td></tr>';
if (d.mem_load_pct) sysHTML += '<tr><td>Mem Load</td><td>'+d.mem_load_pct.toFixed(1)+'%</td></tr>';
(d.fans||[]).forEach(f => sysHTML += '<tr><td>'+f.name+'</td><td>'+f.rpm+' RPM</td></tr>');
if (d.power_w) sysHTML += '<tr><td>System Power</td><td>'+d.power_w.toFixed(0)+'W</td></tr>';
document.getElementById('sys-table').innerHTML = sysHTML ?
'<table>'+sysHTML+'</table>' :
'<p style="color:#64748b;font-size:12px">No sensor data (ipmitool/sensors required)</p>';
if (d.power_w) sysHTML += '<tr><td>Power</td><td>'+d.power_w.toFixed(0)+' W</td></tr>';
const st = document.getElementById('sys-table');
if (st) st.innerHTML = sysHTML ? '<table>'+sysHTML+'</table>' : '<p style="color:#64748b">No sensor data (ipmitool/sensors required)</p>';
(d.gpus||[]).forEach(g => {
const t = document.getElementById('gpu-table-' + g.index);
if (!t) return;
t.innerHTML = '<table>' +
'<tr><td>Temp</td><td>'+g.temp_c+'°C</td>' +
'<td>Load</td><td>'+g.usage_pct+'%</td>' +
'<td>Mem</td><td>'+g.mem_usage_pct+'%</td>' +
'<td>Power</td><td>'+g.power_w+' W</td></tr></table>';
});
});
es.onerror = () => {};
</script>`