feat(metrics): single chart engine + full-width stacked layout

- One engine: go-analyze/charts (grafana theme) for all live metrics
- Server chart: CPU temp, CPU load%, mem load%, power W, fan RPMs
- GPU charts: temp, load%, mem%, power W — one card per GPU, added dynamically
- Charts 1400x280px SVG, rendered at width:100% in single-column layout
- Add CPU load (from /proc/stat) and mem load (from /proc/meminfo) to LiveMetricSample
- Add GPU mem utilization to GPUMetricRow (nvidia-smi utilization.memory)
- Document charting architecture in bible-local/architecture/charting.md

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-27 23:26:13 +03:00
parent e7a7ff54b9
commit ec0b7f7ff9
6 changed files with 336 additions and 78 deletions

View File

@@ -424,7 +424,7 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
case <-ticker.C:
sample := platform.SampleLiveMetrics()
// Feed ring buffers for server-side SVG charts
// Feed server ring buffers
for _, t := range sample.Temps {
if t.Name == "CPU" {
h.ringCPUTemp.push(t.Celsius)
@@ -432,6 +432,35 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
}
}
h.ringPower.push(sample.PowerW)
h.ringCPULoad.push(sample.CPULoadPct)
h.ringMemLoad.push(sample.MemLoadPct)
// Feed fan ring buffers (grow on first sight)
h.ringsMu.Lock()
for i, fan := range sample.Fans {
for len(h.ringFans) <= i {
h.ringFans = append(h.ringFans, newMetricsRing(120))
h.fanNames = append(h.fanNames, fan.Name)
}
h.ringFans[i].push(float64(fan.RPM))
}
// Feed per-GPU ring buffers (grow on first sight)
for _, gpu := range sample.GPUs {
idx := gpu.GPUIndex
for len(h.gpuRings) <= idx {
h.gpuRings = append(h.gpuRings, &gpuRings{
Temp: newMetricsRing(120),
Util: newMetricsRing(120),
MemUtil: newMetricsRing(120),
Power: newMetricsRing(120),
})
}
h.gpuRings[idx].Temp.push(gpu.TempC)
h.gpuRings[idx].Util.push(gpu.UsagePct)
h.gpuRings[idx].MemUtil.push(gpu.MemUsagePct)
h.gpuRings[idx].Power.push(gpu.PowerW)
}
h.ringsMu.Unlock()
b, err := json.Marshal(sample)
if err != nil {

View File

@@ -242,28 +242,27 @@ func renderHealthCard(opts HandlerOptions) string {
// ── Metrics ───────────────────────────────────────────────────────────────────
func renderMetrics() string {
return `<p style="color:#64748b;font-size:13px;margin-bottom:16px">Live server metrics, charts updated every 2 seconds.</p>
<div class="grid2">
<div class="card">
<div class="card-head">System</div>
<div class="card-body">
<img id="chart-cpu-temp" src="/api/metrics/chart/cpu-temp.svg" style="width:100%;border-radius:6px" alt="CPU Temp">
<img id="chart-power" src="/api/metrics/chart/power.svg" style="width:100%;border-radius:6px;margin-top:8px" alt="Power">
<div id="sys-table" style="margin-top:8px"></div>
</div>
</div>
<div class="card">
<div class="card-head">GPU</div>
<div class="card-body">
<div id="gpu-table"><p style="color:#64748b;font-size:12px">Waiting for data...</p></div>
</div>
return `<p style="color:#64748b;font-size:13px;margin-bottom:16px">Live metrics — updated every 2 seconds. Charts use go-analyze/charts (grafana theme).</p>
<div class="card" style="margin-bottom:16px">
<div class="card-head">Server</div>
<div class="card-body" style="padding:8px">
<img id="chart-server" src="/api/metrics/chart/server.svg" style="width:100%;display:block;border-radius:6px" alt="Server metrics">
<div id="sys-table" style="margin-top:8px;font-size:12px"></div>
</div>
</div>
<div id="gpu-charts"></div>
<script>
let knownGPUs = [];
function refreshCharts() {
const t = '?t=' + Date.now();
['chart-cpu-temp','chart-power'].forEach(id => {
const el = document.getElementById(id);
const srv = document.getElementById('chart-server');
if (srv) srv.src = srv.src.split('?')[0] + t;
knownGPUs.forEach(idx => {
const el = document.getElementById('chart-gpu-' + idx);
if (el) el.src = el.src.split('?')[0] + t;
});
}
@@ -272,21 +271,42 @@ setInterval(refreshCharts, 2000);
const es = new EventSource('/api/metrics/stream');
es.addEventListener('metrics', e => {
const d = JSON.parse(e.data);
const gpuRows = (d.gpus||[]).map(g =>
'<tr><td>GPU '+g.index+'</td><td>'+g.temp_c+'°C</td><td>'+g.usage_pct+'%</td><td>'+g.power_w+'W</td><td>'+g.clock_mhz+'MHz</td></tr>'
).join('');
document.getElementById('gpu-table').innerHTML = gpuRows ?
'<table><tr><th>GPU</th><th>Temp</th><th>Usage</th><th>Power</th><th>Clock</th></tr>'+gpuRows+'</table>' :
'<p style="color:#64748b;font-size:12px">No NVIDIA GPU detected</p>';
// Add GPU chart cards as GPUs appear
(d.gpus||[]).forEach(g => {
if (knownGPUs.includes(g.index)) return;
knownGPUs.push(g.index);
const div = document.createElement('div');
div.className = 'card';
div.style.marginBottom = '16px';
div.innerHTML = '<div class="card-head">GPU ' + g.index + '</div>' +
'<div class="card-body" style="padding:8px">' +
'<img id="chart-gpu-' + g.index + '" src="/api/metrics/chart/gpu/' + g.index + '.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + '">' +
'<div id="gpu-table-' + g.index + '" style="margin-top:8px;font-size:12px"></div>' +
'</div>';
document.getElementById('gpu-charts').appendChild(div);
});
// Update numeric tables
let sysHTML = '';
const cpuTemp = (d.temps||[]).find(t => t.name==='CPU');
if (cpuTemp) sysHTML += '<tr><td>CPU Temp</td><td>'+cpuTemp.celsius.toFixed(1)+'°C</td></tr>';
if (d.cpu_load_pct) sysHTML += '<tr><td>CPU Load</td><td>'+d.cpu_load_pct.toFixed(1)+'%</td></tr>';
if (d.mem_load_pct) sysHTML += '<tr><td>Mem Load</td><td>'+d.mem_load_pct.toFixed(1)+'%</td></tr>';
(d.fans||[]).forEach(f => sysHTML += '<tr><td>'+f.name+'</td><td>'+f.rpm+' RPM</td></tr>');
if (d.power_w) sysHTML += '<tr><td>System Power</td><td>'+d.power_w.toFixed(0)+'W</td></tr>';
document.getElementById('sys-table').innerHTML = sysHTML ?
'<table>'+sysHTML+'</table>' :
'<p style="color:#64748b;font-size:12px">No sensor data (ipmitool/sensors required)</p>';
if (d.power_w) sysHTML += '<tr><td>Power</td><td>'+d.power_w.toFixed(0)+' W</td></tr>';
const st = document.getElementById('sys-table');
if (st) st.innerHTML = sysHTML ? '<table>'+sysHTML+'</table>' : '<p style="color:#64748b">No sensor data (ipmitool/sensors required)</p>';
(d.gpus||[]).forEach(g => {
const t = document.getElementById('gpu-table-' + g.index);
if (!t) return;
t.innerHTML = '<table>' +
'<tr><td>Temp</td><td>'+g.temp_c+'°C</td>' +
'<td>Load</td><td>'+g.usage_pct+'%</td>' +
'<td>Mem</td><td>'+g.mem_usage_pct+'%</td>' +
'<td>Power</td><td>'+g.power_w+' W</td></tr></table>';
});
});
es.onerror = () => {};
</script>`

View File

@@ -62,15 +62,27 @@ func (r *metricsRing) snapshot() ([]float64, []string) {
return v, l
}
// gpuRings holds per-GPU ring buffers.
type gpuRings struct {
Temp *metricsRing
Util *metricsRing
MemUtil *metricsRing
Power *metricsRing
}
// handler is the HTTP handler for the web UI.
type handler struct {
opts HandlerOptions
mux *http.ServeMux
opts HandlerOptions
mux *http.ServeMux
// server rings
ringCPUTemp *metricsRing
ringCPULoad *metricsRing
ringMemLoad *metricsRing
ringPower *metricsRing
ringFans []*metricsRing
ringGPUTemp []*metricsRing
ringGPUUtil []*metricsRing
fanNames []string
// per-GPU rings (index = GPU index)
gpuRings []*gpuRings
ringsMu sync.Mutex
}
@@ -89,6 +101,8 @@ func NewHandler(opts HandlerOptions) http.Handler {
h := &handler{
opts: opts,
ringCPUTemp: newMetricsRing(120),
ringCPULoad: newMetricsRing(120),
ringMemLoad: newMetricsRing(120),
ringPower: newMetricsRing(120),
}
mux := http.NewServeMux()
@@ -244,48 +258,88 @@ func (h *handler) handleViewer(w http.ResponseWriter, r *http.Request) {
}
func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request) {
name := strings.TrimPrefix(r.URL.Path, "/api/metrics/chart/")
name = strings.TrimSuffix(name, ".svg")
path := strings.TrimPrefix(r.URL.Path, "/api/metrics/chart/")
path = strings.TrimSuffix(path, ".svg")
var datasets [][]float64
var names []string
var labels []string
var title string
switch {
case path == "server":
title = "Server"
vCPUTemp, l := h.ringCPUTemp.snapshot()
vCPULoad, _ := h.ringCPULoad.snapshot()
vMemLoad, _ := h.ringMemLoad.snapshot()
vPower, _ := h.ringPower.snapshot()
labels = l
datasets = [][]float64{vCPUTemp, vCPULoad, vMemLoad, vPower}
names = []string{"CPU Temp °C", "CPU Load %", "Mem Load %", "Power W"}
h.ringsMu.Lock()
for i, fr := range h.ringFans {
fv, _ := fr.snapshot()
datasets = append(datasets, fv)
name := "Fan"
if i < len(h.fanNames) {
name = h.fanNames[i]
}
names = append(names, name+" RPM")
}
h.ringsMu.Unlock()
case strings.HasPrefix(path, "gpu/"):
idxStr := strings.TrimPrefix(path, "gpu/")
idx := 0
fmt.Sscanf(idxStr, "%d", &idx)
h.ringsMu.Lock()
var gr *gpuRings
if idx < len(h.gpuRings) {
gr = h.gpuRings[idx]
}
h.ringsMu.Unlock()
if gr == nil {
http.NotFound(w, r)
return
}
vTemp, l := gr.Temp.snapshot()
vUtil, _ := gr.Util.snapshot()
vMemUtil, _ := gr.MemUtil.snapshot()
vPower, _ := gr.Power.snapshot()
labels = l
title = fmt.Sprintf("GPU %d", idx)
datasets = [][]float64{vTemp, vUtil, vMemUtil, vPower}
names = []string{"Temp °C", "Load %", "Mem %", "Power W"}
var ring *metricsRing
var title, unit string
switch name {
case "cpu-temp":
ring, title, unit = h.ringCPUTemp, "CPU Temperature", "°C"
case "power":
ring, title, unit = h.ringPower, "System Power", "W"
default:
http.NotFound(w, r)
return
}
vals, labels := ring.snapshot()
if len(vals) == 0 {
vals = []float64{0}
// Ensure all datasets same length as labels
n := len(labels)
if n == 0 {
n = 1
labels = []string{""}
}
// Sparse x-axis labels
sparse := make([]string, len(labels))
step := len(labels) / 6
if step < 1 {
step = 1
}
for i := range labels {
if i%step == 0 {
sparse[i] = labels[i]
for i := range datasets {
if len(datasets[i]) == 0 {
datasets[i] = make([]float64, n)
}
}
opt := gocharts.NewLineChartOptionWithData([][]float64{vals})
opt.Title = gocharts.TitleOption{Text: title + " (" + unit + ")"}
sparse := sparseLabels(labels, 6)
opt := gocharts.NewLineChartOptionWithData(datasets)
opt.Title = gocharts.TitleOption{Text: title}
opt.XAxis.Labels = sparse
opt.Legend = gocharts.LegendOption{Show: gocharts.Ptr(false)}
opt.Legend = gocharts.LegendOption{SeriesNames: names}
p := gocharts.NewPainter(gocharts.PainterOptions{
OutputFormat: gocharts.ChartOutputSVG,
Width: 600,
Height: 180,
Width: 1400,
Height: 280,
}, gocharts.PainterThemeOption(gocharts.GetTheme("grafana")))
if err := p.LineChart(opt); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
@@ -301,6 +355,27 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
_, _ = w.Write(buf)
}
func safeIdx(s []float64, i int) float64 {
if i < len(s) {
return s[i]
}
return 0
}
func sparseLabels(labels []string, n int) []string {
out := make([]string, len(labels))
step := len(labels) / n
if step < 1 {
step = 1
}
for i, l := range labels {
if i%step == 0 {
out[i] = l
}
}
return out
}
// ── Page handler ─────────────────────────────────────────────────────────────
func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {