feat(metrics): single chart engine + full-width stacked layout
- One engine: go-analyze/charts (grafana theme) for all live metrics - Server chart: CPU temp, CPU load%, mem load%, power W, fan RPMs - GPU charts: temp, load%, mem%, power W — one card per GPU, added dynamically - Charts 1400x280px SVG, rendered at width:100% in single-column layout - Add CPU load (from /proc/stat) and mem load (from /proc/meminfo) to LiveMetricSample - Add GPU mem utilization to GPUMetricRow (nvidia-smi utilization.memory) - Document charting architecture in bible-local/architecture/charting.md Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -13,18 +13,19 @@ import (
|
|||||||
|
|
||||||
// GPUMetricRow is one telemetry sample from nvidia-smi during a stress test.
|
// GPUMetricRow is one telemetry sample from nvidia-smi during a stress test.
|
||||||
type GPUMetricRow struct {
|
type GPUMetricRow struct {
|
||||||
ElapsedSec float64
|
ElapsedSec float64 `json:"elapsed_sec"`
|
||||||
GPUIndex int
|
GPUIndex int `json:"index"`
|
||||||
TempC float64
|
TempC float64 `json:"temp_c"`
|
||||||
UsagePct float64
|
UsagePct float64 `json:"usage_pct"`
|
||||||
PowerW float64
|
MemUsagePct float64 `json:"mem_usage_pct"`
|
||||||
ClockMHz float64
|
PowerW float64 `json:"power_w"`
|
||||||
|
ClockMHz float64 `json:"clock_mhz"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
|
// sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
|
||||||
func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
|
func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
|
||||||
args := []string{
|
args := []string{
|
||||||
"--query-gpu=index,temperature.gpu,utilization.gpu,power.draw,clocks.current.graphics",
|
"--query-gpu=index,temperature.gpu,utilization.gpu,utilization.memory,power.draw,clocks.current.graphics",
|
||||||
"--format=csv,noheader,nounits",
|
"--format=csv,noheader,nounits",
|
||||||
}
|
}
|
||||||
if len(gpuIndices) > 0 {
|
if len(gpuIndices) > 0 {
|
||||||
@@ -45,7 +46,7 @@ func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
parts := strings.Split(line, ", ")
|
parts := strings.Split(line, ", ")
|
||||||
if len(parts) < 5 {
|
if len(parts) < 6 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
|
idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
|
||||||
@@ -53,8 +54,9 @@ func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
|
|||||||
GPUIndex: idx,
|
GPUIndex: idx,
|
||||||
TempC: parseGPUFloat(parts[1]),
|
TempC: parseGPUFloat(parts[1]),
|
||||||
UsagePct: parseGPUFloat(parts[2]),
|
UsagePct: parseGPUFloat(parts[2]),
|
||||||
PowerW: parseGPUFloat(parts[3]),
|
MemUsagePct: parseGPUFloat(parts[3]),
|
||||||
ClockMHz: parseGPUFloat(parts[4]),
|
PowerW: parseGPUFloat(parts[4]),
|
||||||
|
ClockMHz: parseGPUFloat(parts[5]),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
return rows, nil
|
return rows, nil
|
||||||
|
|||||||
@@ -1,6 +1,12 @@
|
|||||||
package platform
|
package platform
|
||||||
|
|
||||||
import "time"
|
import (
|
||||||
|
"bufio"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
// LiveMetricSample is a single point-in-time snapshot of server metrics
|
// LiveMetricSample is a single point-in-time snapshot of server metrics
|
||||||
// collected for the web UI metrics page.
|
// collected for the web UI metrics page.
|
||||||
@@ -9,6 +15,8 @@ type LiveMetricSample struct {
|
|||||||
Fans []FanReading `json:"fans"`
|
Fans []FanReading `json:"fans"`
|
||||||
Temps []TempReading `json:"temps"`
|
Temps []TempReading `json:"temps"`
|
||||||
PowerW float64 `json:"power_w"`
|
PowerW float64 `json:"power_w"`
|
||||||
|
CPULoadPct float64 `json:"cpu_load_pct"`
|
||||||
|
MemLoadPct float64 `json:"mem_load_pct"`
|
||||||
GPUs []GPUMetricRow `json:"gpus"`
|
GPUs []GPUMetricRow `json:"gpus"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -41,5 +49,91 @@ func SampleLiveMetrics() LiveMetricSample {
|
|||||||
// System power — returns 0 if unavailable
|
// System power — returns 0 if unavailable
|
||||||
s.PowerW = sampleSystemPower()
|
s.PowerW = sampleSystemPower()
|
||||||
|
|
||||||
|
// CPU load — from /proc/stat
|
||||||
|
s.CPULoadPct = sampleCPULoadPct()
|
||||||
|
|
||||||
|
// Memory load — from /proc/meminfo
|
||||||
|
s.MemLoadPct = sampleMemLoadPct()
|
||||||
|
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sampleCPULoadPct reads two /proc/stat snapshots 200ms apart and returns
|
||||||
|
// the overall CPU utilisation percentage.
|
||||||
|
var cpuStatPrev [2]uint64 // [total, idle]
|
||||||
|
|
||||||
|
func sampleCPULoadPct() float64 {
|
||||||
|
total, idle := readCPUStat()
|
||||||
|
if total == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
prevTotal, prevIdle := cpuStatPrev[0], cpuStatPrev[1]
|
||||||
|
cpuStatPrev = [2]uint64{total, idle}
|
||||||
|
if prevTotal == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
dt := float64(total - prevTotal)
|
||||||
|
di := float64(idle - prevIdle)
|
||||||
|
if dt <= 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
pct := (1 - di/dt) * 100
|
||||||
|
if pct < 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
if pct > 100 {
|
||||||
|
return 100
|
||||||
|
}
|
||||||
|
return pct
|
||||||
|
}
|
||||||
|
|
||||||
|
func readCPUStat() (total, idle uint64) {
|
||||||
|
f, err := os.Open("/proc/stat")
|
||||||
|
if err != nil {
|
||||||
|
return 0, 0
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
sc := bufio.NewScanner(f)
|
||||||
|
for sc.Scan() {
|
||||||
|
line := sc.Text()
|
||||||
|
if !strings.HasPrefix(line, "cpu ") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fields := strings.Fields(line)[1:] // skip "cpu"
|
||||||
|
var vals [10]uint64
|
||||||
|
for i := 0; i < len(fields) && i < 10; i++ {
|
||||||
|
vals[i], _ = strconv.ParseUint(fields[i], 10, 64)
|
||||||
|
}
|
||||||
|
// idle = idle + iowait
|
||||||
|
idle = vals[3] + vals[4]
|
||||||
|
for _, v := range vals {
|
||||||
|
total += v
|
||||||
|
}
|
||||||
|
return total, idle
|
||||||
|
}
|
||||||
|
return 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func sampleMemLoadPct() float64 {
|
||||||
|
f, err := os.Open("/proc/meminfo")
|
||||||
|
if err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
vals := map[string]uint64{}
|
||||||
|
sc := bufio.NewScanner(f)
|
||||||
|
for sc.Scan() {
|
||||||
|
fields := strings.Fields(sc.Text())
|
||||||
|
if len(fields) >= 2 {
|
||||||
|
v, _ := strconv.ParseUint(fields[1], 10, 64)
|
||||||
|
vals[strings.TrimSuffix(fields[0], ":")] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
total := vals["MemTotal"]
|
||||||
|
avail := vals["MemAvailable"]
|
||||||
|
if total == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
used := total - avail
|
||||||
|
return float64(used) / float64(total) * 100
|
||||||
|
}
|
||||||
|
|||||||
@@ -424,7 +424,7 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
|
|||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
sample := platform.SampleLiveMetrics()
|
sample := platform.SampleLiveMetrics()
|
||||||
|
|
||||||
// Feed ring buffers for server-side SVG charts
|
// Feed server ring buffers
|
||||||
for _, t := range sample.Temps {
|
for _, t := range sample.Temps {
|
||||||
if t.Name == "CPU" {
|
if t.Name == "CPU" {
|
||||||
h.ringCPUTemp.push(t.Celsius)
|
h.ringCPUTemp.push(t.Celsius)
|
||||||
@@ -432,6 +432,35 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
h.ringPower.push(sample.PowerW)
|
h.ringPower.push(sample.PowerW)
|
||||||
|
h.ringCPULoad.push(sample.CPULoadPct)
|
||||||
|
h.ringMemLoad.push(sample.MemLoadPct)
|
||||||
|
|
||||||
|
// Feed fan ring buffers (grow on first sight)
|
||||||
|
h.ringsMu.Lock()
|
||||||
|
for i, fan := range sample.Fans {
|
||||||
|
for len(h.ringFans) <= i {
|
||||||
|
h.ringFans = append(h.ringFans, newMetricsRing(120))
|
||||||
|
h.fanNames = append(h.fanNames, fan.Name)
|
||||||
|
}
|
||||||
|
h.ringFans[i].push(float64(fan.RPM))
|
||||||
|
}
|
||||||
|
// Feed per-GPU ring buffers (grow on first sight)
|
||||||
|
for _, gpu := range sample.GPUs {
|
||||||
|
idx := gpu.GPUIndex
|
||||||
|
for len(h.gpuRings) <= idx {
|
||||||
|
h.gpuRings = append(h.gpuRings, &gpuRings{
|
||||||
|
Temp: newMetricsRing(120),
|
||||||
|
Util: newMetricsRing(120),
|
||||||
|
MemUtil: newMetricsRing(120),
|
||||||
|
Power: newMetricsRing(120),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
h.gpuRings[idx].Temp.push(gpu.TempC)
|
||||||
|
h.gpuRings[idx].Util.push(gpu.UsagePct)
|
||||||
|
h.gpuRings[idx].MemUtil.push(gpu.MemUsagePct)
|
||||||
|
h.gpuRings[idx].Power.push(gpu.PowerW)
|
||||||
|
}
|
||||||
|
h.ringsMu.Unlock()
|
||||||
|
|
||||||
b, err := json.Marshal(sample)
|
b, err := json.Marshal(sample)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -242,28 +242,27 @@ func renderHealthCard(opts HandlerOptions) string {
|
|||||||
// ── Metrics ───────────────────────────────────────────────────────────────────
|
// ── Metrics ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
func renderMetrics() string {
|
func renderMetrics() string {
|
||||||
return `<p style="color:#64748b;font-size:13px;margin-bottom:16px">Live server metrics, charts updated every 2 seconds.</p>
|
return `<p style="color:#64748b;font-size:13px;margin-bottom:16px">Live metrics — updated every 2 seconds. Charts use go-analyze/charts (grafana theme).</p>
|
||||||
<div class="grid2">
|
|
||||||
<div class="card">
|
<div class="card" style="margin-bottom:16px">
|
||||||
<div class="card-head">System</div>
|
<div class="card-head">Server</div>
|
||||||
<div class="card-body">
|
<div class="card-body" style="padding:8px">
|
||||||
<img id="chart-cpu-temp" src="/api/metrics/chart/cpu-temp.svg" style="width:100%;border-radius:6px" alt="CPU Temp">
|
<img id="chart-server" src="/api/metrics/chart/server.svg" style="width:100%;display:block;border-radius:6px" alt="Server metrics">
|
||||||
<img id="chart-power" src="/api/metrics/chart/power.svg" style="width:100%;border-radius:6px;margin-top:8px" alt="Power">
|
<div id="sys-table" style="margin-top:8px;font-size:12px"></div>
|
||||||
<div id="sys-table" style="margin-top:8px"></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="card">
|
|
||||||
<div class="card-head">GPU</div>
|
|
||||||
<div class="card-body">
|
|
||||||
<div id="gpu-table"><p style="color:#64748b;font-size:12px">Waiting for data...</p></div>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div id="gpu-charts"></div>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
|
let knownGPUs = [];
|
||||||
|
|
||||||
function refreshCharts() {
|
function refreshCharts() {
|
||||||
const t = '?t=' + Date.now();
|
const t = '?t=' + Date.now();
|
||||||
['chart-cpu-temp','chart-power'].forEach(id => {
|
const srv = document.getElementById('chart-server');
|
||||||
const el = document.getElementById(id);
|
if (srv) srv.src = srv.src.split('?')[0] + t;
|
||||||
|
knownGPUs.forEach(idx => {
|
||||||
|
const el = document.getElementById('chart-gpu-' + idx);
|
||||||
if (el) el.src = el.src.split('?')[0] + t;
|
if (el) el.src = el.src.split('?')[0] + t;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -272,21 +271,42 @@ setInterval(refreshCharts, 2000);
|
|||||||
const es = new EventSource('/api/metrics/stream');
|
const es = new EventSource('/api/metrics/stream');
|
||||||
es.addEventListener('metrics', e => {
|
es.addEventListener('metrics', e => {
|
||||||
const d = JSON.parse(e.data);
|
const d = JSON.parse(e.data);
|
||||||
const gpuRows = (d.gpus||[]).map(g =>
|
|
||||||
'<tr><td>GPU '+g.index+'</td><td>'+g.temp_c+'°C</td><td>'+g.usage_pct+'%</td><td>'+g.power_w+'W</td><td>'+g.clock_mhz+'MHz</td></tr>'
|
|
||||||
).join('');
|
|
||||||
document.getElementById('gpu-table').innerHTML = gpuRows ?
|
|
||||||
'<table><tr><th>GPU</th><th>Temp</th><th>Usage</th><th>Power</th><th>Clock</th></tr>'+gpuRows+'</table>' :
|
|
||||||
'<p style="color:#64748b;font-size:12px">No NVIDIA GPU detected</p>';
|
|
||||||
|
|
||||||
|
// Add GPU chart cards as GPUs appear
|
||||||
|
(d.gpus||[]).forEach(g => {
|
||||||
|
if (knownGPUs.includes(g.index)) return;
|
||||||
|
knownGPUs.push(g.index);
|
||||||
|
const div = document.createElement('div');
|
||||||
|
div.className = 'card';
|
||||||
|
div.style.marginBottom = '16px';
|
||||||
|
div.innerHTML = '<div class="card-head">GPU ' + g.index + '</div>' +
|
||||||
|
'<div class="card-body" style="padding:8px">' +
|
||||||
|
'<img id="chart-gpu-' + g.index + '" src="/api/metrics/chart/gpu/' + g.index + '.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + '">' +
|
||||||
|
'<div id="gpu-table-' + g.index + '" style="margin-top:8px;font-size:12px"></div>' +
|
||||||
|
'</div>';
|
||||||
|
document.getElementById('gpu-charts').appendChild(div);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Update numeric tables
|
||||||
let sysHTML = '';
|
let sysHTML = '';
|
||||||
const cpuTemp = (d.temps||[]).find(t => t.name==='CPU');
|
const cpuTemp = (d.temps||[]).find(t => t.name==='CPU');
|
||||||
if (cpuTemp) sysHTML += '<tr><td>CPU Temp</td><td>'+cpuTemp.celsius.toFixed(1)+'°C</td></tr>';
|
if (cpuTemp) sysHTML += '<tr><td>CPU Temp</td><td>'+cpuTemp.celsius.toFixed(1)+'°C</td></tr>';
|
||||||
|
if (d.cpu_load_pct) sysHTML += '<tr><td>CPU Load</td><td>'+d.cpu_load_pct.toFixed(1)+'%</td></tr>';
|
||||||
|
if (d.mem_load_pct) sysHTML += '<tr><td>Mem Load</td><td>'+d.mem_load_pct.toFixed(1)+'%</td></tr>';
|
||||||
(d.fans||[]).forEach(f => sysHTML += '<tr><td>'+f.name+'</td><td>'+f.rpm+' RPM</td></tr>');
|
(d.fans||[]).forEach(f => sysHTML += '<tr><td>'+f.name+'</td><td>'+f.rpm+' RPM</td></tr>');
|
||||||
if (d.power_w) sysHTML += '<tr><td>System Power</td><td>'+d.power_w.toFixed(0)+'W</td></tr>';
|
if (d.power_w) sysHTML += '<tr><td>Power</td><td>'+d.power_w.toFixed(0)+' W</td></tr>';
|
||||||
document.getElementById('sys-table').innerHTML = sysHTML ?
|
const st = document.getElementById('sys-table');
|
||||||
'<table>'+sysHTML+'</table>' :
|
if (st) st.innerHTML = sysHTML ? '<table>'+sysHTML+'</table>' : '<p style="color:#64748b">No sensor data (ipmitool/sensors required)</p>';
|
||||||
'<p style="color:#64748b;font-size:12px">No sensor data (ipmitool/sensors required)</p>';
|
|
||||||
|
(d.gpus||[]).forEach(g => {
|
||||||
|
const t = document.getElementById('gpu-table-' + g.index);
|
||||||
|
if (!t) return;
|
||||||
|
t.innerHTML = '<table>' +
|
||||||
|
'<tr><td>Temp</td><td>'+g.temp_c+'°C</td>' +
|
||||||
|
'<td>Load</td><td>'+g.usage_pct+'%</td>' +
|
||||||
|
'<td>Mem</td><td>'+g.mem_usage_pct+'%</td>' +
|
||||||
|
'<td>Power</td><td>'+g.power_w+' W</td></tr></table>';
|
||||||
|
});
|
||||||
});
|
});
|
||||||
es.onerror = () => {};
|
es.onerror = () => {};
|
||||||
</script>`
|
</script>`
|
||||||
|
|||||||
@@ -62,15 +62,27 @@ func (r *metricsRing) snapshot() ([]float64, []string) {
|
|||||||
return v, l
|
return v, l
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// gpuRings holds per-GPU ring buffers.
|
||||||
|
type gpuRings struct {
|
||||||
|
Temp *metricsRing
|
||||||
|
Util *metricsRing
|
||||||
|
MemUtil *metricsRing
|
||||||
|
Power *metricsRing
|
||||||
|
}
|
||||||
|
|
||||||
// handler is the HTTP handler for the web UI.
|
// handler is the HTTP handler for the web UI.
|
||||||
type handler struct {
|
type handler struct {
|
||||||
opts HandlerOptions
|
opts HandlerOptions
|
||||||
mux *http.ServeMux
|
mux *http.ServeMux
|
||||||
|
// server rings
|
||||||
ringCPUTemp *metricsRing
|
ringCPUTemp *metricsRing
|
||||||
|
ringCPULoad *metricsRing
|
||||||
|
ringMemLoad *metricsRing
|
||||||
ringPower *metricsRing
|
ringPower *metricsRing
|
||||||
ringFans []*metricsRing
|
ringFans []*metricsRing
|
||||||
ringGPUTemp []*metricsRing
|
fanNames []string
|
||||||
ringGPUUtil []*metricsRing
|
// per-GPU rings (index = GPU index)
|
||||||
|
gpuRings []*gpuRings
|
||||||
ringsMu sync.Mutex
|
ringsMu sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,6 +101,8 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
h := &handler{
|
h := &handler{
|
||||||
opts: opts,
|
opts: opts,
|
||||||
ringCPUTemp: newMetricsRing(120),
|
ringCPUTemp: newMetricsRing(120),
|
||||||
|
ringCPULoad: newMetricsRing(120),
|
||||||
|
ringMemLoad: newMetricsRing(120),
|
||||||
ringPower: newMetricsRing(120),
|
ringPower: newMetricsRing(120),
|
||||||
}
|
}
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
@@ -244,48 +258,88 @@ func (h *handler) handleViewer(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request) {
|
||||||
name := strings.TrimPrefix(r.URL.Path, "/api/metrics/chart/")
|
path := strings.TrimPrefix(r.URL.Path, "/api/metrics/chart/")
|
||||||
name = strings.TrimSuffix(name, ".svg")
|
path = strings.TrimSuffix(path, ".svg")
|
||||||
|
|
||||||
|
var datasets [][]float64
|
||||||
|
var names []string
|
||||||
|
var labels []string
|
||||||
|
var title string
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case path == "server":
|
||||||
|
title = "Server"
|
||||||
|
vCPUTemp, l := h.ringCPUTemp.snapshot()
|
||||||
|
vCPULoad, _ := h.ringCPULoad.snapshot()
|
||||||
|
vMemLoad, _ := h.ringMemLoad.snapshot()
|
||||||
|
vPower, _ := h.ringPower.snapshot()
|
||||||
|
labels = l
|
||||||
|
datasets = [][]float64{vCPUTemp, vCPULoad, vMemLoad, vPower}
|
||||||
|
names = []string{"CPU Temp °C", "CPU Load %", "Mem Load %", "Power W"}
|
||||||
|
|
||||||
|
h.ringsMu.Lock()
|
||||||
|
for i, fr := range h.ringFans {
|
||||||
|
fv, _ := fr.snapshot()
|
||||||
|
datasets = append(datasets, fv)
|
||||||
|
name := "Fan"
|
||||||
|
if i < len(h.fanNames) {
|
||||||
|
name = h.fanNames[i]
|
||||||
|
}
|
||||||
|
names = append(names, name+" RPM")
|
||||||
|
}
|
||||||
|
h.ringsMu.Unlock()
|
||||||
|
|
||||||
|
case strings.HasPrefix(path, "gpu/"):
|
||||||
|
idxStr := strings.TrimPrefix(path, "gpu/")
|
||||||
|
idx := 0
|
||||||
|
fmt.Sscanf(idxStr, "%d", &idx)
|
||||||
|
h.ringsMu.Lock()
|
||||||
|
var gr *gpuRings
|
||||||
|
if idx < len(h.gpuRings) {
|
||||||
|
gr = h.gpuRings[idx]
|
||||||
|
}
|
||||||
|
h.ringsMu.Unlock()
|
||||||
|
if gr == nil {
|
||||||
|
http.NotFound(w, r)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
vTemp, l := gr.Temp.snapshot()
|
||||||
|
vUtil, _ := gr.Util.snapshot()
|
||||||
|
vMemUtil, _ := gr.MemUtil.snapshot()
|
||||||
|
vPower, _ := gr.Power.snapshot()
|
||||||
|
labels = l
|
||||||
|
title = fmt.Sprintf("GPU %d", idx)
|
||||||
|
datasets = [][]float64{vTemp, vUtil, vMemUtil, vPower}
|
||||||
|
names = []string{"Temp °C", "Load %", "Mem %", "Power W"}
|
||||||
|
|
||||||
var ring *metricsRing
|
|
||||||
var title, unit string
|
|
||||||
switch name {
|
|
||||||
case "cpu-temp":
|
|
||||||
ring, title, unit = h.ringCPUTemp, "CPU Temperature", "°C"
|
|
||||||
case "power":
|
|
||||||
ring, title, unit = h.ringPower, "System Power", "W"
|
|
||||||
default:
|
default:
|
||||||
http.NotFound(w, r)
|
http.NotFound(w, r)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
vals, labels := ring.snapshot()
|
// Ensure all datasets same length as labels
|
||||||
if len(vals) == 0 {
|
n := len(labels)
|
||||||
vals = []float64{0}
|
if n == 0 {
|
||||||
|
n = 1
|
||||||
labels = []string{""}
|
labels = []string{""}
|
||||||
}
|
}
|
||||||
|
for i := range datasets {
|
||||||
// Sparse x-axis labels
|
if len(datasets[i]) == 0 {
|
||||||
sparse := make([]string, len(labels))
|
datasets[i] = make([]float64, n)
|
||||||
step := len(labels) / 6
|
|
||||||
if step < 1 {
|
|
||||||
step = 1
|
|
||||||
}
|
|
||||||
for i := range labels {
|
|
||||||
if i%step == 0 {
|
|
||||||
sparse[i] = labels[i]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
opt := gocharts.NewLineChartOptionWithData([][]float64{vals})
|
sparse := sparseLabels(labels, 6)
|
||||||
opt.Title = gocharts.TitleOption{Text: title + " (" + unit + ")"}
|
|
||||||
|
opt := gocharts.NewLineChartOptionWithData(datasets)
|
||||||
|
opt.Title = gocharts.TitleOption{Text: title}
|
||||||
opt.XAxis.Labels = sparse
|
opt.XAxis.Labels = sparse
|
||||||
opt.Legend = gocharts.LegendOption{Show: gocharts.Ptr(false)}
|
opt.Legend = gocharts.LegendOption{SeriesNames: names}
|
||||||
|
|
||||||
p := gocharts.NewPainter(gocharts.PainterOptions{
|
p := gocharts.NewPainter(gocharts.PainterOptions{
|
||||||
OutputFormat: gocharts.ChartOutputSVG,
|
OutputFormat: gocharts.ChartOutputSVG,
|
||||||
Width: 600,
|
Width: 1400,
|
||||||
Height: 180,
|
Height: 280,
|
||||||
}, gocharts.PainterThemeOption(gocharts.GetTheme("grafana")))
|
}, gocharts.PainterThemeOption(gocharts.GetTheme("grafana")))
|
||||||
if err := p.LineChart(opt); err != nil {
|
if err := p.LineChart(opt); err != nil {
|
||||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
@@ -301,6 +355,27 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
|
|||||||
_, _ = w.Write(buf)
|
_, _ = w.Write(buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func safeIdx(s []float64, i int) float64 {
|
||||||
|
if i < len(s) {
|
||||||
|
return s[i]
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func sparseLabels(labels []string, n int) []string {
|
||||||
|
out := make([]string, len(labels))
|
||||||
|
step := len(labels) / n
|
||||||
|
if step < 1 {
|
||||||
|
step = 1
|
||||||
|
}
|
||||||
|
for i, l := range labels {
|
||||||
|
if i%step == 0 {
|
||||||
|
out[i] = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
// ── Page handler ─────────────────────────────────────────────────────────────
|
// ── Page handler ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|||||||
38
bible-local/architecture/charting.md
Normal file
38
bible-local/architecture/charting.md
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# Charting architecture
|
||||||
|
|
||||||
|
## Decision: one chart engine for all live metrics
|
||||||
|
|
||||||
|
**Engine:** `github.com/go-analyze/charts` (pure Go, no CGO, SVG output)
|
||||||
|
**Theme:** `grafana` (dark background, coloured lines)
|
||||||
|
|
||||||
|
All live metrics charts in the web UI are server-side SVG images served by Go
|
||||||
|
and polled by the browser every 2 seconds via `<img src="...?t=now">`.
|
||||||
|
There is no client-side canvas or JS chart library.
|
||||||
|
|
||||||
|
### Why go-analyze/charts
|
||||||
|
|
||||||
|
- Pure Go, no CGO — builds cleanly inside the live-build container
|
||||||
|
- SVG output — crisp at any display resolution, full-width without pixelation
|
||||||
|
- Grafana theme matches the dark web UI colour scheme
|
||||||
|
- Active fork of the archived wcharczuk/go-chart
|
||||||
|
|
||||||
|
### SAT stress-test charts
|
||||||
|
|
||||||
|
The `drawGPUChartSVG` function in `platform/gpu_metrics.go` is a separate
|
||||||
|
self-contained SVG renderer used **only** for completed SAT run reports
|
||||||
|
(HTML export, burn-in summaries). It is not used for live metrics.
|
||||||
|
|
||||||
|
### Live metrics chart endpoints
|
||||||
|
|
||||||
|
| Path | Content |
|
||||||
|
|------|---------|
|
||||||
|
| `GET /api/metrics/chart/server.svg` | CPU temp, CPU load %, mem load %, power W, fan RPMs |
|
||||||
|
| `GET /api/metrics/chart/gpu/{idx}.svg` | GPU temp °C, load %, mem %, power W |
|
||||||
|
|
||||||
|
Charts are 1400 × 280 px SVG. The page renders them at `width: 100%` in a
|
||||||
|
single-column layout so they always fill the viewport width.
|
||||||
|
|
||||||
|
### Ring buffers
|
||||||
|
|
||||||
|
Each metric is stored in a 120-sample ring buffer (2 minutes of history at 1 Hz).
|
||||||
|
Buffers are per-server or per-GPU and grow dynamically as new GPUs appear.
|
||||||
Reference in New Issue
Block a user