Add GPU clock charts and grouped GPU metrics view

This commit is contained in:
2026-04-05 09:57:38 +03:00
parent f3c14cd893
commit 9826d437a5
6 changed files with 650 additions and 57 deletions

View File

@@ -20,12 +20,13 @@ type GPUMetricRow struct {
MemUsagePct float64 `json:"mem_usage_pct"` MemUsagePct float64 `json:"mem_usage_pct"`
PowerW float64 `json:"power_w"` PowerW float64 `json:"power_w"`
ClockMHz float64 `json:"clock_mhz"` ClockMHz float64 `json:"clock_mhz"`
MemClockMHz float64 `json:"mem_clock_mhz"`
} }
// sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU. // sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) { func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
args := []string{ args := []string{
"--query-gpu=index,temperature.gpu,utilization.gpu,utilization.memory,power.draw,clocks.current.graphics", "--query-gpu=index,temperature.gpu,utilization.gpu,utilization.memory,power.draw,clocks.current.graphics,clocks.current.memory",
"--format=csv,noheader,nounits", "--format=csv,noheader,nounits",
} }
if len(gpuIndices) > 0 { if len(gpuIndices) > 0 {
@@ -46,7 +47,7 @@ func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
continue continue
} }
parts := strings.Split(line, ", ") parts := strings.Split(line, ", ")
if len(parts) < 6 { if len(parts) < 7 {
continue continue
} }
idx, _ := strconv.Atoi(strings.TrimSpace(parts[0])) idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
@@ -57,6 +58,7 @@ func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
MemUsagePct: parseGPUFloat(parts[3]), MemUsagePct: parseGPUFloat(parts[3]),
PowerW: parseGPUFloat(parts[4]), PowerW: parseGPUFloat(parts[4]),
ClockMHz: parseGPUFloat(parts[5]), ClockMHz: parseGPUFloat(parts[5]),
MemClockMHz: parseGPUFloat(parts[6]),
}) })
} }
return rows, nil return rows, nil
@@ -139,10 +141,10 @@ func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
// WriteGPUMetricsCSV writes collected rows as a CSV file. // WriteGPUMetricsCSV writes collected rows as a CSV file.
func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error { func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
var b bytes.Buffer var b bytes.Buffer
b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,power_w,clock_mhz\n") b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,power_w,clock_mhz,mem_clock_mhz\n")
for _, r := range rows { for _, r := range rows {
fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.0f\n", fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.0f,%.0f\n",
r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.PowerW, r.ClockMHz) r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz)
} }
return os.WriteFile(path, b.Bytes(), 0644) return os.WriteFile(path, b.Bytes(), 0644)
} }
@@ -197,7 +199,7 @@ func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
const PW = plotX2 - plotX1 const PW = plotX2 - plotX1
const PH = plotY2 - plotY1 const PH = plotY2 - plotY1
// Outer axes // Outer axes
const tempAxisX = 60 // temp axis line const tempAxisX = 60 // temp axis line
const clockAxisX = 900 // clock axis line const clockAxisX = 900 // clock axis line
colors := [4]string{"#e74c3c", "#3498db", "#2ecc71", "#f39c12"} colors := [4]string{"#e74c3c", "#3498db", "#2ecc71", "#f39c12"}

View File

@@ -8,6 +8,7 @@ import (
"path/filepath" "path/filepath"
"sort" "sort"
"strconv" "strconv"
"strings"
"time" "time"
"bee/audit/internal/platform" "bee/audit/internal/platform"
@@ -54,6 +55,8 @@ CREATE TABLE IF NOT EXISTS gpu_metrics (
usage_pct REAL, usage_pct REAL,
mem_usage_pct REAL, mem_usage_pct REAL,
power_w REAL, power_w REAL,
clock_mhz REAL,
mem_clock_mhz REAL,
PRIMARY KEY (ts, gpu_index) PRIMARY KEY (ts, gpu_index)
); );
CREATE TABLE IF NOT EXISTS fan_metrics ( CREATE TABLE IF NOT EXISTS fan_metrics (
@@ -70,6 +73,38 @@ CREATE TABLE IF NOT EXISTS temp_metrics (
PRIMARY KEY (ts, name) PRIMARY KEY (ts, name)
); );
`) `)
if err != nil {
return err
}
if err := ensureMetricsColumn(db, "gpu_metrics", "clock_mhz", "REAL"); err != nil {
return err
}
return ensureMetricsColumn(db, "gpu_metrics", "mem_clock_mhz", "REAL")
}
func ensureMetricsColumn(db *sql.DB, table, column, definition string) error {
rows, err := db.Query("PRAGMA table_info(" + table + ")")
if err != nil {
return err
}
defer rows.Close()
for rows.Next() {
var cid int
var name, ctype string
var notNull, pk int
var dflt sql.NullString
if err := rows.Scan(&cid, &name, &ctype, &notNull, &dflt, &pk); err != nil {
return err
}
if strings.EqualFold(name, column) {
return nil
}
}
if err := rows.Err(); err != nil {
return err
}
_, err = db.Exec("ALTER TABLE " + table + " ADD COLUMN " + column + " " + definition)
return err return err
} }
@@ -91,8 +126,8 @@ func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
} }
for _, g := range s.GPUs { for _, g := range s.GPUs {
_, err = tx.Exec( _, err = tx.Exec(
`INSERT OR REPLACE INTO gpu_metrics(ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w) VALUES(?,?,?,?,?,?)`, `INSERT OR REPLACE INTO gpu_metrics(ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz) VALUES(?,?,?,?,?,?,?,?)`,
ts, g.GPUIndex, g.TempC, g.UsagePct, g.MemUsagePct, g.PowerW, ts, g.GPUIndex, g.TempC, g.UsagePct, g.MemUsagePct, g.PowerW, g.ClockMHz, g.MemClockMHz,
) )
if err != nil { if err != nil {
return err return err
@@ -163,7 +198,7 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
} }
gpuData := map[gpuKey]platform.GPUMetricRow{} gpuData := map[gpuKey]platform.GPUMetricRow{}
gRows, err := m.db.Query( gRows, err := m.db.Query(
`SELECT ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w FROM gpu_metrics WHERE ts>=? AND ts<=? ORDER BY ts,gpu_index`, `SELECT ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w,IFNULL(clock_mhz,0),IFNULL(mem_clock_mhz,0) FROM gpu_metrics WHERE ts>=? AND ts<=? ORDER BY ts,gpu_index`,
minTS, maxTS, minTS, maxTS,
) )
if err == nil { if err == nil {
@@ -171,7 +206,7 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
for gRows.Next() { for gRows.Next() {
var ts int64 var ts int64
var g platform.GPUMetricRow var g platform.GPUMetricRow
if err := gRows.Scan(&ts, &g.GPUIndex, &g.TempC, &g.UsagePct, &g.MemUsagePct, &g.PowerW); err == nil { if err := gRows.Scan(&ts, &g.GPUIndex, &g.TempC, &g.UsagePct, &g.MemUsagePct, &g.PowerW, &g.ClockMHz, &g.MemClockMHz); err == nil {
gpuData[gpuKey{ts, g.GPUIndex}] = g gpuData[gpuKey{ts, g.GPUIndex}] = g
} }
} }
@@ -283,7 +318,8 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
func (m *MetricsDB) ExportCSV(w io.Writer) error { func (m *MetricsDB) ExportCSV(w io.Writer) error {
rows, err := m.db.Query(` rows, err := m.db.Query(`
SELECT s.ts, s.cpu_load_pct, s.mem_load_pct, s.power_w, SELECT s.ts, s.cpu_load_pct, s.mem_load_pct, s.power_w,
g.gpu_index, g.temp_c, g.usage_pct, g.mem_usage_pct, g.power_w g.gpu_index, g.temp_c, g.usage_pct, g.mem_usage_pct, g.power_w,
g.clock_mhz, g.mem_clock_mhz
FROM sys_metrics s FROM sys_metrics s
LEFT JOIN gpu_metrics g ON g.ts = s.ts LEFT JOIN gpu_metrics g ON g.ts = s.ts
ORDER BY s.ts, g.gpu_index ORDER BY s.ts, g.gpu_index
@@ -294,13 +330,13 @@ func (m *MetricsDB) ExportCSV(w io.Writer) error {
defer rows.Close() defer rows.Close()
cw := csv.NewWriter(w) cw := csv.NewWriter(w)
_ = cw.Write([]string{"ts", "cpu_load_pct", "mem_load_pct", "sys_power_w", "gpu_index", "gpu_temp_c", "gpu_usage_pct", "gpu_mem_pct", "gpu_power_w"}) _ = cw.Write([]string{"ts", "cpu_load_pct", "mem_load_pct", "sys_power_w", "gpu_index", "gpu_temp_c", "gpu_usage_pct", "gpu_mem_pct", "gpu_power_w", "gpu_clock_mhz", "gpu_mem_clock_mhz"})
for rows.Next() { for rows.Next() {
var ts int64 var ts int64
var cpu, mem, pwr float64 var cpu, mem, pwr float64
var gpuIdx sql.NullInt64 var gpuIdx sql.NullInt64
var gpuTemp, gpuUse, gpuMem, gpuPow sql.NullFloat64 var gpuTemp, gpuUse, gpuMem, gpuPow, gpuClock, gpuMemClock sql.NullFloat64
if err := rows.Scan(&ts, &cpu, &mem, &pwr, &gpuIdx, &gpuTemp, &gpuUse, &gpuMem, &gpuPow); err != nil { if err := rows.Scan(&ts, &cpu, &mem, &pwr, &gpuIdx, &gpuTemp, &gpuUse, &gpuMem, &gpuPow, &gpuClock, &gpuMemClock); err != nil {
continue continue
} }
row := []string{ row := []string{
@@ -316,9 +352,11 @@ func (m *MetricsDB) ExportCSV(w io.Writer) error {
strconv.FormatFloat(gpuUse.Float64, 'f', 1, 64), strconv.FormatFloat(gpuUse.Float64, 'f', 1, 64),
strconv.FormatFloat(gpuMem.Float64, 'f', 1, 64), strconv.FormatFloat(gpuMem.Float64, 'f', 1, 64),
strconv.FormatFloat(gpuPow.Float64, 'f', 1, 64), strconv.FormatFloat(gpuPow.Float64, 'f', 1, 64),
strconv.FormatFloat(gpuClock.Float64, 'f', 1, 64),
strconv.FormatFloat(gpuMemClock.Float64, 'f', 1, 64),
) )
} else { } else {
row = append(row, "", "", "", "", "") row = append(row, "", "", "", "", "", "", "")
} }
_ = cw.Write(row) _ = cw.Write(row)
} }

View File

@@ -1,11 +1,13 @@
package webui package webui
import ( import (
"database/sql"
"path/filepath" "path/filepath"
"testing" "testing"
"time" "time"
"bee/audit/internal/platform" "bee/audit/internal/platform"
_ "modernc.org/sqlite"
) )
func TestMetricsDBLoadSamplesKeepsChronologicalRangeForGPUs(t *testing.T) { func TestMetricsDBLoadSamplesKeepsChronologicalRangeForGPUs(t *testing.T) {
@@ -67,3 +69,77 @@ func TestMetricsDBLoadSamplesKeepsChronologicalRangeForGPUs(t *testing.T) {
} }
} }
} }
func TestMetricsDBMigratesLegacyGPUSchema(t *testing.T) {
path := filepath.Join(t.TempDir(), "metrics.db")
raw, err := sql.Open("sqlite", path)
if err != nil {
t.Fatalf("sql.Open: %v", err)
}
_, err = raw.Exec(`
CREATE TABLE gpu_metrics (
ts INTEGER NOT NULL,
gpu_index INTEGER NOT NULL,
temp_c REAL,
usage_pct REAL,
mem_usage_pct REAL,
power_w REAL,
PRIMARY KEY (ts, gpu_index)
);
CREATE TABLE sys_metrics (
ts INTEGER NOT NULL,
cpu_load_pct REAL,
mem_load_pct REAL,
power_w REAL,
PRIMARY KEY (ts)
);
CREATE TABLE fan_metrics (
ts INTEGER NOT NULL,
name TEXT NOT NULL,
rpm REAL,
PRIMARY KEY (ts, name)
);
CREATE TABLE temp_metrics (
ts INTEGER NOT NULL,
name TEXT NOT NULL,
grp TEXT NOT NULL,
celsius REAL,
PRIMARY KEY (ts, name)
);
`)
if err != nil {
t.Fatalf("create legacy schema: %v", err)
}
_ = raw.Close()
db, err := openMetricsDB(path)
if err != nil {
t.Fatalf("openMetricsDB: %v", err)
}
defer db.Close()
now := time.Unix(1_700_000_100, 0).UTC()
err = db.Write(platform.LiveMetricSample{
Timestamp: now,
GPUs: []platform.GPUMetricRow{
{GPUIndex: 0, ClockMHz: 1410, MemClockMHz: 2600},
},
})
if err != nil {
t.Fatalf("Write: %v", err)
}
samples, err := db.LoadAll()
if err != nil {
t.Fatalf("LoadAll: %v", err)
}
if len(samples) != 1 || len(samples[0].GPUs) != 1 {
t.Fatalf("samples=%+v", samples)
}
if got := samples[0].GPUs[0].ClockMHz; got != 1410 {
t.Fatalf("ClockMHz=%v want 1410", got)
}
if got := samples[0].GPUs[0].MemClockMHz; got != 2600 {
t.Fatalf("MemClockMHz=%v want 2600", got)
}
}

View File

@@ -464,14 +464,14 @@ func renderMetrics() string {
<div class="card" style="margin-bottom:16px"> <div class="card" style="margin-bottom:16px">
<div class="card-head">Server — Load</div> <div class="card-head">Server — Load</div>
<div class="card-body" style="padding:8px"> <div class="card-body" style="padding:8px">
<img id="chart-server-load" src="/api/metrics/chart/server-load.svg" style="width:100%;display:block;border-radius:6px" alt="CPU/Mem load"> <img id="chart-server-load" data-chart-refresh="1" src="/api/metrics/chart/server-load.svg" style="width:100%;display:block;border-radius:6px" alt="CPU/Mem load">
</div> </div>
</div> </div>
<div class="card" style="margin-bottom:16px"> <div class="card" style="margin-bottom:16px">
<div class="card-head">Temperature — CPU</div> <div class="card-head">Temperature — CPU</div>
<div class="card-body" style="padding:8px"> <div class="card-body" style="padding:8px">
<img id="chart-server-temp-cpu" src="/api/metrics/chart/server-temp-cpu.svg" style="width:100%;display:block;border-radius:6px" alt="CPU temperature"> <img id="chart-server-temp-cpu" data-chart-refresh="1" src="/api/metrics/chart/server-temp-cpu.svg" style="width:100%;display:block;border-radius:6px" alt="CPU temperature">
</div> </div>
</div> </div>
@@ -479,57 +479,84 @@ func renderMetrics() string {
<div class="card" style="margin-bottom:16px"> <div class="card" style="margin-bottom:16px">
<div class="card-head">Temperature — Ambient Sensors</div> <div class="card-head">Temperature — Ambient Sensors</div>
<div class="card-body" style="padding:8px"> <div class="card-body" style="padding:8px">
<img id="chart-server-temp-ambient" src="/api/metrics/chart/server-temp-ambient.svg" style="width:100%;display:block;border-radius:6px" alt="Ambient temperature sensors"> <img id="chart-server-temp-ambient" data-chart-refresh="1" src="/api/metrics/chart/server-temp-ambient.svg" style="width:100%;display:block;border-radius:6px" alt="Ambient temperature sensors">
</div> </div>
</div> </div>
<div class="card" style="margin-bottom:16px"> <div class="card" style="margin-bottom:16px">
<div class="card-head">Server — Power</div> <div class="card-head">Server — Power</div>
<div class="card-body" style="padding:8px"> <div class="card-body" style="padding:8px">
<img id="chart-server-power" src="/api/metrics/chart/server-power.svg" style="width:100%;display:block;border-radius:6px" alt="System power"> <img id="chart-server-power" data-chart-refresh="1" src="/api/metrics/chart/server-power.svg" style="width:100%;display:block;border-radius:6px" alt="System power">
</div> </div>
</div> </div>
<div id="card-server-fans" class="card" style="margin-bottom:16px;display:none"> <div id="card-server-fans" class="card" style="margin-bottom:16px;display:none">
<div class="card-head">Server — Fan RPM</div> <div class="card-head">Server — Fan RPM</div>
<div class="card-body" style="padding:8px"> <div class="card-body" style="padding:8px">
<img id="chart-server-fans" src="/api/metrics/chart/server-fans.svg" style="width:100%;display:block;border-radius:6px" alt="Fan RPM"> <img id="chart-server-fans" data-chart-refresh="1" src="/api/metrics/chart/server-fans.svg" style="width:100%;display:block;border-radius:6px" alt="Fan RPM">
</div> </div>
</div> </div>
<div class="card" style="margin-bottom:16px"> <section id="gpu-metrics-section" style="display:none;margin-top:24px;padding:16px 16px 4px;border:1px solid #d7e0ea;border-radius:10px;background:linear-gradient(180deg,#f7fafc 0%,#eef4f8 100%)">
<div class="card-head">GPU — Compute Load</div> <div style="display:flex;align-items:center;justify-content:space-between;gap:16px;flex-wrap:wrap;margin-bottom:14px">
<div class="card-body" style="padding:8px"> <div>
<img id="chart-gpu-all-load" src="/api/metrics/chart/gpu-all-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU compute load"> <div style="font-size:12px;font-weight:700;letter-spacing:.08em;text-transform:uppercase;color:#486581">GPU Metrics</div>
<div id="gpu-metrics-summary" style="font-size:13px;color:var(--muted);margin-top:4px">Detected GPUs are rendered in a dedicated section.</div>
</div>
<label style="display:inline-flex;align-items:center;gap:8px;font-size:13px;color:var(--ink);font-weight:700;cursor:pointer">
<input id="gpu-chart-toggle" type="checkbox">
<span>One chart per GPU</span>
</label>
</div> </div>
</div>
<div class="card" style="margin-bottom:16px"> <div id="gpu-metrics-by-metric">
<div class="card-head">GPU — Memory Load</div> <div class="card" style="margin-bottom:16px">
<div class="card-body" style="padding:8px"> <div class="card-head">GPU — Compute Load</div>
<img id="chart-gpu-all-memload" src="/api/metrics/chart/gpu-all-memload.svg" style="width:100%;display:block;border-radius:6px" alt="GPU memory load"> <div class="card-body" style="padding:8px">
<img id="chart-gpu-all-load" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU compute load">
</div>
</div>
<div class="card" style="margin-bottom:16px">
<div class="card-head">GPU — Memory Load</div>
<div class="card-body" style="padding:8px">
<img id="chart-gpu-all-memload" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-memload.svg" style="width:100%;display:block;border-radius:6px" alt="GPU memory load">
</div>
</div>
<div class="card" style="margin-bottom:16px">
<div class="card-head">GPU — Core Clock</div>
<div class="card-body" style="padding:8px">
<img id="chart-gpu-all-clock" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-clock.svg" style="width:100%;display:block;border-radius:6px" alt="GPU core clock">
</div>
</div>
<div class="card" style="margin-bottom:16px">
<div class="card-head">GPU — Memory Clock</div>
<div class="card-body" style="padding:8px">
<img id="chart-gpu-all-memclock" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-memclock.svg" style="width:100%;display:block;border-radius:6px" alt="GPU memory clock">
</div>
</div>
<div class="card" style="margin-bottom:16px">
<div class="card-head">GPU — Power</div>
<div class="card-body" style="padding:8px">
<img id="chart-gpu-all-power" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU power">
</div>
</div>
<div class="card" style="margin-bottom:16px">
<div class="card-head">GPU — Temperature</div>
<div class="card-body" style="padding:8px">
<img id="chart-gpu-all-temp" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-temp.svg" style="width:100%;display:block;border-radius:6px" alt="GPU temperature">
</div>
</div>
</div> </div>
</div>
<div class="card" style="margin-bottom:16px"> <div id="gpu-metrics-by-gpu" style="display:none"></div>
<div class="card-head">GPU — Power</div> </section>
<div class="card-body" style="padding:8px">
<img id="chart-gpu-all-power" src="/api/metrics/chart/gpu-all-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU power">
</div>
</div>
<div class="card" style="margin-bottom:16px">
<div class="card-head">GPU — Temperature</div>
<div class="card-body" style="padding:8px">
<img id="chart-gpu-all-temp" src="/api/metrics/chart/gpu-all-temp.svg" style="width:100%;display:block;border-radius:6px" alt="GPU temperature">
</div>
</div>
<script> <script>
const chartIds = [ let gpuChartKey = '';
'chart-server-load','chart-server-temp-cpu','chart-server-temp-gpu','chart-server-temp-ambient','chart-server-power','chart-server-fans',
'chart-gpu-all-load','chart-gpu-all-memload','chart-gpu-all-power','chart-gpu-all-temp'
];
function refreshChartImage(el) { function refreshChartImage(el) {
if (!el || el.dataset.loading === '1') return; if (!el || el.dataset.loading === '1') return;
if (el.offsetParent === null) return;
const baseSrc = el.dataset.baseSrc || el.src.split('?')[0]; const baseSrc = el.dataset.baseSrc || el.src.split('?')[0];
const nextSrc = baseSrc + '?t=' + Date.now(); const nextSrc = baseSrc + '?t=' + Date.now();
const probe = new Image(); const probe = new Image();
@@ -546,14 +573,74 @@ function refreshChartImage(el) {
} }
function refreshCharts() { function refreshCharts() {
chartIds.forEach(id => refreshChartImage(document.getElementById(id))); document.querySelectorAll('img[data-chart-refresh="1"]').forEach(refreshChartImage);
} }
setInterval(refreshCharts, 3000);
fetch('/api/metrics/latest').then(r => r.json()).then(d => { function gpuIndices(rows) {
const seen = {};
const out = [];
(rows || []).forEach(function(row) {
const idx = Number(row.index);
if (!Number.isFinite(idx) || seen[idx]) return;
seen[idx] = true;
out.push(idx);
});
return out.sort(function(a, b) { return a - b; });
}
function renderGPUOverviewCards(indices) {
const host = document.getElementById('gpu-metrics-by-gpu');
if (!host) return;
host.innerHTML = indices.map(function(idx) {
return '<div class="card" style="margin-bottom:16px">' +
'<div class="card-head">GPU ' + idx + ' — Overview</div>' +
'<div class="card-body" style="padding:8px">' +
'<img id="chart-gpu-' + idx + '-overview" data-chart-refresh="1" src="/api/metrics/chart/gpu/' + idx + '-overview.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + idx + ' overview">' +
'</div></div>';
}).join('');
}
function applyGPUChartMode() {
const perMetric = document.getElementById('gpu-metrics-by-metric');
const perGPU = document.getElementById('gpu-metrics-by-gpu');
const toggle = document.getElementById('gpu-chart-toggle');
const gpuModePerGPU = !!(toggle && toggle.checked);
if (perMetric) perMetric.style.display = gpuModePerGPU ? 'none' : '';
if (perGPU) perGPU.style.display = gpuModePerGPU ? '' : 'none';
}
function syncMetricsLayout(d) {
const fanCard = document.getElementById('card-server-fans'); const fanCard = document.getElementById('card-server-fans');
if (fanCard) fanCard.style.display = (d.fans && d.fans.length > 0) ? '' : 'none'; if (fanCard) fanCard.style.display = (d.fans && d.fans.length > 0) ? '' : 'none';
}).catch(() => {}); const section = document.getElementById('gpu-metrics-section');
const summary = document.getElementById('gpu-metrics-summary');
const indices = gpuIndices(d.gpus);
if (section) section.style.display = indices.length > 0 ? '' : 'none';
if (summary) {
summary.textContent = indices.length > 0
? ('Detected GPUs: ' + indices.map(function(idx) { return 'GPU ' + idx; }).join(', '))
: 'No GPUs detected in live metrics.';
}
const nextKey = indices.join(',');
if (nextKey !== gpuChartKey) {
renderGPUOverviewCards(indices);
gpuChartKey = nextKey;
}
applyGPUChartMode();
}
function loadMetricsLayout() {
fetch('/api/metrics/latest').then(function(r) { return r.json(); }).then(syncMetricsLayout).catch(function() {});
}
document.getElementById('gpu-chart-toggle').addEventListener('change', function() {
applyGPUChartMode();
refreshCharts();
});
loadMetricsLayout();
setInterval(refreshCharts, 3000);
setInterval(loadMetricsLayout, 5000);
</script>` </script>`
} }

View File

@@ -6,11 +6,13 @@ import (
"fmt" "fmt"
"html" "html"
"log/slog" "log/slog"
"math"
"mime" "mime"
"net/http" "net/http"
"os" "os"
"path/filepath" "path/filepath"
"sort" "sort"
"strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
@@ -475,6 +477,26 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
http.Error(w, "metrics database not available", http.StatusServiceUnavailable) http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
return return
} }
if idx, sub, ok := parseGPUChartPath(path); ok && sub == "overview" {
samples, err := h.metricsDB.LoadAll()
if err != nil || len(samples) == 0 {
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
return
}
buf, ok, err := renderGPUOverviewChartSVG(idx, samples)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if !ok {
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
return
}
w.Header().Set("Content-Type", "image/svg+xml")
w.Header().Set("Cache-Control", "no-store")
_, _ = w.Write(buf)
return
}
datasets, names, labels, title, yMin, yMax, ok := h.chartDataFromDB(path) datasets, names, labels, title, yMin, yMax, ok := h.chartDataFromDB(path)
if !ok { if !ok {
http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable) http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
@@ -578,15 +600,21 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
yMin = floatPtr(0) yMin = floatPtr(0)
yMax = autoMax120(datasets...) yMax = autoMax120(datasets...)
case path == "gpu-all-clock":
title = "GPU Core Clock"
datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
yMin, yMax = autoBounds120(datasets...)
case path == "gpu-all-memclock":
title = "GPU Memory Clock"
datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
yMin, yMax = autoBounds120(datasets...)
case strings.HasPrefix(path, "gpu/"): case strings.HasPrefix(path, "gpu/"):
rest := strings.TrimPrefix(path, "gpu/") idx, sub, ok := parseGPUChartPath(path)
sub := "" if !ok {
if i := strings.LastIndex(rest, "-"); i > 0 { return nil, nil, nil, "", nil, nil, false
sub = rest[i+1:]
rest = rest[:i]
} }
idx := 0
fmt.Sscanf(rest, "%d", &idx)
switch sub { switch sub {
case "load": case "load":
title = fmt.Sprintf("GPU %d Load", idx) title = fmt.Sprintf("GPU %d Load", idx)
@@ -609,6 +637,24 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
names = []string{"Temp °C"} names = []string{"Temp °C"}
yMin = floatPtr(0) yMin = floatPtr(0)
yMax = autoMax120(temp) yMax = autoMax120(temp)
case "clock":
title = fmt.Sprintf("GPU %d Core Clock", idx)
clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
if clock == nil {
return nil, nil, nil, "", nil, nil, false
}
datasets = [][]float64{clock}
names = []string{"Core Clock MHz"}
yMin, yMax = autoBounds120(clock)
case "memclock":
title = fmt.Sprintf("GPU %d Memory Clock", idx)
clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
if clock == nil {
return nil, nil, nil, "", nil, nil, false
}
datasets = [][]float64{clock}
names = []string{"Memory Clock MHz"}
yMin, yMax = autoBounds120(clock)
default: default:
title = fmt.Sprintf("GPU %d Power", idx) title = fmt.Sprintf("GPU %d Power", idx)
power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW }) power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
@@ -627,6 +673,26 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
return datasets, names, labels, title, yMin, yMax, len(datasets) > 0 return datasets, names, labels, title, yMin, yMax, len(datasets) > 0
} }
func parseGPUChartPath(path string) (idx int, sub string, ok bool) {
if !strings.HasPrefix(path, "gpu/") {
return 0, "", false
}
rest := strings.TrimPrefix(path, "gpu/")
if rest == "" {
return 0, "", false
}
sub = ""
if i := strings.LastIndex(rest, "-"); i > 0 {
sub = rest[i+1:]
rest = rest[:i]
}
n, err := fmt.Sscanf(rest, "%d", &idx)
if err != nil || n != 1 {
return 0, "", false
}
return idx, sub, true
}
func sampleTimeLabels(samples []platform.LiveMetricSample) []string { func sampleTimeLabels(samples []platform.LiveMetricSample) []string {
labels := make([]string, len(samples)) labels := make([]string, len(samples))
if len(samples) == 0 { if len(samples) == 0 {
@@ -852,6 +918,268 @@ func autoBounds120(datasets ...[]float64) (*float64, *float64) {
return floatPtr(low), floatPtr(high) return floatPtr(low), floatPtr(high)
} }
func renderGPUOverviewChartSVG(idx int, samples []platform.LiveMetricSample) ([]byte, bool, error) {
temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
coreClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
memClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
if temp == nil && power == nil && coreClock == nil && memClock == nil {
return nil, false, nil
}
labels := sampleTimeLabels(samples)
svg, err := drawGPUOverviewChartSVG(
fmt.Sprintf("GPU %d Overview", idx),
labels,
[]gpuOverviewSeries{
{Name: "Temp C", Values: coalesceDataset(temp, len(samples)), Color: "#f05a5a", AxisTitle: "Temp C"},
{Name: "Power W", Values: coalesceDataset(power, len(samples)), Color: "#ffb357", AxisTitle: "Power W"},
{Name: "Core Clock MHz", Values: coalesceDataset(coreClock, len(samples)), Color: "#73bf69", AxisTitle: "Core MHz"},
{Name: "Memory Clock MHz", Values: coalesceDataset(memClock, len(samples)), Color: "#5794f2", AxisTitle: "Memory MHz"},
},
)
if err != nil {
return nil, false, err
}
return svg, true, nil
}
type gpuOverviewSeries struct {
Name string
AxisTitle string
Color string
Values []float64
}
func drawGPUOverviewChartSVG(title string, labels []string, series []gpuOverviewSeries) ([]byte, error) {
if len(series) != 4 {
return nil, fmt.Errorf("gpu overview requires 4 series, got %d", len(series))
}
const (
width = 1400
height = 420
plotLeft = 180
plotRight = 1220
plotTop = 74
plotBottom = 292
)
const (
leftOuterAxis = 72
leftInnerAxis = 132
rightInnerAxis = 1268
rightOuterAxis = 1328
)
axisX := []int{leftOuterAxis, leftInnerAxis, rightInnerAxis, rightOuterAxis}
plotWidth := plotRight - plotLeft
plotHeight := plotBottom - plotTop
pointCount := len(labels)
if pointCount == 0 {
pointCount = 1
labels = []string{""}
}
for i := range series {
if len(series[i].Values) == 0 {
series[i].Values = make([]float64, pointCount)
}
}
type axisScale struct {
Min float64
Max float64
Ticks []float64
}
scales := make([]axisScale, len(series))
for i := range series {
min, max := gpuChartSeriesBounds(series[i].Values)
ticks := gpuChartNiceTicks(min, max, 8)
scales[i] = axisScale{
Min: ticks[0],
Max: ticks[len(ticks)-1],
Ticks: ticks,
}
}
xFor := func(index int) float64 {
if pointCount <= 1 {
return float64(plotLeft + plotWidth/2)
}
return float64(plotLeft) + float64(index)*float64(plotWidth)/float64(pointCount-1)
}
yFor := func(value float64, scale axisScale) float64 {
if scale.Max <= scale.Min {
return float64(plotTop + plotHeight/2)
}
return float64(plotBottom) - (value-scale.Min)/(scale.Max-scale.Min)*float64(plotHeight)
}
var b strings.Builder
b.WriteString(fmt.Sprintf(`<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" viewBox="0 0 %d %d">`, width, height, width, height))
b.WriteString("\n")
b.WriteString(`<rect width="100%" height="100%" rx="10" ry="10" fill="#111217" stroke="#2f3440"/>` + "\n")
b.WriteString(`<text x="700" y="28" text-anchor="middle" font-family="sans-serif" font-size="16" font-weight="700" fill="#f5f7fa">` + sanitizeChartText(title) + `</text>` + "\n")
b.WriteString(`<g stroke="#2f3440" stroke-width="1">` + "\n")
for _, tick := range scales[0].Ticks {
y := yFor(tick, scales[0])
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"/>`+"\n", plotLeft, y, plotRight, y)
}
for _, idx := range gpuChartLabelIndices(pointCount, 8) {
x := xFor(idx)
fmt.Fprintf(&b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d"/>`+"\n", x, plotTop, x, plotBottom)
}
b.WriteString("</g>\n")
fmt.Fprintf(&b, `<rect x="%d" y="%d" width="%d" height="%d" fill="none" stroke="#454c5c" stroke-width="1"/>`+"\n",
plotLeft, plotTop, plotWidth, plotHeight)
for i, axisLineX := range axisX {
fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="1"/>`+"\n",
axisLineX, plotTop, axisLineX, plotBottom, series[i].Color)
fmt.Fprintf(&b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="11" font-weight="700" fill="%s">%s</text>`+"\n",
axisLineX, 52, series[i].Color, sanitizeChartText(series[i].AxisTitle))
for _, tick := range scales[i].Ticks {
y := yFor(tick, scales[i])
label := sanitizeChartText(gpuChartFormatTick(tick))
if i < 2 {
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
axisLineX, y, axisLineX+6, y, series[i].Color)
fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
axisLineX-8, y, series[i].Color, label)
continue
}
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
axisLineX, y, axisLineX-6, y, series[i].Color)
fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="start" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
axisLineX+8, y, series[i].Color, label)
}
}
b.WriteString(`<g font-family="sans-serif" font-size="11" fill="#c8d0d8" text-anchor="middle">` + "\n")
for _, idx := range gpuChartLabelIndices(pointCount, 8) {
x := xFor(idx)
fmt.Fprintf(&b, `<text x="%.1f" y="%d">%s</text>`+"\n", x, plotBottom+22, sanitizeChartText(labels[idx]))
}
b.WriteString(`</g>` + "\n")
b.WriteString(`<text x="700" y="338" text-anchor="middle" font-family="sans-serif" font-size="12" fill="#c8d0d8">Time</text>` + "\n")
for i := range series {
var points strings.Builder
for j, value := range series[i].Values {
if j > 0 {
points.WriteByte(' ')
}
points.WriteString(strconv.FormatFloat(xFor(j), 'f', 1, 64))
points.WriteByte(',')
points.WriteString(strconv.FormatFloat(yFor(value, scales[i]), 'f', 1, 64))
}
fmt.Fprintf(&b, `<polyline points="%s" fill="none" stroke="%s" stroke-width="2"/>`+"\n",
points.String(), series[i].Color)
if len(series[i].Values) == 1 {
fmt.Fprintf(&b, `<circle cx="%.1f" cy="%.1f" r="3" fill="%s"/>`+"\n",
xFor(0), yFor(series[i].Values[0], scales[i]), series[i].Color)
}
}
const legendY = 372
legendX := []int{190, 470, 790, 1090}
for i := range series {
fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="3"/>`+"\n",
legendX[i], legendY, legendX[i]+28, legendY, series[i].Color)
fmt.Fprintf(&b, `<text x="%d" y="%d" font-family="sans-serif" font-size="12" fill="#f5f7fa">%s</text>`+"\n",
legendX[i]+38, legendY+4, sanitizeChartText(series[i].Name))
}
b.WriteString("</svg>\n")
return []byte(b.String()), nil
}
func gpuChartSeriesBounds(values []float64) (float64, float64) {
if len(values) == 0 {
return 0, 1
}
min, max := values[0], values[0]
for _, value := range values[1:] {
if value < min {
min = value
}
if value > max {
max = value
}
}
if min == max {
if max == 0 {
return 0, 1
}
pad := math.Abs(max) * 0.1
if pad == 0 {
pad = 1
}
min -= pad
max += pad
}
if min > 0 {
pad := (max - min) * 0.2
if pad == 0 {
pad = max * 0.1
}
min -= pad
if min < 0 {
min = 0
}
max += pad
}
return min, max
}
func gpuChartNiceTicks(min, max float64, target int) []float64 {
if min == max {
max = min + 1
}
span := max - min
step := math.Pow(10, math.Floor(math.Log10(span/float64(target))))
for _, factor := range []float64{1, 2, 5, 10} {
if span/(factor*step) <= float64(target)*1.5 {
step = factor * step
break
}
}
low := math.Floor(min/step) * step
high := math.Ceil(max/step) * step
var ticks []float64
for value := low; value <= high+step*0.001; value += step {
ticks = append(ticks, math.Round(value*1e9)/1e9)
}
return ticks
}
func gpuChartFormatTick(value float64) string {
if value == math.Trunc(value) {
return strconv.Itoa(int(value))
}
return strconv.FormatFloat(value, 'f', 1, 64)
}
func gpuChartLabelIndices(total, target int) []int {
if total <= 0 {
return nil
}
if total == 1 {
return []int{0}
}
step := total / target
if step < 1 {
step = 1
}
var indices []int
for i := 0; i < total; i += step {
indices = append(indices, i)
}
if indices[len(indices)-1] != total-1 {
indices = append(indices, total-1)
}
return indices
}
// renderChartSVG renders a line chart SVG with a fixed Y-axis range. // renderChartSVG renders a line chart SVG with a fixed Y-axis range.
func renderChartSVG(title string, datasets [][]float64, names []string, labels []string, yMin, yMax *float64) ([]byte, error) { func renderChartSVG(title string, datasets [][]float64, names []string, labels []string, yMin, yMax *float64) ([]byte, error) {
n := len(labels) n := len(labels)

View File

@@ -136,6 +136,53 @@ func TestChartDataFromSamplesKeepsStableGPUSeriesOrder(t *testing.T) {
} }
} }
func TestChartDataFromSamplesIncludesGPUClockCharts(t *testing.T) {
samples := []platform.LiveMetricSample{
{
Timestamp: time.Now().Add(-2 * time.Minute),
GPUs: []platform.GPUMetricRow{
{GPUIndex: 0, ClockMHz: 1400, MemClockMHz: 2600},
{GPUIndex: 3, ClockMHz: 1500, MemClockMHz: 2800},
},
},
{
Timestamp: time.Now().Add(-1 * time.Minute),
GPUs: []platform.GPUMetricRow{
{GPUIndex: 0, ClockMHz: 1410, MemClockMHz: 2610},
{GPUIndex: 3, ClockMHz: 1510, MemClockMHz: 2810},
},
},
}
datasets, names, _, title, _, _, ok := chartDataFromSamples("gpu-all-clock", samples)
if !ok {
t.Fatal("gpu-all-clock returned ok=false")
}
if title != "GPU Core Clock" {
t.Fatalf("title=%q", title)
}
if len(names) != 2 || names[0] != "GPU 0" || names[1] != "GPU 3" {
t.Fatalf("names=%v", names)
}
if got := datasets[1][1]; got != 1510 {
t.Fatalf("GPU 3 core clock=%v want 1510", got)
}
datasets, names, _, title, _, _, ok = chartDataFromSamples("gpu-all-memclock", samples)
if !ok {
t.Fatal("gpu-all-memclock returned ok=false")
}
if title != "GPU Memory Clock" {
t.Fatalf("title=%q", title)
}
if len(names) != 2 || names[0] != "GPU 0" || names[1] != "GPU 3" {
t.Fatalf("names=%v", names)
}
if got := datasets[0][0]; got != 2600 {
t.Fatalf("GPU 0 memory clock=%v want 2600", got)
}
}
func TestNormalizePowerSeriesHoldsLastPositive(t *testing.T) { func TestNormalizePowerSeriesHoldsLastPositive(t *testing.T) {
got := normalizePowerSeries([]float64{0, 480, 0, 0, 510, 0}) got := normalizePowerSeries([]float64{0, 480, 0, 0, 510, 0})
want := []float64{0, 480, 480, 480, 510, 510} want := []float64{0, 480, 480, 480, 510, 510}
@@ -157,6 +204,21 @@ func TestRenderMetricsUsesBufferedChartRefresh(t *testing.T) {
if !strings.Contains(body, "el.dataset.loading === '1'") { if !strings.Contains(body, "el.dataset.loading === '1'") {
t.Fatalf("metrics page should avoid overlapping chart reloads: %s", body) t.Fatalf("metrics page should avoid overlapping chart reloads: %s", body)
} }
if !strings.Contains(body, `id="gpu-metrics-section" style="display:none`) {
t.Fatalf("metrics page should keep gpu charts in a hidden dedicated section until GPUs are detected: %s", body)
}
if !strings.Contains(body, `id="gpu-chart-toggle"`) {
t.Fatalf("metrics page should render GPU chart mode toggle: %s", body)
}
if !strings.Contains(body, `/api/metrics/chart/gpu-all-clock.svg`) {
t.Fatalf("metrics page should include GPU core clock chart: %s", body)
}
if !strings.Contains(body, `/api/metrics/chart/gpu-all-memclock.svg`) {
t.Fatalf("metrics page should include GPU memory clock chart: %s", body)
}
if !strings.Contains(body, `renderGPUOverviewCards(indices)`) {
t.Fatalf("metrics page should build per-GPU chart cards dynamically: %s", body)
}
} }
func TestChartLegendVisible(t *testing.T) { func TestChartLegendVisible(t *testing.T) {