feat(tui): live GPU chart during stress test, full VRAM allocation

- GPU Platform Stress Test now shows a live in-TUI chart instead of nvtop.
  nvidia-smi is polled every second; up to 60 data points per GPU kept.
  All three metrics (Usage %, Temp °C, Power W) drawn on a single plot,
  each normalised to its own range and rendered in a different colour.
- Memory allocation changed from MemoryMB/16 to MemoryMB-512 (full VRAM
  minus 512 MB driver overhead) so bee-gpu-stress actually stresses memory.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Chusavitin
2026-03-26 17:37:20 +03:00
parent 0a52a4f3ba
commit 8b4bfdf5ad
7 changed files with 223 additions and 27 deletions

View File

@@ -69,6 +69,11 @@ func parseGPUFloat(s string) float64 {
return v
}
// SampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
func SampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
return sampleGPUMetrics(gpuIndices)
}
// WriteGPUMetricsCSV writes collected rows as a CSV file.
func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
var b bytes.Buffer
@@ -370,6 +375,162 @@ func RenderGPUTerminalChart(rows []GPUMetricRow) string {
return strings.TrimRight(b.String(), "\n")
}
// RenderGPULiveChart renders all GPU metrics on a single combined chart per GPU.
// Each series is normalised to its own minmax and drawn in a different colour.
// chartWidth controls the width of the plot area (Y-axis label uses 5 extra chars).
func RenderGPULiveChart(rows []GPUMetricRow, chartWidth int) string {
if chartWidth < 20 {
chartWidth = 70
}
const chartHeight = 14
seen := make(map[int]bool)
var order []int
gpuMap := make(map[int][]GPUMetricRow)
for _, r := range rows {
if !seen[r.GPUIndex] {
seen[r.GPUIndex] = true
order = append(order, r.GPUIndex)
}
gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
}
type seriesDef struct {
label string
color string
unit string
fn func(GPUMetricRow) float64
}
defs := []seriesDef{
{"Usage", ansiBlue, "%", func(r GPUMetricRow) float64 { return r.UsagePct }},
{"Temp", ansiRed, "°C", func(r GPUMetricRow) float64 { return r.TempC }},
{"Power", ansiGreen, "W", func(r GPUMetricRow) float64 { return r.PowerW }},
}
var b strings.Builder
for _, gpuIdx := range order {
gr := gpuMap[gpuIdx]
if len(gr) == 0 {
continue
}
elapsed := gr[len(gr)-1].ElapsedSec
// Build value slices for each series.
type seriesData struct {
seriesDef
vals []float64
mn float64
mx float64
}
var series []seriesData
for _, d := range defs {
vals := extractGPUField(gr, d.fn)
mn, mx := gpuMinMax(vals)
if mn == mx {
mx = mn + 1
}
series = append(series, seriesData{d, vals, mn, mx})
}
// Shared character grid: row 0 = top (max), row chartHeight = bottom (min).
type cell struct {
ch rune
color string
}
grid := make([][]cell, chartHeight+1)
for r := range grid {
grid[r] = make([]cell, chartWidth)
for c := range grid[r] {
grid[r][c] = cell{' ', ""}
}
}
// Plot each series onto the shared grid.
for _, s := range series {
w := chartWidth
if len(s.vals) < w {
w = len(s.vals)
}
data := gpuDownsample(s.vals, w)
prevRow := -1
for x, v := range data {
row := chartHeight - int(math.Round((v-s.mn)/(s.mx-s.mn)*float64(chartHeight)))
if row < 0 {
row = 0
}
if row > chartHeight {
row = chartHeight
}
if prevRow < 0 || prevRow == row {
grid[row][x] = cell{'─', s.color}
} else {
lo, hi := prevRow, row
if lo > hi {
lo, hi = hi, lo
}
for y := lo + 1; y < hi; y++ {
grid[y][x] = cell{'│', s.color}
}
if prevRow < row {
grid[prevRow][x] = cell{'╮', s.color}
grid[row][x] = cell{'╰', s.color}
} else {
grid[prevRow][x] = cell{'╯', s.color}
grid[row][x] = cell{'╭', s.color}
}
}
prevRow = row
}
}
// Render: Y axis + data rows.
fmt.Fprintf(&b, "GPU %d (%.0fs) each series normalised to its range\n", gpuIdx, elapsed)
for r := 0; r <= chartHeight; r++ {
// Y axis label: 100% at top, 50% in middle, 0% at bottom.
switch r {
case 0:
fmt.Fprintf(&b, "%4s┤", "100%")
case chartHeight / 2:
fmt.Fprintf(&b, "%4s┤", "50%")
case chartHeight:
fmt.Fprintf(&b, "%4s┤", "0%")
default:
fmt.Fprintf(&b, "%4s│", "")
}
for c := 0; c < chartWidth; c++ {
cl := grid[r][c]
if cl.color != "" {
b.WriteString(cl.color)
b.WriteRune(cl.ch)
b.WriteString(ansiReset)
} else {
b.WriteRune(' ')
}
}
b.WriteRune('\n')
}
// Bottom axis.
b.WriteString(" └")
b.WriteString(strings.Repeat("─", chartWidth))
b.WriteRune('\n')
// Legend with current (last) values.
b.WriteString(" ")
for i, s := range series {
last := s.vals[len(s.vals)-1]
b.WriteString(s.color)
fmt.Fprintf(&b, "▐ %s: %.0f%s", s.label, last, s.unit)
b.WriteString(ansiReset)
if i < len(series)-1 {
b.WriteString(" ")
}
}
b.WriteRune('\n')
}
return strings.TrimRight(b.String(), "\n")
}
// renderLineChart draws a single time-series line chart using box-drawing characters.
// Produces output in the style of asciigraph: ╭─╮ │ ╰─╯ with a Y axis and caption.
func renderLineChart(vals []float64, color, caption string, height, width int) string {