package platform import ( "bytes" "fmt" "math" "os" "os/exec" "strconv" "strings" "time" ) // GPUMetricRow is one telemetry sample from nvidia-smi during a stress test. type GPUMetricRow struct { ElapsedSec float64 GPUIndex int TempC float64 UsagePct float64 PowerW float64 ClockMHz float64 } // sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU. func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) { args := []string{ "--query-gpu=index,temperature.gpu,utilization.gpu,power.draw,clocks.current.graphics", "--format=csv,noheader,nounits", } if len(gpuIndices) > 0 { ids := make([]string, len(gpuIndices)) for i, idx := range gpuIndices { ids[i] = strconv.Itoa(idx) } args = append([]string{"--id=" + strings.Join(ids, ",")}, args...) } out, err := exec.Command("nvidia-smi", args...).Output() if err != nil { return nil, err } var rows []GPUMetricRow for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { line = strings.TrimSpace(line) if line == "" { continue } parts := strings.Split(line, ", ") if len(parts) < 5 { continue } idx, _ := strconv.Atoi(strings.TrimSpace(parts[0])) rows = append(rows, GPUMetricRow{ GPUIndex: idx, TempC: parseGPUFloat(parts[1]), UsagePct: parseGPUFloat(parts[2]), PowerW: parseGPUFloat(parts[3]), ClockMHz: parseGPUFloat(parts[4]), }) } return rows, nil } func parseGPUFloat(s string) float64 { s = strings.TrimSpace(s) if s == "N/A" || s == "[Not Supported]" || s == "" { return 0 } v, _ := strconv.ParseFloat(s, 64) return v } // WriteGPUMetricsCSV writes collected rows as a CSV file. func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error { var b bytes.Buffer b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,power_w,clock_mhz\n") for _, r := range rows { fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.0f\n", r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.PowerW, r.ClockMHz) } return os.WriteFile(path, b.Bytes(), 0644) } // WriteGPUMetricsHTML writes a standalone HTML file with one SVG chart per GPU. func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error { // Group by GPU index preserving order. seen := make(map[int]bool) var order []int gpuMap := make(map[int][]GPUMetricRow) for _, r := range rows { if !seen[r.GPUIndex] { seen[r.GPUIndex] = true order = append(order, r.GPUIndex) } gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r) } var svgs strings.Builder for _, gpuIdx := range order { svgs.WriteString(drawGPUChartSVG(gpuMap[gpuIdx], gpuIdx)) svgs.WriteString("\n") } ts := time.Now().UTC().Format("2006-01-02 15:04:05 UTC") html := fmt.Sprintf(` GPU Stress Test Metrics

GPU Stress Test Metrics

Generated %s

%s `, ts, svgs.String()) return os.WriteFile(path, []byte(html), 0644) } // drawGPUChartSVG generates a self-contained SVG chart for one GPU. func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string { // Layout const W, H = 960, 520 const plotX1 = 120 // usage axis / chart left border const plotX2 = 840 // power axis / chart right border const plotY1 = 70 // top const plotY2 = 465 // bottom (PH = 395) const PW = plotX2 - plotX1 const PH = plotY2 - plotY1 // Outer axes const tempAxisX = 60 // temp axis line const clockAxisX = 900 // clock axis line colors := [4]string{"#e74c3c", "#3498db", "#2ecc71", "#f39c12"} seriesLabel := [4]string{ fmt.Sprintf("GPU %d Temp (°C)", gpuIdx), fmt.Sprintf("GPU %d Usage (%%)", gpuIdx), fmt.Sprintf("GPU %d Power (W)", gpuIdx), fmt.Sprintf("GPU %d Clock (MHz)", gpuIdx), } axisLabel := [4]string{"Temperature (°C)", "GPU Usage (%)", "Power (W)", "Clock (MHz)"} // Extract series t := make([]float64, len(rows)) vals := [4][]float64{} for i := range vals { vals[i] = make([]float64, len(rows)) } for i, r := range rows { t[i] = r.ElapsedSec vals[0][i] = r.TempC vals[1][i] = r.UsagePct vals[2][i] = r.PowerW vals[3][i] = r.ClockMHz } tMin, tMax := gpuMinMax(t) type axisScale struct { ticks []float64 min, max float64 } var axes [4]axisScale for i := 0; i < 4; i++ { mn, mx := gpuMinMax(vals[i]) tks := gpuNiceTicks(mn, mx, 8) axes[i] = axisScale{ticks: tks, min: tks[0], max: tks[len(tks)-1]} } xv := func(tv float64) float64 { if tMax == tMin { return float64(plotX1) } return float64(plotX1) + (tv-tMin)/(tMax-tMin)*float64(PW) } yv := func(v float64, ai int) float64 { a := axes[ai] if a.max == a.min { return float64(plotY1 + PH/2) } return float64(plotY2) - (v-a.min)/(a.max-a.min)*float64(PH) } var b strings.Builder fmt.Fprintf(&b, ``+"\n", W, H) // Title fmt.Fprintf(&b, `GPU Stress Test Metrics — GPU %d`+"\n", plotX1+PW/2, gpuIdx) // Horizontal grid (align to temp axis ticks) b.WriteString(`` + "\n") for _, tick := range axes[0].ticks { y := yv(tick, 0) if y < float64(plotY1) || y > float64(plotY2) { continue } fmt.Fprintf(&b, ``+"\n", plotX1, y, plotX2, y) } // Vertical grid xTicks := gpuNiceTicks(tMin, tMax, 10) for _, tv := range xTicks { x := xv(tv) if x < float64(plotX1) || x > float64(plotX2) { continue } fmt.Fprintf(&b, ``+"\n", x, plotY1, x, plotY2) } b.WriteString("\n") // Chart border fmt.Fprintf(&b, ``+"\n", plotX1, plotY1, PW, PH) // X axis ticks and labels b.WriteString(`` + "\n") for _, tv := range xTicks { x := xv(tv) if x < float64(plotX1) || x > float64(plotX2) { continue } fmt.Fprintf(&b, `%s`+"\n", x, plotY2+18, gpuFormatTick(tv)) fmt.Fprintf(&b, ``+"\n", x, plotY2, x, plotY2+4) } b.WriteString("\n") fmt.Fprintf(&b, `Time (seconds)`+"\n", plotX1+PW/2, plotY2+38) // Y axes: [tempAxisX, plotX1, plotX2, clockAxisX] axisLineX := [4]int{tempAxisX, plotX1, plotX2, clockAxisX} axisRight := [4]bool{false, false, true, true} // Label x positions (for rotated vertical text) axisLabelX := [4]int{10, 68, 868, 950} for i := 0; i < 4; i++ { ax := axisLineX[i] right := axisRight[i] color := colors[i] // Axis line fmt.Fprintf(&b, ``+"\n", ax, plotY1, ax, plotY2, color) // Ticks and tick labels fmt.Fprintf(&b, ``+"\n", color) for _, tick := range axes[i].ticks { y := yv(tick, i) if y < float64(plotY1) || y > float64(plotY2) { continue } dx := -5 textX := ax - 8 anchor := "end" if right { dx = 5 textX = ax + 8 anchor = "start" } fmt.Fprintf(&b, ``+"\n", ax, y, ax+dx, y, color) fmt.Fprintf(&b, `%s`+"\n", textX, y, anchor, gpuFormatTick(tick)) } b.WriteString("\n") // Axis label (rotated) lx := axisLabelX[i] fmt.Fprintf(&b, `%s`+"\n", lx, plotY1+PH/2, color, axisLabel[i]) } // Data lines for i := 0; i < 4; i++ { var pts strings.Builder for j := range rows { x := xv(t[j]) y := yv(vals[i][j], i) if j == 0 { fmt.Fprintf(&pts, "%.1f,%.1f", x, y) } else { fmt.Fprintf(&pts, " %.1f,%.1f", x, y) } } fmt.Fprintf(&b, ``+"\n", pts.String(), colors[i]) } // Legend const legendY = 42 for i := 0; i < 4; i++ { lx := plotX1 + i*(PW/4) + 10 fmt.Fprintf(&b, ``+"\n", lx, legendY, lx+20, legendY, colors[i]) fmt.Fprintf(&b, `%s`+"\n", lx+25, legendY+4, seriesLabel[i]) } b.WriteString("\n") return b.String() } const ( ansiRed = "\033[31m" ansiBlue = "\033[34m" ansiGreen = "\033[32m" ansiYellow = "\033[33m" ansiReset = "\033[0m" ) const ( termChartWidth = 70 termChartHeight = 12 ) // RenderGPUTerminalChart returns ANSI line charts (asciigraph-style) per GPU. // Suitable for display in the TUI screenOutput. func RenderGPUTerminalChart(rows []GPUMetricRow) string { seen := make(map[int]bool) var order []int gpuMap := make(map[int][]GPUMetricRow) for _, r := range rows { if !seen[r.GPUIndex] { seen[r.GPUIndex] = true order = append(order, r.GPUIndex) } gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r) } type seriesDef struct { caption string color string fn func(GPUMetricRow) float64 } defs := []seriesDef{ {"Temperature (°C)", ansiRed, func(r GPUMetricRow) float64 { return r.TempC }}, {"GPU Usage (%)", ansiBlue, func(r GPUMetricRow) float64 { return r.UsagePct }}, {"Power (W)", ansiGreen, func(r GPUMetricRow) float64 { return r.PowerW }}, {"Clock (MHz)", ansiYellow, func(r GPUMetricRow) float64 { return r.ClockMHz }}, } var b strings.Builder for _, gpuIdx := range order { gr := gpuMap[gpuIdx] if len(gr) == 0 { continue } tMax := gr[len(gr)-1].ElapsedSec - gr[0].ElapsedSec fmt.Fprintf(&b, "GPU %d — Stress Test Metrics (%.0f seconds)\n\n", gpuIdx, tMax) for _, d := range defs { b.WriteString(renderLineChart(extractGPUField(gr, d.fn), d.color, d.caption, termChartHeight, termChartWidth)) b.WriteRune('\n') } } return strings.TrimRight(b.String(), "\n") } // renderLineChart draws a single time-series line chart using box-drawing characters. // Produces output in the style of asciigraph: ╭─╮ │ ╰─╯ with a Y axis and caption. func renderLineChart(vals []float64, color, caption string, height, width int) string { if len(vals) == 0 { return caption + "\n" } mn, mx := gpuMinMax(vals) if mn == mx { mx = mn + 1 } // Use the smaller of width or len(vals) to avoid stretching sparse data. w := width if len(vals) < w { w = len(vals) } data := gpuDownsample(vals, w) // row[i] = display row index: 0 = top = max value, height = bottom = min value. row := make([]int, w) for i, v := range data { r := int(math.Round((mx - v) / (mx - mn) * float64(height))) if r < 0 { r = 0 } if r > height { r = height } row[i] = r } // Fill the character grid. grid := make([][]rune, height+1) for i := range grid { grid[i] = make([]rune, w) for j := range grid[i] { grid[i][j] = ' ' } } for x := 0; x < w; x++ { r := row[x] if x == 0 { grid[r][0] = '─' continue } p := row[x-1] switch { case r == p: grid[r][x] = '─' case r < p: // value went up (row index decreased toward top) grid[r][x] = '╭' grid[p][x] = '╯' for y := r + 1; y < p; y++ { grid[y][x] = '│' } default: // r > p, value went down grid[p][x] = '╮' grid[r][x] = '╰' for y := p + 1; y < r; y++ { grid[y][x] = '│' } } } // Y axis tick labels. ticks := gpuNiceTicks(mn, mx, height/2) tickAtRow := make(map[int]string) labelWidth := 4 for _, t := range ticks { r := int(math.Round((mx - t) / (mx - mn) * float64(height))) if r < 0 || r > height { continue } s := gpuFormatTick(t) tickAtRow[r] = s if len(s) > labelWidth { labelWidth = len(s) } } var b strings.Builder for r := 0; r <= height; r++ { label := tickAtRow[r] fmt.Fprintf(&b, "%*s", labelWidth, label) switch { case label != "": b.WriteRune('┤') case r == height: b.WriteRune('┼') default: b.WriteRune('│') } b.WriteString(color) b.WriteString(string(grid[r])) b.WriteString(ansiReset) b.WriteRune('\n') } // Bottom axis. b.WriteString(strings.Repeat(" ", labelWidth)) b.WriteRune('└') b.WriteString(strings.Repeat("─", w)) b.WriteRune('\n') // Caption centered under the chart. if caption != "" { total := labelWidth + 1 + w if pad := (total - len(caption)) / 2; pad > 0 { b.WriteString(strings.Repeat(" ", pad)) } b.WriteString(caption) b.WriteRune('\n') } return b.String() } func extractGPUField(rows []GPUMetricRow, fn func(GPUMetricRow) float64) []float64 { v := make([]float64, len(rows)) for i, r := range rows { v[i] = fn(r) } return v } // gpuDownsample averages vals into w buckets (or nearest-neighbor upsamples if len(vals) < w). func gpuDownsample(vals []float64, w int) []float64 { n := len(vals) if n == 0 { return make([]float64, w) } result := make([]float64, w) if n >= w { counts := make([]int, w) for i, v := range vals { bucket := i * w / n if bucket >= w { bucket = w - 1 } result[bucket] += v counts[bucket]++ } for i := range result { if counts[i] > 0 { result[i] /= float64(counts[i]) } } } else { // Nearest-neighbour upsample. for i := range result { src := i * (n - 1) / (w - 1) if src >= n { src = n - 1 } result[i] = vals[src] } } return result } func gpuMinMax(vals []float64) (float64, float64) { if len(vals) == 0 { return 0, 1 } mn, mx := vals[0], vals[0] for _, v := range vals[1:] { if v < mn { mn = v } if v > mx { mx = v } } return mn, mx } func gpuNiceTicks(mn, mx float64, targetCount int) []float64 { if mn == mx { mn -= 1 mx += 1 } r := mx - mn step := math.Pow(10, math.Floor(math.Log10(r/float64(targetCount)))) for _, f := range []float64{1, 2, 5, 10} { if r/(f*step) <= float64(targetCount)*1.5 { step = f * step break } } lo := math.Floor(mn/step) * step hi := math.Ceil(mx/step) * step var ticks []float64 for v := lo; v <= hi+step*0.001; v += step { ticks = append(ticks, math.Round(v*1e9)/1e9) } return ticks } func gpuFormatTick(v float64) string { if v == math.Trunc(v) { return strconv.Itoa(int(v)) } return strconv.FormatFloat(v, 'f', 1, 64) }