feat(tui): NVIDIA SAT with nvtop, GPU selection, metrics and chart — v1.0.0

- TUI: duration presets (10m/1h/8h/24h), GPU multi-select checkboxes
- nvtop launched concurrently with SAT via tea.ExecProcess; can reopen or abort
- GPU metrics collected per-second during bee-gpu-stress (temp/usage/power/clock)
- Outputs: gpu-metrics.csv, gpu-metrics.html (offline SVG), gpu-metrics-term.txt
- Terminal chart: asciigraph-style line chart with box-drawing chars and ANSI colours
- AUDIT_VERSION bumped 0.1.1 → 1.0.0; nvtop added to ISO package list
- runtime-flows.md updated with full NVIDIA SAT TUI flow documentation

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Chusavitin
2026-03-18 15:18:57 +03:00
parent b965184e71
commit 76a17937f3
14 changed files with 1162 additions and 24 deletions

View File

@@ -1,6 +1,7 @@
package app package app
import ( import (
"context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"log/slog" "log/slog"
@@ -71,8 +72,10 @@ type toolManager interface {
type satRunner interface { type satRunner interface {
RunNvidiaAcceptancePack(baseDir string) (string, error) RunNvidiaAcceptancePack(baseDir string) (string, error)
RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, durationSec int, sizeMB int, gpuIndices []int) (string, error)
RunMemoryAcceptancePack(baseDir string) (string, error) RunMemoryAcceptancePack(baseDir string) (string, error)
RunStorageAcceptancePack(baseDir string) (string, error) RunStorageAcceptancePack(baseDir string) (string, error)
ListNvidiaGPUs() ([]platform.NvidiaGPU, error)
} }
type runtimeChecker interface { type runtimeChecker interface {
@@ -395,6 +398,29 @@ func (a *App) RunNvidiaAcceptancePackResult(baseDir string) (ActionResult, error
return ActionResult{Title: "NVIDIA SAT", Body: body}, err return ActionResult{Title: "NVIDIA SAT", Body: body}, err
} }
func (a *App) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
return a.sat.ListNvidiaGPUs()
}
func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, durationSec int, sizeMB int, gpuIndices []int) (ActionResult, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
path, err := a.sat.RunNvidiaAcceptancePackWithOptions(ctx, baseDir, durationSec, sizeMB, gpuIndices)
body := "Archive written."
if path != "" {
body = "Archive written to " + path
}
// Include terminal chart if available (runDir = archive path without .tar.gz).
if path != "" {
termPath := filepath.Join(strings.TrimSuffix(path, ".tar.gz"), "gpu-metrics-term.txt")
if chart, readErr := os.ReadFile(termPath); readErr == nil && len(chart) > 0 {
body += "\n\n" + string(chart)
}
}
return ActionResult{Title: "NVIDIA SAT", Body: body}, err
}
func (a *App) RunMemoryAcceptancePack(baseDir string) (string, error) { func (a *App) RunMemoryAcceptancePack(baseDir string) (string, error) {
if strings.TrimSpace(baseDir) == "" { if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir baseDir = DefaultSATBaseDir

View File

@@ -1,6 +1,7 @@
package app package app
import ( import (
"context"
"encoding/json" "encoding/json"
"errors" "errors"
"os" "os"
@@ -105,6 +106,14 @@ func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string) (string, error) {
return f.runNvidiaFn(baseDir) return f.runNvidiaFn(baseDir)
} }
func (f fakeSAT) RunNvidiaAcceptancePackWithOptions(_ context.Context, baseDir string, _ int, _ int, _ []int) (string, error) {
return f.runNvidiaFn(baseDir)
}
func (f fakeSAT) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
return nil, nil
}
func (f fakeSAT) RunMemoryAcceptancePack(baseDir string) (string, error) { func (f fakeSAT) RunMemoryAcceptancePack(baseDir string) (string, error) {
return f.runMemoryFn(baseDir) return f.runMemoryFn(baseDir)
} }

View File

@@ -0,0 +1,577 @@
package platform
import (
"bytes"
"fmt"
"math"
"os"
"os/exec"
"strconv"
"strings"
"time"
)
// GPUMetricRow is one telemetry sample from nvidia-smi during a stress test.
type GPUMetricRow struct {
ElapsedSec float64
GPUIndex int
TempC float64
UsagePct float64
PowerW float64
ClockMHz float64
}
// sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
args := []string{
"--query-gpu=index,temperature.gpu,utilization.gpu,power.draw,clocks.current.graphics",
"--format=csv,noheader,nounits",
}
if len(gpuIndices) > 0 {
ids := make([]string, len(gpuIndices))
for i, idx := range gpuIndices {
ids[i] = strconv.Itoa(idx)
}
args = append([]string{"--id=" + strings.Join(ids, ",")}, args...)
}
out, err := exec.Command("nvidia-smi", args...).Output()
if err != nil {
return nil, err
}
var rows []GPUMetricRow
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
parts := strings.Split(line, ", ")
if len(parts) < 5 {
continue
}
idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
rows = append(rows, GPUMetricRow{
GPUIndex: idx,
TempC: parseGPUFloat(parts[1]),
UsagePct: parseGPUFloat(parts[2]),
PowerW: parseGPUFloat(parts[3]),
ClockMHz: parseGPUFloat(parts[4]),
})
}
return rows, nil
}
func parseGPUFloat(s string) float64 {
s = strings.TrimSpace(s)
if s == "N/A" || s == "[Not Supported]" || s == "" {
return 0
}
v, _ := strconv.ParseFloat(s, 64)
return v
}
// WriteGPUMetricsCSV writes collected rows as a CSV file.
func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
var b bytes.Buffer
b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,power_w,clock_mhz\n")
for _, r := range rows {
fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.0f\n",
r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.PowerW, r.ClockMHz)
}
return os.WriteFile(path, b.Bytes(), 0644)
}
// WriteGPUMetricsHTML writes a standalone HTML file with one SVG chart per GPU.
func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
// Group by GPU index preserving order.
seen := make(map[int]bool)
var order []int
gpuMap := make(map[int][]GPUMetricRow)
for _, r := range rows {
if !seen[r.GPUIndex] {
seen[r.GPUIndex] = true
order = append(order, r.GPUIndex)
}
gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
}
var svgs strings.Builder
for _, gpuIdx := range order {
svgs.WriteString(drawGPUChartSVG(gpuMap[gpuIdx], gpuIdx))
svgs.WriteString("\n")
}
ts := time.Now().UTC().Format("2006-01-02 15:04:05 UTC")
html := fmt.Sprintf(`<!DOCTYPE html>
<html><head>
<meta charset="utf-8">
<title>GPU Stress Test Metrics</title>
<style>
body { font-family: sans-serif; background: #f0f0f0; margin: 0; padding: 20px; }
h1 { text-align: center; color: #333; margin: 0 0 8px; }
p { text-align: center; color: #888; font-size: 13px; margin: 0 0 24px; }
</style>
</head><body>
<h1>GPU Stress Test Metrics</h1>
<p>Generated %s</p>
%s
</body></html>`, ts, svgs.String())
return os.WriteFile(path, []byte(html), 0644)
}
// drawGPUChartSVG generates a self-contained SVG chart for one GPU.
func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
// Layout
const W, H = 960, 520
const plotX1 = 120 // usage axis / chart left border
const plotX2 = 840 // power axis / chart right border
const plotY1 = 70 // top
const plotY2 = 465 // bottom (PH = 395)
const PW = plotX2 - plotX1
const PH = plotY2 - plotY1
// Outer axes
const tempAxisX = 60 // temp axis line
const clockAxisX = 900 // clock axis line
colors := [4]string{"#e74c3c", "#3498db", "#2ecc71", "#f39c12"}
seriesLabel := [4]string{
fmt.Sprintf("GPU %d Temp (°C)", gpuIdx),
fmt.Sprintf("GPU %d Usage (%%)", gpuIdx),
fmt.Sprintf("GPU %d Power (W)", gpuIdx),
fmt.Sprintf("GPU %d Clock (MHz)", gpuIdx),
}
axisLabel := [4]string{"Temperature (°C)", "GPU Usage (%)", "Power (W)", "Clock (MHz)"}
// Extract series
t := make([]float64, len(rows))
vals := [4][]float64{}
for i := range vals {
vals[i] = make([]float64, len(rows))
}
for i, r := range rows {
t[i] = r.ElapsedSec
vals[0][i] = r.TempC
vals[1][i] = r.UsagePct
vals[2][i] = r.PowerW
vals[3][i] = r.ClockMHz
}
tMin, tMax := gpuMinMax(t)
type axisScale struct {
ticks []float64
min, max float64
}
var axes [4]axisScale
for i := 0; i < 4; i++ {
mn, mx := gpuMinMax(vals[i])
tks := gpuNiceTicks(mn, mx, 8)
axes[i] = axisScale{ticks: tks, min: tks[0], max: tks[len(tks)-1]}
}
xv := func(tv float64) float64 {
if tMax == tMin {
return float64(plotX1)
}
return float64(plotX1) + (tv-tMin)/(tMax-tMin)*float64(PW)
}
yv := func(v float64, ai int) float64 {
a := axes[ai]
if a.max == a.min {
return float64(plotY1 + PH/2)
}
return float64(plotY2) - (v-a.min)/(a.max-a.min)*float64(PH)
}
var b strings.Builder
fmt.Fprintf(&b, `<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d"`+
` style="background:#fff;border-radius:8px;display:block;margin:0 auto 24px;`+
`box-shadow:0 2px 12px rgba(0,0,0,.12)">`+"\n", W, H)
// Title
fmt.Fprintf(&b, `<text x="%d" y="22" text-anchor="middle" font-family="sans-serif"`+
` font-size="14" font-weight="bold" fill="#333">GPU Stress Test Metrics — GPU %d</text>`+"\n",
plotX1+PW/2, gpuIdx)
// Horizontal grid (align to temp axis ticks)
b.WriteString(`<g stroke="#e0e0e0" stroke-width="0.5">` + "\n")
for _, tick := range axes[0].ticks {
y := yv(tick, 0)
if y < float64(plotY1) || y > float64(plotY2) {
continue
}
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"/>`+"\n",
plotX1, y, plotX2, y)
}
// Vertical grid
xTicks := gpuNiceTicks(tMin, tMax, 10)
for _, tv := range xTicks {
x := xv(tv)
if x < float64(plotX1) || x > float64(plotX2) {
continue
}
fmt.Fprintf(&b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d"/>`+"\n",
x, plotY1, x, plotY2)
}
b.WriteString("</g>\n")
// Chart border
fmt.Fprintf(&b, `<rect x="%d" y="%d" width="%d" height="%d"`+
` fill="none" stroke="#333" stroke-width="1"/>`+"\n",
plotX1, plotY1, PW, PH)
// X axis ticks and labels
b.WriteString(`<g font-family="sans-serif" font-size="11" fill="#333" text-anchor="middle">` + "\n")
for _, tv := range xTicks {
x := xv(tv)
if x < float64(plotX1) || x > float64(plotX2) {
continue
}
fmt.Fprintf(&b, `<text x="%.1f" y="%d">%s</text>`+"\n", x, plotY2+18, gpuFormatTick(tv))
fmt.Fprintf(&b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d" stroke="#333" stroke-width="1"/>`+"\n",
x, plotY2, x, plotY2+4)
}
b.WriteString("</g>\n")
fmt.Fprintf(&b, `<text x="%d" y="%d" font-family="sans-serif" font-size="13"`+
` fill="#333" text-anchor="middle">Time (seconds)</text>`+"\n",
plotX1+PW/2, plotY2+38)
// Y axes: [tempAxisX, plotX1, plotX2, clockAxisX]
axisLineX := [4]int{tempAxisX, plotX1, plotX2, clockAxisX}
axisRight := [4]bool{false, false, true, true}
// Label x positions (for rotated vertical text)
axisLabelX := [4]int{10, 68, 868, 950}
for i := 0; i < 4; i++ {
ax := axisLineX[i]
right := axisRight[i]
color := colors[i]
// Axis line
fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d"`+
` stroke="%s" stroke-width="1"/>`+"\n",
ax, plotY1, ax, plotY2, color)
// Ticks and tick labels
fmt.Fprintf(&b, `<g font-family="sans-serif" font-size="10" fill="%s">`+"\n", color)
for _, tick := range axes[i].ticks {
y := yv(tick, i)
if y < float64(plotY1) || y > float64(plotY2) {
continue
}
dx := -5
textX := ax - 8
anchor := "end"
if right {
dx = 5
textX = ax + 8
anchor = "start"
}
fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"`+
` stroke="%s" stroke-width="1"/>`+"\n",
ax, y, ax+dx, y, color)
fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="%s" dy="4">%s</text>`+"\n",
textX, y, anchor, gpuFormatTick(tick))
}
b.WriteString("</g>\n")
// Axis label (rotated)
lx := axisLabelX[i]
fmt.Fprintf(&b, `<text transform="translate(%d,%d) rotate(-90)"`+
` font-family="sans-serif" font-size="12" fill="%s" text-anchor="middle">%s</text>`+"\n",
lx, plotY1+PH/2, color, axisLabel[i])
}
// Data lines
for i := 0; i < 4; i++ {
var pts strings.Builder
for j := range rows {
x := xv(t[j])
y := yv(vals[i][j], i)
if j == 0 {
fmt.Fprintf(&pts, "%.1f,%.1f", x, y)
} else {
fmt.Fprintf(&pts, " %.1f,%.1f", x, y)
}
}
fmt.Fprintf(&b, `<polyline points="%s" fill="none" stroke="%s" stroke-width="1.5"/>`+"\n",
pts.String(), colors[i])
}
// Legend
const legendY = 42
for i := 0; i < 4; i++ {
lx := plotX1 + i*(PW/4) + 10
fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d"`+
` stroke="%s" stroke-width="2"/>`+"\n",
lx, legendY, lx+20, legendY, colors[i])
fmt.Fprintf(&b, `<text x="%d" y="%d" font-family="sans-serif" font-size="12" fill="#333">%s</text>`+"\n",
lx+25, legendY+4, seriesLabel[i])
}
b.WriteString("</svg>\n")
return b.String()
}
const (
ansiRed = "\033[31m"
ansiBlue = "\033[34m"
ansiGreen = "\033[32m"
ansiYellow = "\033[33m"
ansiReset = "\033[0m"
)
const (
termChartWidth = 70
termChartHeight = 12
)
// RenderGPUTerminalChart returns ANSI line charts (asciigraph-style) per GPU.
// Suitable for display in the TUI screenOutput.
func RenderGPUTerminalChart(rows []GPUMetricRow) string {
seen := make(map[int]bool)
var order []int
gpuMap := make(map[int][]GPUMetricRow)
for _, r := range rows {
if !seen[r.GPUIndex] {
seen[r.GPUIndex] = true
order = append(order, r.GPUIndex)
}
gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
}
type seriesDef struct {
caption string
color string
fn func(GPUMetricRow) float64
}
defs := []seriesDef{
{"Temperature (°C)", ansiRed, func(r GPUMetricRow) float64 { return r.TempC }},
{"GPU Usage (%)", ansiBlue, func(r GPUMetricRow) float64 { return r.UsagePct }},
{"Power (W)", ansiGreen, func(r GPUMetricRow) float64 { return r.PowerW }},
{"Clock (MHz)", ansiYellow, func(r GPUMetricRow) float64 { return r.ClockMHz }},
}
var b strings.Builder
for _, gpuIdx := range order {
gr := gpuMap[gpuIdx]
if len(gr) == 0 {
continue
}
tMax := gr[len(gr)-1].ElapsedSec - gr[0].ElapsedSec
fmt.Fprintf(&b, "GPU %d — Stress Test Metrics (%.0f seconds)\n\n", gpuIdx, tMax)
for _, d := range defs {
b.WriteString(renderLineChart(extractGPUField(gr, d.fn), d.color, d.caption,
termChartHeight, termChartWidth))
b.WriteRune('\n')
}
}
return strings.TrimRight(b.String(), "\n")
}
// renderLineChart draws a single time-series line chart using box-drawing characters.
// Produces output in the style of asciigraph: ╭─╮ │ ╰─╯ with a Y axis and caption.
func renderLineChart(vals []float64, color, caption string, height, width int) string {
if len(vals) == 0 {
return caption + "\n"
}
mn, mx := gpuMinMax(vals)
if mn == mx {
mx = mn + 1
}
// Use the smaller of width or len(vals) to avoid stretching sparse data.
w := width
if len(vals) < w {
w = len(vals)
}
data := gpuDownsample(vals, w)
// row[i] = display row index: 0 = top = max value, height = bottom = min value.
row := make([]int, w)
for i, v := range data {
r := int(math.Round((mx - v) / (mx - mn) * float64(height)))
if r < 0 {
r = 0
}
if r > height {
r = height
}
row[i] = r
}
// Fill the character grid.
grid := make([][]rune, height+1)
for i := range grid {
grid[i] = make([]rune, w)
for j := range grid[i] {
grid[i][j] = ' '
}
}
for x := 0; x < w; x++ {
r := row[x]
if x == 0 {
grid[r][0] = '─'
continue
}
p := row[x-1]
switch {
case r == p:
grid[r][x] = '─'
case r < p: // value went up (row index decreased toward top)
grid[r][x] = '╭'
grid[p][x] = '╯'
for y := r + 1; y < p; y++ {
grid[y][x] = '│'
}
default: // r > p, value went down
grid[p][x] = '╮'
grid[r][x] = '╰'
for y := p + 1; y < r; y++ {
grid[y][x] = '│'
}
}
}
// Y axis tick labels.
ticks := gpuNiceTicks(mn, mx, height/2)
tickAtRow := make(map[int]string)
labelWidth := 4
for _, t := range ticks {
r := int(math.Round((mx - t) / (mx - mn) * float64(height)))
if r < 0 || r > height {
continue
}
s := gpuFormatTick(t)
tickAtRow[r] = s
if len(s) > labelWidth {
labelWidth = len(s)
}
}
var b strings.Builder
for r := 0; r <= height; r++ {
label := tickAtRow[r]
fmt.Fprintf(&b, "%*s", labelWidth, label)
switch {
case label != "":
b.WriteRune('┤')
case r == height:
b.WriteRune('┼')
default:
b.WriteRune('│')
}
b.WriteString(color)
b.WriteString(string(grid[r]))
b.WriteString(ansiReset)
b.WriteRune('\n')
}
// Bottom axis.
b.WriteString(strings.Repeat(" ", labelWidth))
b.WriteRune('└')
b.WriteString(strings.Repeat("─", w))
b.WriteRune('\n')
// Caption centered under the chart.
if caption != "" {
total := labelWidth + 1 + w
if pad := (total - len(caption)) / 2; pad > 0 {
b.WriteString(strings.Repeat(" ", pad))
}
b.WriteString(caption)
b.WriteRune('\n')
}
return b.String()
}
func extractGPUField(rows []GPUMetricRow, fn func(GPUMetricRow) float64) []float64 {
v := make([]float64, len(rows))
for i, r := range rows {
v[i] = fn(r)
}
return v
}
// gpuDownsample averages vals into w buckets (or nearest-neighbor upsamples if len(vals) < w).
func gpuDownsample(vals []float64, w int) []float64 {
n := len(vals)
if n == 0 {
return make([]float64, w)
}
result := make([]float64, w)
if n >= w {
counts := make([]int, w)
for i, v := range vals {
bucket := i * w / n
if bucket >= w {
bucket = w - 1
}
result[bucket] += v
counts[bucket]++
}
for i := range result {
if counts[i] > 0 {
result[i] /= float64(counts[i])
}
}
} else {
// Nearest-neighbour upsample.
for i := range result {
src := i * (n - 1) / (w - 1)
if src >= n {
src = n - 1
}
result[i] = vals[src]
}
}
return result
}
func gpuMinMax(vals []float64) (float64, float64) {
if len(vals) == 0 {
return 0, 1
}
mn, mx := vals[0], vals[0]
for _, v := range vals[1:] {
if v < mn {
mn = v
}
if v > mx {
mx = v
}
}
return mn, mx
}
func gpuNiceTicks(mn, mx float64, targetCount int) []float64 {
if mn == mx {
mn -= 1
mx += 1
}
r := mx - mn
step := math.Pow(10, math.Floor(math.Log10(r/float64(targetCount))))
for _, f := range []float64{1, 2, 5, 10} {
if r/(f*step) <= float64(targetCount)*1.5 {
step = f * step
break
}
}
lo := math.Floor(mn/step) * step
hi := math.Ceil(mx/step) * step
var ticks []float64
for v := lo; v <= hi+step*0.001; v += step {
ticks = append(ticks, math.Round(v*1e9)/1e9)
}
return ticks
}
func gpuFormatTick(v float64) string {
if v == math.Trunc(v) {
return strconv.Itoa(int(v))
}
return strconv.FormatFloat(v, 'f', 1, 64)
}

View File

@@ -3,6 +3,7 @@ package platform
import ( import (
"archive/tar" "archive/tar"
"compress/gzip" "compress/gzip"
"context"
"fmt" "fmt"
"io" "io"
"os" "os"
@@ -14,10 +15,55 @@ import (
"time" "time"
) )
// NvidiaGPU holds basic GPU info from nvidia-smi.
type NvidiaGPU struct {
Index int
Name string
MemoryMB int
}
// ListNvidiaGPUs returns GPUs visible to nvidia-smi.
func (s *System) ListNvidiaGPUs() ([]NvidiaGPU, error) {
out, err := exec.Command("nvidia-smi",
"--query-gpu=index,name,memory.total",
"--format=csv,noheader,nounits").Output()
if err != nil {
return nil, fmt.Errorf("nvidia-smi: %w", err)
}
var gpus []NvidiaGPU
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
parts := strings.SplitN(line, ", ", 3)
if len(parts) != 3 {
continue
}
idx, err := strconv.Atoi(strings.TrimSpace(parts[0]))
if err != nil {
continue
}
memMB, _ := strconv.Atoi(strings.TrimSpace(parts[2]))
gpus = append(gpus, NvidiaGPU{
Index: idx,
Name: strings.TrimSpace(parts[1]),
MemoryMB: memMB,
})
}
return gpus, nil
}
func (s *System) RunNvidiaAcceptancePack(baseDir string) (string, error) { func (s *System) RunNvidiaAcceptancePack(baseDir string) (string, error) {
return runAcceptancePack(baseDir, "gpu-nvidia", nvidiaSATJobs()) return runAcceptancePack(baseDir, "gpu-nvidia", nvidiaSATJobs())
} }
// RunNvidiaAcceptancePackWithOptions runs the NVIDIA SAT with explicit duration,
// GPU memory size, and GPU index selection. ctx cancellation kills the running job.
func (s *System) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, durationSec int, sizeMB int, gpuIndices []int) (string, error) {
return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaSATJobsWithOptions(durationSec, sizeMB, gpuIndices))
}
func (s *System) RunMemoryAcceptancePack(baseDir string) (string, error) { func (s *System) RunMemoryAcceptancePack(baseDir string) (string, error) {
sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128) sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128)
passes := envInt("BEE_MEMTESTER_PASSES", 1) passes := envInt("BEE_MEMTESTER_PASSES", 1)
@@ -84,8 +130,11 @@ func (s *System) RunStorageAcceptancePack(baseDir string) (string, error) {
} }
type satJob struct { type satJob struct {
name string name string
cmd []string cmd []string
env []string // extra env vars (appended to os.Environ)
collectGPU bool // collect GPU metrics via nvidia-smi while this job runs
gpuIndices []int // GPU indices to collect metrics for (empty = all)
} }
type satStats struct { type satStats struct {
@@ -147,6 +196,109 @@ func runAcceptancePack(baseDir, prefix string, jobs []satJob) (string, error) {
return archive, nil return archive, nil
} }
func nvidiaSATJobsWithOptions(durationSec, sizeMB int, gpuIndices []int) []satJob {
var env []string
if len(gpuIndices) > 0 {
ids := make([]string, len(gpuIndices))
for i, idx := range gpuIndices {
ids[i] = strconv.Itoa(idx)
}
env = []string{"CUDA_VISIBLE_DEVICES=" + strings.Join(ids, ",")}
}
return []satJob{
{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output-file", "{{run_dir}}/nvidia-bug-report.log"}},
{
name: "05-bee-gpu-stress.log",
cmd: []string{"bee-gpu-stress", "--seconds", strconv.Itoa(durationSec), "--size-mb", strconv.Itoa(sizeMB)},
env: env,
collectGPU: true,
gpuIndices: gpuIndices,
},
}
}
func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []satJob) (string, error) {
if baseDir == "" {
baseDir = "/var/log/bee-sat"
}
ts := time.Now().UTC().Format("20060102-150405")
runDir := filepath.Join(baseDir, prefix+"-"+ts)
if err := os.MkdirAll(runDir, 0755); err != nil {
return "", err
}
verboseLog := filepath.Join(runDir, "verbose.log")
var summary strings.Builder
stats := satStats{}
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
for _, job := range jobs {
if ctx.Err() != nil {
break
}
cmd := make([]string, 0, len(job.cmd))
for _, arg := range job.cmd {
cmd = append(cmd, strings.ReplaceAll(arg, "{{run_dir}}", runDir))
}
var out []byte
var err error
if job.collectGPU {
out, err = runSATCommandWithMetrics(ctx, verboseLog, job.name, cmd, job.env, job.gpuIndices, runDir)
} else {
out, err = runSATCommandCtx(ctx, verboseLog, job.name, cmd, job.env)
}
if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
return "", writeErr
}
status, rc := classifySATResult(job.name, out, err)
stats.Add(status)
key := strings.TrimSuffix(strings.TrimPrefix(job.name, "0"), ".log")
fmt.Fprintf(&summary, "%s_rc=%d\n", key, rc)
fmt.Fprintf(&summary, "%s_status=%s\n", key, status)
}
writeSATStats(&summary, stats)
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
return "", err
}
archive := filepath.Join(baseDir, prefix+"-"+ts+".tar.gz")
if err := createTarGz(archive, runDir); err != nil {
return "", err
}
return archive, nil
}
func runSATCommandCtx(ctx context.Context, verboseLog, name string, cmd []string, env []string) ([]byte, error) {
start := time.Now().UTC()
appendSATVerboseLog(verboseLog,
fmt.Sprintf("[%s] start %s", start.Format(time.RFC3339), name),
"cmd: "+strings.Join(cmd, " "),
)
c := exec.CommandContext(ctx, cmd[0], cmd[1:]...)
if len(env) > 0 {
c.Env = append(os.Environ(), env...)
}
out, err := c.CombinedOutput()
rc := 0
if err != nil {
rc = 1
}
appendSATVerboseLog(verboseLog,
fmt.Sprintf("[%s] finish %s", time.Now().UTC().Format(time.RFC3339), name),
fmt.Sprintf("rc: %d", rc),
fmt.Sprintf("duration_ms: %d", time.Since(start).Milliseconds()),
"",
)
return out, err
}
func listStorageDevices() ([]string, error) { func listStorageDevices() ([]string, error) {
out, err := exec.Command("lsblk", "-dn", "-o", "NAME,TYPE").Output() out, err := exec.Command("lsblk", "-dn", "-o", "NAME,TYPE").Output()
if err != nil { if err != nil {
@@ -251,6 +403,51 @@ func runSATCommand(verboseLog, name string, cmd []string) ([]byte, error) {
return out, err return out, err
} }
// runSATCommandWithMetrics runs a command while collecting GPU metrics in the background.
// On completion it writes gpu-metrics.csv and gpu-metrics.html into runDir.
func runSATCommandWithMetrics(ctx context.Context, verboseLog, name string, cmd []string, env []string, gpuIndices []int, runDir string) ([]byte, error) {
stopCh := make(chan struct{})
doneCh := make(chan struct{})
var metricRows []GPUMetricRow
start := time.Now()
go func() {
defer close(doneCh)
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for {
select {
case <-stopCh:
return
case <-ticker.C:
samples, err := sampleGPUMetrics(gpuIndices)
if err != nil {
continue
}
elapsed := time.Since(start).Seconds()
for i := range samples {
samples[i].ElapsedSec = elapsed
}
metricRows = append(metricRows, samples...)
}
}
}()
out, err := runSATCommandCtx(ctx, verboseLog, name, cmd, env)
close(stopCh)
<-doneCh
if len(metricRows) > 0 {
_ = WriteGPUMetricsCSV(filepath.Join(runDir, "gpu-metrics.csv"), metricRows)
_ = WriteGPUMetricsHTML(filepath.Join(runDir, "gpu-metrics.html"), metricRows)
chart := RenderGPUTerminalChart(metricRows)
_ = os.WriteFile(filepath.Join(runDir, "gpu-metrics-term.txt"), []byte(chart), 0644)
}
return out, err
}
func appendSATVerboseLog(path string, lines ...string) { func appendSATVerboseLog(path string, lines ...string) {
if path == "" { if path == "" {
return return

View File

@@ -27,3 +27,16 @@ type exportTargetsMsg struct {
type bannerMsg struct { type bannerMsg struct {
text string text string
} }
type nvidiaGPUsMsg struct {
gpus []platform.NvidiaGPU
err error
}
type nvtopClosedMsg struct{}
type nvidiaSATDoneMsg struct {
title string
body string
err error
}

View File

@@ -10,7 +10,7 @@ func (m model) handleAcceptanceMenu() (tea.Model, tea.Cmd) {
} }
switch m.cursor { switch m.cursor {
case 0: case 0:
m.pendingAction = actionRunNvidiaSAT return m.enterNvidiaSATSetup()
case 1: case 1:
m.pendingAction = actionRunMemorySAT m.pendingAction = actionRunMemorySAT
case 2: case 2:

View File

@@ -0,0 +1,238 @@
package tui
import (
"context"
"fmt"
"os/exec"
"strings"
"bee/audit/internal/platform"
tea "github.com/charmbracelet/bubbletea"
)
var nvidiaDurationOptions = []struct {
label string
seconds int
}{
{"10 minutes", 600},
{"1 hour", 3600},
{"8 hours", 28800},
{"24 hours", 86400},
}
// enterNvidiaSATSetup resets the setup screen and starts loading GPU list.
func (m model) enterNvidiaSATSetup() (tea.Model, tea.Cmd) {
m.screen = screenNvidiaSATSetup
m.nvidiaGPUs = nil
m.nvidiaGPUSel = nil
m.nvidiaDurIdx = 0
m.nvidiaSATCursor = 0
m.busy = true
m.busyTitle = "NVIDIA SAT"
return m, func() tea.Msg {
gpus, err := m.app.ListNvidiaGPUs()
return nvidiaGPUsMsg{gpus: gpus, err: err}
}
}
// handleNvidiaGPUsMsg processes the GPU list response.
func (m model) handleNvidiaGPUsMsg(msg nvidiaGPUsMsg) (tea.Model, tea.Cmd) {
m.busy = false
m.busyTitle = ""
if msg.err != nil {
m.title = "NVIDIA SAT"
m.body = fmt.Sprintf("Failed to list GPUs: %v", msg.err)
m.prevScreen = screenAcceptance
m.screen = screenOutput
return m, nil
}
m.nvidiaGPUs = msg.gpus
m.nvidiaGPUSel = make([]bool, len(msg.gpus))
for i := range m.nvidiaGPUSel {
m.nvidiaGPUSel[i] = true // all selected by default
}
m.nvidiaSATCursor = 0
return m, nil
}
// updateNvidiaSATSetup handles keys on the setup screen.
func (m model) updateNvidiaSATSetup(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
numDur := len(nvidiaDurationOptions)
numGPU := len(m.nvidiaGPUs)
totalItems := numDur + numGPU + 2 // +2: Start, Cancel
switch msg.String() {
case "up", "k":
if m.nvidiaSATCursor > 0 {
m.nvidiaSATCursor--
}
case "down", "j":
if m.nvidiaSATCursor < totalItems-1 {
m.nvidiaSATCursor++
}
case " ":
switch {
case m.nvidiaSATCursor < numDur:
m.nvidiaDurIdx = m.nvidiaSATCursor
case m.nvidiaSATCursor < numDur+numGPU:
i := m.nvidiaSATCursor - numDur
m.nvidiaGPUSel[i] = !m.nvidiaGPUSel[i]
}
case "enter":
startIdx := numDur + numGPU
cancelIdx := startIdx + 1
switch {
case m.nvidiaSATCursor < numDur:
m.nvidiaDurIdx = m.nvidiaSATCursor
case m.nvidiaSATCursor < startIdx:
i := m.nvidiaSATCursor - numDur
m.nvidiaGPUSel[i] = !m.nvidiaGPUSel[i]
case m.nvidiaSATCursor == startIdx:
return m.startNvidiaSAT()
case m.nvidiaSATCursor == cancelIdx:
m.screen = screenAcceptance
m.cursor = 0
}
case "esc":
m.screen = screenAcceptance
m.cursor = 0
case "ctrl+c", "q":
return m, tea.Quit
}
return m, nil
}
// startNvidiaSAT launches the SAT and nvtop.
func (m model) startNvidiaSAT() (tea.Model, tea.Cmd) {
var selectedGPUs []platform.NvidiaGPU
for i, sel := range m.nvidiaGPUSel {
if sel {
selectedGPUs = append(selectedGPUs, m.nvidiaGPUs[i])
}
}
if len(selectedGPUs) == 0 {
selectedGPUs = m.nvidiaGPUs // fallback: use all if none explicitly selected
}
sizeMB := 0
for _, g := range selectedGPUs {
if sizeMB == 0 || g.MemoryMB < sizeMB {
sizeMB = g.MemoryMB
}
}
if sizeMB == 0 {
sizeMB = 64
}
var gpuIndices []int
for _, g := range selectedGPUs {
gpuIndices = append(gpuIndices, g.Index)
}
durationSec := nvidiaDurationOptions[m.nvidiaDurIdx].seconds
ctx, cancel := context.WithCancel(context.Background())
m.nvidiaSATCancel = cancel
m.nvidiaSATAborted = false
m.screen = screenNvidiaSATRunning
m.nvidiaSATCursor = 0
satCmd := func() tea.Msg {
result, err := m.app.RunNvidiaAcceptancePackWithOptions(ctx, "", durationSec, sizeMB, gpuIndices)
return nvidiaSATDoneMsg{title: result.Title, body: result.Body, err: err}
}
nvtopPath, lookErr := exec.LookPath("nvtop")
if lookErr != nil {
// nvtop not available: just run the SAT, show running screen
return m, satCmd
}
return m, tea.Batch(
satCmd,
tea.ExecProcess(exec.Command(nvtopPath), func(_ error) tea.Msg {
return nvtopClosedMsg{}
}),
)
}
// updateNvidiaSATRunning handles keys on the running screen.
func (m model) updateNvidiaSATRunning(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
switch msg.String() {
case "o", "O":
nvtopPath, err := exec.LookPath("nvtop")
if err != nil {
return m, nil
}
return m, tea.ExecProcess(exec.Command(nvtopPath), func(_ error) tea.Msg {
return nvtopClosedMsg{}
})
case "a", "A":
if m.nvidiaSATCancel != nil {
m.nvidiaSATCancel()
m.nvidiaSATCancel = nil
}
m.nvidiaSATAborted = true
m.screen = screenAcceptance
m.cursor = 0
case "ctrl+c":
return m, tea.Quit
}
return m, nil
}
// renderNvidiaSATSetup renders the setup screen.
func renderNvidiaSATSetup(m model) string {
var b strings.Builder
fmt.Fprintln(&b, "NVIDIA SAT")
fmt.Fprintln(&b)
fmt.Fprintln(&b, "Duration:")
for i, opt := range nvidiaDurationOptions {
radio := "( )"
if i == m.nvidiaDurIdx {
radio = "(*)"
}
prefix := " "
if m.nvidiaSATCursor == i {
prefix = "> "
}
fmt.Fprintf(&b, "%s%s %s\n", prefix, radio, opt.label)
}
fmt.Fprintln(&b)
if len(m.nvidiaGPUs) == 0 {
fmt.Fprintln(&b, "GPUs: (none detected)")
} else {
fmt.Fprintln(&b, "GPUs:")
for i, gpu := range m.nvidiaGPUs {
check := "[ ]"
if m.nvidiaGPUSel[i] {
check = "[x]"
}
prefix := " "
if m.nvidiaSATCursor == len(nvidiaDurationOptions)+i {
prefix = "> "
}
fmt.Fprintf(&b, "%s%s %d: %s (%d MB)\n", prefix, check, gpu.Index, gpu.Name, gpu.MemoryMB)
}
}
fmt.Fprintln(&b)
startIdx := len(nvidiaDurationOptions) + len(m.nvidiaGPUs)
startPfx := " "
cancelPfx := " "
if m.nvidiaSATCursor == startIdx {
startPfx = "> "
}
if m.nvidiaSATCursor == startIdx+1 {
cancelPfx = "> "
}
fmt.Fprintf(&b, "%sStart\n", startPfx)
fmt.Fprintf(&b, "%sCancel\n", cancelPfx)
fmt.Fprintln(&b)
b.WriteString("[↑/↓] move [space] toggle [enter] select [esc] cancel\n")
return b.String()
}
// renderNvidiaSATRunning renders the running screen.
func renderNvidiaSATRunning() string {
return "NVIDIA SAT\n\nTest is running...\n\n[o] Open nvtop [a] Abort test [ctrl+c] quit\n"
}

View File

@@ -255,7 +255,7 @@ func TestOutputScreenReturnsToPreviousScreen(t *testing.T) {
} }
} }
func TestAcceptanceConfirmFlow(t *testing.T) { func TestAcceptanceNvidiaSATOpensSetup(t *testing.T) {
t.Parallel() t.Parallel()
m := newTestModel() m := newTestModel()
@@ -265,17 +265,15 @@ func TestAcceptanceConfirmFlow(t *testing.T) {
next, cmd := m.handleAcceptanceMenu() next, cmd := m.handleAcceptanceMenu()
got := next.(model) got := next.(model)
if cmd != nil { if cmd == nil {
t.Fatal("expected nil cmd") t.Fatal("expected non-nil cmd (GPU list loader)")
} }
if got.screen != screenConfirm { if got.screen != screenNvidiaSATSetup {
t.Fatalf("screen=%q want %q", got.screen, screenConfirm) t.Fatalf("screen=%q want %q", got.screen, screenNvidiaSATSetup)
}
if got.pendingAction != actionRunNvidiaSAT {
t.Fatalf("pendingAction=%q want %q", got.pendingAction, actionRunNvidiaSAT)
} }
next, _ = got.updateConfirm(tea.KeyMsg{Type: tea.KeyEsc}) // esc from setup returns to acceptance
next, _ = got.updateNvidiaSATSetup(tea.KeyMsg{Type: tea.KeyEsc})
got = next.(model) got = next.(model)
if got.screen != screenAcceptance { if got.screen != screenAcceptance {
t.Fatalf("screen after esc=%q want %q", got.screen, screenAcceptance) t.Fatalf("screen after esc=%q want %q", got.screen, screenAcceptance)
@@ -289,7 +287,6 @@ func TestAcceptanceMenuMapsNewTargets(t *testing.T) {
cursor int cursor int
want actionKind want actionKind
}{ }{
{cursor: 0, want: actionRunNvidiaSAT},
{cursor: 1, want: actionRunMemorySAT}, {cursor: 1, want: actionRunMemorySAT},
{cursor: 2, want: actionRunStorageSAT}, {cursor: 2, want: actionRunStorageSAT},
} }

View File

@@ -1,6 +1,8 @@
package tui package tui
import ( import (
"context"
"bee/audit/internal/app" "bee/audit/internal/app"
"bee/audit/internal/platform" "bee/audit/internal/platform"
"bee/audit/internal/runtimeenv" "bee/audit/internal/runtimeenv"
@@ -12,16 +14,18 @@ import (
type screen string type screen string
const ( const (
screenMain screen = "main" screenMain screen = "main"
screenNetwork screen = "network" screenNetwork screen = "network"
screenInterfacePick screen = "interface_pick" screenInterfacePick screen = "interface_pick"
screenServices screen = "services" screenServices screen = "services"
screenServiceAction screen = "service_action" screenServiceAction screen = "service_action"
screenAcceptance screen = "acceptance" screenAcceptance screen = "acceptance"
screenExportTargets screen = "export_targets" screenExportTargets screen = "export_targets"
screenOutput screen = "output" screenOutput screen = "output"
screenStaticForm screen = "static_form" screenStaticForm screen = "static_form"
screenConfirm screen = "confirm" screenConfirm screen = "confirm"
screenNvidiaSATSetup screen = "nvidia_sat_setup"
screenNvidiaSATRunning screen = "nvidia_sat_running"
) )
type actionKind string type actionKind string
@@ -63,6 +67,16 @@ type model struct {
formFields []formField formFields []formField
formIndex int formIndex int
// NVIDIA SAT setup
nvidiaGPUs []platform.NvidiaGPU
nvidiaGPUSel []bool
nvidiaDurIdx int // index into nvidiaDurationOptions
nvidiaSATCursor int
// NVIDIA SAT running
nvidiaSATCancel context.CancelFunc
nvidiaSATAborted bool
} }
type formField struct { type formField struct {

View File

@@ -87,6 +87,33 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
case bannerMsg: case bannerMsg:
m.banner = strings.TrimSpace(msg.text) m.banner = strings.TrimSpace(msg.text)
return m, nil return m, nil
case nvidiaGPUsMsg:
return m.handleNvidiaGPUsMsg(msg)
case nvtopClosedMsg:
// nvtop closed — stay on running screen (or result if SAT is already done)
return m, nil
case nvidiaSATDoneMsg:
if m.nvidiaSATAborted {
return m, nil
}
if m.nvidiaSATCancel != nil {
m.nvidiaSATCancel()
m.nvidiaSATCancel = nil
}
m.prevScreen = screenAcceptance
m.screen = screenOutput
m.title = msg.title
if msg.err != nil {
body := strings.TrimSpace(msg.body)
if body == "" {
m.body = fmt.Sprintf("ERROR: %v", msg.err)
} else {
m.body = fmt.Sprintf("%s\n\nERROR: %v", body, msg.err)
}
} else {
m.body = msg.body
}
return m, nil
} }
return m, nil return m, nil
@@ -104,6 +131,10 @@ func (m model) updateKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
return m.updateMenu(msg, len(m.serviceMenu), m.handleServiceActionMenu) return m.updateMenu(msg, len(m.serviceMenu), m.handleServiceActionMenu)
case screenAcceptance: case screenAcceptance:
return m.updateMenu(msg, 4, m.handleAcceptanceMenu) return m.updateMenu(msg, 4, m.handleAcceptanceMenu)
case screenNvidiaSATSetup:
return m.updateNvidiaSATSetup(msg)
case screenNvidiaSATRunning:
return m.updateNvidiaSATRunning(msg)
case screenExportTargets: case screenExportTargets:
return m.updateMenu(msg, len(m.targets), m.handleExportTargetsMenu) return m.updateMenu(msg, len(m.targets), m.handleExportTargetsMenu)
case screenInterfacePick: case screenInterfacePick:

View File

@@ -43,6 +43,10 @@ func (m model) View() string {
case screenConfirm: case screenConfirm:
title, body := m.confirmBody() title, body := m.confirmBody()
return renderConfirm(title, body, m.cursor) return renderConfirm(title, body, m.cursor)
case screenNvidiaSATSetup:
return renderNvidiaSATSetup(m)
case screenNvidiaSATRunning:
return renderNvidiaSATRunning()
case screenOutput: case screenOutput:
return fmt.Sprintf("%s\n\n%s\n\n[enter/esc] back [ctrl+c] quit\n", m.title, strings.TrimSpace(m.body)) return fmt.Sprintf("%s\n\n%s\n\n[enter/esc] back [ctrl+c] quit\n", m.title, strings.TrimSpace(m.body))
default: default:

View File

@@ -140,3 +140,34 @@ Acceptance flows:
- `BEE_GPU_STRESS_SIZE_MB` - `BEE_GPU_STRESS_SIZE_MB`
- `BEE_MEMTESTER_SIZE_MB` - `BEE_MEMTESTER_SIZE_MB`
- `BEE_MEMTESTER_PASSES` - `BEE_MEMTESTER_PASSES`
## NVIDIA SAT TUI flow (v1.0.0+)
```
TUI: Acceptance tests → NVIDIA command pack
1. screenNvidiaSATSetup
a. enumerate GPUs via `nvidia-smi --query-gpu=index,name,memory.total`
b. user selects duration preset: 10 min / 1 h / 8 h / 24 h
c. user selects GPUs via checkboxes (all selected by default)
d. memory size = max(selected GPU memory) — auto-detected, not exposed to user
2. Start → screenNvidiaSATRunning
a. CUDA_VISIBLE_DEVICES set to selected GPU indices
b. tea.Batch: SAT goroutine + tea.ExecProcess(nvtop) launched concurrently
c. nvtop occupies full terminal; SAT result queues in background
d. [o] reopen nvtop at any time; [a] abort (cancels context → kills bee-gpu-stress)
3. GPU metrics collection (during bee-gpu-stress)
- background goroutine polls `nvidia-smi` every second
- per-second rows: elapsed, GPU index, temp°C, usage%, power W, clock MHz
- outputs: gpu-metrics.csv, gpu-metrics.html (offline SVG chart), gpu-metrics-term.txt
4. After SAT completes
- result shown in screenOutput with terminal line-chart (gpu-metrics-term.txt)
- chart is asciigraph-style: box-drawing chars (╭╮╰╯─│), 4 series per GPU,
Y axis with ticks, ANSI colours (red=temp, blue=usage, green=power, yellow=clock)
```
**Critical invariants:**
- `nvtop` must be in `iso/builder/config/package-lists/bee.list.chroot` (baked into ISO).
- `bee-gpu-stress` uses `exec.CommandContext` — aborted on cancel.
- Metric goroutine uses stopCh/doneCh pattern; main goroutine waits `<-doneCh` before reading rows (no mutex needed).
- If `nvtop` is not found on PATH, SAT still runs without it (graceful degradation).
- SVG chart is fully offline: no JS, no external CSS, pure inline SVG.

View File

@@ -2,4 +2,4 @@ DEBIAN_VERSION=12
DEBIAN_KERNEL_ABI=6.1.0-43 DEBIAN_KERNEL_ABI=6.1.0-43
NVIDIA_DRIVER_VERSION=590.48.01 NVIDIA_DRIVER_VERSION=590.48.01
GO_VERSION=1.24.0 GO_VERSION=1.24.0
AUDIT_VERSION=0.1.1 AUDIT_VERSION=1.0.0

View File

@@ -27,6 +27,7 @@ less
vim-tiny vim-tiny
mc mc
htop htop
nvtop
sudo sudo
zstd zstd
mstflint mstflint