feat(tui): live GPU chart during stress test, full VRAM allocation
- GPU Platform Stress Test now shows a live in-TUI chart instead of nvtop. nvidia-smi is polled every second; up to 60 data points per GPU kept. All three metrics (Usage %, Temp °C, Power W) drawn on a single plot, each normalised to its own range and rendered in a different colour. - Memory allocation changed from MemoryMB/16 to MemoryMB-512 (full VRAM minus 512 MB driver overhead) so bee-gpu-stress actually stresses memory. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,8 +3,10 @@ package tui
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"bee/audit/internal/platform"
|
||||
|
||||
tea "github.com/charmbracelet/bubbletea"
|
||||
)
|
||||
@@ -156,14 +158,16 @@ func (m model) hcRunFanStress() (tea.Model, tea.Cmd) {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// startGPUStressTest launches the GPU Platform Stress Test and nvtop concurrently.
|
||||
// nvtop occupies the full terminal as a live chart; the stress test runs in background.
|
||||
// startGPUStressTest launches the GPU Platform Stress Test with a live in-TUI chart.
|
||||
func (m model) startGPUStressTest() (tea.Model, tea.Cmd) {
|
||||
opts := hcFanStressOpts(m.hcMode, m.app)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
m.gpuStressCancel = cancel
|
||||
m.gpuStressAborted = false
|
||||
m.gpuLiveRows = nil
|
||||
m.gpuLiveIndices = opts.GPUIndices
|
||||
m.gpuLiveStart = time.Now()
|
||||
m.screen = screenGPUStressRunning
|
||||
m.nvidiaSATCursor = 0
|
||||
|
||||
@@ -172,30 +176,21 @@ func (m model) startGPUStressTest() (tea.Model, tea.Cmd) {
|
||||
return gpuStressDoneMsg{title: result.Title, body: result.Body, err: err}
|
||||
}
|
||||
|
||||
nvtopPath, lookErr := exec.LookPath("nvtop")
|
||||
if lookErr != nil {
|
||||
return m, stressCmd
|
||||
}
|
||||
return m, tea.Batch(stressCmd, pollGPULive(opts.GPUIndices))
|
||||
}
|
||||
|
||||
return m, tea.Batch(
|
||||
stressCmd,
|
||||
tea.ExecProcess(exec.Command(nvtopPath), func(_ error) tea.Msg {
|
||||
return nvtopClosedMsg{}
|
||||
}),
|
||||
)
|
||||
// pollGPULive samples nvidia-smi once after one second and returns a gpuLiveTickMsg.
|
||||
// The update handler reschedules it to achieve continuous 1s polling.
|
||||
func pollGPULive(indices []int) tea.Cmd {
|
||||
return tea.Tick(time.Second, func(_ time.Time) tea.Msg {
|
||||
rows, _ := platform.SampleGPUMetrics(indices)
|
||||
return gpuLiveTickMsg{rows: rows, indices: indices}
|
||||
})
|
||||
}
|
||||
|
||||
// updateGPUStressRunning handles keys on the GPU stress running screen.
|
||||
func (m model) updateGPUStressRunning(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
||||
switch msg.String() {
|
||||
case "o", "O":
|
||||
nvtopPath, err := exec.LookPath("nvtop")
|
||||
if err != nil {
|
||||
return m, nil
|
||||
}
|
||||
return m, tea.ExecProcess(exec.Command(nvtopPath), func(_ error) tea.Msg {
|
||||
return nvtopClosedMsg{}
|
||||
})
|
||||
case "a", "A":
|
||||
if m.gpuStressCancel != nil {
|
||||
m.gpuStressCancel()
|
||||
@@ -210,8 +205,22 @@ func (m model) updateGPUStressRunning(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func renderGPUStressRunning() string {
|
||||
return "GPU PLATFORM STRESS TEST\n\nTest is running...\n\n[o] Open nvtop [a] Abort test [ctrl+c] quit\n"
|
||||
func renderGPUStressRunning(m model) string {
|
||||
var b strings.Builder
|
||||
fmt.Fprintln(&b, "GPU PLATFORM STRESS TEST")
|
||||
fmt.Fprintln(&b)
|
||||
if len(m.gpuLiveRows) == 0 {
|
||||
fmt.Fprintln(&b, "Collecting metrics...")
|
||||
} else {
|
||||
chartWidth := m.width - 8
|
||||
if chartWidth < 40 {
|
||||
chartWidth = 70
|
||||
}
|
||||
b.WriteString(platform.RenderGPULiveChart(m.gpuLiveRows, chartWidth))
|
||||
}
|
||||
fmt.Fprintln(&b)
|
||||
b.WriteString("[a] Abort test [ctrl+c] quit")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (m model) hcRunAll() (tea.Model, tea.Cmd) {
|
||||
|
||||
Reference in New Issue
Block a user