- Add 9002-nvidia-dcgm.hook.chroot: installs datacenter-gpu-manager from NVIDIA apt repo during live-build - Enable nvidia-dcgm.service in chroot setup hook - Replace bee-gpu-stress with dcgmi diag (levels 1-4) in NVIDIA SAT - TUI: replace GPU checkbox + duration UI with DCGM level selection - Remove console=tty2 from boot params: KVM/VGA now shows tty1 where bee-tui runs, fixing unresponsive console Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
138 lines
3.5 KiB
Go
138 lines
3.5 KiB
Go
package tui
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
|
|
tea "github.com/charmbracelet/bubbletea"
|
|
)
|
|
|
|
var nvidiaDCGMOptions = []struct {
|
|
label string
|
|
level int
|
|
note string
|
|
}{
|
|
{"Level 1 — Quick", 1, "~1 min, configuration check"},
|
|
{"Level 2 — Medium", 2, "~2 min, memory test"},
|
|
{"Level 3 — Targeted stress", 3, "~10 min, SM + memory + PCIe [recommended]"},
|
|
{"Level 4 — Extended stress", 4, "~30 min, extended burn-in"},
|
|
}
|
|
|
|
// enterNvidiaSATSetup resets and shows the DCGM level selection screen.
|
|
func (m model) enterNvidiaSATSetup() (tea.Model, tea.Cmd) {
|
|
m.screen = screenNvidiaSATSetup
|
|
m.nvidiaDurIdx = 2 // default: Level 3
|
|
m.nvidiaSATCursor = 2
|
|
m.busy = false
|
|
return m, nil
|
|
}
|
|
|
|
// updateNvidiaSATSetup handles keys on the DCGM setup screen.
|
|
func (m model) updateNvidiaSATSetup(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
|
numOpts := len(nvidiaDCGMOptions)
|
|
totalItems := numOpts + 2 // +2: Start, Cancel
|
|
switch msg.String() {
|
|
case "up", "k":
|
|
if m.nvidiaSATCursor > 0 {
|
|
m.nvidiaSATCursor--
|
|
}
|
|
case "down", "j":
|
|
if m.nvidiaSATCursor < totalItems-1 {
|
|
m.nvidiaSATCursor++
|
|
}
|
|
case " ", "enter":
|
|
startIdx := numOpts
|
|
cancelIdx := startIdx + 1
|
|
switch {
|
|
case m.nvidiaSATCursor < numOpts:
|
|
m.nvidiaDurIdx = m.nvidiaSATCursor
|
|
case m.nvidiaSATCursor == startIdx:
|
|
return m.startNvidiaSAT()
|
|
case m.nvidiaSATCursor == cancelIdx:
|
|
m.screen = screenHealthCheck
|
|
m.cursor = 0
|
|
}
|
|
case "esc":
|
|
m.screen = screenHealthCheck
|
|
m.cursor = 0
|
|
case "ctrl+c", "q":
|
|
return m, tea.Quit
|
|
}
|
|
return m, nil
|
|
}
|
|
|
|
// startNvidiaSAT launches the DCGM diagnostic.
|
|
func (m model) startNvidiaSAT() (tea.Model, tea.Cmd) {
|
|
diagLevel := nvidiaDCGMOptions[m.nvidiaDurIdx].level
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
m.nvidiaSATCancel = cancel
|
|
m.nvidiaSATAborted = false
|
|
m.screen = screenNvidiaSATRunning
|
|
m.nvidiaSATCursor = 0
|
|
|
|
satCmd := func() tea.Msg {
|
|
result, err := m.app.RunNvidiaAcceptancePackWithOptions(ctx, "", diagLevel, nil)
|
|
return nvidiaSATDoneMsg{title: result.Title, body: result.Body, err: err}
|
|
}
|
|
|
|
return m, satCmd
|
|
}
|
|
|
|
// updateNvidiaSATRunning handles keys on the running screen.
|
|
func (m model) updateNvidiaSATRunning(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
|
|
switch msg.String() {
|
|
case "a", "A":
|
|
if m.nvidiaSATCancel != nil {
|
|
m.nvidiaSATCancel()
|
|
m.nvidiaSATCancel = nil
|
|
}
|
|
m.nvidiaSATAborted = true
|
|
m.screen = screenHealthCheck
|
|
m.cursor = 0
|
|
case "ctrl+c":
|
|
return m, tea.Quit
|
|
}
|
|
return m, nil
|
|
}
|
|
|
|
// renderNvidiaSATSetup renders the DCGM level selection screen.
|
|
func renderNvidiaSATSetup(m model) string {
|
|
var b strings.Builder
|
|
fmt.Fprintln(&b, "NVIDIA Diagnostics (DCGM)")
|
|
fmt.Fprintln(&b)
|
|
fmt.Fprintln(&b, "Diagnostic level:")
|
|
for i, opt := range nvidiaDCGMOptions {
|
|
radio := "( )"
|
|
if i == m.nvidiaDurIdx {
|
|
radio = "(*)"
|
|
}
|
|
prefix := " "
|
|
if m.nvidiaSATCursor == i {
|
|
prefix = "> "
|
|
}
|
|
fmt.Fprintf(&b, "%s%s %s (%s)\n", prefix, radio, opt.label, opt.note)
|
|
}
|
|
fmt.Fprintln(&b)
|
|
startIdx := len(nvidiaDCGMOptions)
|
|
startPfx := " "
|
|
cancelPfx := " "
|
|
if m.nvidiaSATCursor == startIdx {
|
|
startPfx = "> "
|
|
}
|
|
if m.nvidiaSATCursor == startIdx+1 {
|
|
cancelPfx = "> "
|
|
}
|
|
fmt.Fprintf(&b, "%sStart\n", startPfx)
|
|
fmt.Fprintf(&b, "%sCancel\n", cancelPfx)
|
|
fmt.Fprintln(&b)
|
|
b.WriteString("[↑/↓] move [space/enter] select [esc] cancel\n")
|
|
return b.String()
|
|
}
|
|
|
|
// renderNvidiaSATRunning renders the running screen.
|
|
func renderNvidiaSATRunning() string {
|
|
return "NVIDIA Diagnostics (DCGM)\n\nTest is running...\n\n[a] Abort test [ctrl+c] quit\n"
|
|
}
|