1. Verbose live progress during SAT tests (CPU, Memory, Storage, AMD GPU)

- New tui/sat_progress.go: polls {DefaultSATBaseDir}/{prefix}-*/verbose.log every 300ms and parses completed/in-progress steps
  - Busy screen now shows each step as PASS  lscpu (234ms) / FAIL  stress-ng (60.0s) / ...   sensors-after instead of just "Working..."

  2. Test results shown on screen (instead of just "Archive written to /path")
  - RunCPUAcceptancePackResult, RunMemoryAcceptancePackResult, RunStorageAcceptancePackResult, RunAMDAcceptancePackResult now read summary.txt from the run directory and return a formatted per-step result:
  Run: 2025-03-25T10:00:00Z

  PASS  lscpu
  PASS  sensors-before
  FAIL  stress-ng
  PASS  sensors-after

  Overall: FAILED  (ok=3  failed=1)

  3. AMD GPU SAT with auto-detection
  - platform.System.DetectGPUVendor(): checks /dev/nvidia0 → "nvidia", /dev/kfd → "amd"
  - platform.System.RunAMDAcceptancePack(): runs rocm-smi, rocm-smi --showallinfo, dmidecode
  - GPU SAT (G key / GPU row enter) automatically routes to AMD or NVIDIA based on detected vendor
  - "Run All" also auto-detects vendor

  4. Panel detail view
  - GPU detail now shows the most recent (NVIDIA or AMD) SAT result, whichever is newer
  - All SAT detail views use the same human-readable formatSATDetail format
This commit is contained in:
Mikhail Chusavitin
2026-03-25 17:54:27 +03:00
parent adcc147b32
commit 0c16616cc9
9 changed files with 300 additions and 57 deletions

View File

@@ -77,6 +77,9 @@ type satRunner interface {
RunStorageAcceptancePack(baseDir string) (string, error)
RunCPUAcceptancePack(baseDir string, durationSec int) (string, error)
ListNvidiaGPUs() ([]platform.NvidiaGPU, error)
DetectGPUVendor() string
ListAMDGPUs() ([]platform.AMDGPUInfo, error)
RunAMDAcceptancePack(baseDir string) (string, error)
}
type runtimeChecker interface {
@@ -431,11 +434,7 @@ func (a *App) RunMemoryAcceptancePack(baseDir string) (string, error) {
func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
path, err := a.RunMemoryAcceptancePack(baseDir)
body := "Archive written."
if path != "" {
body = "Archive written to " + path
}
return ActionResult{Title: "Memory SAT", Body: body}, err
return ActionResult{Title: "Memory SAT", Body: satResultBody(path)}, err
}
func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int) (string, error) {
@@ -447,11 +446,7 @@ func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int) (string, err
func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (ActionResult, error) {
path, err := a.RunCPUAcceptancePack(baseDir, durationSec)
body := "Archive written."
if path != "" {
body = "Archive written to " + path
}
return ActionResult{Title: "CPU SAT", Body: body}, err
return ActionResult{Title: "CPU SAT", Body: satResultBody(path)}, err
}
func (a *App) RunStorageAcceptancePack(baseDir string) (string, error) {
@@ -463,11 +458,41 @@ func (a *App) RunStorageAcceptancePack(baseDir string) (string, error) {
func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
path, err := a.RunStorageAcceptancePack(baseDir)
body := "Archive written."
if path != "" {
body = "Archive written to " + path
return ActionResult{Title: "Storage SAT", Body: satResultBody(path)}, err
}
func (a *App) DetectGPUVendor() string {
return a.sat.DetectGPUVendor()
}
func (a *App) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
return a.sat.ListAMDGPUs()
}
func (a *App) RunAMDAcceptancePack(baseDir string) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return ActionResult{Title: "Storage SAT", Body: body}, err
return a.sat.RunAMDAcceptancePack(baseDir)
}
func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
path, err := a.RunAMDAcceptancePack(baseDir)
return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
}
// satResultBody reads summary.txt from the SAT run directory (archive path without .tar.gz)
// and returns a formatted human-readable result. Falls back to a plain message if unreadable.
func satResultBody(archivePath string) string {
if archivePath == "" {
return "No output produced."
}
runDir := strings.TrimSuffix(archivePath, ".tar.gz")
raw, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
if err != nil {
return "Archive written to " + archivePath
}
return formatSATDetail(strings.TrimSpace(string(raw)))
}
func (a *App) HealthSummaryResult() ActionResult {

View File

@@ -130,6 +130,12 @@ func (f fakeSAT) RunCPUAcceptancePack(baseDir string, durationSec int) (string,
return "", nil
}
func (f fakeSAT) DetectGPUVendor() string { return "" }
func (f fakeSAT) ListAMDGPUs() ([]platform.AMDGPUInfo, error) { return nil, nil }
func (f fakeSAT) RunAMDAcceptancePack(baseDir string) (string, error) { return "", nil }
func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
t.Parallel()
@@ -380,10 +386,10 @@ func TestActionResultsUseFallbackBody(t *testing.T) {
if got, _ := a.RunNvidiaAcceptancePackResult(""); got.Body != "Archive written." {
t.Fatalf("sat body=%q", got.Body)
}
if got, _ := a.RunMemoryAcceptancePackResult(""); got.Body != "Archive written." {
if got, _ := a.RunMemoryAcceptancePackResult(""); got.Body != "No output produced." {
t.Fatalf("memory sat body=%q", got.Body)
}
if got, _ := a.RunStorageAcceptancePackResult(""); got.Body != "Archive written." {
if got, _ := a.RunStorageAcceptancePackResult(""); got.Body != "No output produced." {
t.Fatalf("storage sat body=%q", got.Body)
}
}

View File

@@ -103,6 +103,22 @@ func (a *App) ComponentDetailResult(key string) ActionResult {
case "MEM":
return a.satDetailResult("memory", "memory-", "MEM detail")
case "GPU":
// Prefer whichever GPU SAT was run most recently.
nv, _ := filepath.Glob(filepath.Join(DefaultSATBaseDir, "gpu-nvidia-*/summary.txt"))
am, _ := filepath.Glob(filepath.Join(DefaultSATBaseDir, "gpu-amd-*/summary.txt"))
sort.Strings(nv)
sort.Strings(am)
latestNV := ""
if len(nv) > 0 {
latestNV = nv[len(nv)-1]
}
latestAM := ""
if len(am) > 0 {
latestAM = am[len(am)-1]
}
if latestAM > latestNV {
return a.satDetailResult("gpu", "gpu-amd-", "GPU detail")
}
return a.satDetailResult("gpu", "gpu-nvidia-", "GPU detail")
case "DISK":
return a.satDetailResult("storage", "storage-", "DISK detail")
@@ -190,7 +206,70 @@ func (a *App) satDetailResult(statusKey, prefix, title string) ActionResult {
if err != nil {
return ActionResult{Title: title, Body: "Could not read test results."}
}
return ActionResult{Title: title, Body: strings.TrimSpace(string(raw))}
return ActionResult{Title: title, Body: formatSATDetail(strings.TrimSpace(string(raw)))}
}
// formatSATDetail converts raw summary.txt key=value content to a human-readable per-step display.
func formatSATDetail(raw string) string {
var b strings.Builder
kv := parseKeyValueSummary(raw)
if t, ok := kv["run_at_utc"]; ok {
fmt.Fprintf(&b, "Run: %s\n\n", t)
}
// Collect step names in order they appear in the file
lines := strings.Split(raw, "\n")
var stepKeys []string
seenStep := map[string]bool{}
for _, line := range lines {
if idx := strings.Index(line, "_status="); idx >= 0 {
key := line[:idx]
if !seenStep[key] && key != "overall" {
seenStep[key] = true
stepKeys = append(stepKeys, key)
}
}
}
for _, key := range stepKeys {
status := kv[key+"_status"]
display := cleanSummaryKey(key)
switch status {
case "OK":
fmt.Fprintf(&b, "PASS %s\n", display)
case "FAILED":
fmt.Fprintf(&b, "FAIL %s\n", display)
case "UNSUPPORTED":
fmt.Fprintf(&b, "SKIP %s\n", display)
default:
fmt.Fprintf(&b, "? %s\n", display)
}
}
if overall, ok := kv["overall_status"]; ok {
ok2 := kv["job_ok"]
failed := kv["job_failed"]
fmt.Fprintf(&b, "\nOverall: %s (ok=%s failed=%s)", overall, ok2, failed)
}
return strings.TrimSpace(b.String())
}
// cleanSummaryKey strips the leading numeric prefix from a SAT step key.
// "1-lscpu" → "lscpu", "3-stress-ng" → "stress-ng"
func cleanSummaryKey(key string) string {
idx := strings.Index(key, "-")
if idx <= 0 {
return key
}
prefix := key[:idx]
for _, c := range prefix {
if c < '0' || c > '9' {
return key
}
}
return key[idx+1:]
}
func (a *App) psuDetailResult() ActionResult {
@@ -247,6 +326,7 @@ func satStatuses() map[string]string {
prefix string
}{
{"gpu", "gpu-nvidia-"},
{"gpu", "gpu-amd-"},
{"memory", "memory-"},
{"storage", "storage-"},
{"cpu", "cpu-"},

View File

@@ -22,6 +22,56 @@ type NvidiaGPU struct {
MemoryMB int
}
// AMDGPUInfo holds basic info about an AMD GPU from rocm-smi.
type AMDGPUInfo struct {
Index int
Name string
}
// DetectGPUVendor returns "nvidia" if /dev/nvidia0 exists, "amd" if /dev/kfd exists, or "" otherwise.
func (s *System) DetectGPUVendor() string {
if _, err := os.Stat("/dev/nvidia0"); err == nil {
return "nvidia"
}
if _, err := os.Stat("/dev/kfd"); err == nil {
return "amd"
}
return ""
}
// ListAMDGPUs returns AMD GPUs visible to rocm-smi.
func (s *System) ListAMDGPUs() ([]AMDGPUInfo, error) {
out, err := exec.Command("rocm-smi", "--showproductname", "--csv").Output()
if err != nil {
return nil, fmt.Errorf("rocm-smi: %w", err)
}
var gpus []AMDGPUInfo
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(strings.ToLower(line), "device") {
continue
}
parts := strings.SplitN(line, ",", 2)
name := ""
if len(parts) >= 2 {
name = strings.TrimSpace(parts[1])
}
idx := len(gpus)
gpus = append(gpus, AMDGPUInfo{Index: idx, Name: name})
}
return gpus, nil
}
// RunAMDAcceptancePack runs an AMD GPU diagnostic pack using rocm-smi.
func (s *System) RunAMDAcceptancePack(baseDir string) (string, error) {
return runAcceptancePack(baseDir, "gpu-amd", []satJob{
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
{name: "02-rocm-smi-showallinfo.log", cmd: []string{"rocm-smi", "--showallinfo"}},
{name: "03-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
{name: "04-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
})
}
// ListNvidiaGPUs returns GPUs visible to nvidia-smi.
func (s *System) ListNvidiaGPUs() ([]NvidiaGPU, error) {
out, err := exec.Command("nvidia-smi",

View File

@@ -1,6 +1,10 @@
package tui
import tea "github.com/charmbracelet/bubbletea"
import (
"time"
tea "github.com/charmbracelet/bubbletea"
)
func (m model) updateStaticForm(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
switch msg.String() {
@@ -82,23 +86,57 @@ func (m model) updateConfirm(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
return m.executeRunAll()
case actionRunMemorySAT:
m.busyTitle = "Memory test"
return m, func() tea.Msg {
result, err := m.app.RunMemoryAcceptancePackResult("")
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenHealthCheck}
}
m.progressPrefix = "memory"
m.progressSince = time.Now()
m.progressLines = nil
since := m.progressSince
return m, tea.Batch(
func() tea.Msg {
result, err := m.app.RunMemoryAcceptancePackResult("")
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenHealthCheck}
},
pollSATProgress("memory", since),
)
case actionRunStorageSAT:
m.busyTitle = "Storage test"
return m, func() tea.Msg {
result, err := m.app.RunStorageAcceptancePackResult("")
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenHealthCheck}
}
m.progressPrefix = "storage"
m.progressSince = time.Now()
m.progressLines = nil
since := m.progressSince
return m, tea.Batch(
func() tea.Msg {
result, err := m.app.RunStorageAcceptancePackResult("")
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenHealthCheck}
},
pollSATProgress("storage", since),
)
case actionRunCPUSAT:
m.busyTitle = "CPU test"
m.progressPrefix = "cpu"
m.progressSince = time.Now()
m.progressLines = nil
since := m.progressSince
durationSec := hcCPUDurations[m.hcMode]
return m, func() tea.Msg {
result, err := m.app.RunCPUAcceptancePackResult("", durationSec)
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenHealthCheck}
}
return m, tea.Batch(
func() tea.Msg {
result, err := m.app.RunCPUAcceptancePackResult("", durationSec)
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenHealthCheck}
},
pollSATProgress("cpu", since),
)
case actionRunAMDGPUSAT:
m.busyTitle = "AMD GPU test"
m.progressPrefix = "gpu-amd"
m.progressSince = time.Now()
m.progressLines = nil
since := m.progressSince
return m, tea.Batch(
func() tea.Msg {
result, err := m.app.RunAMDAcceptancePackResult("")
return resultMsg{title: result.Title, body: result.Body, err: err, back: screenHealthCheck}
},
pollSATProgress("gpu-amd", since),
)
}
case "ctrl+c":
return m, tea.Quit
@@ -110,7 +148,7 @@ func (m model) confirmCancelTarget() screen {
switch m.pendingAction {
case actionExportBundle:
return screenExportTargets
case actionRunAll, actionRunMemorySAT, actionRunStorageSAT, actionRunCPUSAT:
case actionRunAll, actionRunMemorySAT, actionRunStorageSAT, actionRunCPUSAT, actionRunAMDGPUSAT:
return screenHealthCheck
default:
return screenMain

View File

@@ -116,6 +116,12 @@ func (m model) updateHealthCheck(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
func (m model) hcRunSingle(idx int) (tea.Model, tea.Cmd) {
switch idx {
case hcGPU:
if m.app.DetectGPUVendor() == "amd" {
m.pendingAction = actionRunAMDGPUSAT
m.screen = screenConfirm
m.cursor = 0
return m, nil
}
m.nvidiaDurIdx = m.hcMode
return m.enterNvidiaSATSetup()
case hcMemory:
@@ -159,27 +165,37 @@ func (m model) executeRunAll() (tea.Model, tea.Cmd) {
return m, func() tea.Msg {
var parts []string
if sel[hcGPU] {
gpus, err := app.ListNvidiaGPUs()
if err != nil || len(gpus) == 0 {
parts = append(parts, "=== GPU ===\nNo NVIDIA GPUs detected or driver not loaded.")
} else {
var indices []int
sizeMB := 0
for _, g := range gpus {
indices = append(indices, g.Index)
if sizeMB == 0 || g.MemoryMB < sizeMB {
sizeMB = g.MemoryMB
}
}
if sizeMB == 0 {
sizeMB = 64
}
r, err := app.RunNvidiaAcceptancePackWithOptions(context.Background(), "", durationSec, sizeMB, indices)
vendor := app.DetectGPUVendor()
if vendor == "amd" {
r, err := app.RunAMDAcceptancePackResult("")
body := r.Body
if err != nil {
body += "\nERROR: " + err.Error()
}
parts = append(parts, "=== GPU ===\n"+body)
parts = append(parts, "=== GPU (AMD) ===\n"+body)
} else {
gpus, err := app.ListNvidiaGPUs()
if err != nil || len(gpus) == 0 {
parts = append(parts, "=== GPU ===\nNo NVIDIA GPUs detected or driver not loaded.")
} else {
var indices []int
sizeMB := 0
for _, g := range gpus {
indices = append(indices, g.Index)
if sizeMB == 0 || g.MemoryMB < sizeMB {
sizeMB = g.MemoryMB
}
}
if sizeMB == 0 {
sizeMB = 64
}
r, err := app.RunNvidiaAcceptancePackWithOptions(context.Background(), "", durationSec, sizeMB, indices)
body := r.Body
if err != nil {
body += "\nERROR: " + err.Error()
}
parts = append(parts, "=== GPU ===\n"+body)
}
}
}
if sel[hcMemory] {
@@ -225,7 +241,7 @@ func renderHealthCheck(m model) string {
type comp struct{ name, desc, key string }
comps := []comp{
{"GPU", "nvidia-smi + bee-gpu-stress", "G"},
{"GPU", "nvidia/amd auto-detect", "G"},
{"MEMORY", "memtester", "M"},
{"STORAGE", "smartctl + NVMe self-test", "S"},
{"CPU", "audit diagnostics", "C"},

View File

@@ -2,6 +2,7 @@ package tui
import (
"strings"
"time"
"bee/audit/internal/app"
"bee/audit/internal/platform"
@@ -31,14 +32,15 @@ const (
type actionKind string
const (
actionNone actionKind = ""
actionDHCPOne actionKind = "dhcp_one"
actionStaticIPv4 actionKind = "static_ipv4"
actionExportBundle actionKind = "export_bundle"
actionRunAll actionKind = "run_all"
actionRunMemorySAT actionKind = "run_memory_sat"
actionRunStorageSAT actionKind = "run_storage_sat"
actionRunCPUSAT actionKind = "run_cpu_sat"
actionNone actionKind = ""
actionDHCPOne actionKind = "dhcp_one"
actionStaticIPv4 actionKind = "static_ipv4"
actionExportBundle actionKind = "export_bundle"
actionRunAll actionKind = "run_all"
actionRunMemorySAT actionKind = "run_memory_sat"
actionRunStorageSAT actionKind = "run_storage_sat"
actionRunCPUSAT actionKind = "run_cpu_sat"
actionRunAMDGPUSAT actionKind = "run_amd_gpu_sat"
)
type model struct {
@@ -88,6 +90,11 @@ type model struct {
// NVIDIA SAT running
nvidiaSATCancel func()
nvidiaSATAborted bool
// SAT verbose progress (CPU / Memory / Storage / AMD GPU)
progressLines []string
progressPrefix string
progressSince time.Time
}
type formField struct {
@@ -177,6 +184,8 @@ func (m model) confirmBody() (string, string) {
case actionRunCPUSAT:
modes := []string{"Quick (60s)", "Standard (300s)", "Express (900s)"}
return "CPU test", "Run stress-ng? Mode: " + modes[m.hcMode]
case actionRunAMDGPUSAT:
return "AMD GPU test", "Run AMD GPU diagnostic pack (rocm-smi)?"
default:
return "Confirm", "Proceed?"
}

View File

@@ -17,9 +17,19 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
return m, nil
}
return m.updateKey(msg)
case satProgressMsg:
if m.busy && m.progressPrefix != "" {
if len(msg.lines) > 0 {
m.progressLines = msg.lines
}
return m, pollSATProgress(m.progressPrefix, m.progressSince)
}
return m, nil
case resultMsg:
m.busy = false
m.busyTitle = ""
m.progressLines = nil
m.progressPrefix = ""
m.title = msg.title
if msg.err != nil {
body := strings.TrimSpace(msg.body)

View File

@@ -39,6 +39,15 @@ func (m model) View() string {
if m.busyTitle != "" {
title = m.busyTitle
}
if len(m.progressLines) > 0 {
var b strings.Builder
fmt.Fprintf(&b, "%s\n\n", title)
for _, l := range m.progressLines {
fmt.Fprintf(&b, " %s\n", l)
}
b.WriteString("\n[ctrl+c] quit\n")
return b.String()
}
return fmt.Sprintf("%s\n\nWorking...\n\n[ctrl+c] quit\n", title)
}
switch m.screen {