Add TUI hardware banner and polish SAT summaries

This commit is contained in:
Mikhail Chusavitin
2026-03-15 14:27:01 +03:00
parent b483e2ce35
commit b8c235b5ac
12 changed files with 667 additions and 47 deletions

View File

@@ -347,6 +347,8 @@ Planned code shape:
- `bee tui` can export the latest audit JSON to removable media
- `bee tui` can show health summary and run NVIDIA/memory/storage acceptance tests
- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-stress`
- SAT summaries now expose `overall_status` plus per-job `OK/FAILED/UNSUPPORTED`
- Memory/GPU SAT runtime defaults can be overridden via `BEE_MEMTESTER_*` and `BEE_GPU_STRESS_*`
- removable export requires explicit target selection, mount, confirmation, copy, and cleanup
### 2.6 — Vendor utilities and optional assets

View File

@@ -13,11 +13,13 @@ import (
"bee/audit/internal/collector"
"bee/audit/internal/platform"
"bee/audit/internal/runtimeenv"
"bee/audit/internal/schema"
)
const (
var (
DefaultAuditJSONPath = "/var/log/bee-audit.json"
DefaultAuditLogPath = "/var/log/bee-audit.log"
DefaultSATBaseDir = "/var/log/bee-sat"
)
type App struct {
@@ -354,7 +356,7 @@ func (a *App) HealthSummaryResult() ActionResult {
fmt.Fprintf(&body, "PSU: warn=%d fail=%d\n", summary.PSUWarn, summary.PSUFail)
fmt.Fprintf(&body, "Memory: warn=%d fail=%d\n", summary.MemoryWarn, summary.MemoryFail)
for _, item := range latestSATSummaries() {
fmt.Fprintf(&body, "\n%s", item)
fmt.Fprintf(&body, "\n\n%s", item)
}
if len(summary.Failures) > 0 {
fmt.Fprintf(&body, "\n\nFailures:\n- %s", strings.Join(summary.Failures, "\n- "))
@@ -365,6 +367,40 @@ func (a *App) HealthSummaryResult() ActionResult {
return ActionResult{Title: "Health summary", Body: strings.TrimSpace(body.String())}
}
func (a *App) MainBanner() string {
raw, err := os.ReadFile(DefaultAuditJSONPath)
if err != nil {
return ""
}
var snapshot schema.HardwareIngestRequest
if err := json.Unmarshal(raw, &snapshot); err != nil {
return ""
}
var lines []string
if system := formatSystemLine(snapshot.Hardware.Board); system != "" {
lines = append(lines, system)
}
if cpu := formatCPULine(snapshot.Hardware.CPUs); cpu != "" {
lines = append(lines, cpu)
}
if memory := formatMemoryLine(snapshot.Hardware.Memory); memory != "" {
lines = append(lines, memory)
}
if storage := formatStorageLine(snapshot.Hardware.Storage); storage != "" {
lines = append(lines, storage)
}
if gpu := formatGPULine(snapshot.Hardware.PCIeDevices); gpu != "" {
lines = append(lines, gpu)
}
if ip := formatIPLine(a.network.ListInterfaces); ip != "" {
lines = append(lines, ip)
}
return strings.TrimSpace(strings.Join(lines, "\n"))
}
func (a *App) FormatToolStatuses(statuses []platform.ToolStatus) string {
var body strings.Builder
for _, tool := range statuses {
@@ -418,7 +454,6 @@ func bodyOr(body, fallback string) string {
}
func latestSATSummaries() []string {
baseDir := "/var/log/bee-sat"
patterns := []struct {
label string
prefix string
@@ -429,7 +464,7 @@ func latestSATSummaries() []string {
}
var out []string
for _, item := range patterns {
matches, err := filepath.Glob(filepath.Join(baseDir, item.prefix+"*/summary.txt"))
matches, err := filepath.Glob(filepath.Join(DefaultSATBaseDir, item.prefix+"*/summary.txt"))
if err != nil || len(matches) == 0 {
continue
}
@@ -438,7 +473,273 @@ func latestSATSummaries() []string {
if err != nil {
continue
}
out = append(out, item.label+":\n"+strings.TrimSpace(string(raw)))
out = append(out, formatSATSummary(item.label, string(raw)))
}
return out
}
func formatSATSummary(label, raw string) string {
values := parseKeyValueSummary(raw)
var body strings.Builder
fmt.Fprintf(&body, "%s:", label)
if overall := firstNonEmpty(values["overall_status"], "UNKNOWN"); overall != "" {
fmt.Fprintf(&body, " %s", overall)
}
if ok := firstNonEmpty(values["job_ok"], "0"); ok != "" {
fmt.Fprintf(&body, " ok=%s", ok)
}
if failed := firstNonEmpty(values["job_failed"], "0"); failed != "" {
fmt.Fprintf(&body, " failed=%s", failed)
}
if unsupported := firstNonEmpty(values["job_unsupported"], "0"); unsupported != "" && unsupported != "0" {
fmt.Fprintf(&body, " unsupported=%s", unsupported)
}
if devices := strings.TrimSpace(values["devices"]); devices != "" {
fmt.Fprintf(&body, "\nDevices: %s", devices)
}
return body.String()
}
func formatSystemLine(board schema.HardwareBoard) string {
model := strings.TrimSpace(strings.Join([]string{
trimPtr(board.Manufacturer),
trimPtr(board.ProductName),
}, " "))
serial := strings.TrimSpace(board.SerialNumber)
switch {
case model != "" && serial != "":
return fmt.Sprintf("System: %s | S/N %s", model, serial)
case model != "":
return "System: " + model
case serial != "":
return "System S/N: " + serial
default:
return ""
}
}
func formatCPULine(cpus []schema.HardwareCPU) string {
if len(cpus) == 0 {
return ""
}
modelCounts := map[string]int{}
unknown := 0
for _, cpu := range cpus {
model := trimPtr(cpu.Model)
if model == "" {
unknown++
continue
}
modelCounts[model]++
}
if len(modelCounts) == 1 && unknown == 0 {
for model, count := range modelCounts {
return fmt.Sprintf("CPU: %d x %s", count, model)
}
}
parts := make([]string, 0, len(modelCounts)+1)
if len(modelCounts) > 0 {
keys := make([]string, 0, len(modelCounts))
for key := range modelCounts {
keys = append(keys, key)
}
sort.Strings(keys)
for _, key := range keys {
parts = append(parts, fmt.Sprintf("%d x %s", modelCounts[key], key))
}
}
if unknown > 0 {
parts = append(parts, fmt.Sprintf("%d x unknown", unknown))
}
return "CPU: " + strings.Join(parts, ", ")
}
func formatMemoryLine(dimms []schema.HardwareMemory) string {
totalMB := 0
present := 0
types := map[string]struct{}{}
for _, dimm := range dimms {
if dimm.Present != nil && !*dimm.Present {
continue
}
if dimm.SizeMB == nil || *dimm.SizeMB <= 0 {
continue
}
present++
totalMB += *dimm.SizeMB
if value := trimPtr(dimm.Type); value != "" {
types[value] = struct{}{}
}
}
if totalMB == 0 {
return ""
}
typeText := joinSortedKeys(types)
line := fmt.Sprintf("Memory: %s", humanizeMB(totalMB))
if typeText != "" {
line += " " + typeText
}
if present > 0 {
line += fmt.Sprintf(" (%d DIMMs)", present)
}
return line
}
func formatStorageLine(disks []schema.HardwareStorage) string {
count := 0
totalGB := 0
for _, disk := range disks {
if disk.Present != nil && !*disk.Present {
continue
}
count++
if disk.SizeGB != nil && *disk.SizeGB > 0 {
totalGB += *disk.SizeGB
}
}
if count == 0 {
return ""
}
line := fmt.Sprintf("Storage: %d drives", count)
if totalGB > 0 {
line += fmt.Sprintf(" / %s", humanizeGB(totalGB))
}
return line
}
func formatGPULine(devices []schema.HardwarePCIeDevice) string {
gpus := map[string]int{}
for _, dev := range devices {
if !isGPUDevice(dev) {
continue
}
name := firstNonEmpty(trimPtr(dev.Model), trimPtr(dev.Manufacturer), "unknown")
gpus[name]++
}
if len(gpus) == 0 {
return ""
}
keys := make([]string, 0, len(gpus))
for key := range gpus {
keys = append(keys, key)
}
sort.Strings(keys)
parts := make([]string, 0, len(keys))
for _, key := range keys {
parts = append(parts, fmt.Sprintf("%d x %s", gpus[key], key))
}
return "GPU: " + strings.Join(parts, ", ")
}
func formatIPLine(list func() ([]platform.InterfaceInfo, error)) string {
if list == nil {
return ""
}
ifaces, err := list()
if err != nil {
return ""
}
seen := map[string]struct{}{}
var ips []string
for _, iface := range ifaces {
for _, ip := range iface.IPv4 {
ip = strings.TrimSpace(ip)
if ip == "" {
continue
}
if _, ok := seen[ip]; ok {
continue
}
seen[ip] = struct{}{}
ips = append(ips, ip)
}
}
if len(ips) == 0 {
return ""
}
sort.Strings(ips)
return "IP: " + strings.Join(ips, ", ")
}
func isGPUDevice(dev schema.HardwarePCIeDevice) bool {
class := strings.ToLower(trimPtr(dev.DeviceClass))
model := strings.ToLower(trimPtr(dev.Model))
vendor := strings.ToLower(trimPtr(dev.Manufacturer))
return strings.Contains(class, "vga") ||
strings.Contains(class, "3d") ||
strings.Contains(class, "display") ||
strings.Contains(model, "nvidia") ||
strings.Contains(vendor, "nvidia") ||
strings.Contains(vendor, "amd")
}
func trimPtr(value *string) string {
if value == nil {
return ""
}
return strings.TrimSpace(*value)
}
func joinSortedKeys(values map[string]struct{}) string {
if len(values) == 0 {
return ""
}
keys := make([]string, 0, len(values))
for key := range values {
keys = append(keys, key)
}
sort.Strings(keys)
return strings.Join(keys, "/")
}
func humanizeMB(totalMB int) string {
if totalMB <= 0 {
return ""
}
gb := float64(totalMB) / 1024.0
if gb >= 1024.0 {
tb := gb / 1024.0
return fmt.Sprintf("%.1f TB", tb)
}
if gb == float64(int64(gb)) {
return fmt.Sprintf("%.0f GB", gb)
}
return fmt.Sprintf("%.1f GB", gb)
}
func humanizeGB(totalGB int) string {
if totalGB <= 0 {
return ""
}
tb := float64(totalGB) / 1024.0
if tb >= 1.0 {
return fmt.Sprintf("%.1f TB", tb)
}
return fmt.Sprintf("%d GB", totalGB)
}
func parseKeyValueSummary(raw string) map[string]string {
out := map[string]string{}
for _, line := range strings.Split(raw, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
key, value, ok := strings.Cut(line, "=")
if !ok {
continue
}
out[strings.TrimSpace(key)] = strings.TrimSpace(value)
}
return out
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
value = strings.TrimSpace(value)
if value != "" {
return value
}
}
return ""
}

View File

@@ -1,10 +1,14 @@
package app
import (
"encoding/json"
"errors"
"os"
"path/filepath"
"testing"
"bee/audit/internal/platform"
"bee/audit/internal/schema"
)
type fakeNetwork struct {
@@ -356,6 +360,124 @@ func TestRunNvidiaAcceptancePackResult(t *testing.T) {
}
}
func TestFormatSATSummary(t *testing.T) {
t.Parallel()
got := formatSATSummary("Memory SAT", "overall_status=PARTIAL\njob_ok=2\njob_failed=0\njob_unsupported=1\ndevices=3\n")
want := "Memory SAT: PARTIAL ok=2 failed=0 unsupported=1\nDevices: 3"
if got != want {
t.Fatalf("got %q want %q", got, want)
}
}
func TestHealthSummaryResultIncludesCompactSATSummary(t *testing.T) {
t.Parallel()
tmp := t.TempDir()
oldAuditPath := DefaultAuditJSONPath
oldSATBaseDir := DefaultSATBaseDir
DefaultAuditJSONPath = filepath.Join(tmp, "audit.json")
DefaultSATBaseDir = filepath.Join(tmp, "sat")
t.Cleanup(func() { DefaultAuditJSONPath = oldAuditPath })
t.Cleanup(func() { DefaultSATBaseDir = oldSATBaseDir })
satDir := filepath.Join(DefaultSATBaseDir, "memory-testcase")
if err := os.MkdirAll(satDir, 0755); err != nil {
t.Fatalf("mkdir sat dir: %v", err)
}
raw := `{"hardware":{"summary":{"status":"WARNING","storage_warn":1,"storage_fail":0,"pcie_warn":0,"pcie_fail":0,"psu_warn":0,"psu_fail":0,"memory_warn":0,"memory_fail":0}}}`
if err := os.WriteFile(DefaultAuditJSONPath, []byte(raw), 0644); err != nil {
t.Fatalf("write audit json: %v", err)
}
if err := os.WriteFile(filepath.Join(satDir, "summary.txt"), []byte("overall_status=OK\njob_ok=3\njob_failed=0\njob_unsupported=0\n"), 0644); err != nil {
t.Fatalf("write sat summary: %v", err)
}
result := (&App{}).HealthSummaryResult()
if !contains(result.Body, "Memory SAT: OK ok=3 failed=0") {
t.Fatalf("body missing compact sat summary:\n%s", result.Body)
}
}
func TestMainBanner(t *testing.T) {
t.Parallel()
tmp := t.TempDir()
oldAuditPath := DefaultAuditJSONPath
DefaultAuditJSONPath = filepath.Join(tmp, "audit.json")
t.Cleanup(func() { DefaultAuditJSONPath = oldAuditPath })
trueValue := true
manufacturer := "Dell"
product := "PowerEdge R760"
cpuModel := "Intel Xeon Gold 6430"
memoryType := "DDR5"
gpuClass := "VGA compatible controller"
gpuModel := "NVIDIA H100"
payload := schema.HardwareIngestRequest{
Hardware: schema.HardwareSnapshot{
Board: schema.HardwareBoard{
Manufacturer: &manufacturer,
ProductName: &product,
SerialNumber: "SRV123",
},
CPUs: []schema.HardwareCPU{
{Model: &cpuModel},
{Model: &cpuModel},
},
Memory: []schema.HardwareMemory{
{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType},
{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType},
},
Storage: []schema.HardwareStorage{
{Present: &trueValue, SizeGB: intPtr(3840)},
{Present: &trueValue, SizeGB: intPtr(3840)},
},
PCIeDevices: []schema.HardwarePCIeDevice{
{DeviceClass: &gpuClass, Model: &gpuModel},
{DeviceClass: &gpuClass, Model: &gpuModel},
},
},
}
raw, err := json.Marshal(payload)
if err != nil {
t.Fatalf("marshal: %v", err)
}
if err := os.WriteFile(DefaultAuditJSONPath, raw, 0644); err != nil {
t.Fatalf("write audit json: %v", err)
}
a := &App{
network: fakeNetwork{
listInterfacesFn: func() ([]platform.InterfaceInfo, error) {
return []platform.InterfaceInfo{
{Name: "eth0", IPv4: []string{"10.0.0.10"}},
{Name: "eth1", IPv4: []string{"192.168.1.10"}},
}, nil
},
},
}
got := a.MainBanner()
for _, want := range []string{
"System: Dell PowerEdge R760 | S/N SRV123",
"CPU: 2 x Intel Xeon Gold 6430",
"Memory: 1.0 TB DDR5 (2 DIMMs)",
"Storage: 2 drives / 7.5 TB",
"GPU: 2 x NVIDIA H100",
"IP: 10.0.0.10, 192.168.1.10",
} {
if !contains(got, want) {
t.Fatalf("banner missing %q:\n%s", want, got)
}
}
}
func intPtr(v int) *int { return &v }
func contains(haystack, needle string) bool {
return len(needle) == 0 || (len(haystack) >= len(needle) && (haystack == needle || containsAt(haystack, needle)))
}

View File

@@ -9,6 +9,7 @@ import (
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"time"
)
@@ -18,9 +19,11 @@ func (s *System) RunNvidiaAcceptancePack(baseDir string) (string, error) {
}
func (s *System) RunMemoryAcceptancePack(baseDir string) (string, error) {
sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128)
passes := envInt("BEE_MEMTESTER_PASSES", 1)
return runAcceptancePack(baseDir, "memory", []satJob{
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
{name: "02-memtester.log", cmd: []string{"memtester", "128M", "1"}},
{name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
})
}
@@ -42,9 +45,11 @@ func (s *System) RunStorageAcceptancePack(baseDir string) (string, error) {
sort.Strings(devices)
var summary strings.Builder
stats := satStats{}
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
if len(devices) == 0 {
fmt.Fprintln(&summary, "devices=0")
stats.Unsupported++
} else {
fmt.Fprintf(&summary, "devices=%d\n", len(devices))
}
@@ -58,14 +63,15 @@ func (s *System) RunStorageAcceptancePack(baseDir string) (string, error) {
if writeErr := os.WriteFile(filepath.Join(runDir, name), out, 0644); writeErr != nil {
return "", writeErr
}
rc := 0
if err != nil {
rc = 1
}
fmt.Fprintf(&summary, "%s_%s_rc=%d\n", filepath.Base(devPath), strings.ReplaceAll(job.name, "-", "_"), rc)
status, rc := classifySATResult(job.name, out, err)
stats.Add(status)
key := filepath.Base(devPath) + "_" + strings.ReplaceAll(job.name, "-", "_")
fmt.Fprintf(&summary, "%s_rc=%d\n", key, rc)
fmt.Fprintf(&summary, "%s_status=%s\n", key, status)
}
}
writeSATStats(&summary, stats)
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
return "", err
}
@@ -81,13 +87,21 @@ type satJob struct {
cmd []string
}
type satStats struct {
OK int
Failed int
Unsupported int
}
func nvidiaSATJobs() []satJob {
seconds := envInt("BEE_GPU_STRESS_SECONDS", 5)
sizeMB := envInt("BEE_GPU_STRESS_SIZE_MB", 64)
return []satJob{
{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output", "{{run_dir}}/nvidia-bug-report.log"}},
{name: "05-bee-gpu-stress.log", cmd: []string{"bee-gpu-stress", "--seconds", "5", "--size-mb", "64"}},
{name: "05-bee-gpu-stress.log", cmd: []string{"bee-gpu-stress", "--seconds", fmt.Sprintf("%d", seconds), "--size-mb", fmt.Sprintf("%d", sizeMB)}},
}
}
@@ -102,6 +116,7 @@ func runAcceptancePack(baseDir, prefix string, jobs []satJob) (string, error) {
}
var summary strings.Builder
stats := satStats{}
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
for _, job := range jobs {
cmd := make([]string, 0, len(job.cmd))
@@ -112,12 +127,13 @@ func runAcceptancePack(baseDir, prefix string, jobs []satJob) (string, error) {
if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
return "", writeErr
}
rc := 0
if err != nil {
rc = 1
}
fmt.Fprintf(&summary, "%s_rc=%d\n", strings.TrimSuffix(strings.TrimPrefix(job.name, "0"), ".log"), rc)
status, rc := classifySATResult(job.name, out, err)
stats.Add(status)
key := strings.TrimSuffix(strings.TrimPrefix(job.name, "0"), ".log")
fmt.Fprintf(&summary, "%s_rc=%d\n", key, rc)
fmt.Fprintf(&summary, "%s_status=%s\n", key, status)
}
writeSATStats(&summary, stats)
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
return "", err
}
@@ -159,6 +175,69 @@ func storageSATCommands(devPath string) []satJob {
}
}
func (s *satStats) Add(status string) {
switch status {
case "OK":
s.OK++
case "UNSUPPORTED":
s.Unsupported++
default:
s.Failed++
}
}
func (s satStats) Overall() string {
if s.Failed > 0 {
return "FAILED"
}
if s.Unsupported > 0 {
return "PARTIAL"
}
return "OK"
}
func writeSATStats(summary *strings.Builder, stats satStats) {
fmt.Fprintf(summary, "overall_status=%s\n", stats.Overall())
fmt.Fprintf(summary, "job_ok=%d\n", stats.OK)
fmt.Fprintf(summary, "job_failed=%d\n", stats.Failed)
fmt.Fprintf(summary, "job_unsupported=%d\n", stats.Unsupported)
}
func classifySATResult(name string, out []byte, err error) (string, int) {
rc := 0
if err != nil {
rc = 1
}
if err == nil {
return "OK", rc
}
text := strings.ToLower(string(out))
if strings.Contains(text, "unsupported") ||
strings.Contains(text, "not supported") ||
strings.Contains(text, "invalid opcode") ||
strings.Contains(text, "unknown command") ||
strings.Contains(text, "not implemented") ||
strings.Contains(text, "not available") ||
strings.Contains(text, "no such device") ||
(strings.Contains(name, "self-test") && strings.Contains(text, "aborted")) {
return "UNSUPPORTED", rc
}
return "FAILED", rc
}
func envInt(name string, fallback int) int {
raw := strings.TrimSpace(os.Getenv(name))
if raw == "" {
return fallback
}
value, err := strconv.Atoi(raw)
if err != nil || value <= 0 {
return fallback
}
return value
}
func createTarGz(dst, srcDir string) error {
file, err := os.Create(dst)
if err != nil {

View File

@@ -1,6 +1,10 @@
package platform
import "testing"
import (
"errors"
"os"
"testing"
)
func TestStorageSATCommands(t *testing.T) {
t.Parallel()
@@ -28,3 +32,58 @@ func TestRunNvidiaAcceptancePackIncludesGPUStress(t *testing.T) {
t.Fatalf("gpu stress command=%q want bee-gpu-stress", got)
}
}
func TestNvidiaSATJobsUseEnvOverrides(t *testing.T) {
t.Setenv("BEE_GPU_STRESS_SECONDS", "9")
t.Setenv("BEE_GPU_STRESS_SIZE_MB", "96")
jobs := nvidiaSATJobs()
got := jobs[4].cmd
want := []string{"bee-gpu-stress", "--seconds", "9", "--size-mb", "96"}
if len(got) != len(want) {
t.Fatalf("cmd len=%d want %d", len(got), len(want))
}
for i := range want {
if got[i] != want[i] {
t.Fatalf("cmd[%d]=%q want %q", i, got[i], want[i])
}
}
}
func TestEnvIntFallback(t *testing.T) {
os.Unsetenv("BEE_MEMTESTER_SIZE_MB")
if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
t.Fatalf("got %d want 123", got)
}
t.Setenv("BEE_MEMTESTER_SIZE_MB", "bad")
if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
t.Fatalf("got %d want 123", got)
}
t.Setenv("BEE_MEMTESTER_SIZE_MB", "256")
if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 256 {
t.Fatalf("got %d want 256", got)
}
}
func TestClassifySATResult(t *testing.T) {
tests := []struct {
name string
job string
out string
err error
status string
}{
{name: "ok", job: "memtester", out: "done", err: nil, status: "OK"},
{name: "unsupported", job: "smartctl-self-test-short", out: "Self-test not supported", err: errors.New("rc 1"), status: "UNSUPPORTED"},
{name: "failed", job: "bee-gpu-stress", out: "cuda error", err: errors.New("rc 1"), status: "FAILED"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, _ := classifySATResult(tt.job, []byte(tt.out), tt.err)
if got != tt.status {
t.Fatalf("status=%q want %q", got, tt.status)
}
})
}
}

View File

@@ -23,3 +23,7 @@ type exportTargetsMsg struct {
targets []platform.RemovableTarget
err error
}
type bannerMsg struct {
text string
}

View File

@@ -179,6 +179,24 @@ func TestMainMenuAsyncActionsSetBusy(t *testing.T) {
}
}
func TestMainViewIncludesBanner(t *testing.T) {
t.Parallel()
m := newTestModel()
m.banner = "System: Test Server | S/N ABC123\nIP: 10.0.0.10"
view := m.View()
if !strings.Contains(view, "System: Test Server | S/N ABC123") {
t.Fatalf("view missing system banner:\n%s", view)
}
if !strings.Contains(view, "IP: 10.0.0.10") {
t.Fatalf("view missing ip banner:\n%s", view)
}
if !strings.Contains(view, "Select action") {
t.Fatalf("view missing menu subtitle:\n%s", view)
}
}
func TestEscapeNavigation(t *testing.T) {
t.Parallel()

View File

@@ -4,6 +4,7 @@ import (
"bee/audit/internal/app"
"bee/audit/internal/platform"
"bee/audit/internal/runtimeenv"
"strings"
tea "github.com/charmbracelet/bubbletea"
)
@@ -46,6 +47,7 @@ type model struct {
busyTitle string
title string
body string
banner string
mainMenu []string
networkMenu []string
serviceMenu []string
@@ -111,5 +113,7 @@ func newModel(application *app.App, runtimeMode runtimeenv.Mode) model {
}
func (m model) Init() tea.Cmd {
return nil
return func() tea.Msg {
return bannerMsg{text: strings.TrimSpace(m.app.MainBanner())}
}
}

View File

@@ -84,6 +84,9 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
m.screen = screenExportTargets
m.cursor = 0
return m, nil
case bannerMsg:
m.banner = strings.TrimSpace(msg.text)
return m, nil
}
return m, nil

View File

@@ -19,7 +19,7 @@ func (m model) View() string {
}
switch m.screen {
case screenMain:
return renderMenu("bee", "Select action", m.mainMenu, m.cursor)
return renderMainMenu("bee", m.banner, "Select action", m.mainMenu, m.cursor)
case screenNetwork:
return renderMenu("Network", "Select action", m.networkMenu, m.cursor)
case screenServices:
@@ -109,6 +109,30 @@ func renderMenu(title, subtitle string, items []string, cursor int) string {
return body.String()
}
func renderMainMenu(title, banner, subtitle string, items []string, cursor int) string {
var body strings.Builder
fmt.Fprintf(&body, "%s\n\n", title)
if banner != "" {
body.WriteString(strings.TrimSpace(banner))
body.WriteString("\n\n")
}
body.WriteString(subtitle)
body.WriteString("\n\n")
if len(items) == 0 {
body.WriteString("(no items)\n")
} else {
for i, item := range items {
prefix := " "
if i == cursor {
prefix = "> "
}
fmt.Fprintf(&body, "%s%s\n", prefix, item)
}
}
body.WriteString("\n[↑/↓] move [enter] select [esc] back [ctrl+c] quit\n")
return body.String()
}
func renderForm(title string, fields []formField, idx int) string {
var body strings.Builder
fmt.Fprintf(&body, "%s\n\n", title)

View File

@@ -132,3 +132,9 @@ Acceptance flows:
- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + lightweight `bee-gpu-stress`
- `bee sat memory``memtester` archive
- `bee sat storage` → SMART/NVMe diagnostic archive and short self-test trigger where supported
- SAT `summary.txt` now includes `overall_status` and per-job `*_status` values (`OK`, `FAILED`, `UNSUPPORTED`)
- Runtime overrides:
- `BEE_GPU_STRESS_SECONDS`
- `BEE_GPU_STRESS_SIZE_MB`
- `BEE_MEMTESTER_SIZE_MB`
- `BEE_MEMTESTER_PASSES`

View File

@@ -1,22 +1,20 @@
# Backlog
## GPU stress test (H100)
## Real hardware validation
**Статус:** отложено. В текущем ISO `gpu_burn` не включается и не запускается.
**Статус:** ожидает доступа к железу.
**Почему задача всё ещё в backlog:**
- `gpu_burn` остаётся тяжёлым и неудобным с точки зрения зависимостей
- хочется штатный lightweight stress tool без `libcublas.so` и без заметного раздувания ISO
- для H100 нужен предсказуемый offline-инструмент, который можно стабильно возить внутри ISO
Что осталось подтвердить на практике:
- `bee sat nvidia` на реальном NVIDIA GPU host
- `bee sat storage` на NVMe/SATA/RAID host
- `ipmitool sdr` parsing на сервере с реальным BMC/IPMI
- vendor RAID tooling (`storcli64`, `sas2ircu`, `sas3ircu`, `arcconf`, `ssacli`) в живом ISO
**Желаемый следующий шаг:** написать минимальный stress tool на CUDA Driver API
- использует только `libcuda.so`, уже присутствующий в ISO
- выполняет простой compute / memory workload через `cuLaunchKernel`
- собирается отдельно на builder VM и кладётся в `iso/vendor/`
- в будущем может вызываться из `bee tui` как предпочтительный встроенный GPU SAT/stress path
## SAT result polish
**Отклонённые / проблемные варианты:**
- `gpu_burn` — нужен libcublas (~500MB)
- `nvbandwidth` — только bandwidth, не жжёт FLOPs; нужен libcudart (~8MB)
- DCGM diag — правильный инструмент для H100 но ~100MB установка
- Download on demand — нужен libcublas, проблема та же
**Статус:** частично закрыто.
Что ещё можно улучшить после полевой проверки:
- точнее классифицировать vendor-specific self-test outputs в `storage SAT`
- подобрать дефолты `memtester` по объёму RAM на целевых машинах
- при необходимости расширить `bee-gpu-stress` по длительности/нагрузке