Add TUI hardware banner and polish SAT summaries
This commit is contained in:
2
PLAN.md
2
PLAN.md
@@ -347,6 +347,8 @@ Planned code shape:
|
|||||||
- `bee tui` can export the latest audit JSON to removable media
|
- `bee tui` can export the latest audit JSON to removable media
|
||||||
- `bee tui` can show health summary and run NVIDIA/memory/storage acceptance tests
|
- `bee tui` can show health summary and run NVIDIA/memory/storage acceptance tests
|
||||||
- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-stress`
|
- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-stress`
|
||||||
|
- SAT summaries now expose `overall_status` plus per-job `OK/FAILED/UNSUPPORTED`
|
||||||
|
- Memory/GPU SAT runtime defaults can be overridden via `BEE_MEMTESTER_*` and `BEE_GPU_STRESS_*`
|
||||||
- removable export requires explicit target selection, mount, confirmation, copy, and cleanup
|
- removable export requires explicit target selection, mount, confirmation, copy, and cleanup
|
||||||
|
|
||||||
### 2.6 — Vendor utilities and optional assets
|
### 2.6 — Vendor utilities and optional assets
|
||||||
|
|||||||
@@ -13,11 +13,13 @@ import (
|
|||||||
"bee/audit/internal/collector"
|
"bee/audit/internal/collector"
|
||||||
"bee/audit/internal/platform"
|
"bee/audit/internal/platform"
|
||||||
"bee/audit/internal/runtimeenv"
|
"bee/audit/internal/runtimeenv"
|
||||||
|
"bee/audit/internal/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
var (
|
||||||
DefaultAuditJSONPath = "/var/log/bee-audit.json"
|
DefaultAuditJSONPath = "/var/log/bee-audit.json"
|
||||||
DefaultAuditLogPath = "/var/log/bee-audit.log"
|
DefaultAuditLogPath = "/var/log/bee-audit.log"
|
||||||
|
DefaultSATBaseDir = "/var/log/bee-sat"
|
||||||
)
|
)
|
||||||
|
|
||||||
type App struct {
|
type App struct {
|
||||||
@@ -354,7 +356,7 @@ func (a *App) HealthSummaryResult() ActionResult {
|
|||||||
fmt.Fprintf(&body, "PSU: warn=%d fail=%d\n", summary.PSUWarn, summary.PSUFail)
|
fmt.Fprintf(&body, "PSU: warn=%d fail=%d\n", summary.PSUWarn, summary.PSUFail)
|
||||||
fmt.Fprintf(&body, "Memory: warn=%d fail=%d\n", summary.MemoryWarn, summary.MemoryFail)
|
fmt.Fprintf(&body, "Memory: warn=%d fail=%d\n", summary.MemoryWarn, summary.MemoryFail)
|
||||||
for _, item := range latestSATSummaries() {
|
for _, item := range latestSATSummaries() {
|
||||||
fmt.Fprintf(&body, "\n%s", item)
|
fmt.Fprintf(&body, "\n\n%s", item)
|
||||||
}
|
}
|
||||||
if len(summary.Failures) > 0 {
|
if len(summary.Failures) > 0 {
|
||||||
fmt.Fprintf(&body, "\n\nFailures:\n- %s", strings.Join(summary.Failures, "\n- "))
|
fmt.Fprintf(&body, "\n\nFailures:\n- %s", strings.Join(summary.Failures, "\n- "))
|
||||||
@@ -365,6 +367,40 @@ func (a *App) HealthSummaryResult() ActionResult {
|
|||||||
return ActionResult{Title: "Health summary", Body: strings.TrimSpace(body.String())}
|
return ActionResult{Title: "Health summary", Body: strings.TrimSpace(body.String())}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *App) MainBanner() string {
|
||||||
|
raw, err := os.ReadFile(DefaultAuditJSONPath)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
var snapshot schema.HardwareIngestRequest
|
||||||
|
if err := json.Unmarshal(raw, &snapshot); err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
var lines []string
|
||||||
|
if system := formatSystemLine(snapshot.Hardware.Board); system != "" {
|
||||||
|
lines = append(lines, system)
|
||||||
|
}
|
||||||
|
if cpu := formatCPULine(snapshot.Hardware.CPUs); cpu != "" {
|
||||||
|
lines = append(lines, cpu)
|
||||||
|
}
|
||||||
|
if memory := formatMemoryLine(snapshot.Hardware.Memory); memory != "" {
|
||||||
|
lines = append(lines, memory)
|
||||||
|
}
|
||||||
|
if storage := formatStorageLine(snapshot.Hardware.Storage); storage != "" {
|
||||||
|
lines = append(lines, storage)
|
||||||
|
}
|
||||||
|
if gpu := formatGPULine(snapshot.Hardware.PCIeDevices); gpu != "" {
|
||||||
|
lines = append(lines, gpu)
|
||||||
|
}
|
||||||
|
if ip := formatIPLine(a.network.ListInterfaces); ip != "" {
|
||||||
|
lines = append(lines, ip)
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.TrimSpace(strings.Join(lines, "\n"))
|
||||||
|
}
|
||||||
|
|
||||||
func (a *App) FormatToolStatuses(statuses []platform.ToolStatus) string {
|
func (a *App) FormatToolStatuses(statuses []platform.ToolStatus) string {
|
||||||
var body strings.Builder
|
var body strings.Builder
|
||||||
for _, tool := range statuses {
|
for _, tool := range statuses {
|
||||||
@@ -418,7 +454,6 @@ func bodyOr(body, fallback string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func latestSATSummaries() []string {
|
func latestSATSummaries() []string {
|
||||||
baseDir := "/var/log/bee-sat"
|
|
||||||
patterns := []struct {
|
patterns := []struct {
|
||||||
label string
|
label string
|
||||||
prefix string
|
prefix string
|
||||||
@@ -429,7 +464,7 @@ func latestSATSummaries() []string {
|
|||||||
}
|
}
|
||||||
var out []string
|
var out []string
|
||||||
for _, item := range patterns {
|
for _, item := range patterns {
|
||||||
matches, err := filepath.Glob(filepath.Join(baseDir, item.prefix+"*/summary.txt"))
|
matches, err := filepath.Glob(filepath.Join(DefaultSATBaseDir, item.prefix+"*/summary.txt"))
|
||||||
if err != nil || len(matches) == 0 {
|
if err != nil || len(matches) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -438,7 +473,273 @@ func latestSATSummaries() []string {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
out = append(out, item.label+":\n"+strings.TrimSpace(string(raw)))
|
out = append(out, formatSATSummary(item.label, string(raw)))
|
||||||
}
|
}
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func formatSATSummary(label, raw string) string {
|
||||||
|
values := parseKeyValueSummary(raw)
|
||||||
|
var body strings.Builder
|
||||||
|
fmt.Fprintf(&body, "%s:", label)
|
||||||
|
if overall := firstNonEmpty(values["overall_status"], "UNKNOWN"); overall != "" {
|
||||||
|
fmt.Fprintf(&body, " %s", overall)
|
||||||
|
}
|
||||||
|
if ok := firstNonEmpty(values["job_ok"], "0"); ok != "" {
|
||||||
|
fmt.Fprintf(&body, " ok=%s", ok)
|
||||||
|
}
|
||||||
|
if failed := firstNonEmpty(values["job_failed"], "0"); failed != "" {
|
||||||
|
fmt.Fprintf(&body, " failed=%s", failed)
|
||||||
|
}
|
||||||
|
if unsupported := firstNonEmpty(values["job_unsupported"], "0"); unsupported != "" && unsupported != "0" {
|
||||||
|
fmt.Fprintf(&body, " unsupported=%s", unsupported)
|
||||||
|
}
|
||||||
|
if devices := strings.TrimSpace(values["devices"]); devices != "" {
|
||||||
|
fmt.Fprintf(&body, "\nDevices: %s", devices)
|
||||||
|
}
|
||||||
|
return body.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatSystemLine(board schema.HardwareBoard) string {
|
||||||
|
model := strings.TrimSpace(strings.Join([]string{
|
||||||
|
trimPtr(board.Manufacturer),
|
||||||
|
trimPtr(board.ProductName),
|
||||||
|
}, " "))
|
||||||
|
serial := strings.TrimSpace(board.SerialNumber)
|
||||||
|
switch {
|
||||||
|
case model != "" && serial != "":
|
||||||
|
return fmt.Sprintf("System: %s | S/N %s", model, serial)
|
||||||
|
case model != "":
|
||||||
|
return "System: " + model
|
||||||
|
case serial != "":
|
||||||
|
return "System S/N: " + serial
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatCPULine(cpus []schema.HardwareCPU) string {
|
||||||
|
if len(cpus) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
modelCounts := map[string]int{}
|
||||||
|
unknown := 0
|
||||||
|
for _, cpu := range cpus {
|
||||||
|
model := trimPtr(cpu.Model)
|
||||||
|
if model == "" {
|
||||||
|
unknown++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
modelCounts[model]++
|
||||||
|
}
|
||||||
|
if len(modelCounts) == 1 && unknown == 0 {
|
||||||
|
for model, count := range modelCounts {
|
||||||
|
return fmt.Sprintf("CPU: %d x %s", count, model)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
parts := make([]string, 0, len(modelCounts)+1)
|
||||||
|
if len(modelCounts) > 0 {
|
||||||
|
keys := make([]string, 0, len(modelCounts))
|
||||||
|
for key := range modelCounts {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
for _, key := range keys {
|
||||||
|
parts = append(parts, fmt.Sprintf("%d x %s", modelCounts[key], key))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if unknown > 0 {
|
||||||
|
parts = append(parts, fmt.Sprintf("%d x unknown", unknown))
|
||||||
|
}
|
||||||
|
return "CPU: " + strings.Join(parts, ", ")
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatMemoryLine(dimms []schema.HardwareMemory) string {
|
||||||
|
totalMB := 0
|
||||||
|
present := 0
|
||||||
|
types := map[string]struct{}{}
|
||||||
|
for _, dimm := range dimms {
|
||||||
|
if dimm.Present != nil && !*dimm.Present {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if dimm.SizeMB == nil || *dimm.SizeMB <= 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
present++
|
||||||
|
totalMB += *dimm.SizeMB
|
||||||
|
if value := trimPtr(dimm.Type); value != "" {
|
||||||
|
types[value] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if totalMB == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
typeText := joinSortedKeys(types)
|
||||||
|
line := fmt.Sprintf("Memory: %s", humanizeMB(totalMB))
|
||||||
|
if typeText != "" {
|
||||||
|
line += " " + typeText
|
||||||
|
}
|
||||||
|
if present > 0 {
|
||||||
|
line += fmt.Sprintf(" (%d DIMMs)", present)
|
||||||
|
}
|
||||||
|
return line
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatStorageLine(disks []schema.HardwareStorage) string {
|
||||||
|
count := 0
|
||||||
|
totalGB := 0
|
||||||
|
for _, disk := range disks {
|
||||||
|
if disk.Present != nil && !*disk.Present {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
count++
|
||||||
|
if disk.SizeGB != nil && *disk.SizeGB > 0 {
|
||||||
|
totalGB += *disk.SizeGB
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if count == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
line := fmt.Sprintf("Storage: %d drives", count)
|
||||||
|
if totalGB > 0 {
|
||||||
|
line += fmt.Sprintf(" / %s", humanizeGB(totalGB))
|
||||||
|
}
|
||||||
|
return line
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatGPULine(devices []schema.HardwarePCIeDevice) string {
|
||||||
|
gpus := map[string]int{}
|
||||||
|
for _, dev := range devices {
|
||||||
|
if !isGPUDevice(dev) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
name := firstNonEmpty(trimPtr(dev.Model), trimPtr(dev.Manufacturer), "unknown")
|
||||||
|
gpus[name]++
|
||||||
|
}
|
||||||
|
if len(gpus) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
keys := make([]string, 0, len(gpus))
|
||||||
|
for key := range gpus {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
parts := make([]string, 0, len(keys))
|
||||||
|
for _, key := range keys {
|
||||||
|
parts = append(parts, fmt.Sprintf("%d x %s", gpus[key], key))
|
||||||
|
}
|
||||||
|
return "GPU: " + strings.Join(parts, ", ")
|
||||||
|
}
|
||||||
|
|
||||||
|
func formatIPLine(list func() ([]platform.InterfaceInfo, error)) string {
|
||||||
|
if list == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
ifaces, err := list()
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
var ips []string
|
||||||
|
for _, iface := range ifaces {
|
||||||
|
for _, ip := range iface.IPv4 {
|
||||||
|
ip = strings.TrimSpace(ip)
|
||||||
|
if ip == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, ok := seen[ip]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[ip] = struct{}{}
|
||||||
|
ips = append(ips, ip)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(ips) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
sort.Strings(ips)
|
||||||
|
return "IP: " + strings.Join(ips, ", ")
|
||||||
|
}
|
||||||
|
|
||||||
|
func isGPUDevice(dev schema.HardwarePCIeDevice) bool {
|
||||||
|
class := strings.ToLower(trimPtr(dev.DeviceClass))
|
||||||
|
model := strings.ToLower(trimPtr(dev.Model))
|
||||||
|
vendor := strings.ToLower(trimPtr(dev.Manufacturer))
|
||||||
|
return strings.Contains(class, "vga") ||
|
||||||
|
strings.Contains(class, "3d") ||
|
||||||
|
strings.Contains(class, "display") ||
|
||||||
|
strings.Contains(model, "nvidia") ||
|
||||||
|
strings.Contains(vendor, "nvidia") ||
|
||||||
|
strings.Contains(vendor, "amd")
|
||||||
|
}
|
||||||
|
|
||||||
|
func trimPtr(value *string) string {
|
||||||
|
if value == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return strings.TrimSpace(*value)
|
||||||
|
}
|
||||||
|
|
||||||
|
func joinSortedKeys(values map[string]struct{}) string {
|
||||||
|
if len(values) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
keys := make([]string, 0, len(values))
|
||||||
|
for key := range values {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
return strings.Join(keys, "/")
|
||||||
|
}
|
||||||
|
|
||||||
|
func humanizeMB(totalMB int) string {
|
||||||
|
if totalMB <= 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
gb := float64(totalMB) / 1024.0
|
||||||
|
if gb >= 1024.0 {
|
||||||
|
tb := gb / 1024.0
|
||||||
|
return fmt.Sprintf("%.1f TB", tb)
|
||||||
|
}
|
||||||
|
if gb == float64(int64(gb)) {
|
||||||
|
return fmt.Sprintf("%.0f GB", gb)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%.1f GB", gb)
|
||||||
|
}
|
||||||
|
|
||||||
|
func humanizeGB(totalGB int) string {
|
||||||
|
if totalGB <= 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
tb := float64(totalGB) / 1024.0
|
||||||
|
if tb >= 1.0 {
|
||||||
|
return fmt.Sprintf("%.1f TB", tb)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%d GB", totalGB)
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseKeyValueSummary(raw string) map[string]string {
|
||||||
|
out := map[string]string{}
|
||||||
|
for _, line := range strings.Split(raw, "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
key, value, ok := strings.Cut(line, "=")
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out[strings.TrimSpace(key)] = strings.TrimSpace(value)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstNonEmpty(values ...string) string {
|
||||||
|
for _, value := range values {
|
||||||
|
value = strings.TrimSpace(value)
|
||||||
|
if value != "" {
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,10 +1,14 @@
|
|||||||
package app
|
package app
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"bee/audit/internal/platform"
|
"bee/audit/internal/platform"
|
||||||
|
"bee/audit/internal/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
type fakeNetwork struct {
|
type fakeNetwork struct {
|
||||||
@@ -76,8 +80,8 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type fakeSAT struct {
|
type fakeSAT struct {
|
||||||
runNvidiaFn func(string) (string, error)
|
runNvidiaFn func(string) (string, error)
|
||||||
runMemoryFn func(string) (string, error)
|
runMemoryFn func(string) (string, error)
|
||||||
runStorageFn func(string) (string, error)
|
runStorageFn func(string) (string, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -293,8 +297,8 @@ func TestActionResultsUseFallbackBody(t *testing.T) {
|
|||||||
checkToolsFn: func([]string) []platform.ToolStatus { return nil },
|
checkToolsFn: func([]string) []platform.ToolStatus { return nil },
|
||||||
},
|
},
|
||||||
sat: fakeSAT{
|
sat: fakeSAT{
|
||||||
runNvidiaFn: func(string) (string, error) { return "", nil },
|
runNvidiaFn: func(string) (string, error) { return "", nil },
|
||||||
runMemoryFn: func(string) (string, error) { return "", nil },
|
runMemoryFn: func(string) (string, error) { return "", nil },
|
||||||
runStorageFn: func(string) (string, error) { return "", nil },
|
runStorageFn: func(string) (string, error) { return "", nil },
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -342,7 +346,7 @@ func TestRunNvidiaAcceptancePackResult(t *testing.T) {
|
|||||||
}
|
}
|
||||||
return "/tmp/sat/out.tar.gz", nil
|
return "/tmp/sat/out.tar.gz", nil
|
||||||
},
|
},
|
||||||
runMemoryFn: func(string) (string, error) { return "", nil },
|
runMemoryFn: func(string) (string, error) { return "", nil },
|
||||||
runStorageFn: func(string) (string, error) { return "", nil },
|
runStorageFn: func(string) (string, error) { return "", nil },
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -356,6 +360,124 @@ func TestRunNvidiaAcceptancePackResult(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFormatSATSummary(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
got := formatSATSummary("Memory SAT", "overall_status=PARTIAL\njob_ok=2\njob_failed=0\njob_unsupported=1\ndevices=3\n")
|
||||||
|
want := "Memory SAT: PARTIAL ok=2 failed=0 unsupported=1\nDevices: 3"
|
||||||
|
if got != want {
|
||||||
|
t.Fatalf("got %q want %q", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHealthSummaryResultIncludesCompactSATSummary(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
tmp := t.TempDir()
|
||||||
|
oldAuditPath := DefaultAuditJSONPath
|
||||||
|
oldSATBaseDir := DefaultSATBaseDir
|
||||||
|
DefaultAuditJSONPath = filepath.Join(tmp, "audit.json")
|
||||||
|
DefaultSATBaseDir = filepath.Join(tmp, "sat")
|
||||||
|
t.Cleanup(func() { DefaultAuditJSONPath = oldAuditPath })
|
||||||
|
t.Cleanup(func() { DefaultSATBaseDir = oldSATBaseDir })
|
||||||
|
|
||||||
|
satDir := filepath.Join(DefaultSATBaseDir, "memory-testcase")
|
||||||
|
if err := os.MkdirAll(satDir, 0755); err != nil {
|
||||||
|
t.Fatalf("mkdir sat dir: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
raw := `{"hardware":{"summary":{"status":"WARNING","storage_warn":1,"storage_fail":0,"pcie_warn":0,"pcie_fail":0,"psu_warn":0,"psu_fail":0,"memory_warn":0,"memory_fail":0}}}`
|
||||||
|
if err := os.WriteFile(DefaultAuditJSONPath, []byte(raw), 0644); err != nil {
|
||||||
|
t.Fatalf("write audit json: %v", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(filepath.Join(satDir, "summary.txt"), []byte("overall_status=OK\njob_ok=3\njob_failed=0\njob_unsupported=0\n"), 0644); err != nil {
|
||||||
|
t.Fatalf("write sat summary: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
result := (&App{}).HealthSummaryResult()
|
||||||
|
if !contains(result.Body, "Memory SAT: OK ok=3 failed=0") {
|
||||||
|
t.Fatalf("body missing compact sat summary:\n%s", result.Body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMainBanner(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
tmp := t.TempDir()
|
||||||
|
oldAuditPath := DefaultAuditJSONPath
|
||||||
|
DefaultAuditJSONPath = filepath.Join(tmp, "audit.json")
|
||||||
|
t.Cleanup(func() { DefaultAuditJSONPath = oldAuditPath })
|
||||||
|
|
||||||
|
trueValue := true
|
||||||
|
manufacturer := "Dell"
|
||||||
|
product := "PowerEdge R760"
|
||||||
|
cpuModel := "Intel Xeon Gold 6430"
|
||||||
|
memoryType := "DDR5"
|
||||||
|
gpuClass := "VGA compatible controller"
|
||||||
|
gpuModel := "NVIDIA H100"
|
||||||
|
|
||||||
|
payload := schema.HardwareIngestRequest{
|
||||||
|
Hardware: schema.HardwareSnapshot{
|
||||||
|
Board: schema.HardwareBoard{
|
||||||
|
Manufacturer: &manufacturer,
|
||||||
|
ProductName: &product,
|
||||||
|
SerialNumber: "SRV123",
|
||||||
|
},
|
||||||
|
CPUs: []schema.HardwareCPU{
|
||||||
|
{Model: &cpuModel},
|
||||||
|
{Model: &cpuModel},
|
||||||
|
},
|
||||||
|
Memory: []schema.HardwareMemory{
|
||||||
|
{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType},
|
||||||
|
{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType},
|
||||||
|
},
|
||||||
|
Storage: []schema.HardwareStorage{
|
||||||
|
{Present: &trueValue, SizeGB: intPtr(3840)},
|
||||||
|
{Present: &trueValue, SizeGB: intPtr(3840)},
|
||||||
|
},
|
||||||
|
PCIeDevices: []schema.HardwarePCIeDevice{
|
||||||
|
{DeviceClass: &gpuClass, Model: &gpuModel},
|
||||||
|
{DeviceClass: &gpuClass, Model: &gpuModel},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
raw, err := json.Marshal(payload)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("marshal: %v", err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(DefaultAuditJSONPath, raw, 0644); err != nil {
|
||||||
|
t.Fatalf("write audit json: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
a := &App{
|
||||||
|
network: fakeNetwork{
|
||||||
|
listInterfacesFn: func() ([]platform.InterfaceInfo, error) {
|
||||||
|
return []platform.InterfaceInfo{
|
||||||
|
{Name: "eth0", IPv4: []string{"10.0.0.10"}},
|
||||||
|
{Name: "eth1", IPv4: []string{"192.168.1.10"}},
|
||||||
|
}, nil
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
got := a.MainBanner()
|
||||||
|
for _, want := range []string{
|
||||||
|
"System: Dell PowerEdge R760 | S/N SRV123",
|
||||||
|
"CPU: 2 x Intel Xeon Gold 6430",
|
||||||
|
"Memory: 1.0 TB DDR5 (2 DIMMs)",
|
||||||
|
"Storage: 2 drives / 7.5 TB",
|
||||||
|
"GPU: 2 x NVIDIA H100",
|
||||||
|
"IP: 10.0.0.10, 192.168.1.10",
|
||||||
|
} {
|
||||||
|
if !contains(got, want) {
|
||||||
|
t.Fatalf("banner missing %q:\n%s", want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func intPtr(v int) *int { return &v }
|
||||||
|
|
||||||
func contains(haystack, needle string) bool {
|
func contains(haystack, needle string) bool {
|
||||||
return len(needle) == 0 || (len(haystack) >= len(needle) && (haystack == needle || containsAt(haystack, needle)))
|
return len(needle) == 0 || (len(haystack) >= len(needle) && (haystack == needle || containsAt(haystack, needle)))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@@ -18,9 +19,11 @@ func (s *System) RunNvidiaAcceptancePack(baseDir string) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunMemoryAcceptancePack(baseDir string) (string, error) {
|
func (s *System) RunMemoryAcceptancePack(baseDir string) (string, error) {
|
||||||
|
sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128)
|
||||||
|
passes := envInt("BEE_MEMTESTER_PASSES", 1)
|
||||||
return runAcceptancePack(baseDir, "memory", []satJob{
|
return runAcceptancePack(baseDir, "memory", []satJob{
|
||||||
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
||||||
{name: "02-memtester.log", cmd: []string{"memtester", "128M", "1"}},
|
{name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
|
||||||
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
|
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -42,9 +45,11 @@ func (s *System) RunStorageAcceptancePack(baseDir string) (string, error) {
|
|||||||
sort.Strings(devices)
|
sort.Strings(devices)
|
||||||
|
|
||||||
var summary strings.Builder
|
var summary strings.Builder
|
||||||
|
stats := satStats{}
|
||||||
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
|
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
|
||||||
if len(devices) == 0 {
|
if len(devices) == 0 {
|
||||||
fmt.Fprintln(&summary, "devices=0")
|
fmt.Fprintln(&summary, "devices=0")
|
||||||
|
stats.Unsupported++
|
||||||
} else {
|
} else {
|
||||||
fmt.Fprintf(&summary, "devices=%d\n", len(devices))
|
fmt.Fprintf(&summary, "devices=%d\n", len(devices))
|
||||||
}
|
}
|
||||||
@@ -58,14 +63,15 @@ func (s *System) RunStorageAcceptancePack(baseDir string) (string, error) {
|
|||||||
if writeErr := os.WriteFile(filepath.Join(runDir, name), out, 0644); writeErr != nil {
|
if writeErr := os.WriteFile(filepath.Join(runDir, name), out, 0644); writeErr != nil {
|
||||||
return "", writeErr
|
return "", writeErr
|
||||||
}
|
}
|
||||||
rc := 0
|
status, rc := classifySATResult(job.name, out, err)
|
||||||
if err != nil {
|
stats.Add(status)
|
||||||
rc = 1
|
key := filepath.Base(devPath) + "_" + strings.ReplaceAll(job.name, "-", "_")
|
||||||
}
|
fmt.Fprintf(&summary, "%s_rc=%d\n", key, rc)
|
||||||
fmt.Fprintf(&summary, "%s_%s_rc=%d\n", filepath.Base(devPath), strings.ReplaceAll(job.name, "-", "_"), rc)
|
fmt.Fprintf(&summary, "%s_status=%s\n", key, status)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
writeSATStats(&summary, stats)
|
||||||
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
|
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@@ -81,13 +87,21 @@ type satJob struct {
|
|||||||
cmd []string
|
cmd []string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type satStats struct {
|
||||||
|
OK int
|
||||||
|
Failed int
|
||||||
|
Unsupported int
|
||||||
|
}
|
||||||
|
|
||||||
func nvidiaSATJobs() []satJob {
|
func nvidiaSATJobs() []satJob {
|
||||||
|
seconds := envInt("BEE_GPU_STRESS_SECONDS", 5)
|
||||||
|
sizeMB := envInt("BEE_GPU_STRESS_SIZE_MB", 64)
|
||||||
return []satJob{
|
return []satJob{
|
||||||
{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
|
{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
|
||||||
{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
|
{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
|
||||||
{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
|
{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
|
||||||
{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output", "{{run_dir}}/nvidia-bug-report.log"}},
|
{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output", "{{run_dir}}/nvidia-bug-report.log"}},
|
||||||
{name: "05-bee-gpu-stress.log", cmd: []string{"bee-gpu-stress", "--seconds", "5", "--size-mb", "64"}},
|
{name: "05-bee-gpu-stress.log", cmd: []string{"bee-gpu-stress", "--seconds", fmt.Sprintf("%d", seconds), "--size-mb", fmt.Sprintf("%d", sizeMB)}},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -102,6 +116,7 @@ func runAcceptancePack(baseDir, prefix string, jobs []satJob) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var summary strings.Builder
|
var summary strings.Builder
|
||||||
|
stats := satStats{}
|
||||||
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
|
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
|
||||||
for _, job := range jobs {
|
for _, job := range jobs {
|
||||||
cmd := make([]string, 0, len(job.cmd))
|
cmd := make([]string, 0, len(job.cmd))
|
||||||
@@ -112,12 +127,13 @@ func runAcceptancePack(baseDir, prefix string, jobs []satJob) (string, error) {
|
|||||||
if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
|
if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
|
||||||
return "", writeErr
|
return "", writeErr
|
||||||
}
|
}
|
||||||
rc := 0
|
status, rc := classifySATResult(job.name, out, err)
|
||||||
if err != nil {
|
stats.Add(status)
|
||||||
rc = 1
|
key := strings.TrimSuffix(strings.TrimPrefix(job.name, "0"), ".log")
|
||||||
}
|
fmt.Fprintf(&summary, "%s_rc=%d\n", key, rc)
|
||||||
fmt.Fprintf(&summary, "%s_rc=%d\n", strings.TrimSuffix(strings.TrimPrefix(job.name, "0"), ".log"), rc)
|
fmt.Fprintf(&summary, "%s_status=%s\n", key, status)
|
||||||
}
|
}
|
||||||
|
writeSATStats(&summary, stats)
|
||||||
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
|
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@@ -159,6 +175,69 @@ func storageSATCommands(devPath string) []satJob {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *satStats) Add(status string) {
|
||||||
|
switch status {
|
||||||
|
case "OK":
|
||||||
|
s.OK++
|
||||||
|
case "UNSUPPORTED":
|
||||||
|
s.Unsupported++
|
||||||
|
default:
|
||||||
|
s.Failed++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s satStats) Overall() string {
|
||||||
|
if s.Failed > 0 {
|
||||||
|
return "FAILED"
|
||||||
|
}
|
||||||
|
if s.Unsupported > 0 {
|
||||||
|
return "PARTIAL"
|
||||||
|
}
|
||||||
|
return "OK"
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeSATStats(summary *strings.Builder, stats satStats) {
|
||||||
|
fmt.Fprintf(summary, "overall_status=%s\n", stats.Overall())
|
||||||
|
fmt.Fprintf(summary, "job_ok=%d\n", stats.OK)
|
||||||
|
fmt.Fprintf(summary, "job_failed=%d\n", stats.Failed)
|
||||||
|
fmt.Fprintf(summary, "job_unsupported=%d\n", stats.Unsupported)
|
||||||
|
}
|
||||||
|
|
||||||
|
func classifySATResult(name string, out []byte, err error) (string, int) {
|
||||||
|
rc := 0
|
||||||
|
if err != nil {
|
||||||
|
rc = 1
|
||||||
|
}
|
||||||
|
if err == nil {
|
||||||
|
return "OK", rc
|
||||||
|
}
|
||||||
|
|
||||||
|
text := strings.ToLower(string(out))
|
||||||
|
if strings.Contains(text, "unsupported") ||
|
||||||
|
strings.Contains(text, "not supported") ||
|
||||||
|
strings.Contains(text, "invalid opcode") ||
|
||||||
|
strings.Contains(text, "unknown command") ||
|
||||||
|
strings.Contains(text, "not implemented") ||
|
||||||
|
strings.Contains(text, "not available") ||
|
||||||
|
strings.Contains(text, "no such device") ||
|
||||||
|
(strings.Contains(name, "self-test") && strings.Contains(text, "aborted")) {
|
||||||
|
return "UNSUPPORTED", rc
|
||||||
|
}
|
||||||
|
return "FAILED", rc
|
||||||
|
}
|
||||||
|
|
||||||
|
func envInt(name string, fallback int) int {
|
||||||
|
raw := strings.TrimSpace(os.Getenv(name))
|
||||||
|
if raw == "" {
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
value, err := strconv.Atoi(raw)
|
||||||
|
if err != nil || value <= 0 {
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
func createTarGz(dst, srcDir string) error {
|
func createTarGz(dst, srcDir string) error {
|
||||||
file, err := os.Create(dst)
|
file, err := os.Create(dst)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -1,6 +1,10 @@
|
|||||||
package platform
|
package platform
|
||||||
|
|
||||||
import "testing"
|
import (
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
func TestStorageSATCommands(t *testing.T) {
|
func TestStorageSATCommands(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
@@ -28,3 +32,58 @@ func TestRunNvidiaAcceptancePackIncludesGPUStress(t *testing.T) {
|
|||||||
t.Fatalf("gpu stress command=%q want bee-gpu-stress", got)
|
t.Fatalf("gpu stress command=%q want bee-gpu-stress", got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNvidiaSATJobsUseEnvOverrides(t *testing.T) {
|
||||||
|
t.Setenv("BEE_GPU_STRESS_SECONDS", "9")
|
||||||
|
t.Setenv("BEE_GPU_STRESS_SIZE_MB", "96")
|
||||||
|
|
||||||
|
jobs := nvidiaSATJobs()
|
||||||
|
got := jobs[4].cmd
|
||||||
|
want := []string{"bee-gpu-stress", "--seconds", "9", "--size-mb", "96"}
|
||||||
|
if len(got) != len(want) {
|
||||||
|
t.Fatalf("cmd len=%d want %d", len(got), len(want))
|
||||||
|
}
|
||||||
|
for i := range want {
|
||||||
|
if got[i] != want[i] {
|
||||||
|
t.Fatalf("cmd[%d]=%q want %q", i, got[i], want[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEnvIntFallback(t *testing.T) {
|
||||||
|
os.Unsetenv("BEE_MEMTESTER_SIZE_MB")
|
||||||
|
if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
|
||||||
|
t.Fatalf("got %d want 123", got)
|
||||||
|
}
|
||||||
|
t.Setenv("BEE_MEMTESTER_SIZE_MB", "bad")
|
||||||
|
if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
|
||||||
|
t.Fatalf("got %d want 123", got)
|
||||||
|
}
|
||||||
|
t.Setenv("BEE_MEMTESTER_SIZE_MB", "256")
|
||||||
|
if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 256 {
|
||||||
|
t.Fatalf("got %d want 256", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClassifySATResult(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
job string
|
||||||
|
out string
|
||||||
|
err error
|
||||||
|
status string
|
||||||
|
}{
|
||||||
|
{name: "ok", job: "memtester", out: "done", err: nil, status: "OK"},
|
||||||
|
{name: "unsupported", job: "smartctl-self-test-short", out: "Self-test not supported", err: errors.New("rc 1"), status: "UNSUPPORTED"},
|
||||||
|
{name: "failed", job: "bee-gpu-stress", out: "cuda error", err: errors.New("rc 1"), status: "FAILED"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got, _ := classifySATResult(tt.job, []byte(tt.out), tt.err)
|
||||||
|
if got != tt.status {
|
||||||
|
t.Fatalf("status=%q want %q", got, tt.status)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -23,3 +23,7 @@ type exportTargetsMsg struct {
|
|||||||
targets []platform.RemovableTarget
|
targets []platform.RemovableTarget
|
||||||
err error
|
err error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type bannerMsg struct {
|
||||||
|
text string
|
||||||
|
}
|
||||||
|
|||||||
@@ -179,6 +179,24 @@ func TestMainMenuAsyncActionsSetBusy(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMainViewIncludesBanner(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
m := newTestModel()
|
||||||
|
m.banner = "System: Test Server | S/N ABC123\nIP: 10.0.0.10"
|
||||||
|
|
||||||
|
view := m.View()
|
||||||
|
if !strings.Contains(view, "System: Test Server | S/N ABC123") {
|
||||||
|
t.Fatalf("view missing system banner:\n%s", view)
|
||||||
|
}
|
||||||
|
if !strings.Contains(view, "IP: 10.0.0.10") {
|
||||||
|
t.Fatalf("view missing ip banner:\n%s", view)
|
||||||
|
}
|
||||||
|
if !strings.Contains(view, "Select action") {
|
||||||
|
t.Fatalf("view missing menu subtitle:\n%s", view)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestEscapeNavigation(t *testing.T) {
|
func TestEscapeNavigation(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"bee/audit/internal/app"
|
"bee/audit/internal/app"
|
||||||
"bee/audit/internal/platform"
|
"bee/audit/internal/platform"
|
||||||
"bee/audit/internal/runtimeenv"
|
"bee/audit/internal/runtimeenv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
tea "github.com/charmbracelet/bubbletea"
|
tea "github.com/charmbracelet/bubbletea"
|
||||||
)
|
)
|
||||||
@@ -26,12 +27,12 @@ const (
|
|||||||
type actionKind string
|
type actionKind string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
actionNone actionKind = ""
|
actionNone actionKind = ""
|
||||||
actionDHCPOne actionKind = "dhcp_one"
|
actionDHCPOne actionKind = "dhcp_one"
|
||||||
actionStaticIPv4 actionKind = "static_ipv4"
|
actionStaticIPv4 actionKind = "static_ipv4"
|
||||||
actionExportAudit actionKind = "export_audit"
|
actionExportAudit actionKind = "export_audit"
|
||||||
actionRunNvidiaSAT actionKind = "run_nvidia_sat"
|
actionRunNvidiaSAT actionKind = "run_nvidia_sat"
|
||||||
actionRunMemorySAT actionKind = "run_memory_sat"
|
actionRunMemorySAT actionKind = "run_memory_sat"
|
||||||
actionRunStorageSAT actionKind = "run_storage_sat"
|
actionRunStorageSAT actionKind = "run_storage_sat"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -46,6 +47,7 @@ type model struct {
|
|||||||
busyTitle string
|
busyTitle string
|
||||||
title string
|
title string
|
||||||
body string
|
body string
|
||||||
|
banner string
|
||||||
mainMenu []string
|
mainMenu []string
|
||||||
networkMenu []string
|
networkMenu []string
|
||||||
serviceMenu []string
|
serviceMenu []string
|
||||||
@@ -111,5 +113,7 @@ func newModel(application *app.App, runtimeMode runtimeenv.Mode) model {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m model) Init() tea.Cmd {
|
func (m model) Init() tea.Cmd {
|
||||||
return nil
|
return func() tea.Msg {
|
||||||
|
return bannerMsg{text: strings.TrimSpace(m.app.MainBanner())}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -84,6 +84,9 @@ func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
|||||||
m.screen = screenExportTargets
|
m.screen = screenExportTargets
|
||||||
m.cursor = 0
|
m.cursor = 0
|
||||||
return m, nil
|
return m, nil
|
||||||
|
case bannerMsg:
|
||||||
|
m.banner = strings.TrimSpace(msg.text)
|
||||||
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return m, nil
|
return m, nil
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ func (m model) View() string {
|
|||||||
}
|
}
|
||||||
switch m.screen {
|
switch m.screen {
|
||||||
case screenMain:
|
case screenMain:
|
||||||
return renderMenu("bee", "Select action", m.mainMenu, m.cursor)
|
return renderMainMenu("bee", m.banner, "Select action", m.mainMenu, m.cursor)
|
||||||
case screenNetwork:
|
case screenNetwork:
|
||||||
return renderMenu("Network", "Select action", m.networkMenu, m.cursor)
|
return renderMenu("Network", "Select action", m.networkMenu, m.cursor)
|
||||||
case screenServices:
|
case screenServices:
|
||||||
@@ -109,6 +109,30 @@ func renderMenu(title, subtitle string, items []string, cursor int) string {
|
|||||||
return body.String()
|
return body.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func renderMainMenu(title, banner, subtitle string, items []string, cursor int) string {
|
||||||
|
var body strings.Builder
|
||||||
|
fmt.Fprintf(&body, "%s\n\n", title)
|
||||||
|
if banner != "" {
|
||||||
|
body.WriteString(strings.TrimSpace(banner))
|
||||||
|
body.WriteString("\n\n")
|
||||||
|
}
|
||||||
|
body.WriteString(subtitle)
|
||||||
|
body.WriteString("\n\n")
|
||||||
|
if len(items) == 0 {
|
||||||
|
body.WriteString("(no items)\n")
|
||||||
|
} else {
|
||||||
|
for i, item := range items {
|
||||||
|
prefix := " "
|
||||||
|
if i == cursor {
|
||||||
|
prefix = "> "
|
||||||
|
}
|
||||||
|
fmt.Fprintf(&body, "%s%s\n", prefix, item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
body.WriteString("\n[↑/↓] move [enter] select [esc] back [ctrl+c] quit\n")
|
||||||
|
return body.String()
|
||||||
|
}
|
||||||
|
|
||||||
func renderForm(title string, fields []formField, idx int) string {
|
func renderForm(title string, fields []formField, idx int) string {
|
||||||
var body strings.Builder
|
var body strings.Builder
|
||||||
fmt.Fprintf(&body, "%s\n\n", title)
|
fmt.Fprintf(&body, "%s\n\n", title)
|
||||||
|
|||||||
@@ -132,3 +132,9 @@ Acceptance flows:
|
|||||||
- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + lightweight `bee-gpu-stress`
|
- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + lightweight `bee-gpu-stress`
|
||||||
- `bee sat memory` → `memtester` archive
|
- `bee sat memory` → `memtester` archive
|
||||||
- `bee sat storage` → SMART/NVMe diagnostic archive and short self-test trigger where supported
|
- `bee sat storage` → SMART/NVMe diagnostic archive and short self-test trigger where supported
|
||||||
|
- SAT `summary.txt` now includes `overall_status` and per-job `*_status` values (`OK`, `FAILED`, `UNSUPPORTED`)
|
||||||
|
- Runtime overrides:
|
||||||
|
- `BEE_GPU_STRESS_SECONDS`
|
||||||
|
- `BEE_GPU_STRESS_SIZE_MB`
|
||||||
|
- `BEE_MEMTESTER_SIZE_MB`
|
||||||
|
- `BEE_MEMTESTER_PASSES`
|
||||||
|
|||||||
@@ -1,22 +1,20 @@
|
|||||||
# Backlog
|
# Backlog
|
||||||
|
|
||||||
## GPU stress test (H100)
|
## Real hardware validation
|
||||||
|
|
||||||
**Статус:** отложено. В текущем ISO `gpu_burn` не включается и не запускается.
|
**Статус:** ожидает доступа к железу.
|
||||||
|
|
||||||
**Почему задача всё ещё в backlog:**
|
Что осталось подтвердить на практике:
|
||||||
- `gpu_burn` остаётся тяжёлым и неудобным с точки зрения зависимостей
|
- `bee sat nvidia` на реальном NVIDIA GPU host
|
||||||
- хочется штатный lightweight stress tool без `libcublas.so` и без заметного раздувания ISO
|
- `bee sat storage` на NVMe/SATA/RAID host
|
||||||
- для H100 нужен предсказуемый offline-инструмент, который можно стабильно возить внутри ISO
|
- `ipmitool sdr` parsing на сервере с реальным BMC/IPMI
|
||||||
|
- vendor RAID tooling (`storcli64`, `sas2ircu`, `sas3ircu`, `arcconf`, `ssacli`) в живом ISO
|
||||||
|
|
||||||
**Желаемый следующий шаг:** написать минимальный stress tool на CUDA Driver API
|
## SAT result polish
|
||||||
- использует только `libcuda.so`, уже присутствующий в ISO
|
|
||||||
- выполняет простой compute / memory workload через `cuLaunchKernel`
|
|
||||||
- собирается отдельно на builder VM и кладётся в `iso/vendor/`
|
|
||||||
- в будущем может вызываться из `bee tui` как предпочтительный встроенный GPU SAT/stress path
|
|
||||||
|
|
||||||
**Отклонённые / проблемные варианты:**
|
**Статус:** частично закрыто.
|
||||||
- `gpu_burn` — нужен libcublas (~500MB)
|
|
||||||
- `nvbandwidth` — только bandwidth, не жжёт FLOPs; нужен libcudart (~8MB)
|
Что ещё можно улучшить после полевой проверки:
|
||||||
- DCGM diag — правильный инструмент для H100 но ~100MB установка
|
- точнее классифицировать vendor-specific self-test outputs в `storage SAT`
|
||||||
- Download on demand — нужен libcublas, проблема та же
|
- подобрать дефолты `memtester` по объёму RAM на целевых машинах
|
||||||
|
- при необходимости расширить `bee-gpu-stress` по длительности/нагрузке
|
||||||
|
|||||||
Reference in New Issue
Block a user