Copies the live system to a local disk via unsquashfs — no debootstrap, no network required. Supports UEFI (GPT+EFI) and BIOS (MBR) layouts. ISO: - Add squashfs-tools, parted, grub-pc, grub-efi-amd64 to package list - New overlay script bee-install: partitions, formats, unsquashfs, writes fstab, runs grub-install+update-grub in chroot Go TUI: - Settings → Tools submenu (Install to disk, Check tools) - Disk picker screen: lists non-USB, non-boot disks via lsblk - Confirm screen warns about data loss - Runs with live progress tail of /tmp/bee-install.log - platform/install.go: ListInstallDisks, InstallToDisk, findLiveBootDevice Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1021 lines
29 KiB
Go
1021 lines
29 KiB
Go
package app
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"bee/audit/internal/collector"
|
|
"bee/audit/internal/platform"
|
|
"bee/audit/internal/runtimeenv"
|
|
"bee/audit/internal/schema"
|
|
)
|
|
|
|
var (
|
|
DefaultExportDir = "/appdata/bee/export"
|
|
DefaultAuditJSONPath = DefaultExportDir + "/bee-audit.json"
|
|
DefaultAuditLogPath = DefaultExportDir + "/bee-audit.log"
|
|
DefaultWebLogPath = DefaultExportDir + "/bee-web.log"
|
|
DefaultNetworkLogPath = DefaultExportDir + "/bee-network.log"
|
|
DefaultNvidiaLogPath = DefaultExportDir + "/bee-nvidia.log"
|
|
DefaultSSHLogPath = DefaultExportDir + "/bee-sshsetup.log"
|
|
DefaultRuntimeJSONPath = DefaultExportDir + "/runtime-health.json"
|
|
DefaultRuntimeLogPath = DefaultExportDir + "/runtime-health.log"
|
|
DefaultTechDumpDir = DefaultExportDir + "/techdump"
|
|
DefaultSATBaseDir = DefaultExportDir + "/bee-sat"
|
|
)
|
|
|
|
type App struct {
|
|
network networkManager
|
|
services serviceManager
|
|
exports exportManager
|
|
tools toolManager
|
|
sat satRunner
|
|
runtime runtimeChecker
|
|
installer installer
|
|
}
|
|
|
|
type ActionResult struct {
|
|
Title string
|
|
Body string
|
|
}
|
|
|
|
type networkManager interface {
|
|
ListInterfaces() ([]platform.InterfaceInfo, error)
|
|
DefaultRoute() string
|
|
DHCPOne(iface string) (string, error)
|
|
DHCPAll() (string, error)
|
|
SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error)
|
|
}
|
|
|
|
type serviceManager interface {
|
|
ListBeeServices() ([]string, error)
|
|
ServiceStatus(name string) (string, error)
|
|
ServiceDo(name string, action platform.ServiceAction) (string, error)
|
|
}
|
|
|
|
type exportManager interface {
|
|
ListRemovableTargets() ([]platform.RemovableTarget, error)
|
|
ExportFileToTarget(src string, target platform.RemovableTarget) (string, error)
|
|
}
|
|
|
|
type toolManager interface {
|
|
TailFile(path string, lines int) string
|
|
CheckTools(names []string) []platform.ToolStatus
|
|
}
|
|
|
|
type installer interface {
|
|
ListInstallDisks() ([]platform.InstallDisk, error)
|
|
InstallToDisk(ctx context.Context, device string, logFile string) error
|
|
}
|
|
|
|
type satRunner interface {
|
|
RunNvidiaAcceptancePack(baseDir string) (string, error)
|
|
RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int) (string, error)
|
|
RunMemoryAcceptancePack(baseDir string) (string, error)
|
|
RunStorageAcceptancePack(baseDir string) (string, error)
|
|
RunCPUAcceptancePack(baseDir string, durationSec int) (string, error)
|
|
ListNvidiaGPUs() ([]platform.NvidiaGPU, error)
|
|
DetectGPUVendor() string
|
|
ListAMDGPUs() ([]platform.AMDGPUInfo, error)
|
|
RunAMDAcceptancePack(baseDir string) (string, error)
|
|
RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
|
|
RunNCCLTests(ctx context.Context, baseDir string) (string, error)
|
|
}
|
|
|
|
type runtimeChecker interface {
|
|
CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, error)
|
|
CaptureTechnicalDump(baseDir string) error
|
|
}
|
|
|
|
func New(platform *platform.System) *App {
|
|
return &App{
|
|
network: platform,
|
|
services: platform,
|
|
exports: platform,
|
|
tools: platform,
|
|
sat: platform,
|
|
runtime: platform,
|
|
installer: platform,
|
|
}
|
|
}
|
|
|
|
func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, error) {
|
|
if runtimeMode == runtimeenv.ModeLiveCD {
|
|
if err := a.runtime.CaptureTechnicalDump(DefaultTechDumpDir); err != nil {
|
|
slog.Warn("capture technical dump", "err", err)
|
|
}
|
|
}
|
|
result := collector.Run(runtimeMode)
|
|
applyLatestSATStatuses(&result.Hardware, DefaultSATBaseDir)
|
|
if health, err := ReadRuntimeHealth(DefaultRuntimeJSONPath); err == nil {
|
|
result.Runtime = &health
|
|
}
|
|
data, err := json.MarshalIndent(result, "", " ")
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
switch {
|
|
case output == "stdout":
|
|
_, err := os.Stdout.Write(append(data, '\n'))
|
|
return "stdout", err
|
|
case strings.HasPrefix(output, "file:"):
|
|
path := strings.TrimPrefix(output, "file:")
|
|
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
|
|
return "", err
|
|
}
|
|
if err := os.WriteFile(path, append(data, '\n'), 0644); err != nil {
|
|
return "", err
|
|
}
|
|
return path, nil
|
|
default:
|
|
return "", fmt.Errorf("unknown output destination %q — use stdout or file:<path>", output)
|
|
}
|
|
}
|
|
|
|
func (a *App) RunRuntimePreflight(output string) (string, error) {
|
|
health, err := a.runtime.CollectRuntimeHealth(DefaultExportDir)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
data, err := json.MarshalIndent(health, "", " ")
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
switch {
|
|
case output == "stdout":
|
|
_, err := os.Stdout.Write(append(data, '\n'))
|
|
return "stdout", err
|
|
case strings.HasPrefix(output, "file:"):
|
|
path := strings.TrimPrefix(output, "file:")
|
|
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
|
|
return "", err
|
|
}
|
|
if err := os.WriteFile(path, append(data, '\n'), 0644); err != nil {
|
|
return "", err
|
|
}
|
|
return path, nil
|
|
default:
|
|
return "", fmt.Errorf("unknown output destination %q — use stdout or file:<path>", output)
|
|
}
|
|
}
|
|
|
|
func (a *App) RunRuntimePreflightResult() (ActionResult, error) {
|
|
path, err := a.RunRuntimePreflight("file:" + DefaultRuntimeJSONPath)
|
|
body := "Runtime preflight completed."
|
|
if path != "" {
|
|
body = "Runtime health written to " + path
|
|
}
|
|
return ActionResult{Title: "Run self-check", Body: body}, err
|
|
}
|
|
|
|
func (a *App) RuntimeHealthResult() ActionResult {
|
|
health, err := ReadRuntimeHealth(DefaultRuntimeJSONPath)
|
|
if err != nil {
|
|
return ActionResult{Title: "Runtime issues", Body: "No runtime health found."}
|
|
}
|
|
driverLabel := "Driver ready"
|
|
accelLabel := "CUDA ready"
|
|
switch a.sat.DetectGPUVendor() {
|
|
case "amd":
|
|
driverLabel = "AMDGPU ready"
|
|
accelLabel = "ROCm SMI ready"
|
|
case "nvidia":
|
|
driverLabel = "NVIDIA ready"
|
|
}
|
|
var body strings.Builder
|
|
fmt.Fprintf(&body, "Status: %s\n", firstNonEmpty(health.Status, "UNKNOWN"))
|
|
fmt.Fprintf(&body, "Export dir: %s\n", firstNonEmpty(health.ExportDir, DefaultExportDir))
|
|
fmt.Fprintf(&body, "%s: %t\n", driverLabel, health.DriverReady)
|
|
fmt.Fprintf(&body, "%s: %t\n", accelLabel, health.CUDAReady)
|
|
fmt.Fprintf(&body, "Network: %s", firstNonEmpty(health.NetworkStatus, "UNKNOWN"))
|
|
if len(health.Issues) > 0 {
|
|
body.WriteString("\n\nIssues:\n")
|
|
for _, issue := range health.Issues {
|
|
fmt.Fprintf(&body, "- %s: %s\n", issue.Code, issue.Description)
|
|
}
|
|
}
|
|
return ActionResult{Title: "Runtime issues", Body: strings.TrimSpace(body.String())}
|
|
}
|
|
|
|
func (a *App) RunAuditNow(runtimeMode runtimeenv.Mode) (ActionResult, error) {
|
|
path, err := a.RunAudit(runtimeMode, "file:"+DefaultAuditJSONPath)
|
|
body := "Audit completed."
|
|
if path != "" {
|
|
body = "Audit output: " + path
|
|
}
|
|
return ActionResult{Title: "Run audit", Body: body}, err
|
|
}
|
|
|
|
func (a *App) RunAuditToDefaultFile(runtimeMode runtimeenv.Mode) (string, error) {
|
|
return a.RunAudit(runtimeMode, "file:"+DefaultAuditJSONPath)
|
|
}
|
|
|
|
func (a *App) ExportLatestAudit(target platform.RemovableTarget) (string, error) {
|
|
if _, err := os.Stat(DefaultAuditJSONPath); err != nil {
|
|
return "", err
|
|
}
|
|
filename := fmt.Sprintf("audit-%s-%s.json", sanitizeFilename(hostnameOr("unknown")), time.Now().UTC().Format("20060102-150405"))
|
|
tmpPath := filepath.Join(os.TempDir(), filename)
|
|
data, err := os.ReadFile(DefaultAuditJSONPath)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if err := os.WriteFile(tmpPath, data, 0644); err != nil {
|
|
return "", err
|
|
}
|
|
defer os.Remove(tmpPath)
|
|
return a.exports.ExportFileToTarget(tmpPath, target)
|
|
}
|
|
|
|
func (a *App) ExportLatestAuditResult(target platform.RemovableTarget) (ActionResult, error) {
|
|
path, err := a.ExportLatestAudit(target)
|
|
body := "Audit export failed."
|
|
if err == nil {
|
|
body = "Audit exported."
|
|
}
|
|
if err == nil && path != "" {
|
|
body = "Audit exported to " + path
|
|
}
|
|
return ActionResult{Title: "Export audit", Body: body}, err
|
|
}
|
|
|
|
func (a *App) ExportSupportBundle(target platform.RemovableTarget) (string, error) {
|
|
archive, err := BuildSupportBundle(DefaultExportDir)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer os.Remove(archive)
|
|
return a.exports.ExportFileToTarget(archive, target)
|
|
}
|
|
|
|
func (a *App) ExportSupportBundleResult(target platform.RemovableTarget) (ActionResult, error) {
|
|
path, err := a.ExportSupportBundle(target)
|
|
body := "Support bundle export failed."
|
|
if err == nil {
|
|
body = "Support bundle exported. USB target unmounted and safe to remove."
|
|
}
|
|
if err == nil && path != "" {
|
|
body = "Support bundle exported to " + path + ".\n\nUSB target unmounted and safe to remove."
|
|
}
|
|
return ActionResult{Title: "Export support bundle", Body: body}, err
|
|
}
|
|
|
|
func (a *App) ListInterfaces() ([]platform.InterfaceInfo, error) {
|
|
return a.network.ListInterfaces()
|
|
}
|
|
|
|
func (a *App) DefaultRoute() string {
|
|
return a.network.DefaultRoute()
|
|
}
|
|
|
|
func (a *App) DHCPOne(iface string) (string, error) {
|
|
return a.network.DHCPOne(iface)
|
|
}
|
|
|
|
func (a *App) DHCPOneResult(iface string) (ActionResult, error) {
|
|
body, err := a.network.DHCPOne(iface)
|
|
return ActionResult{Title: "DHCP: " + iface, Body: bodyOr(body, "DHCP completed.")}, err
|
|
}
|
|
|
|
func (a *App) DHCPAll() (string, error) {
|
|
return a.network.DHCPAll()
|
|
}
|
|
|
|
func (a *App) DHCPAllResult() (ActionResult, error) {
|
|
body, err := a.network.DHCPAll()
|
|
return ActionResult{Title: "DHCP: all interfaces", Body: bodyOr(body, "DHCP completed.")}, err
|
|
}
|
|
|
|
func (a *App) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error) {
|
|
return a.network.SetStaticIPv4(cfg)
|
|
}
|
|
|
|
func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) {
|
|
body, err := a.network.SetStaticIPv4(cfg)
|
|
return ActionResult{Title: "Static IPv4: " + cfg.Interface, Body: bodyOr(body, "Static IPv4 updated.")}, err
|
|
}
|
|
|
|
func (a *App) NetworkStatus() (ActionResult, error) {
|
|
ifaces, err := a.network.ListInterfaces()
|
|
if err != nil {
|
|
return ActionResult{Title: "Network status"}, err
|
|
}
|
|
if len(ifaces) == 0 {
|
|
return ActionResult{Title: "Network status", Body: "No physical interfaces found."}, nil
|
|
}
|
|
var body strings.Builder
|
|
for _, iface := range ifaces {
|
|
ipv4 := "(no IPv4)"
|
|
if len(iface.IPv4) > 0 {
|
|
ipv4 = strings.Join(iface.IPv4, ", ")
|
|
}
|
|
fmt.Fprintf(&body, "- %s: state=%s ip=%s\n", iface.Name, iface.State, ipv4)
|
|
}
|
|
if gw := a.network.DefaultRoute(); gw != "" {
|
|
fmt.Fprintf(&body, "\nDefault route: %s\n", gw)
|
|
}
|
|
return ActionResult{Title: "Network status", Body: strings.TrimSpace(body.String())}, nil
|
|
}
|
|
|
|
func (a *App) DefaultStaticIPv4FormFields(iface string) []string {
|
|
return []string{
|
|
"",
|
|
"24",
|
|
strings.TrimSpace(a.network.DefaultRoute()),
|
|
"77.88.8.8 77.88.8.1 1.1.1.1 8.8.8.8",
|
|
}
|
|
}
|
|
|
|
func (a *App) ParseStaticIPv4Config(iface string, fields []string) platform.StaticIPv4Config {
|
|
get := func(index int) string {
|
|
if index >= 0 && index < len(fields) {
|
|
return strings.TrimSpace(fields[index])
|
|
}
|
|
return ""
|
|
}
|
|
return platform.StaticIPv4Config{
|
|
Interface: iface,
|
|
Address: get(0),
|
|
Prefix: get(1),
|
|
Gateway: get(2),
|
|
DNS: strings.Fields(get(3)),
|
|
}
|
|
}
|
|
|
|
func (a *App) ListBeeServices() ([]string, error) {
|
|
return a.services.ListBeeServices()
|
|
}
|
|
|
|
func (a *App) ServiceStatus(name string) (string, error) {
|
|
return a.services.ServiceStatus(name)
|
|
}
|
|
|
|
func (a *App) ServiceStatusResult(name string) (ActionResult, error) {
|
|
body, err := a.services.ServiceStatus(name)
|
|
return ActionResult{Title: "service status: " + name, Body: bodyOr(body, "No status output.")}, err
|
|
}
|
|
|
|
func (a *App) ServiceDo(name string, action platform.ServiceAction) (string, error) {
|
|
return a.services.ServiceDo(name, action)
|
|
}
|
|
|
|
func (a *App) ServiceActionResult(name string, action platform.ServiceAction) (ActionResult, error) {
|
|
body, err := a.services.ServiceDo(name, action)
|
|
return ActionResult{Title: "service " + string(action) + ": " + name, Body: bodyOr(body, "Action completed.")}, err
|
|
}
|
|
|
|
func (a *App) ListRemovableTargets() ([]platform.RemovableTarget, error) {
|
|
return a.exports.ListRemovableTargets()
|
|
}
|
|
|
|
func (a *App) TailFile(path string, lines int) string {
|
|
return a.tools.TailFile(path, lines)
|
|
}
|
|
|
|
func (a *App) CheckTools(names []string) []platform.ToolStatus {
|
|
return a.tools.CheckTools(names)
|
|
}
|
|
|
|
func (a *App) ToolCheckResult(names []string) ActionResult {
|
|
if len(names) == 0 {
|
|
return ActionResult{Title: "Required tools", Body: "No tools checked."}
|
|
}
|
|
var body strings.Builder
|
|
for _, tool := range a.tools.CheckTools(names) {
|
|
status := "MISSING"
|
|
if tool.OK {
|
|
status = "OK (" + tool.Path + ")"
|
|
}
|
|
fmt.Fprintf(&body, "- %s: %s\n", tool.Name, status)
|
|
}
|
|
return ActionResult{Title: "Required tools", Body: strings.TrimSpace(body.String())}
|
|
}
|
|
|
|
func (a *App) AuditLogTailResult() ActionResult {
|
|
logTail := strings.TrimSpace(a.tools.TailFile(DefaultAuditLogPath, 40))
|
|
jsonTail := strings.TrimSpace(a.tools.TailFile(DefaultAuditJSONPath, 20))
|
|
body := strings.TrimSpace(logTail + "\n\n" + jsonTail)
|
|
if body == "" {
|
|
body = "No audit logs found."
|
|
}
|
|
return ActionResult{Title: "Audit log tail", Body: body}
|
|
}
|
|
|
|
func (a *App) RunNvidiaAcceptancePack(baseDir string) (string, error) {
|
|
if strings.TrimSpace(baseDir) == "" {
|
|
baseDir = DefaultSATBaseDir
|
|
}
|
|
return a.sat.RunNvidiaAcceptancePack(baseDir)
|
|
}
|
|
|
|
func (a *App) RunNvidiaAcceptancePackResult(baseDir string) (ActionResult, error) {
|
|
path, err := a.RunNvidiaAcceptancePack(baseDir)
|
|
body := "Archive written."
|
|
if path != "" {
|
|
body = "Archive written to " + path
|
|
}
|
|
return ActionResult{Title: "NVIDIA SAT", Body: body}, err
|
|
}
|
|
|
|
func (a *App) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
|
|
return a.sat.ListNvidiaGPUs()
|
|
}
|
|
|
|
func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int) (ActionResult, error) {
|
|
if strings.TrimSpace(baseDir) == "" {
|
|
baseDir = DefaultSATBaseDir
|
|
}
|
|
path, err := a.sat.RunNvidiaAcceptancePackWithOptions(ctx, baseDir, diagLevel, gpuIndices)
|
|
body := "Archive written."
|
|
if path != "" {
|
|
body = "Archive written to " + path
|
|
}
|
|
return ActionResult{Title: "NVIDIA DCGM", Body: body}, err
|
|
}
|
|
|
|
func (a *App) RunMemoryAcceptancePack(baseDir string) (string, error) {
|
|
if strings.TrimSpace(baseDir) == "" {
|
|
baseDir = DefaultSATBaseDir
|
|
}
|
|
return a.sat.RunMemoryAcceptancePack(baseDir)
|
|
}
|
|
|
|
func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
|
|
path, err := a.RunMemoryAcceptancePack(baseDir)
|
|
return ActionResult{Title: "Memory SAT", Body: satResultBody(path)}, err
|
|
}
|
|
|
|
func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int) (string, error) {
|
|
if strings.TrimSpace(baseDir) == "" {
|
|
baseDir = DefaultSATBaseDir
|
|
}
|
|
return a.sat.RunCPUAcceptancePack(baseDir, durationSec)
|
|
}
|
|
|
|
func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (ActionResult, error) {
|
|
path, err := a.RunCPUAcceptancePack(baseDir, durationSec)
|
|
return ActionResult{Title: "CPU SAT", Body: satResultBody(path)}, err
|
|
}
|
|
|
|
func (a *App) RunStorageAcceptancePack(baseDir string) (string, error) {
|
|
if strings.TrimSpace(baseDir) == "" {
|
|
baseDir = DefaultSATBaseDir
|
|
}
|
|
return a.sat.RunStorageAcceptancePack(baseDir)
|
|
}
|
|
|
|
func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
|
|
path, err := a.RunStorageAcceptancePack(baseDir)
|
|
return ActionResult{Title: "Storage SAT", Body: satResultBody(path)}, err
|
|
}
|
|
|
|
func (a *App) DetectGPUVendor() string {
|
|
return a.sat.DetectGPUVendor()
|
|
}
|
|
|
|
func (a *App) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
|
|
return a.sat.ListAMDGPUs()
|
|
}
|
|
|
|
func (a *App) RunAMDAcceptancePack(baseDir string) (string, error) {
|
|
if strings.TrimSpace(baseDir) == "" {
|
|
baseDir = DefaultSATBaseDir
|
|
}
|
|
return a.sat.RunAMDAcceptancePack(baseDir)
|
|
}
|
|
|
|
func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
|
|
path, err := a.RunAMDAcceptancePack(baseDir)
|
|
return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
|
|
}
|
|
|
|
func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error) {
|
|
if strings.TrimSpace(baseDir) == "" {
|
|
baseDir = DefaultSATBaseDir
|
|
}
|
|
return a.sat.RunFanStressTest(ctx, baseDir, opts)
|
|
}
|
|
|
|
func (a *App) RunNCCLTestsResult(ctx context.Context) (ActionResult, error) {
|
|
path, err := a.sat.RunNCCLTests(ctx, DefaultSATBaseDir)
|
|
body := "Results: " + path
|
|
if err != nil && err != context.Canceled {
|
|
body += "\nERROR: " + err.Error()
|
|
}
|
|
return ActionResult{Title: "NCCL bandwidth test", Body: body}, err
|
|
}
|
|
|
|
func (a *App) RunFanStressTestResult(ctx context.Context, opts platform.FanStressOptions) (ActionResult, error) {
|
|
path, err := a.RunFanStressTest(ctx, "", opts)
|
|
body := formatFanStressResult(path)
|
|
if err != nil && err != context.Canceled {
|
|
body += "\nERROR: " + err.Error()
|
|
}
|
|
return ActionResult{Title: "GPU Platform Stress Test", Body: body}, err
|
|
}
|
|
|
|
// formatFanStressResult formats the summary.txt from a fan-stress run, including
|
|
// the per-step pass/fail display and the analysis section (throttling, max temps, fan response).
|
|
func formatFanStressResult(archivePath string) string {
|
|
if archivePath == "" {
|
|
return "No output produced."
|
|
}
|
|
runDir := strings.TrimSuffix(archivePath, ".tar.gz")
|
|
raw, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
|
|
if err != nil {
|
|
return "Archive written to " + archivePath
|
|
}
|
|
content := strings.TrimSpace(string(raw))
|
|
kv := parseKeyValueSummary(content)
|
|
|
|
var b strings.Builder
|
|
b.WriteString(formatSATDetail(content))
|
|
|
|
// Append analysis section.
|
|
var analysis []string
|
|
if v, ok := kv["throttling_detected"]; ok {
|
|
label := "NO"
|
|
if v == "true" {
|
|
label = "YES ← throttling detected during load"
|
|
}
|
|
analysis = append(analysis, "Throttling: "+label)
|
|
}
|
|
if v, ok := kv["max_gpu_temp_c"]; ok && v != "0.0" {
|
|
analysis = append(analysis, "Max GPU temp: "+v+"°C")
|
|
}
|
|
if v, ok := kv["max_cpu_temp_c"]; ok && v != "0.0" {
|
|
analysis = append(analysis, "Max CPU temp: "+v+"°C")
|
|
}
|
|
if v, ok := kv["fan_response_sec"]; ok && v != "N/A" && v != "-1.0" {
|
|
analysis = append(analysis, "Fan response: "+v+"s")
|
|
}
|
|
|
|
if len(analysis) > 0 {
|
|
b.WriteString("\n\n=== Analysis ===\n")
|
|
for _, line := range analysis {
|
|
b.WriteString(line + "\n")
|
|
}
|
|
}
|
|
return strings.TrimSpace(b.String())
|
|
}
|
|
|
|
// satResultBody reads summary.txt from the SAT run directory (archive path without .tar.gz)
|
|
// and returns a formatted human-readable result. Falls back to a plain message if unreadable.
|
|
func satResultBody(archivePath string) string {
|
|
if archivePath == "" {
|
|
return "No output produced."
|
|
}
|
|
runDir := strings.TrimSuffix(archivePath, ".tar.gz")
|
|
raw, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
|
|
if err != nil {
|
|
return "Archive written to " + archivePath
|
|
}
|
|
return formatSATDetail(strings.TrimSpace(string(raw)))
|
|
}
|
|
|
|
func (a *App) HealthSummaryResult() ActionResult {
|
|
raw, err := os.ReadFile(DefaultAuditJSONPath)
|
|
if err != nil {
|
|
return ActionResult{Title: "Health summary", Body: "No audit JSON found."}
|
|
}
|
|
var snapshot schema.HardwareIngestRequest
|
|
if err := json.Unmarshal(raw, &snapshot); err != nil {
|
|
return ActionResult{Title: "Health summary", Body: "Audit JSON is unreadable."}
|
|
}
|
|
|
|
summary := collector.BuildHealthSummary(snapshot.Hardware)
|
|
var body strings.Builder
|
|
status := summary.Status
|
|
if status == "" {
|
|
status = "Unknown"
|
|
}
|
|
fmt.Fprintf(&body, "Overall: %s\n", status)
|
|
fmt.Fprintf(&body, "Storage: warn=%d fail=%d\n", summary.StorageWarn, summary.StorageFail)
|
|
fmt.Fprintf(&body, "PCIe: warn=%d fail=%d\n", summary.PCIeWarn, summary.PCIeFail)
|
|
fmt.Fprintf(&body, "PSU: warn=%d fail=%d\n", summary.PSUWarn, summary.PSUFail)
|
|
fmt.Fprintf(&body, "Memory: warn=%d fail=%d\n", summary.MemoryWarn, summary.MemoryFail)
|
|
for _, item := range latestSATSummaries() {
|
|
fmt.Fprintf(&body, "\n\n%s", item)
|
|
}
|
|
if len(summary.Failures) > 0 {
|
|
fmt.Fprintf(&body, "\n\nFailures:\n- %s", strings.Join(summary.Failures, "\n- "))
|
|
}
|
|
if len(summary.Warnings) > 0 {
|
|
fmt.Fprintf(&body, "\n\nWarnings:\n- %s", strings.Join(summary.Warnings, "\n- "))
|
|
}
|
|
return ActionResult{Title: "Health summary", Body: strings.TrimSpace(body.String())}
|
|
}
|
|
|
|
func (a *App) MainBanner() string {
|
|
raw, err := os.ReadFile(DefaultAuditJSONPath)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
|
|
var snapshot schema.HardwareIngestRequest
|
|
if err := json.Unmarshal(raw, &snapshot); err != nil {
|
|
return ""
|
|
}
|
|
|
|
var lines []string
|
|
if system := formatSystemLine(snapshot.Hardware.Board); system != "" {
|
|
lines = append(lines, system)
|
|
}
|
|
if cpu := formatCPULine(snapshot.Hardware.CPUs); cpu != "" {
|
|
lines = append(lines, cpu)
|
|
}
|
|
if memory := formatMemoryLine(snapshot.Hardware.Memory); memory != "" {
|
|
lines = append(lines, memory)
|
|
}
|
|
if storage := formatStorageLine(snapshot.Hardware.Storage); storage != "" {
|
|
lines = append(lines, storage)
|
|
}
|
|
if gpu := formatGPULine(snapshot.Hardware.PCIeDevices); gpu != "" {
|
|
lines = append(lines, gpu)
|
|
}
|
|
if ip := formatIPLine(a.network.ListInterfaces); ip != "" {
|
|
lines = append(lines, ip)
|
|
}
|
|
|
|
return strings.TrimSpace(strings.Join(lines, "\n"))
|
|
}
|
|
|
|
func (a *App) FormatToolStatuses(statuses []platform.ToolStatus) string {
|
|
var body strings.Builder
|
|
for _, tool := range statuses {
|
|
status := "MISSING"
|
|
if tool.OK {
|
|
status = "OK (" + tool.Path + ")"
|
|
}
|
|
fmt.Fprintf(&body, "- %s: %s\n", tool.Name, status)
|
|
}
|
|
return strings.TrimSpace(body.String())
|
|
}
|
|
|
|
func (a *App) ParsePrefix(raw string, fallback int) int {
|
|
value, err := strconv.Atoi(strings.TrimSpace(raw))
|
|
if err != nil || value <= 0 {
|
|
return fallback
|
|
}
|
|
return value
|
|
}
|
|
|
|
func hostnameOr(fallback string) string {
|
|
hn, err := os.Hostname()
|
|
if err != nil || strings.TrimSpace(hn) == "" {
|
|
return fallback
|
|
}
|
|
return hn
|
|
}
|
|
|
|
func sanitizeFilename(v string) string {
|
|
var out []rune
|
|
for _, r := range v {
|
|
switch {
|
|
case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z', r >= '0' && r <= '9', r == '-', r == '_', r == '.':
|
|
out = append(out, r)
|
|
default:
|
|
out = append(out, '-')
|
|
}
|
|
}
|
|
if len(out) == 0 {
|
|
return "unknown"
|
|
}
|
|
return string(out)
|
|
}
|
|
|
|
func bodyOr(body, fallback string) string {
|
|
body = strings.TrimSpace(body)
|
|
if body == "" {
|
|
return fallback
|
|
}
|
|
return body
|
|
}
|
|
|
|
func ReadRuntimeHealth(path string) (schema.RuntimeHealth, error) {
|
|
raw, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return schema.RuntimeHealth{}, err
|
|
}
|
|
var health schema.RuntimeHealth
|
|
if err := json.Unmarshal(raw, &health); err != nil {
|
|
return schema.RuntimeHealth{}, err
|
|
}
|
|
return health, nil
|
|
}
|
|
|
|
func latestSATSummaries() []string {
|
|
patterns := []struct {
|
|
label string
|
|
prefix string
|
|
}{
|
|
{label: "NVIDIA SAT", prefix: "gpu-nvidia-"},
|
|
{label: "Memory SAT", prefix: "memory-"},
|
|
{label: "Storage SAT", prefix: "storage-"},
|
|
{label: "CPU SAT", prefix: "cpu-"},
|
|
}
|
|
var out []string
|
|
for _, item := range patterns {
|
|
matches, err := filepath.Glob(filepath.Join(DefaultSATBaseDir, item.prefix+"*/summary.txt"))
|
|
if err != nil || len(matches) == 0 {
|
|
continue
|
|
}
|
|
sort.Strings(matches)
|
|
raw, err := os.ReadFile(matches[len(matches)-1])
|
|
if err != nil {
|
|
continue
|
|
}
|
|
out = append(out, formatSATSummary(item.label, string(raw)))
|
|
}
|
|
return out
|
|
}
|
|
|
|
func formatSATSummary(label, raw string) string {
|
|
values := parseKeyValueSummary(raw)
|
|
var body strings.Builder
|
|
fmt.Fprintf(&body, "%s:", label)
|
|
if overall := firstNonEmpty(values["overall_status"], "UNKNOWN"); overall != "" {
|
|
fmt.Fprintf(&body, " %s", overall)
|
|
}
|
|
if ok := firstNonEmpty(values["job_ok"], "0"); ok != "" {
|
|
fmt.Fprintf(&body, " ok=%s", ok)
|
|
}
|
|
if failed := firstNonEmpty(values["job_failed"], "0"); failed != "" {
|
|
fmt.Fprintf(&body, " failed=%s", failed)
|
|
}
|
|
if unsupported := firstNonEmpty(values["job_unsupported"], "0"); unsupported != "" && unsupported != "0" {
|
|
fmt.Fprintf(&body, " unsupported=%s", unsupported)
|
|
}
|
|
if devices := strings.TrimSpace(values["devices"]); devices != "" {
|
|
fmt.Fprintf(&body, "\nDevices: %s", devices)
|
|
}
|
|
return body.String()
|
|
}
|
|
|
|
func formatSystemLine(board schema.HardwareBoard) string {
|
|
model := strings.TrimSpace(strings.Join([]string{
|
|
trimPtr(board.Manufacturer),
|
|
trimPtr(board.ProductName),
|
|
}, " "))
|
|
serial := strings.TrimSpace(board.SerialNumber)
|
|
switch {
|
|
case model != "" && serial != "":
|
|
return fmt.Sprintf("System: %s | S/N %s", model, serial)
|
|
case model != "":
|
|
return "System: " + model
|
|
case serial != "":
|
|
return "System S/N: " + serial
|
|
default:
|
|
return ""
|
|
}
|
|
}
|
|
|
|
func formatCPULine(cpus []schema.HardwareCPU) string {
|
|
if len(cpus) == 0 {
|
|
return ""
|
|
}
|
|
modelCounts := map[string]int{}
|
|
unknown := 0
|
|
for _, cpu := range cpus {
|
|
model := trimPtr(cpu.Model)
|
|
if model == "" {
|
|
unknown++
|
|
continue
|
|
}
|
|
modelCounts[model]++
|
|
}
|
|
if len(modelCounts) == 1 && unknown == 0 {
|
|
for model, count := range modelCounts {
|
|
return fmt.Sprintf("CPU: %d x %s", count, model)
|
|
}
|
|
}
|
|
parts := make([]string, 0, len(modelCounts)+1)
|
|
if len(modelCounts) > 0 {
|
|
keys := make([]string, 0, len(modelCounts))
|
|
for key := range modelCounts {
|
|
keys = append(keys, key)
|
|
}
|
|
sort.Strings(keys)
|
|
for _, key := range keys {
|
|
parts = append(parts, fmt.Sprintf("%d x %s", modelCounts[key], key))
|
|
}
|
|
}
|
|
if unknown > 0 {
|
|
parts = append(parts, fmt.Sprintf("%d x unknown", unknown))
|
|
}
|
|
return "CPU: " + strings.Join(parts, ", ")
|
|
}
|
|
|
|
func formatMemoryLine(dimms []schema.HardwareMemory) string {
|
|
totalMB := 0
|
|
present := 0
|
|
types := map[string]struct{}{}
|
|
for _, dimm := range dimms {
|
|
if dimm.Present != nil && !*dimm.Present {
|
|
continue
|
|
}
|
|
if dimm.SizeMB == nil || *dimm.SizeMB <= 0 {
|
|
continue
|
|
}
|
|
present++
|
|
totalMB += *dimm.SizeMB
|
|
if value := trimPtr(dimm.Type); value != "" {
|
|
types[value] = struct{}{}
|
|
}
|
|
}
|
|
if totalMB == 0 {
|
|
return ""
|
|
}
|
|
typeText := joinSortedKeys(types)
|
|
line := fmt.Sprintf("Memory: %s", humanizeMB(totalMB))
|
|
if typeText != "" {
|
|
line += " " + typeText
|
|
}
|
|
if present > 0 {
|
|
line += fmt.Sprintf(" (%d DIMMs)", present)
|
|
}
|
|
return line
|
|
}
|
|
|
|
func formatStorageLine(disks []schema.HardwareStorage) string {
|
|
count := 0
|
|
totalGB := 0
|
|
for _, disk := range disks {
|
|
if disk.Present != nil && !*disk.Present {
|
|
continue
|
|
}
|
|
count++
|
|
if disk.SizeGB != nil && *disk.SizeGB > 0 {
|
|
totalGB += *disk.SizeGB
|
|
}
|
|
}
|
|
if count == 0 {
|
|
return ""
|
|
}
|
|
line := fmt.Sprintf("Storage: %d drives", count)
|
|
if totalGB > 0 {
|
|
line += fmt.Sprintf(" / %s", humanizeGB(totalGB))
|
|
}
|
|
return line
|
|
}
|
|
|
|
func formatGPULine(devices []schema.HardwarePCIeDevice) string {
|
|
gpus := map[string]int{}
|
|
for _, dev := range devices {
|
|
if !isGPUDevice(dev) {
|
|
continue
|
|
}
|
|
name := firstNonEmpty(trimPtr(dev.Model), trimPtr(dev.Manufacturer), "unknown")
|
|
gpus[name]++
|
|
}
|
|
if len(gpus) == 0 {
|
|
return ""
|
|
}
|
|
keys := make([]string, 0, len(gpus))
|
|
for key := range gpus {
|
|
keys = append(keys, key)
|
|
}
|
|
sort.Strings(keys)
|
|
parts := make([]string, 0, len(keys))
|
|
for _, key := range keys {
|
|
parts = append(parts, fmt.Sprintf("%d x %s", gpus[key], key))
|
|
}
|
|
return "GPU: " + strings.Join(parts, ", ")
|
|
}
|
|
|
|
func formatIPLine(list func() ([]platform.InterfaceInfo, error)) string {
|
|
if list == nil {
|
|
return ""
|
|
}
|
|
ifaces, err := list()
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
seen := map[string]struct{}{}
|
|
var ips []string
|
|
for _, iface := range ifaces {
|
|
for _, ip := range iface.IPv4 {
|
|
ip = strings.TrimSpace(ip)
|
|
if ip == "" {
|
|
continue
|
|
}
|
|
if _, ok := seen[ip]; ok {
|
|
continue
|
|
}
|
|
seen[ip] = struct{}{}
|
|
ips = append(ips, ip)
|
|
}
|
|
}
|
|
if len(ips) == 0 {
|
|
return ""
|
|
}
|
|
sort.Strings(ips)
|
|
return "IP: " + strings.Join(ips, ", ")
|
|
}
|
|
|
|
func isGPUDevice(dev schema.HardwarePCIeDevice) bool {
|
|
class := trimPtr(dev.DeviceClass)
|
|
model := strings.ToLower(trimPtr(dev.Model))
|
|
vendor := strings.ToLower(trimPtr(dev.Manufacturer))
|
|
// Exclude ASPEED (BMC VGA adapter, not a compute GPU)
|
|
if strings.Contains(vendor, "aspeed") || strings.Contains(model, "aspeed") {
|
|
return false
|
|
}
|
|
// AMD Instinct / Radeon compute GPUs have class ProcessingAccelerator or DisplayController.
|
|
// Do NOT match by AMD vendor alone — chipset/CPU PCIe devices share that vendor.
|
|
return class == "VideoController" ||
|
|
class == "DisplayController" ||
|
|
class == "ProcessingAccelerator" ||
|
|
strings.Contains(model, "nvidia") ||
|
|
strings.Contains(vendor, "nvidia")
|
|
}
|
|
|
|
func trimPtr(value *string) string {
|
|
if value == nil {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(*value)
|
|
}
|
|
|
|
func joinSortedKeys(values map[string]struct{}) string {
|
|
if len(values) == 0 {
|
|
return ""
|
|
}
|
|
keys := make([]string, 0, len(values))
|
|
for key := range values {
|
|
keys = append(keys, key)
|
|
}
|
|
sort.Strings(keys)
|
|
return strings.Join(keys, "/")
|
|
}
|
|
|
|
func humanizeMB(totalMB int) string {
|
|
if totalMB <= 0 {
|
|
return ""
|
|
}
|
|
gb := float64(totalMB) / 1024.0
|
|
if gb >= 1024.0 {
|
|
tb := gb / 1024.0
|
|
return fmt.Sprintf("%.1f TB", tb)
|
|
}
|
|
if gb == float64(int64(gb)) {
|
|
return fmt.Sprintf("%.0f GB", gb)
|
|
}
|
|
return fmt.Sprintf("%.1f GB", gb)
|
|
}
|
|
|
|
func humanizeGB(totalGB int) string {
|
|
if totalGB <= 0 {
|
|
return ""
|
|
}
|
|
tb := float64(totalGB) / 1024.0
|
|
if tb >= 1.0 {
|
|
return fmt.Sprintf("%.1f TB", tb)
|
|
}
|
|
return fmt.Sprintf("%d GB", totalGB)
|
|
}
|
|
|
|
func parseKeyValueSummary(raw string) map[string]string {
|
|
out := map[string]string{}
|
|
for _, line := range strings.Split(raw, "\n") {
|
|
line = strings.TrimSpace(line)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
key, value, ok := strings.Cut(line, "=")
|
|
if !ok {
|
|
continue
|
|
}
|
|
out[strings.TrimSpace(key)] = strings.TrimSpace(value)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func firstNonEmpty(values ...string) string {
|
|
for _, value := range values {
|
|
value = strings.TrimSpace(value)
|
|
if value != "" {
|
|
return value
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func (a *App) ListInstallDisks() ([]platform.InstallDisk, error) {
|
|
return a.installer.ListInstallDisks()
|
|
}
|
|
|
|
func (a *App) InstallToDisk(ctx context.Context, device string, logFile string) error {
|
|
return a.installer.InstallToDisk(ctx, device, logFile)
|
|
}
|