Add health verdicts and acceptance tests

This commit is contained in:
Mikhail Chusavitin
2026-03-14 17:53:58 +03:00
parent 17f0bda45e
commit b483e2ce35
28 changed files with 1688 additions and 82 deletions

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"time"
@@ -58,6 +59,8 @@ type toolManager interface {
type satRunner interface {
RunNvidiaAcceptancePack(baseDir string) (string, error)
RunMemoryAcceptancePack(baseDir string) (string, error)
RunStorageAcceptancePack(baseDir string) (string, error)
}
func New(platform *platform.System) *App {
@@ -124,7 +127,11 @@ func (a *App) ExportLatestAudit(target platform.RemovableTarget) (string, error)
func (a *App) ExportLatestAuditResult(target platform.RemovableTarget) (ActionResult, error) {
path, err := a.ExportLatestAudit(target)
return ActionResult{Title: "Export audit", Body: "Audit exported to " + path}, err
body := "Audit exported."
if path != "" {
body = "Audit exported to " + path
}
return ActionResult{Title: "Export audit", Body: body}, err
}
func (a *App) ListInterfaces() ([]platform.InterfaceInfo, error) {
@@ -141,7 +148,7 @@ func (a *App) DHCPOne(iface string) (string, error) {
func (a *App) DHCPOneResult(iface string) (ActionResult, error) {
body, err := a.network.DHCPOne(iface)
return ActionResult{Title: "DHCP on " + iface, Body: body}, err
return ActionResult{Title: "DHCP: " + iface, Body: bodyOr(body, "DHCP completed.")}, err
}
func (a *App) DHCPAll() (string, error) {
@@ -150,7 +157,7 @@ func (a *App) DHCPAll() (string, error) {
func (a *App) DHCPAllResult() (ActionResult, error) {
body, err := a.network.DHCPAll()
return ActionResult{Title: "DHCP all interfaces", Body: body}, err
return ActionResult{Title: "DHCP: all interfaces", Body: bodyOr(body, "DHCP completed.")}, err
}
func (a *App) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error) {
@@ -159,7 +166,7 @@ func (a *App) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error) {
func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) {
body, err := a.network.SetStaticIPv4(cfg)
return ActionResult{Title: "Static IPv4 on " + cfg.Interface, Body: body}, err
return ActionResult{Title: "Static IPv4: " + cfg.Interface, Body: bodyOr(body, "Static IPv4 updated.")}, err
}
func (a *App) NetworkStatus() (ActionResult, error) {
@@ -167,6 +174,9 @@ func (a *App) NetworkStatus() (ActionResult, error) {
if err != nil {
return ActionResult{Title: "Network status"}, err
}
if len(ifaces) == 0 {
return ActionResult{Title: "Network status", Body: "No physical interfaces found."}, nil
}
var body strings.Builder
for _, iface := range ifaces {
ipv4 := "(no IPv4)"
@@ -216,7 +226,7 @@ func (a *App) ServiceStatus(name string) (string, error) {
func (a *App) ServiceStatusResult(name string) (ActionResult, error) {
body, err := a.services.ServiceStatus(name)
return ActionResult{Title: "service: " + name, Body: body}, err
return ActionResult{Title: "service status: " + name, Body: bodyOr(body, "No status output.")}, err
}
func (a *App) ServiceDo(name string, action platform.ServiceAction) (string, error) {
@@ -225,7 +235,7 @@ func (a *App) ServiceDo(name string, action platform.ServiceAction) (string, err
func (a *App) ServiceActionResult(name string, action platform.ServiceAction) (ActionResult, error) {
body, err := a.services.ServiceDo(name, action)
return ActionResult{Title: "service: " + name, Body: body}, err
return ActionResult{Title: "service " + string(action) + ": " + name, Body: bodyOr(body, "Action completed.")}, err
}
func (a *App) ListRemovableTargets() ([]platform.RemovableTarget, error) {
@@ -241,6 +251,9 @@ func (a *App) CheckTools(names []string) []platform.ToolStatus {
}
func (a *App) ToolCheckResult(names []string) ActionResult {
if len(names) == 0 {
return ActionResult{Title: "Required tools", Body: "No tools checked."}
}
var body strings.Builder
for _, tool := range a.tools.CheckTools(names) {
status := "MISSING"
@@ -253,7 +266,12 @@ func (a *App) ToolCheckResult(names []string) ActionResult {
}
func (a *App) AuditLogTailResult() ActionResult {
body := a.tools.TailFile(DefaultAuditLogPath, 40) + "\n\n" + a.tools.TailFile(DefaultAuditJSONPath, 20)
logTail := strings.TrimSpace(a.tools.TailFile(DefaultAuditLogPath, 40))
jsonTail := strings.TrimSpace(a.tools.TailFile(DefaultAuditJSONPath, 20))
body := strings.TrimSpace(logTail + "\n\n" + jsonTail)
if body == "" {
body = "No audit logs found."
}
return ActionResult{Title: "Audit log tail", Body: body}
}
@@ -263,7 +281,88 @@ func (a *App) RunNvidiaAcceptancePack(baseDir string) (string, error) {
func (a *App) RunNvidiaAcceptancePackResult(baseDir string) (ActionResult, error) {
path, err := a.sat.RunNvidiaAcceptancePack(baseDir)
return ActionResult{Title: "NVIDIA SAT", Body: "Archive written to " + path}, err
body := "Archive written."
if path != "" {
body = "Archive written to " + path
}
return ActionResult{Title: "NVIDIA SAT", Body: body}, err
}
func (a *App) RunMemoryAcceptancePack(baseDir string) (string, error) {
return a.sat.RunMemoryAcceptancePack(baseDir)
}
func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
path, err := a.sat.RunMemoryAcceptancePack(baseDir)
body := "Archive written."
if path != "" {
body = "Archive written to " + path
}
return ActionResult{Title: "Memory SAT", Body: body}, err
}
func (a *App) RunStorageAcceptancePack(baseDir string) (string, error) {
return a.sat.RunStorageAcceptancePack(baseDir)
}
func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
path, err := a.sat.RunStorageAcceptancePack(baseDir)
body := "Archive written."
if path != "" {
body = "Archive written to " + path
}
return ActionResult{Title: "Storage SAT", Body: body}, err
}
func (a *App) HealthSummaryResult() ActionResult {
type auditFile struct {
Hardware struct {
Summary struct {
Status string `json:"status"`
Warnings []string `json:"warnings"`
Failures []string `json:"failures"`
StorageWarn int `json:"storage_warn"`
StorageFail int `json:"storage_fail"`
PCIeWarn int `json:"pcie_warn"`
PCIeFail int `json:"pcie_fail"`
PSUWarn int `json:"psu_warn"`
PSUFail int `json:"psu_fail"`
MemoryWarn int `json:"memory_warn"`
MemoryFail int `json:"memory_fail"`
} `json:"summary"`
} `json:"hardware"`
}
raw, err := os.ReadFile(DefaultAuditJSONPath)
if err != nil {
return ActionResult{Title: "Health summary", Body: "No audit JSON found."}
}
var snapshot auditFile
if err := json.Unmarshal(raw, &snapshot); err != nil {
return ActionResult{Title: "Health summary", Body: "Audit JSON is unreadable."}
}
summary := snapshot.Hardware.Summary
var body strings.Builder
status := summary.Status
if status == "" {
status = "UNKNOWN"
}
fmt.Fprintf(&body, "Overall: %s\n", status)
fmt.Fprintf(&body, "Storage: warn=%d fail=%d\n", summary.StorageWarn, summary.StorageFail)
fmt.Fprintf(&body, "PCIe: warn=%d fail=%d\n", summary.PCIeWarn, summary.PCIeFail)
fmt.Fprintf(&body, "PSU: warn=%d fail=%d\n", summary.PSUWarn, summary.PSUFail)
fmt.Fprintf(&body, "Memory: warn=%d fail=%d\n", summary.MemoryWarn, summary.MemoryFail)
for _, item := range latestSATSummaries() {
fmt.Fprintf(&body, "\n%s", item)
}
if len(summary.Failures) > 0 {
fmt.Fprintf(&body, "\n\nFailures:\n- %s", strings.Join(summary.Failures, "\n- "))
}
if len(summary.Warnings) > 0 {
fmt.Fprintf(&body, "\n\nWarnings:\n- %s", strings.Join(summary.Warnings, "\n- "))
}
return ActionResult{Title: "Health summary", Body: strings.TrimSpace(body.String())}
}
func (a *App) FormatToolStatuses(statuses []platform.ToolStatus) string {
@@ -309,3 +408,37 @@ func sanitizeFilename(v string) string {
}
return string(out)
}
func bodyOr(body, fallback string) string {
body = strings.TrimSpace(body)
if body == "" {
return fallback
}
return body
}
func latestSATSummaries() []string {
baseDir := "/var/log/bee-sat"
patterns := []struct {
label string
prefix string
}{
{label: "NVIDIA SAT", prefix: "gpu-nvidia-"},
{label: "Memory SAT", prefix: "memory-"},
{label: "Storage SAT", prefix: "storage-"},
}
var out []string
for _, item := range patterns {
matches, err := filepath.Glob(filepath.Join(baseDir, item.prefix+"*/summary.txt"))
if err != nil || len(matches) == 0 {
continue
}
sort.Strings(matches)
raw, err := os.ReadFile(matches[len(matches)-1])
if err != nil {
continue
}
out = append(out, item.label+":\n"+strings.TrimSpace(string(raw)))
}
return out
}

View File

@@ -76,11 +76,21 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
}
type fakeSAT struct {
runFn func(string) (string, error)
runNvidiaFn func(string) (string, error)
runMemoryFn func(string) (string, error)
runStorageFn func(string) (string, error)
}
func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string) (string, error) {
return f.runFn(baseDir)
return f.runNvidiaFn(baseDir)
}
func (f fakeSAT) RunMemoryAcceptancePack(baseDir string) (string, error) {
return f.runMemoryFn(baseDir)
}
func (f fakeSAT) RunStorageAcceptancePack(baseDir string) (string, error) {
return f.runStorageFn(baseDir)
}
func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
@@ -116,6 +126,25 @@ func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
}
}
func TestNetworkStatusHandlesNoInterfaces(t *testing.T) {
t.Parallel()
a := &App{
network: fakeNetwork{
listInterfacesFn: func() ([]platform.InterfaceInfo, error) { return nil, nil },
defaultRouteFn: func() string { return "" },
},
}
result, err := a.NetworkStatus()
if err != nil {
t.Fatalf("NetworkStatus error: %v", err)
}
if result.Body != "No physical interfaces found." {
t.Fatalf("body=%q want %q", result.Body, "No physical interfaces found.")
}
}
func TestNetworkStatusPropagatesListError(t *testing.T) {
t.Parallel()
@@ -192,7 +221,7 @@ func TestServiceActionResults(t *testing.T) {
if err != nil {
t.Fatalf("ServiceStatusResult error: %v", err)
}
if statusResult.Title != "service: bee-audit" || statusResult.Body != "active" {
if statusResult.Title != "service status: bee-audit" || statusResult.Body != "active" {
t.Fatalf("unexpected status result: %#v", statusResult)
}
@@ -200,7 +229,7 @@ func TestServiceActionResults(t *testing.T) {
if err != nil {
t.Fatalf("ServiceActionResult error: %v", err)
}
if actionResult.Title != "service: bee-audit" || actionResult.Body != "restart ok" {
if actionResult.Title != "service restart: bee-audit" || actionResult.Body != "restart ok" {
t.Fatalf("unexpected action result: %#v", actionResult)
}
}
@@ -242,17 +271,79 @@ func TestToolCheckAndLogTailResults(t *testing.T) {
}
}
func TestActionResultsUseFallbackBody(t *testing.T) {
t.Parallel()
a := &App{
network: fakeNetwork{
dhcpOneFn: func(string) (string, error) { return " ", nil },
dhcpAllFn: func() (string, error) { return "", nil },
setStaticIPv4Fn: func(platform.StaticIPv4Config) (string, error) { return "", nil },
listInterfacesFn: func() ([]platform.InterfaceInfo, error) {
return nil, nil
},
defaultRouteFn: func() string { return "" },
},
services: fakeServices{
serviceStatusFn: func(string) (string, error) { return "", nil },
serviceDoFn: func(string, platform.ServiceAction) (string, error) { return "", nil },
},
tools: fakeTools{
tailFileFn: func(string, int) string { return " " },
checkToolsFn: func([]string) []platform.ToolStatus { return nil },
},
sat: fakeSAT{
runNvidiaFn: func(string) (string, error) { return "", nil },
runMemoryFn: func(string) (string, error) { return "", nil },
runStorageFn: func(string) (string, error) { return "", nil },
},
}
if got, _ := a.DHCPOneResult("eth0"); got.Body != "DHCP completed." {
t.Fatalf("dhcp one body=%q", got.Body)
}
if got, _ := a.DHCPAllResult(); got.Body != "DHCP completed." {
t.Fatalf("dhcp all body=%q", got.Body)
}
if got, _ := a.SetStaticIPv4Result(platform.StaticIPv4Config{Interface: "eth0"}); got.Body != "Static IPv4 updated." {
t.Fatalf("static body=%q", got.Body)
}
if got, _ := a.ServiceStatusResult("bee-audit"); got.Body != "No status output." {
t.Fatalf("status body=%q", got.Body)
}
if got, _ := a.ServiceActionResult("bee-audit", platform.ServiceRestart); got.Body != "Action completed." {
t.Fatalf("action body=%q", got.Body)
}
if got := a.ToolCheckResult(nil); got.Body != "No tools checked." {
t.Fatalf("tool body=%q", got.Body)
}
if got := a.AuditLogTailResult(); got.Body != "No audit logs found." {
t.Fatalf("log body=%q", got.Body)
}
if got, _ := a.RunNvidiaAcceptancePackResult(""); got.Body != "Archive written." {
t.Fatalf("sat body=%q", got.Body)
}
if got, _ := a.RunMemoryAcceptancePackResult(""); got.Body != "Archive written." {
t.Fatalf("memory sat body=%q", got.Body)
}
if got, _ := a.RunStorageAcceptancePackResult(""); got.Body != "Archive written." {
t.Fatalf("storage sat body=%q", got.Body)
}
}
func TestRunNvidiaAcceptancePackResult(t *testing.T) {
t.Parallel()
a := &App{
sat: fakeSAT{
runFn: func(baseDir string) (string, error) {
runNvidiaFn: func(baseDir string) (string, error) {
if baseDir != "/tmp/sat" {
t.Fatalf("baseDir=%q want %q", baseDir, "/tmp/sat")
}
return "/tmp/sat/out.tar.gz", nil
},
runMemoryFn: func(string) (string, error) { return "", nil },
runStorageFn: func(string) (string, error) { return "", nil },
},
}