Bring codebase into compliance with bible contracts (A–E)

A (hardware-ingest-json v2.8-2.9): remove sensor location fields from schema
and collector; tag HardwareMemory.Location as json:"-"; add PlatformConfig to
HardwareSnapshot.

B (no-hardcoded-vendors): consolidate PCI vendor IDs into collector/pci_vendors.go;
replace all vendor-name string checks in isGPUDevice, isNVIDIADevice, isMellanoxDevice,
isAMDGPUDevice, matchesGPUVendor (sat_overlay), and validateIsVendorGPU (page_validate)
with numeric vendor_id comparisons.

C (module-structure): split app/app.go (1413 lines) into app.go + app_format.go,
app_network.go, app_services.go, app_packs.go, app_install.go — no logic changes.

D (go-code-style): wrap bare return err in interfaceAdminState and
interfaceIPv4Addrs (platform/network.go) with fmt.Errorf context including
the interface name.

E (go-project-bible): add bible-local/architecture/data-model.md and
bible-local/architecture/api-surface.md.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-13 14:32:08 +03:00
parent 2320925433
commit 7d2e904d14
20 changed files with 1420 additions and 1062 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,405 @@
package app
import (
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"bee/audit/internal/collector"
"bee/audit/internal/platform"
"bee/audit/internal/schema"
)
func hostnameOr(fallback string) string {
hn, err := os.Hostname()
if err != nil || strings.TrimSpace(hn) == "" {
return fallback
}
return hn
}
func sanitizeFilename(v string) string {
var out []rune
for _, r := range v {
switch {
case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z', r >= '0' && r <= '9', r == '-', r == '_', r == '.':
out = append(out, r)
default:
out = append(out, '-')
}
}
if len(out) == 0 {
return "unknown"
}
return string(out)
}
func bodyOr(body, fallback string) string {
body = strings.TrimSpace(body)
if body == "" {
return fallback
}
return body
}
func trimPtr(value *string) string {
if value == nil {
return ""
}
return strings.TrimSpace(*value)
}
func joinSortedKeys(values map[string]struct{}) string {
if len(values) == 0 {
return ""
}
keys := make([]string, 0, len(values))
for key := range values {
keys = append(keys, key)
}
sort.Strings(keys)
return strings.Join(keys, "/")
}
func humanizeMB(totalMB int) string {
if totalMB <= 0 {
return ""
}
gb := float64(totalMB) / 1024.0
if gb >= 1024.0 {
tb := gb / 1024.0
return fmt.Sprintf("%.1f TB", tb)
}
if gb == float64(int64(gb)) {
return fmt.Sprintf("%.0f GB", gb)
}
return fmt.Sprintf("%.1f GB", gb)
}
func humanizeGB(totalGB int) string {
if totalGB <= 0 {
return ""
}
tb := float64(totalGB) / 1024.0
if tb >= 1.0 {
return fmt.Sprintf("%.1f TB", tb)
}
return fmt.Sprintf("%d GB", totalGB)
}
func parseKeyValueSummary(raw string) map[string]string {
out := map[string]string{}
for _, line := range strings.Split(raw, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
key, value, ok := strings.Cut(line, "=")
if !ok {
continue
}
out[strings.TrimSpace(key)] = strings.TrimSpace(value)
}
return out
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
value = strings.TrimSpace(value)
if value != "" {
return value
}
}
return ""
}
func cleanSummaryKey(key string) string {
idx := strings.Index(key, "-")
if idx <= 0 {
return key
}
prefix := key[:idx]
for _, c := range prefix {
if c < '0' || c > '9' {
return key
}
}
return key[idx+1:]
}
func isGPUDevice(dev schema.HardwarePCIeDevice) bool {
// Exclude Aspeed BMC VGA adapters (not compute GPUs).
if dev.VendorID != nil && *dev.VendorID == collector.AspeedVendorID {
return false
}
class := trimPtr(dev.DeviceClass)
// AMD Instinct / Radeon compute GPUs always carry ProcessingAccelerator or DisplayController.
// Do NOT match AMD vendor alone — CPU chipset PCIe devices share that vendor ID.
if class == "VideoController" || class == "DisplayController" || class == "ProcessingAccelerator" {
return true
}
// NVIDIA devices sometimes expose class values outside the standard GPU set.
return dev.VendorID != nil && *dev.VendorID == collector.NvidiaVendorID
}
func formatSystemLine(board schema.HardwareBoard) string {
model := strings.TrimSpace(strings.Join([]string{
trimPtr(board.Manufacturer),
trimPtr(board.ProductName),
}, " "))
serial := strings.TrimSpace(board.SerialNumber)
switch {
case model != "" && serial != "":
return fmt.Sprintf("System: %s | S/N %s", model, serial)
case model != "":
return "System: " + model
case serial != "":
return "System S/N: " + serial
default:
return ""
}
}
func formatCPULine(cpus []schema.HardwareCPU) string {
if len(cpus) == 0 {
return ""
}
modelCounts := map[string]int{}
unknown := 0
for _, cpu := range cpus {
model := trimPtr(cpu.Model)
if model == "" {
unknown++
continue
}
modelCounts[model]++
}
if len(modelCounts) == 1 && unknown == 0 {
for model, count := range modelCounts {
return fmt.Sprintf("CPU: %d x %s", count, model)
}
}
parts := make([]string, 0, len(modelCounts)+1)
if len(modelCounts) > 0 {
keys := make([]string, 0, len(modelCounts))
for key := range modelCounts {
keys = append(keys, key)
}
sort.Strings(keys)
for _, key := range keys {
parts = append(parts, fmt.Sprintf("%d x %s", modelCounts[key], key))
}
}
if unknown > 0 {
parts = append(parts, fmt.Sprintf("%d x unknown", unknown))
}
return "CPU: " + strings.Join(parts, ", ")
}
func formatMemoryLine(dimms []schema.HardwareMemory) string {
totalMB := 0
present := 0
types := map[string]struct{}{}
for _, dimm := range dimms {
if dimm.Present != nil && !*dimm.Present {
continue
}
if dimm.SizeMB == nil || *dimm.SizeMB <= 0 {
continue
}
present++
totalMB += *dimm.SizeMB
if value := trimPtr(dimm.Type); value != "" {
types[value] = struct{}{}
}
}
if totalMB == 0 {
return ""
}
typeText := joinSortedKeys(types)
line := fmt.Sprintf("Memory: %s", humanizeMB(totalMB))
if typeText != "" {
line += " " + typeText
}
if present > 0 {
line += fmt.Sprintf(" (%d DIMMs)", present)
}
return line
}
func formatStorageLine(disks []schema.HardwareStorage) string {
count := 0
totalGB := 0
for _, disk := range disks {
if disk.Present != nil && !*disk.Present {
continue
}
count++
if disk.SizeGB != nil && *disk.SizeGB > 0 {
totalGB += *disk.SizeGB
}
}
if count == 0 {
return ""
}
line := fmt.Sprintf("Storage: %d drives", count)
if totalGB > 0 {
line += fmt.Sprintf(" / %s", humanizeGB(totalGB))
}
return line
}
func formatGPULine(devices []schema.HardwarePCIeDevice) string {
gpus := map[string]int{}
for _, dev := range devices {
if !isGPUDevice(dev) {
continue
}
name := firstNonEmpty(trimPtr(dev.Model), trimPtr(dev.Manufacturer), "unknown")
gpus[name]++
}
if len(gpus) == 0 {
return ""
}
keys := make([]string, 0, len(gpus))
for key := range gpus {
keys = append(keys, key)
}
sort.Strings(keys)
parts := make([]string, 0, len(keys))
for _, key := range keys {
parts = append(parts, fmt.Sprintf("%d x %s", gpus[key], key))
}
return "GPU: " + strings.Join(parts, ", ")
}
func formatIPLine(list func() ([]platform.InterfaceInfo, error)) string {
if list == nil {
return ""
}
ifaces, err := list()
if err != nil {
return ""
}
seen := map[string]struct{}{}
var ips []string
for _, iface := range ifaces {
for _, ip := range iface.IPv4 {
ip = strings.TrimSpace(ip)
if ip == "" {
continue
}
if _, ok := seen[ip]; ok {
continue
}
seen[ip] = struct{}{}
ips = append(ips, ip)
}
}
if len(ips) == 0 {
return ""
}
sort.Strings(ips)
return "IP: " + strings.Join(ips, ", ")
}
func formatSATDetail(raw string) string {
var b strings.Builder
kv := parseKeyValueSummary(raw)
if t, ok := kv["run_at_utc"]; ok {
fmt.Fprintf(&b, "Run: %s\n\n", t)
}
lines := strings.Split(raw, "\n")
var stepKeys []string
seenStep := map[string]bool{}
for _, line := range lines {
if idx := strings.Index(line, "_status="); idx >= 0 {
key := line[:idx]
if !seenStep[key] && key != "overall" {
seenStep[key] = true
stepKeys = append(stepKeys, key)
}
}
}
for _, key := range stepKeys {
status := kv[key+"_status"]
display := cleanSummaryKey(key)
switch status {
case "OK":
fmt.Fprintf(&b, "PASS %s\n", display)
case "FAILED":
fmt.Fprintf(&b, "FAIL %s\n", display)
case "UNSUPPORTED":
fmt.Fprintf(&b, "SKIP %s\n", display)
default:
fmt.Fprintf(&b, "? %s\n", display)
}
}
if overall, ok := kv["overall_status"]; ok {
ok2 := kv["job_ok"]
failed := kv["job_failed"]
fmt.Fprintf(&b, "\nOverall: %s (ok=%s failed=%s)", overall, ok2, failed)
}
return strings.TrimSpace(b.String())
}
func formatSATSummary(label, raw string) string {
values := parseKeyValueSummary(raw)
var body strings.Builder
fmt.Fprintf(&body, "%s:", label)
if overall := firstNonEmpty(values["overall_status"], "UNKNOWN"); overall != "" {
fmt.Fprintf(&body, " %s", overall)
}
if ok := firstNonEmpty(values["job_ok"], "0"); ok != "" {
fmt.Fprintf(&body, " ok=%s", ok)
}
if failed := firstNonEmpty(values["job_failed"], "0"); failed != "" {
fmt.Fprintf(&body, " failed=%s", failed)
}
if unsupported := firstNonEmpty(values["job_unsupported"], "0"); unsupported != "" && unsupported != "0" {
fmt.Fprintf(&body, " unsupported=%s", unsupported)
}
if devices := strings.TrimSpace(values["devices"]); devices != "" {
fmt.Fprintf(&body, "\nDevices: %s", devices)
}
return body.String()
}
func latestSATSummaries() []string {
patterns := []struct {
label string
prefix string
}{
{label: "NVIDIA SAT", prefix: "gpu-nvidia-"},
{label: "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)", prefix: "gpu-nvidia-targeted-stress-"},
{label: "NVIDIA Max Compute Load (dcgmproftester)", prefix: "gpu-nvidia-compute-"},
{label: "NVIDIA Targeted Power (dcgmi diag targeted_power)", prefix: "gpu-nvidia-targeted-power-"},
{label: "NVIDIA Pulse Test (dcgmi diag pulse_test)", prefix: "gpu-nvidia-pulse-"},
{label: "NVIDIA Interconnect Test (NCCL all_reduce_perf)", prefix: "gpu-nvidia-nccl-"},
{label: "NVIDIA Bandwidth Test (NVBandwidth)", prefix: "gpu-nvidia-bandwidth-"},
{label: "Memory SAT", prefix: "memory-"},
{label: "Storage SAT", prefix: "storage-"},
{label: "CPU SAT", prefix: "cpu-"},
}
var out []string
for _, item := range patterns {
matches, err := filepath.Glob(filepath.Join(DefaultSATBaseDir, item.prefix+"*/summary.txt"))
if err != nil || len(matches) == 0 {
continue
}
sort.Strings(matches)
raw, err := os.ReadFile(matches[len(matches)-1])
if err != nil {
continue
}
out = append(out, formatSATSummary(item.label, string(raw)))
}
return out
}

View File

@@ -0,0 +1,76 @@
package app
import (
"context"
"fmt"
"os"
"path/filepath"
"time"
"bee/audit/internal/platform"
)
func (a *App) ListRemovableTargets() ([]platform.RemovableTarget, error) {
return a.exports.ListRemovableTargets()
}
func (a *App) ExportLatestAudit(target platform.RemovableTarget) (string, error) {
if _, err := os.Stat(DefaultAuditJSONPath); err != nil {
return "", err
}
filename := fmt.Sprintf("audit-%s-%s.json", sanitizeFilename(hostnameOr("unknown")), time.Now().UTC().Format("20060102-150405"))
tmpPath := filepath.Join(os.TempDir(), filename)
data, err := readFileLimited(DefaultAuditJSONPath, 100<<20)
if err != nil {
return "", err
}
if normalized, normErr := ApplySATOverlay(data); normErr == nil {
data = normalized
}
if err := os.WriteFile(tmpPath, data, 0644); err != nil {
return "", err
}
defer os.Remove(tmpPath)
return a.exports.ExportFileToTarget(tmpPath, target)
}
func (a *App) ExportLatestAuditResult(target platform.RemovableTarget) (ActionResult, error) {
path, err := a.ExportLatestAudit(target)
body := "Audit export failed."
if err == nil {
body = "Audit exported."
}
if err == nil && path != "" {
body = "Audit exported to " + path
}
return ActionResult{Title: "Export audit", Body: body}, err
}
func (a *App) ExportSupportBundle(target platform.RemovableTarget) (string, error) {
archive, err := BuildSupportBundle(DefaultExportDir)
if err != nil {
return "", err
}
defer os.Remove(archive)
return a.exports.ExportFileToTarget(archive, target)
}
func (a *App) ExportSupportBundleResult(target platform.RemovableTarget) (ActionResult, error) {
path, err := a.ExportSupportBundle(target)
body := "Support bundle export failed."
if err == nil {
body = "Support bundle exported. USB target unmounted and safe to remove."
}
if err == nil && path != "" {
body = "Support bundle exported to " + path + ".\n\nUSB target unmounted and safe to remove."
}
return ActionResult{Title: "Export support bundle", Body: body}, err
}
func (a *App) ListInstallDisks() ([]platform.InstallDisk, error) {
return a.installer.ListInstallDisks()
}
func (a *App) InstallToDisk(ctx context.Context, device string, logFile string) error {
return a.installer.InstallToDisk(ctx, device, logFile)
}

View File

@@ -0,0 +1,106 @@
package app
import (
"fmt"
"strings"
"bee/audit/internal/platform"
)
func (a *App) ListInterfaces() ([]platform.InterfaceInfo, error) {
return a.network.ListInterfaces()
}
func (a *App) DefaultRoute() string {
return a.network.DefaultRoute()
}
func (a *App) DHCPOne(iface string) (string, error) {
return a.network.DHCPOne(iface)
}
func (a *App) DHCPOneResult(iface string) (ActionResult, error) {
body, err := a.network.DHCPOne(iface)
return ActionResult{Title: "DHCP: " + iface, Body: bodyOr(body, "DHCP completed.")}, err
}
func (a *App) DHCPAll() (string, error) {
return a.network.DHCPAll()
}
func (a *App) DHCPAllResult() (ActionResult, error) {
body, err := a.network.DHCPAll()
return ActionResult{Title: "DHCP: all interfaces", Body: bodyOr(body, "DHCP completed.")}, err
}
func (a *App) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error) {
return a.network.SetStaticIPv4(cfg)
}
func (a *App) SetInterfaceState(iface string, up bool) error {
return a.network.SetInterfaceState(iface, up)
}
func (a *App) GetInterfaceState(iface string) (bool, error) {
return a.network.GetInterfaceState(iface)
}
func (a *App) CaptureNetworkSnapshot() (platform.NetworkSnapshot, error) {
return a.network.CaptureNetworkSnapshot()
}
func (a *App) RestoreNetworkSnapshot(snapshot platform.NetworkSnapshot) error {
return a.network.RestoreNetworkSnapshot(snapshot)
}
func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) {
body, err := a.network.SetStaticIPv4(cfg)
return ActionResult{Title: "Static IPv4: " + cfg.Interface, Body: bodyOr(body, "Static IPv4 updated.")}, err
}
func (a *App) NetworkStatus() (ActionResult, error) {
ifaces, err := a.network.ListInterfaces()
if err != nil {
return ActionResult{Title: "Network status"}, err
}
if len(ifaces) == 0 {
return ActionResult{Title: "Network status", Body: "No physical interfaces found."}, nil
}
var body strings.Builder
for _, iface := range ifaces {
ipv4 := "(no IPv4)"
if len(iface.IPv4) > 0 {
ipv4 = strings.Join(iface.IPv4, ", ")
}
fmt.Fprintf(&body, "- %s: state=%s ip=%s\n", iface.Name, iface.State, ipv4)
}
if gw := a.network.DefaultRoute(); gw != "" {
fmt.Fprintf(&body, "\nDefault route: %s\n", gw)
}
return ActionResult{Title: "Network status", Body: strings.TrimSpace(body.String())}, nil
}
func (a *App) DefaultStaticIPv4FormFields(iface string) []string {
return []string{
"",
"24",
strings.TrimSpace(a.network.DefaultRoute()),
"77.88.8.8 77.88.8.1 1.1.1.1 8.8.8.8",
}
}
func (a *App) ParseStaticIPv4Config(iface string, fields []string) platform.StaticIPv4Config {
get := func(index int) string {
if index >= 0 && index < len(fields) {
return strings.TrimSpace(fields[index])
}
return ""
}
return platform.StaticIPv4Config{
Interface: iface,
Address: get(0),
Prefix: get(1),
Gateway: get(2),
DNS: strings.Fields(get(3)),
}
}

View File

@@ -0,0 +1,370 @@
package app
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"bee/audit/internal/platform"
)
func (a *App) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunNvidiaAcceptancePack(baseDir, logFunc)
}
func (a *App) RunNvidiaAcceptancePackResult(baseDir string) (ActionResult, error) {
path, err := a.RunNvidiaAcceptancePack(baseDir, nil)
body := "Archive written."
if path != "" {
body = "Archive written to " + path
}
return ActionResult{Title: "NVIDIA SAT", Body: body}, err
}
func (a *App) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
return a.sat.ListNvidiaGPUs()
}
func (a *App) ListNvidiaGPUStatuses() ([]platform.NvidiaGPUStatus, error) {
return a.sat.ListNvidiaGPUStatuses()
}
func (a *App) ResetNvidiaGPU(index int) (ActionResult, error) {
out, err := a.sat.ResetNvidiaGPU(index)
return ActionResult{Title: fmt.Sprintf("Reset NVIDIA GPU %d", index), Body: strings.TrimSpace(out)}, err
}
func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (ActionResult, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
path, err := a.sat.RunNvidiaAcceptancePackWithOptions(ctx, baseDir, diagLevel, gpuIndices, logFunc)
body := "Archive written."
if path != "" {
body = "Archive written to " + path
}
return ActionResult{Title: "NVIDIA DCGM", Body: body}, err
}
func (a *App) RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunNvidiaTargetedStressValidatePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
}
func (a *App) RunNvidiaStressPack(baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
return a.RunNvidiaStressPackCtx(context.Background(), baseDir, opts, logFunc)
}
func (a *App) RunNvidiaBenchmark(baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
return a.RunNvidiaBenchmarkCtx(context.Background(), baseDir, opts, logFunc)
}
func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultBeeBenchPerfDir
}
resolved, err := a.ensureBenchmarkPowerAutotune(ctx, baseDir, opts, "performance", logFunc)
if err != nil {
return "", err
}
opts.ServerPowerSource = resolved.SelectedSource
return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
}
func (a *App) RunNvidiaPowerBenchCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultBeeBenchPowerDir
}
resolved, err := a.ensureBenchmarkPowerAutotune(ctx, baseDir, opts, "power-fit", logFunc)
if err != nil {
return "", err
}
opts.ServerPowerSource = resolved.SelectedSource
return a.sat.RunNvidiaPowerBench(ctx, baseDir, opts, logFunc)
}
func (a *App) RunNvidiaPowerSourceAutotuneCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultBeeBenchAutotuneDir
}
return a.sat.RunNvidiaPowerSourceAutotune(ctx, baseDir, opts, benchmarkKind, logFunc)
}
func (a *App) LoadBenchmarkPowerAutotune() (*platform.BenchmarkPowerAutotuneConfig, error) {
return platform.LoadBenchmarkPowerAutotuneConfig(DefaultBeeBenchPowerSourceConfigPath)
}
func (a *App) ensureBenchmarkPowerAutotune(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (platform.BenchmarkPowerAutotuneConfig, error) {
cfgPath := platform.BenchmarkPowerSourceConfigPath(baseDir)
if cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(cfgPath); err == nil {
if logFunc != nil {
logFunc(fmt.Sprintf("benchmark autotune: using saved server power source %s", cfg.SelectedSource))
}
return *cfg, nil
}
if logFunc != nil {
logFunc("benchmark autotune: no saved power source config, running autotune first")
}
autotuneDir := filepath.Join(filepath.Dir(baseDir), "autotune")
if _, err := a.RunNvidiaPowerSourceAutotuneCtx(ctx, autotuneDir, opts, benchmarkKind, logFunc); err != nil {
return platform.BenchmarkPowerAutotuneConfig{}, err
}
cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(cfgPath)
if err != nil {
return platform.BenchmarkPowerAutotuneConfig{}, err
}
return *cfg, nil
}
func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, staggerSec, logFunc)
}
func (a *App) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunNvidiaTargetedPowerPack(ctx, baseDir, durationSec, gpuIndices, logFunc)
}
func (a *App) RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunNvidiaPulseTestPack(ctx, baseDir, durationSec, gpuIndices, logFunc)
}
func (a *App) RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunNvidiaBandwidthPack(ctx, baseDir, gpuIndices, logFunc)
}
func (a *App) RunNvidiaStressPackCtx(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunNvidiaStressPack(ctx, baseDir, opts, logFunc)
}
func (a *App) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
return a.RunMemoryAcceptancePackCtx(context.Background(), baseDir, 256, 1, logFunc)
}
func (a *App) RunMemoryAcceptancePackCtx(ctx context.Context, baseDir string, sizeMB, passes int, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunMemoryAcceptancePack(ctx, baseDir, sizeMB, passes, logFunc)
}
func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
path, err := a.RunMemoryAcceptancePack(baseDir, nil)
return ActionResult{Title: "Memory SAT", Body: satResultBody(path)}, err
}
func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunCPUAcceptancePackCtx(context.Background(), baseDir, durationSec, logFunc)
}
func (a *App) RunCPUAcceptancePackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunCPUAcceptancePack(ctx, baseDir, durationSec, logFunc)
}
func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (ActionResult, error) {
path, err := a.RunCPUAcceptancePack(baseDir, durationSec, nil)
return ActionResult{Title: "CPU SAT", Body: satResultBody(path)}, err
}
func (a *App) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
return a.RunStorageAcceptancePackCtx(context.Background(), baseDir, false, logFunc)
}
func (a *App) RunStorageAcceptancePackCtx(ctx context.Context, baseDir string, extended bool, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunStorageAcceptancePack(ctx, baseDir, extended, logFunc)
}
func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
path, err := a.RunStorageAcceptancePack(baseDir, nil)
return ActionResult{Title: "Storage SAT", Body: satResultBody(path)}, err
}
func (a *App) DetectGPUVendor() string {
return a.sat.DetectGPUVendor()
}
func (a *App) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
return a.sat.ListAMDGPUs()
}
func (a *App) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
return a.RunAMDAcceptancePackCtx(context.Background(), baseDir, logFunc)
}
func (a *App) RunAMDAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunAMDAcceptancePack(ctx, baseDir, logFunc)
}
func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
path, err := a.RunAMDAcceptancePack(baseDir, nil)
return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
}
func (a *App) RunAMDMemIntegrityPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunAMDMemIntegrityPack(ctx, baseDir, logFunc)
}
func (a *App) RunAMDMemBandwidthPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunAMDMemBandwidthPack(ctx, baseDir, logFunc)
}
func (a *App) RunMemoryStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunMemoryStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
}
func (a *App) RunSATStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunSATStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
}
func (a *App) RunAMDStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunAMDStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
}
func (a *App) RunMemoryStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.sat.RunMemoryStressPack(ctx, baseDir, durationSec, logFunc)
}
func (a *App) RunSATStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.sat.RunSATStressPack(ctx, baseDir, durationSec, logFunc)
}
func (a *App) RunAMDStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunAMDStressPack(ctx, baseDir, durationSec, logFunc)
}
func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunFanStressTest(ctx, baseDir, opts)
}
func (a *App) RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunPlatformStress(ctx, baseDir, opts, logFunc)
}
func (a *App) RunNCCLTests(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
return a.sat.RunNCCLTests(ctx, baseDir, gpuIndices, logFunc)
}
func (a *App) RunNCCLTestsResult(ctx context.Context) (ActionResult, error) {
path, err := a.RunNCCLTests(ctx, DefaultSATBaseDir, nil, nil)
body := "Results: " + path
if err != nil && err != context.Canceled {
body += "\nERROR: " + err.Error()
}
return ActionResult{Title: "NCCL bandwidth test", Body: body}, err
}
func (a *App) RunFanStressTestResult(ctx context.Context, opts platform.FanStressOptions) (ActionResult, error) {
path, err := a.RunFanStressTest(ctx, "", opts)
body := formatFanStressResult(path)
if err != nil && err != context.Canceled {
body += "\nERROR: " + err.Error()
}
return ActionResult{Title: "GPU Platform Stress Test", Body: body}, err
}
// formatFanStressResult formats the summary.txt from a fan-stress run, including
// the per-step pass/fail display and the analysis section (throttling, max temps, fan response).
func formatFanStressResult(archivePath string) string {
if archivePath == "" {
return "No output produced."
}
runDir := strings.TrimSuffix(archivePath, ".tar.gz")
raw, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
if err != nil {
return "Archive written to " + archivePath
}
content := strings.TrimSpace(string(raw))
kv := parseKeyValueSummary(content)
var b strings.Builder
b.WriteString(formatSATDetail(content))
// Append analysis section.
var analysis []string
if v, ok := kv["throttling_detected"]; ok {
label := "NO"
if v == "true" {
label = "YES ← throttling detected during load"
}
analysis = append(analysis, "Throttling: "+label)
}
if v, ok := kv["max_gpu_temp_c"]; ok && v != "0.0" {
analysis = append(analysis, "Max GPU temp: "+v+"°C")
}
if v, ok := kv["max_cpu_temp_c"]; ok && v != "0.0" {
analysis = append(analysis, "Max CPU temp: "+v+"°C")
}
if v, ok := kv["fan_response_sec"]; ok && v != "N/A" && v != "-1.0" {
analysis = append(analysis, "Fan response: "+v+"s")
}
if len(analysis) > 0 {
b.WriteString("\n\n=== Analysis ===\n")
for _, line := range analysis {
b.WriteString(line + "\n")
}
}
return strings.TrimSpace(b.String())
}
// satResultBody reads summary.txt from the SAT run directory (archive path without .tar.gz)
// and returns a formatted human-readable result. Falls back to a plain message if unreadable.
func satResultBody(archivePath string) string {
if archivePath == "" {
return "No output produced."
}
runDir := strings.TrimSuffix(archivePath, ".tar.gz")
raw, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
if err != nil {
return "Archive written to " + archivePath
}
return formatSATDetail(strings.TrimSpace(string(raw)))
}

View File

@@ -0,0 +1,67 @@
package app
import (
"fmt"
"strings"
"bee/audit/internal/platform"
)
func (a *App) ListBeeServices() ([]string, error) {
return a.services.ListBeeServices()
}
func (a *App) ServiceState(name string) string {
return a.services.ServiceState(name)
}
func (a *App) ServiceStatus(name string) (string, error) {
return a.services.ServiceStatus(name)
}
func (a *App) ServiceStatusResult(name string) (ActionResult, error) {
body, err := a.services.ServiceStatus(name)
return ActionResult{Title: "service status: " + name, Body: bodyOr(body, "No status output.")}, err
}
func (a *App) ServiceDo(name string, action platform.ServiceAction) (string, error) {
return a.services.ServiceDo(name, action)
}
func (a *App) ServiceActionResult(name string, action platform.ServiceAction) (ActionResult, error) {
body, err := a.services.ServiceDo(name, action)
return ActionResult{Title: "service " + string(action) + ": " + name, Body: bodyOr(body, "Action completed.")}, err
}
func (a *App) TailFile(path string, lines int) string {
return a.tools.TailFile(path, lines)
}
func (a *App) CheckTools(names []string) []platform.ToolStatus {
return a.tools.CheckTools(names)
}
func (a *App) ToolCheckResult(names []string) ActionResult {
if len(names) == 0 {
return ActionResult{Title: "Required tools", Body: "No tools checked."}
}
var body strings.Builder
for _, tool := range a.tools.CheckTools(names) {
status := "MISSING"
if tool.OK {
status = "OK (" + tool.Path + ")"
}
fmt.Fprintf(&body, "- %s: %s\n", tool.Name, status)
}
return ActionResult{Title: "Required tools", Body: strings.TrimSpace(body.String())}
}
func (a *App) AuditLogTailResult() ActionResult {
logTail := strings.TrimSpace(a.tools.TailFile(DefaultAuditLogPath, 40))
jsonTail := strings.TrimSpace(a.tools.TailFile(DefaultAuditJSONPath, 20))
body := strings.TrimSpace(logTail + "\n\n" + jsonTail)
if body == "" {
body = "No audit logs found."
}
return ActionResult{Title: "Audit log tail", Body: body}
}

View File

@@ -3,10 +3,11 @@ package app
import (
"os"
"path/filepath"
"strconv"
"sort"
"strconv"
"strings"
"bee/audit/internal/collector"
"bee/audit/internal/schema"
)
@@ -313,17 +314,20 @@ func statusSeverity(status string) int {
}
func matchesGPUVendor(dev schema.HardwarePCIeDevice, vendor string) bool {
if dev.DeviceClass == nil || !strings.Contains(strings.TrimSpace(*dev.DeviceClass), "Controller") && !strings.Contains(strings.TrimSpace(*dev.DeviceClass), "Accelerator") {
if dev.DeviceClass == nil || !strings.Contains(strings.TrimSpace(*dev.DeviceClass), "Display") && !strings.Contains(strings.TrimSpace(*dev.DeviceClass), "Video") {
return false
}
if dev.DeviceClass == nil {
return false
}
class := strings.TrimSpace(*dev.DeviceClass)
isGPUClass := strings.Contains(class, "Controller") || strings.Contains(class, "Accelerator") ||
strings.Contains(class, "Display") || strings.Contains(class, "Video")
if !isGPUClass {
return false
}
manufacturer := strings.ToLower(strings.TrimSpace(ptrString(dev.Manufacturer)))
switch vendor {
case "amd":
return strings.Contains(manufacturer, "advanced micro devices") || strings.Contains(manufacturer, "amd/ati")
return dev.VendorID != nil && *dev.VendorID == collector.AMDVendorID
case "nvidia":
return strings.Contains(manufacturer, "nvidia")
return dev.VendorID != nil && *dev.VendorID == collector.NvidiaVendorID
default:
return false
}

View File

@@ -5,6 +5,7 @@ import (
"path/filepath"
"testing"
"bee/audit/internal/collector"
"bee/audit/internal/schema"
)
@@ -46,10 +47,12 @@ func TestApplyLatestSATStatusesMarksAMDGPUs(t *testing.T) {
class := "DisplayController"
manufacturer := "Advanced Micro Devices, Inc. [AMD/ATI]"
amdVendorID := collector.AMDVendorID
snap := schema.HardwareSnapshot{
PCIeDevices: []schema.HardwarePCIeDevice{{
DeviceClass: &class,
Manufacturer: &manufacturer,
VendorID: &amdVendorID,
}},
}