release: v3.1

This commit is contained in:
2026-03-28 22:51:36 +03:00
parent 0dbfaf6121
commit 59a1d4b209
22 changed files with 1560 additions and 269 deletions

View File

@@ -1,11 +1,13 @@
package main package main
import ( import (
"context"
"flag" "flag"
"fmt" "fmt"
"io" "io"
"log/slog" "log/slog"
"os" "os"
"runtime/debug"
"strings" "strings"
"bee/audit/internal/app" "bee/audit/internal/app"
@@ -16,6 +18,37 @@ import (
var Version = "dev" var Version = "dev"
func buildLabel() string {
label := strings.TrimSpace(Version)
if label == "" {
label = "dev"
}
if info, ok := debug.ReadBuildInfo(); ok {
var revision string
var modified bool
for _, setting := range info.Settings {
switch setting.Key {
case "vcs.revision":
revision = setting.Value
case "vcs.modified":
modified = setting.Value == "true"
}
}
if revision != "" {
short := revision
if len(short) > 12 {
short = short[:12]
}
label += " (" + short
if modified {
label += "+"
}
label += ")"
}
}
return label
}
func main() { func main() {
os.Exit(run(os.Args[1:], os.Stdout, os.Stderr)) os.Exit(run(os.Args[1:], os.Stdout, os.Stderr))
} }
@@ -139,7 +172,6 @@ func runAudit(args []string, stdout, stderr io.Writer) int {
return 0 return 0
} }
func runExport(args []string, stdout, stderr io.Writer) int { func runExport(args []string, stdout, stderr io.Writer) int {
fs := flag.NewFlagSet("export", flag.ContinueOnError) fs := flag.NewFlagSet("export", flag.ContinueOnError)
fs.SetOutput(stderr) fs.SetOutput(stderr)
@@ -299,6 +331,7 @@ func runWeb(args []string, stdout, stderr io.Writer) int {
if err := webui.ListenAndServe(*listenAddr, webui.HandlerOptions{ if err := webui.ListenAndServe(*listenAddr, webui.HandlerOptions{
Title: *title, Title: *title,
BuildLabel: buildLabel(),
AuditPath: *auditPath, AuditPath: *auditPath,
ExportDir: *exportDir, ExportDir: *exportDir,
App: app.New(platform.New()), App: app.New(platform.New()),
@@ -351,15 +384,15 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
case "nvidia": case "nvidia":
archive, err = application.RunNvidiaAcceptancePack("", logLine) archive, err = application.RunNvidiaAcceptancePack("", logLine)
case "memory": case "memory":
archive, err = application.RunMemoryAcceptancePack("", logLine) archive, err = application.RunMemoryAcceptancePackCtx(context.Background(), "", logLine)
case "storage": case "storage":
archive, err = application.RunStorageAcceptancePack("", logLine) archive, err = application.RunStorageAcceptancePackCtx(context.Background(), "", logLine)
case "cpu": case "cpu":
dur := *duration dur := *duration
if dur <= 0 { if dur <= 0 {
dur = 60 dur = 60
} }
archive, err = application.RunCPUAcceptancePack("", dur, logLine) archive, err = application.RunCPUAcceptancePackCtx(context.Background(), "", dur, logLine)
} }
if err != nil { if err != nil {
slog.Error("run sat", "target", target, "err", err) slog.Error("run sat", "target", target, "err", err)

View File

@@ -55,6 +55,8 @@ type networkManager interface {
SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error)
SetInterfaceState(iface string, up bool) error SetInterfaceState(iface string, up bool) error
GetInterfaceState(iface string) (bool, error) GetInterfaceState(iface string) (bool, error)
CaptureNetworkSnapshot() (platform.NetworkSnapshot, error)
RestoreNetworkSnapshot(snapshot platform.NetworkSnapshot) error
} }
type serviceManager interface { type serviceManager interface {
@@ -78,7 +80,7 @@ type installer interface {
ListInstallDisks() ([]platform.InstallDisk, error) ListInstallDisks() ([]platform.InstallDisk, error)
InstallToDisk(ctx context.Context, device string, logFile string) error InstallToDisk(ctx context.Context, device string, logFile string) error
IsLiveMediaInRAM() bool IsLiveMediaInRAM() bool
RunInstallToRAM(logFunc func(string)) error RunInstallToRAM(ctx context.Context, logFunc func(string)) error
} }
type GPUPresenceResult struct { type GPUPresenceResult struct {
@@ -98,23 +100,23 @@ func (a *App) IsLiveMediaInRAM() bool {
return a.installer.IsLiveMediaInRAM() return a.installer.IsLiveMediaInRAM()
} }
func (a *App) RunInstallToRAM(logFunc func(string)) error { func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
return a.installer.RunInstallToRAM(logFunc) return a.installer.RunInstallToRAM(ctx, logFunc)
} }
type satRunner interface { type satRunner interface {
RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error)
RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
ListNvidiaGPUs() ([]platform.NvidiaGPU, error) ListNvidiaGPUs() ([]platform.NvidiaGPU, error)
DetectGPUVendor() string DetectGPUVendor() string
ListAMDGPUs() ([]platform.AMDGPUInfo, error) ListAMDGPUs() ([]platform.AMDGPUInfo, error)
RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
RunAMDStressPack(baseDir string, logFunc func(string)) (string, error) RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
RunMemoryStressPack(baseDir string, logFunc func(string)) (string, error) RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
RunSATStressPack(baseDir string, logFunc func(string)) (string, error) RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error) RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
} }
@@ -348,6 +350,14 @@ func (a *App) GetInterfaceState(iface string) (bool, error) {
return a.network.GetInterfaceState(iface) return a.network.GetInterfaceState(iface)
} }
func (a *App) CaptureNetworkSnapshot() (platform.NetworkSnapshot, error) {
return a.network.CaptureNetworkSnapshot()
}
func (a *App) RestoreNetworkSnapshot(snapshot platform.NetworkSnapshot) error {
return a.network.RestoreNetworkSnapshot(snapshot)
}
func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) { func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) {
body, err := a.network.SetStaticIPv4(cfg) body, err := a.network.SetStaticIPv4(cfg)
return ActionResult{Title: "Static IPv4: " + cfg.Interface, Body: bodyOr(body, "Static IPv4 updated.")}, err return ActionResult{Title: "Static IPv4: " + cfg.Interface, Body: bodyOr(body, "Static IPv4 updated.")}, err
@@ -496,10 +506,14 @@ func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir st
} }
func (a *App) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) { func (a *App) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
return a.RunMemoryAcceptancePackCtx(context.Background(), baseDir, logFunc)
}
func (a *App) RunMemoryAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" { if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir baseDir = DefaultSATBaseDir
} }
return a.sat.RunMemoryAcceptancePack(baseDir, logFunc) return a.sat.RunMemoryAcceptancePack(ctx, baseDir, logFunc)
} }
func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) { func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
@@ -508,10 +522,14 @@ func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error
} }
func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) { func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunCPUAcceptancePackCtx(context.Background(), baseDir, durationSec, logFunc)
}
func (a *App) RunCPUAcceptancePackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" { if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir baseDir = DefaultSATBaseDir
} }
return a.sat.RunCPUAcceptancePack(baseDir, durationSec, logFunc) return a.sat.RunCPUAcceptancePack(ctx, baseDir, durationSec, logFunc)
} }
func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (ActionResult, error) { func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (ActionResult, error) {
@@ -520,10 +538,14 @@ func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (Actio
} }
func (a *App) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) { func (a *App) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
return a.RunStorageAcceptancePackCtx(context.Background(), baseDir, logFunc)
}
func (a *App) RunStorageAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" { if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir baseDir = DefaultSATBaseDir
} }
return a.sat.RunStorageAcceptancePack(baseDir, logFunc) return a.sat.RunStorageAcceptancePack(ctx, baseDir, logFunc)
} }
func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) { func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
@@ -540,10 +562,14 @@ func (a *App) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
} }
func (a *App) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) { func (a *App) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
return a.RunAMDAcceptancePackCtx(context.Background(), baseDir, logFunc)
}
func (a *App) RunAMDAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" { if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir baseDir = DefaultSATBaseDir
} }
return a.sat.RunAMDAcceptancePack(baseDir, logFunc) return a.sat.RunAMDAcceptancePack(ctx, baseDir, logFunc)
} }
func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) { func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
@@ -551,19 +577,31 @@ func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
} }
func (a *App) RunMemoryStressPack(baseDir string, logFunc func(string)) (string, error) { func (a *App) RunMemoryStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.sat.RunMemoryStressPack(baseDir, logFunc) return a.RunMemoryStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
} }
func (a *App) RunSATStressPack(baseDir string, logFunc func(string)) (string, error) { func (a *App) RunSATStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.sat.RunSATStressPack(baseDir, logFunc) return a.RunSATStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
} }
func (a *App) RunAMDStressPack(baseDir string, logFunc func(string)) (string, error) { func (a *App) RunAMDStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunAMDStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
}
func (a *App) RunMemoryStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.sat.RunMemoryStressPack(ctx, baseDir, durationSec, logFunc)
}
func (a *App) RunSATStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.sat.RunSATStressPack(ctx, baseDir, durationSec, logFunc)
}
func (a *App) RunAMDStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" { if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir baseDir = DefaultSATBaseDir
} }
return a.sat.RunAMDStressPack(baseDir, logFunc) return a.sat.RunAMDStressPack(ctx, baseDir, durationSec, logFunc)
} }
func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error) { func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error) {

View File

@@ -43,8 +43,12 @@ func (f fakeNetwork) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error
return f.setStaticIPv4Fn(cfg) return f.setStaticIPv4Fn(cfg)
} }
func (f fakeNetwork) SetInterfaceState(_ string, _ bool) error { return nil } func (f fakeNetwork) SetInterfaceState(_ string, _ bool) error { return nil }
func (f fakeNetwork) GetInterfaceState(_ string) (bool, error) { return true, nil } func (f fakeNetwork) GetInterfaceState(_ string) (bool, error) { return true, nil }
func (f fakeNetwork) CaptureNetworkSnapshot() (platform.NetworkSnapshot, error) {
return platform.NetworkSnapshot{}, nil
}
func (f fakeNetwork) RestoreNetworkSnapshot(platform.NetworkSnapshot) error { return nil }
type fakeServices struct { type fakeServices struct {
serviceStatusFn func(string) (string, error) serviceStatusFn func(string) (string, error)
@@ -141,15 +145,15 @@ func (f fakeSAT) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
return nil, nil return nil, nil
} }
func (f fakeSAT) RunMemoryAcceptancePack(baseDir string, _ func(string)) (string, error) { func (f fakeSAT) RunMemoryAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
return f.runMemoryFn(baseDir) return f.runMemoryFn(baseDir)
} }
func (f fakeSAT) RunStorageAcceptancePack(baseDir string, _ func(string)) (string, error) { func (f fakeSAT) RunStorageAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
return f.runStorageFn(baseDir) return f.runStorageFn(baseDir)
} }
func (f fakeSAT) RunCPUAcceptancePack(baseDir string, durationSec int, _ func(string)) (string, error) { func (f fakeSAT) RunCPUAcceptancePack(_ context.Context, baseDir string, durationSec int, _ func(string)) (string, error) {
if f.runCPUFn != nil { if f.runCPUFn != nil {
return f.runCPUFn(baseDir, durationSec) return f.runCPUFn(baseDir, durationSec)
} }
@@ -170,16 +174,22 @@ func (f fakeSAT) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
return nil, nil return nil, nil
} }
func (f fakeSAT) RunAMDAcceptancePack(baseDir string, _ func(string)) (string, error) { func (f fakeSAT) RunAMDAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
if f.runAMDPackFn != nil { if f.runAMDPackFn != nil {
return f.runAMDPackFn(baseDir) return f.runAMDPackFn(baseDir)
} }
return "", nil return "", nil
} }
func (f fakeSAT) RunAMDStressPack(_ string, _ func(string)) (string, error) { return "", nil } func (f fakeSAT) RunAMDStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
func (f fakeSAT) RunMemoryStressPack(_ string, _ func(string)) (string, error) { return "", nil } return "", nil
func (f fakeSAT) RunSATStressPack(_ string, _ func(string)) (string, error) { return "", nil } }
func (f fakeSAT) RunMemoryStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
return "", nil
}
func (f fakeSAT) RunSATStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
return "", nil
}
func (f fakeSAT) RunFanStressTest(_ context.Context, _ string, _ platform.FanStressOptions) (string, error) { func (f fakeSAT) RunFanStressTest(_ context.Context, _ string, _ platform.FanStressOptions) (string, error) {
return "", nil return "", nil

View File

@@ -1,6 +1,7 @@
package platform package platform
import ( import (
"context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
@@ -18,7 +19,7 @@ func (s *System) IsLiveMediaInRAM() bool {
return strings.TrimSpace(string(out)) == "tmpfs" return strings.TrimSpace(string(out)) == "tmpfs"
} }
func (s *System) RunInstallToRAM(logFunc func(string)) error { func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
log := func(msg string) { log := func(msg string) {
if logFunc != nil { if logFunc != nil {
logFunc(msg) logFunc(msg)
@@ -56,10 +57,13 @@ func (s *System) RunInstallToRAM(logFunc func(string)) error {
} }
for _, sf := range squashfsFiles { for _, sf := range squashfsFiles {
if err := ctx.Err(); err != nil {
return err
}
base := filepath.Base(sf) base := filepath.Base(sf)
dst := filepath.Join(dstDir, base) dst := filepath.Join(dstDir, base)
log(fmt.Sprintf("Copying %s to RAM...", base)) log(fmt.Sprintf("Copying %s to RAM...", base))
if err := copyFileLarge(sf, dst, log); err != nil { if err := copyFileLarge(ctx, sf, dst, log); err != nil {
return fmt.Errorf("copy %s: %v", base, err) return fmt.Errorf("copy %s: %v", base, err)
} }
log(fmt.Sprintf("Copied %s.", base)) log(fmt.Sprintf("Copied %s.", base))
@@ -77,9 +81,12 @@ func (s *System) RunInstallToRAM(logFunc func(string)) error {
} }
log("Copying remaining medium files...") log("Copying remaining medium files...")
if err := cpDir("/run/live/medium", dstDir, log); err != nil { if err := cpDir(ctx, "/run/live/medium", dstDir, log); err != nil {
log(fmt.Sprintf("Warning: partial copy: %v", err)) log(fmt.Sprintf("Warning: partial copy: %v", err))
} }
if err := ctx.Err(); err != nil {
return err
}
if err := exec.Command("mount", "--bind", dstDir, "/run/live/medium").Run(); err != nil { if err := exec.Command("mount", "--bind", dstDir, "/run/live/medium").Run(); err != nil {
log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err)) log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
} }
@@ -88,7 +95,7 @@ func (s *System) RunInstallToRAM(logFunc func(string)) error {
return nil return nil
} }
func copyFileLarge(src, dst string, logFunc func(string)) error { func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) error {
in, err := os.Open(src) in, err := os.Open(src)
if err != nil { if err != nil {
return err return err
@@ -107,6 +114,9 @@ func copyFileLarge(src, dst string, logFunc func(string)) error {
var copied int64 var copied int64
buf := make([]byte, 4*1024*1024) buf := make([]byte, 4*1024*1024)
for { for {
if err := ctx.Err(); err != nil {
return err
}
n, err := in.Read(buf) n, err := in.Read(buf)
if n > 0 { if n > 0 {
if _, werr := out.Write(buf[:n]); werr != nil { if _, werr := out.Write(buf[:n]); werr != nil {
@@ -128,8 +138,11 @@ func copyFileLarge(src, dst string, logFunc func(string)) error {
return out.Sync() return out.Sync()
} }
func cpDir(src, dst string, logFunc func(string)) error { func cpDir(ctx context.Context, src, dst string, logFunc func(string)) error {
return filepath.Walk(src, func(path string, fi os.FileInfo, err error) error { return filepath.Walk(src, func(path string, fi os.FileInfo, err error) error {
if ctx.Err() != nil {
return ctx.Err()
}
if err != nil { if err != nil {
return nil return nil
} }
@@ -144,7 +157,7 @@ func cpDir(src, dst string, logFunc func(string)) error {
if _, err := os.Stat(target); err == nil { if _, err := os.Stat(target); err == nil {
return nil return nil
} }
return copyFileLarge(path, target, nil) return copyFileLarge(ctx, path, target, nil)
}) })
} }

View File

@@ -2,7 +2,10 @@ package platform
import ( import (
"bufio" "bufio"
"encoding/json"
"os" "os"
"os/exec"
"sort"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@@ -23,6 +26,7 @@ type LiveMetricSample struct {
// TempReading is a named temperature sensor value. // TempReading is a named temperature sensor value.
type TempReading struct { type TempReading struct {
Name string `json:"name"` Name string `json:"name"`
Group string `json:"group,omitempty"`
Celsius float64 `json:"celsius"` Celsius float64 `json:"celsius"`
} }
@@ -43,10 +47,11 @@ func SampleLiveMetrics() LiveMetricSample {
fans, _ := sampleFanSpeeds() fans, _ := sampleFanSpeeds()
s.Fans = fans s.Fans = fans
// CPU/system temperature — returns 0 if unavailable s.Temps = append(s.Temps, sampleLiveTemperatureReadings()...)
cpuTemp := sampleCPUMaxTemp() if !hasTempGroup(s.Temps, "cpu") {
if cpuTemp > 0 { if cpuTemp := sampleCPUMaxTemp(); cpuTemp > 0 {
s.Temps = append(s.Temps, TempReading{Name: "CPU", Celsius: cpuTemp}) s.Temps = append(s.Temps, TempReading{Name: "CPU Max", Group: "cpu", Celsius: cpuTemp})
}
} }
// System power — returns 0 if unavailable // System power — returns 0 if unavailable
@@ -140,3 +145,181 @@ func sampleMemLoadPct() float64 {
used := total - avail used := total - avail
return float64(used) / float64(total) * 100 return float64(used) / float64(total) * 100
} }
func hasTempGroup(temps []TempReading, group string) bool {
for _, t := range temps {
if t.Group == group {
return true
}
}
return false
}
func sampleLiveTemperatureReadings() []TempReading {
if temps := sampleLiveTempsViaSensorsJSON(); len(temps) > 0 {
return temps
}
return sampleLiveTempsViaIPMI()
}
func sampleLiveTempsViaSensorsJSON() []TempReading {
out, err := exec.Command("sensors", "-j").Output()
if err != nil || len(out) == 0 {
return nil
}
var doc map[string]map[string]any
if err := json.Unmarshal(out, &doc); err != nil {
return nil
}
chips := make([]string, 0, len(doc))
for chip := range doc {
chips = append(chips, chip)
}
sort.Strings(chips)
temps := make([]TempReading, 0, len(chips))
seen := map[string]struct{}{}
for _, chip := range chips {
features := doc[chip]
featureNames := make([]string, 0, len(features))
for name := range features {
featureNames = append(featureNames, name)
}
sort.Strings(featureNames)
for _, name := range featureNames {
if strings.EqualFold(name, "Adapter") {
continue
}
feature, ok := features[name].(map[string]any)
if !ok {
continue
}
value, ok := firstTempInputValue(feature)
if !ok || value <= 0 || value > 150 {
continue
}
group := classifyLiveTempGroup(chip, name)
if group == "gpu" {
continue
}
label := strings.TrimSpace(name)
if label == "" {
continue
}
if group == "ambient" {
label = compactAmbientTempName(chip, label)
}
key := group + "\x00" + label
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
temps = append(temps, TempReading{Name: label, Group: group, Celsius: value})
}
}
return temps
}
func sampleLiveTempsViaIPMI() []TempReading {
out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
if err != nil || len(out) == 0 {
return nil
}
var temps []TempReading
seen := map[string]struct{}{}
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
parts := strings.Split(line, "|")
if len(parts) < 3 {
continue
}
name := strings.TrimSpace(parts[0])
if name == "" {
continue
}
unit := strings.ToLower(strings.TrimSpace(parts[2]))
if !strings.Contains(unit, "degrees") {
continue
}
raw := strings.TrimSpace(parts[1])
if raw == "" || strings.EqualFold(raw, "na") {
continue
}
value, err := strconv.ParseFloat(raw, 64)
if err != nil || value <= 0 || value > 150 {
continue
}
group := classifyLiveTempGroup("", name)
if group == "gpu" {
continue
}
label := name
if group == "ambient" {
label = compactAmbientTempName("", label)
}
key := group + "\x00" + label
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
temps = append(temps, TempReading{Name: label, Group: group, Celsius: value})
}
return temps
}
func firstTempInputValue(feature map[string]any) (float64, bool) {
keys := make([]string, 0, len(feature))
for key := range feature {
keys = append(keys, key)
}
sort.Strings(keys)
for _, key := range keys {
lower := strings.ToLower(key)
if !strings.Contains(lower, "temp") || !strings.HasSuffix(lower, "_input") {
continue
}
switch value := feature[key].(type) {
case float64:
return value, true
case string:
f, err := strconv.ParseFloat(value, 64)
if err == nil {
return f, true
}
}
}
return 0, false
}
func classifyLiveTempGroup(chip, name string) string {
text := strings.ToLower(strings.TrimSpace(chip + " " + name))
switch {
case strings.Contains(text, "gpu"), strings.Contains(text, "amdgpu"), strings.Contains(text, "nvidia"), strings.Contains(text, "adeon"):
return "gpu"
case strings.Contains(text, "coretemp"),
strings.Contains(text, "k10temp"),
strings.Contains(text, "zenpower"),
strings.Contains(text, "package id"),
strings.Contains(text, "x86_pkg_temp"),
strings.Contains(text, "tctl"),
strings.Contains(text, "tdie"),
strings.Contains(text, "cpu"),
strings.Contains(text, "peci"):
return "cpu"
default:
return "ambient"
}
}
func compactAmbientTempName(chip, name string) string {
chip = strings.TrimSpace(chip)
name = strings.TrimSpace(name)
if chip == "" || strings.EqualFold(chip, name) {
return name
}
if strings.Contains(strings.ToLower(name), strings.ToLower(chip)) {
return name
}
return chip + " / " + name
}

View File

@@ -0,0 +1,44 @@
package platform
import "testing"
func TestFirstTempInputValue(t *testing.T) {
feature := map[string]any{
"temp1_input": 61.5,
"temp1_max": 80.0,
}
got, ok := firstTempInputValue(feature)
if !ok {
t.Fatal("expected value")
}
if got != 61.5 {
t.Fatalf("got %v want 61.5", got)
}
}
func TestClassifyLiveTempGroup(t *testing.T) {
tests := []struct {
chip string
name string
want string
}{
{chip: "coretemp-isa-0000", name: "Package id 0", want: "cpu"},
{chip: "amdgpu-pci-4300", name: "edge", want: "gpu"},
{chip: "nvme-pci-0100", name: "Composite", want: "ambient"},
{chip: "acpitz-acpi-0", name: "temp1", want: "ambient"},
}
for _, tc := range tests {
if got := classifyLiveTempGroup(tc.chip, tc.name); got != tc.want {
t.Fatalf("classifyLiveTempGroup(%q,%q)=%q want %q", tc.chip, tc.name, got, tc.want)
}
}
}
func TestCompactAmbientTempName(t *testing.T) {
if got := compactAmbientTempName("nvme-pci-0100", "Composite"); got != "nvme-pci-0100 / Composite" {
t.Fatalf("got %q", got)
}
if got := compactAmbientTempName("", "Inlet Temp"); got != "Inlet Temp" {
t.Fatalf("got %q", got)
}
}

View File

@@ -2,6 +2,7 @@ package platform
import ( import (
"bytes" "bytes"
"errors"
"fmt" "fmt"
"os" "os"
"os/exec" "os/exec"
@@ -18,21 +19,17 @@ func (s *System) ListInterfaces() ([]InterfaceInfo, error) {
out := make([]InterfaceInfo, 0, len(names)) out := make([]InterfaceInfo, 0, len(names))
for _, name := range names { for _, name := range names {
state := "unknown" state := "unknown"
if raw, err := exec.Command("ip", "-o", "link", "show", name).Output(); err == nil { if up, err := interfaceAdminState(name); err == nil {
fields := strings.Fields(string(raw)) if up {
if len(fields) >= 9 { state = "up"
state = fields[8] } else {
state = "down"
} }
} }
var ipv4 []string ipv4, err := interfaceIPv4Addrs(name)
if raw, err := exec.Command("ip", "-o", "-4", "addr", "show", "dev", name).Output(); err == nil { if err != nil {
for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") { ipv4 = nil
fields := strings.Fields(line)
if len(fields) >= 4 {
ipv4 = append(ipv4, fields[3])
}
}
} }
out = append(out, InterfaceInfo{Name: name, State: state, IPv4: ipv4}) out = append(out, InterfaceInfo{Name: name, State: state, IPv4: ipv4})
@@ -55,6 +52,109 @@ func (s *System) DefaultRoute() string {
return "" return ""
} }
func (s *System) CaptureNetworkSnapshot() (NetworkSnapshot, error) {
names, err := listInterfaceNames()
if err != nil {
return NetworkSnapshot{}, err
}
snapshot := NetworkSnapshot{
Interfaces: make([]NetworkInterfaceSnapshot, 0, len(names)),
}
for _, name := range names {
up, err := interfaceAdminState(name)
if err != nil {
return NetworkSnapshot{}, err
}
ipv4, err := interfaceIPv4Addrs(name)
if err != nil {
return NetworkSnapshot{}, err
}
snapshot.Interfaces = append(snapshot.Interfaces, NetworkInterfaceSnapshot{
Name: name,
Up: up,
IPv4: ipv4,
})
}
if raw, err := exec.Command("ip", "route", "show", "default").Output(); err == nil {
for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
line = strings.TrimSpace(line)
if line != "" {
snapshot.DefaultRoutes = append(snapshot.DefaultRoutes, line)
}
}
}
if raw, err := os.ReadFile("/etc/resolv.conf"); err == nil {
snapshot.ResolvConf = string(raw)
}
return snapshot, nil
}
func (s *System) RestoreNetworkSnapshot(snapshot NetworkSnapshot) error {
var errs []string
for _, iface := range snapshot.Interfaces {
if err := exec.Command("ip", "link", "set", "dev", iface.Name, "up").Run(); err != nil {
errs = append(errs, fmt.Sprintf("%s: bring up before restore: %v", iface.Name, err))
continue
}
if err := exec.Command("ip", "addr", "flush", "dev", iface.Name).Run(); err != nil {
errs = append(errs, fmt.Sprintf("%s: flush addresses: %v", iface.Name, err))
}
for _, cidr := range iface.IPv4 {
if raw, err := exec.Command("ip", "addr", "add", cidr, "dev", iface.Name).CombinedOutput(); err != nil {
detail := strings.TrimSpace(string(raw))
if detail != "" {
errs = append(errs, fmt.Sprintf("%s: restore address %s: %v: %s", iface.Name, cidr, err, detail))
} else {
errs = append(errs, fmt.Sprintf("%s: restore address %s: %v", iface.Name, cidr, err))
}
}
}
state := "down"
if iface.Up {
state = "up"
}
if err := exec.Command("ip", "link", "set", "dev", iface.Name, state).Run(); err != nil {
errs = append(errs, fmt.Sprintf("%s: restore state %s: %v", iface.Name, state, err))
}
}
if err := exec.Command("ip", "route", "del", "default").Run(); err != nil {
var exitErr *exec.ExitError
if !errors.As(err, &exitErr) {
errs = append(errs, fmt.Sprintf("clear default route: %v", err))
}
}
for _, route := range snapshot.DefaultRoutes {
fields := strings.Fields(route)
if len(fields) == 0 {
continue
}
args := append([]string{"route", "add"}, fields...)
if raw, err := exec.Command("ip", args...).CombinedOutput(); err != nil {
detail := strings.TrimSpace(string(raw))
if detail != "" {
errs = append(errs, fmt.Sprintf("restore route %q: %v: %s", route, err, detail))
} else {
errs = append(errs, fmt.Sprintf("restore route %q: %v", route, err))
}
}
}
if err := os.WriteFile("/etc/resolv.conf", []byte(snapshot.ResolvConf), 0644); err != nil {
errs = append(errs, fmt.Sprintf("restore resolv.conf: %v", err))
}
if len(errs) > 0 {
return errors.New(strings.Join(errs, "; "))
}
return nil
}
func (s *System) DHCPOne(iface string) (string, error) { func (s *System) DHCPOne(iface string) (string, error) {
var out bytes.Buffer var out bytes.Buffer
if err := exec.Command("ip", "link", "set", iface, "up").Run(); err != nil { if err := exec.Command("ip", "link", "set", iface, "up").Run(); err != nil {
@@ -142,12 +242,52 @@ func (s *System) SetInterfaceState(iface string, up bool) error {
// GetInterfaceState returns true if the interface is UP. // GetInterfaceState returns true if the interface is UP.
func (s *System) GetInterfaceState(iface string) (bool, error) { func (s *System) GetInterfaceState(iface string) (bool, error) {
raw, err := os.ReadFile(fmt.Sprintf("/sys/class/net/%s/operstate", iface)) return interfaceAdminState(iface)
}
func interfaceAdminState(iface string) (bool, error) {
raw, err := exec.Command("ip", "-o", "link", "show", "dev", iface).Output()
if err != nil { if err != nil {
return false, err return false, err
} }
state := strings.TrimSpace(string(raw)) return parseInterfaceAdminState(string(raw))
return state == "up", nil }
func parseInterfaceAdminState(raw string) (bool, error) {
start := strings.IndexByte(raw, '<')
if start == -1 {
return false, fmt.Errorf("ip link output missing flags")
}
end := strings.IndexByte(raw[start+1:], '>')
if end == -1 {
return false, fmt.Errorf("ip link output missing flag terminator")
}
flags := strings.Split(raw[start+1:start+1+end], ",")
for _, flag := range flags {
if strings.TrimSpace(flag) == "UP" {
return true, nil
}
}
return false, nil
}
func interfaceIPv4Addrs(iface string) ([]string, error) {
raw, err := exec.Command("ip", "-o", "-4", "addr", "show", "dev", iface).Output()
if err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
return nil, nil
}
return nil, err
}
var ipv4 []string
for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
fields := strings.Fields(line)
if len(fields) >= 4 {
ipv4 = append(ipv4, fields[3])
}
}
return ipv4, nil
} }
func listInterfaceNames() ([]string, error) { func listInterfaceNames() ([]string, error) {

View File

@@ -0,0 +1,46 @@
package platform
import "testing"
func TestParseInterfaceAdminState(t *testing.T) {
tests := []struct {
name string
raw string
want bool
wantErr bool
}{
{
name: "admin up with no carrier",
raw: "2: enp1s0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000\n",
want: true,
},
{
name: "admin down",
raw: "2: enp1s0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000\n",
want: false,
},
{
name: "malformed output",
raw: "2: enp1s0: mtu 1500 state DOWN\n",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := parseInterfaceAdminState(tt.raw)
if tt.wantErr {
if err == nil {
t.Fatal("expected error")
}
return
}
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if got != tt.want {
t.Fatalf("got %v want %v", got, tt.want)
}
})
}
}

View File

@@ -90,6 +90,12 @@ func (s *System) DetectGPUVendor() string {
if _, err := os.Stat("/dev/kfd"); err == nil { if _, err := os.Stat("/dev/kfd"); err == nil {
return "amd" return "amd"
} }
if raw, err := exec.Command("lspci", "-nn").Output(); err == nil {
text := strings.ToLower(string(raw))
if strings.Contains(text, "advanced micro devices") || strings.Contains(text, "amd/ati") {
return "amd"
}
}
return "" return ""
} }
@@ -117,8 +123,8 @@ func (s *System) ListAMDGPUs() ([]AMDGPUInfo, error) {
} }
// RunAMDAcceptancePack runs an AMD GPU diagnostic pack using rocm-smi. // RunAMDAcceptancePack runs an AMD GPU diagnostic pack using rocm-smi.
func (s *System) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) { func (s *System) RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
return runAcceptancePack(baseDir, "gpu-amd", []satJob{ return runAcceptancePackCtx(ctx, baseDir, "gpu-amd", []satJob{
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}}, {name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
{name: "02-rocm-smi-showallinfo.log", cmd: []string{"rocm-smi", "--showallinfo"}}, {name: "02-rocm-smi-showallinfo.log", cmd: []string{"rocm-smi", "--showallinfo"}},
{name: "03-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}}, {name: "03-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
@@ -128,14 +134,20 @@ func (s *System) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (str
// RunAMDStressPack runs an AMD GPU burn-in pack. // RunAMDStressPack runs an AMD GPU burn-in pack.
// Missing tools are reported as UNSUPPORTED, consistent with the existing SAT pattern. // Missing tools are reported as UNSUPPORTED, consistent with the existing SAT pattern.
func (s *System) RunAMDStressPack(baseDir string, logFunc func(string)) (string, error) { func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
seconds := envInt("BEE_AMD_STRESS_SECONDS", 300) seconds := durationSec
return runAcceptancePack(baseDir, "gpu-amd-stress", []satJob{ if seconds <= 0 {
seconds = envInt("BEE_AMD_STRESS_SECONDS", 300)
}
if err := ensureAMDRuntimeReady(); err != nil {
return "", err
}
return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-stress", []satJob{
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}}, {name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}}, {name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
{name: fmt.Sprintf("03-rocm-smi-monitor-%ds.log", seconds), cmd: []string{ {name: fmt.Sprintf("03-rocm-smi-monitor-%ds.log", seconds), cmd: []string{
"rocm-smi", "--showtemp", "--showpower", "bash", "-lc",
fmt.Sprintf("--duration=%d", seconds), fmt.Sprintf("end=$((SECONDS+%d)); while [ \"$SECONDS\" -lt \"$end\" ]; do rocm-smi --showtemp --showpower --csv; sleep 1; done", seconds),
}}, }},
}, logFunc) }, logFunc)
} }
@@ -191,7 +203,7 @@ func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(
} }
func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) { func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
return runAcceptancePack(baseDir, "gpu-nvidia", nvidiaSATJobs(), logFunc) return runAcceptancePackCtx(context.Background(), baseDir, "gpu-nvidia", nvidiaSATJobs(), logFunc)
} }
// RunNvidiaAcceptancePackWithOptions runs the NVIDIA diagnostics via DCGM. // RunNvidiaAcceptancePackWithOptions runs the NVIDIA diagnostics via DCGM.
@@ -202,24 +214,27 @@ func (s *System) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir
return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, gpuIndices), logFunc) return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, gpuIndices), logFunc)
} }
func (s *System) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) { func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128) sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128)
passes := envInt("BEE_MEMTESTER_PASSES", 1) passes := envInt("BEE_MEMTESTER_PASSES", 1)
return runAcceptancePack(baseDir, "memory", []satJob{ return runAcceptancePackCtx(ctx, baseDir, "memory", []satJob{
{name: "01-free-before.log", cmd: []string{"free", "-h"}}, {name: "01-free-before.log", cmd: []string{"free", "-h"}},
{name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}}, {name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
{name: "03-free-after.log", cmd: []string{"free", "-h"}}, {name: "03-free-after.log", cmd: []string{"free", "-h"}},
}, logFunc) }, logFunc)
} }
func (s *System) RunMemoryStressPack(baseDir string, logFunc func(string)) (string, error) { func (s *System) RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
seconds := envInt("BEE_VM_STRESS_SECONDS", 300) seconds := durationSec
if seconds <= 0 {
seconds = envInt("BEE_VM_STRESS_SECONDS", 300)
}
// Use 80% of RAM by default; override with BEE_VM_STRESS_SIZE_MB. // Use 80% of RAM by default; override with BEE_VM_STRESS_SIZE_MB.
sizeArg := "80%" sizeArg := "80%"
if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 { if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 {
sizeArg = fmt.Sprintf("%dM", mb) sizeArg = fmt.Sprintf("%dM", mb)
} }
return runAcceptancePack(baseDir, "memory-stress", []satJob{ return runAcceptancePackCtx(ctx, baseDir, "memory-stress", []satJob{
{name: "01-free-before.log", cmd: []string{"free", "-h"}}, {name: "01-free-before.log", cmd: []string{"free", "-h"}},
{name: "02-stress-ng-vm.log", cmd: []string{ {name: "02-stress-ng-vm.log", cmd: []string{
"stress-ng", "--vm", "1", "stress-ng", "--vm", "1",
@@ -232,24 +247,27 @@ func (s *System) RunMemoryStressPack(baseDir string, logFunc func(string)) (stri
}, logFunc) }, logFunc)
} }
func (s *System) RunSATStressPack(baseDir string, logFunc func(string)) (string, error) { func (s *System) RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
seconds := envInt("BEE_SAT_STRESS_SECONDS", 300) seconds := durationSec
if seconds <= 0 {
seconds = envInt("BEE_SAT_STRESS_SECONDS", 300)
}
cmd := []string{"stressapptest", "-s", fmt.Sprintf("%d", seconds), "-W", "--cc_test"} cmd := []string{"stressapptest", "-s", fmt.Sprintf("%d", seconds), "-W", "--cc_test"}
if mb := envInt("BEE_SAT_STRESS_MB", 0); mb > 0 { if mb := envInt("BEE_SAT_STRESS_MB", 0); mb > 0 {
cmd = append(cmd, "-M", fmt.Sprintf("%d", mb)) cmd = append(cmd, "-M", fmt.Sprintf("%d", mb))
} }
return runAcceptancePack(baseDir, "sat-stress", []satJob{ return runAcceptancePackCtx(ctx, baseDir, "sat-stress", []satJob{
{name: "01-free-before.log", cmd: []string{"free", "-h"}}, {name: "01-free-before.log", cmd: []string{"free", "-h"}},
{name: "02-stressapptest.log", cmd: cmd}, {name: "02-stressapptest.log", cmd: cmd},
{name: "03-free-after.log", cmd: []string{"free", "-h"}}, {name: "03-free-after.log", cmd: []string{"free", "-h"}},
}, logFunc) }, logFunc)
} }
func (s *System) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) { func (s *System) RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
if durationSec <= 0 { if durationSec <= 0 {
durationSec = 60 durationSec = 60
} }
return runAcceptancePack(baseDir, "cpu", []satJob{ return runAcceptancePackCtx(ctx, baseDir, "cpu", []satJob{
{name: "01-lscpu.log", cmd: []string{"lscpu"}}, {name: "01-lscpu.log", cmd: []string{"lscpu"}},
{name: "02-sensors-before.log", cmd: []string{"sensors"}}, {name: "02-sensors-before.log", cmd: []string{"sensors"}},
{name: "03-stress-ng.log", cmd: []string{"stress-ng", "--cpu", "0", "--cpu-method", "all", "--timeout", fmt.Sprintf("%d", durationSec)}}, {name: "03-stress-ng.log", cmd: []string{"stress-ng", "--cpu", "0", "--cpu-method", "all", "--timeout", fmt.Sprintf("%d", durationSec)}},
@@ -257,7 +275,7 @@ func (s *System) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc f
}, logFunc) }, logFunc)
} }
func (s *System) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) { func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
if baseDir == "" { if baseDir == "" {
baseDir = "/var/log/bee-sat" baseDir = "/var/log/bee-sat"
} }
@@ -285,11 +303,17 @@ func (s *System) RunStorageAcceptancePack(baseDir string, logFunc func(string))
} }
for index, devPath := range devices { for index, devPath := range devices {
if ctx.Err() != nil {
break
}
prefix := fmt.Sprintf("%02d-%s", index+1, filepath.Base(devPath)) prefix := fmt.Sprintf("%02d-%s", index+1, filepath.Base(devPath))
commands := storageSATCommands(devPath) commands := storageSATCommands(devPath)
for cmdIndex, job := range commands { for cmdIndex, job := range commands {
if ctx.Err() != nil {
break
}
name := fmt.Sprintf("%s-%02d-%s.log", prefix, cmdIndex+1, job.name) name := fmt.Sprintf("%s-%02d-%s.log", prefix, cmdIndex+1, job.name)
out, err := runSATCommand(verboseLog, job.name, job.cmd, logFunc) out, err := runSATCommandCtx(ctx, verboseLog, job.name, job.cmd, nil, logFunc)
if writeErr := os.WriteFile(filepath.Join(runDir, name), out, 0644); writeErr != nil { if writeErr := os.WriteFile(filepath.Join(runDir, name), out, 0644); writeErr != nil {
return "", writeErr return "", writeErr
} }
@@ -338,49 +362,6 @@ func nvidiaSATJobs() []satJob {
} }
} }
func runAcceptancePack(baseDir, prefix string, jobs []satJob, logFunc func(string)) (string, error) {
if baseDir == "" {
baseDir = "/var/log/bee-sat"
}
ts := time.Now().UTC().Format("20060102-150405")
runDir := filepath.Join(baseDir, prefix+"-"+ts)
if err := os.MkdirAll(runDir, 0755); err != nil {
return "", err
}
verboseLog := filepath.Join(runDir, "verbose.log")
var summary strings.Builder
stats := satStats{}
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
for _, job := range jobs {
var out []byte
var err error
cmd := make([]string, 0, len(job.cmd))
for _, arg := range job.cmd {
cmd = append(cmd, strings.ReplaceAll(arg, "{{run_dir}}", runDir))
}
out, err = runSATCommand(verboseLog, job.name, cmd, logFunc)
if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
return "", writeErr
}
status, rc := classifySATResult(job.name, out, err)
stats.Add(status)
key := strings.TrimSuffix(strings.TrimPrefix(job.name, "0"), ".log")
fmt.Fprintf(&summary, "%s_rc=%d\n", key, rc)
fmt.Fprintf(&summary, "%s_status=%s\n", key, status)
}
writeSATStats(&summary, stats)
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
return "", err
}
archive := filepath.Join(baseDir, prefix+"-"+ts+".tar.gz")
if err := createTarGz(archive, runDir); err != nil {
return "", err
}
return archive, nil
}
func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob { func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
if diagLevel < 1 || diagLevel > 4 { if diagLevel < 1 || diagLevel > 4 {
diagLevel = 3 diagLevel = 3
@@ -402,6 +383,9 @@ func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
} }
func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []satJob, logFunc func(string)) (string, error) { func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []satJob, logFunc func(string)) (string, error) {
if ctx == nil {
ctx = context.Background()
}
if baseDir == "" { if baseDir == "" {
baseDir = "/var/log/bee-sat" baseDir = "/var/log/bee-sat"
} }
@@ -649,6 +633,20 @@ func resolveROCmSMICommand(args ...string) ([]string, error) {
return nil, errors.New("rocm-smi not found in PATH or under /opt/rocm") return nil, errors.New("rocm-smi not found in PATH or under /opt/rocm")
} }
func ensureAMDRuntimeReady() error {
if _, err := os.Stat("/dev/kfd"); err == nil {
return nil
}
if raw, err := os.ReadFile("/sys/module/amdgpu/initstate"); err == nil {
state := strings.TrimSpace(string(raw))
if strings.EqualFold(state, "live") {
return nil
}
return fmt.Errorf("AMD driver is present but not initialized: amdgpu initstate=%q", state)
}
return errors.New("AMD GPUs are present but the runtime is not initialized: /dev/kfd is missing and amdgpu is not loaded")
}
func rocmSMIExecutableCandidates() []string { func rocmSMIExecutableCandidates() []string {
return expandExistingPaths(rocmSMIExecutableGlobs) return expandExistingPaths(rocmSMIExecutableGlobs)
} }

View File

@@ -2,10 +2,12 @@ package platform
import ( import (
"context" "context"
"encoding/json"
"fmt" "fmt"
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"sort"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
@@ -304,10 +306,19 @@ func sampleGPUStressMetrics(gpuIndices []int) []GPUStressMetric {
// sampleFanSpeeds reads fan RPM values from ipmitool sdr. // sampleFanSpeeds reads fan RPM values from ipmitool sdr.
func sampleFanSpeeds() ([]FanReading, error) { func sampleFanSpeeds() ([]FanReading, error) {
out, err := exec.Command("ipmitool", "sdr", "type", "Fan").Output() out, err := exec.Command("ipmitool", "sdr", "type", "Fan").Output()
if err == nil {
if fans := parseFanSpeeds(string(out)); len(fans) > 0 {
return fans, nil
}
}
fans, sensorsErr := sampleFanSpeedsViaSensorsJSON()
if len(fans) > 0 {
return fans, nil
}
if err != nil { if err != nil {
return nil, err return nil, err
} }
return parseFanSpeeds(string(out)), nil return nil, sensorsErr
} }
// parseFanSpeeds parses "ipmitool sdr type Fan" output. // parseFanSpeeds parses "ipmitool sdr type Fan" output.
@@ -316,18 +327,21 @@ func parseFanSpeeds(raw string) []FanReading {
var fans []FanReading var fans []FanReading
for _, line := range strings.Split(strings.TrimSpace(raw), "\n") { for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
parts := strings.Split(line, "|") parts := strings.Split(line, "|")
if len(parts) < 3 { if len(parts) < 2 {
continue continue
} }
unit := strings.TrimSpace(parts[2]) unit := ""
if !strings.EqualFold(unit, "RPM") { if len(parts) >= 3 {
continue unit = strings.TrimSpace(parts[2])
} }
valStr := strings.TrimSpace(parts[1]) valStr := strings.TrimSpace(parts[1])
if !strings.EqualFold(unit, "RPM") && !strings.Contains(strings.ToUpper(valStr), "RPM") {
continue
}
if strings.EqualFold(valStr, "na") || strings.EqualFold(valStr, "disabled") || valStr == "" { if strings.EqualFold(valStr, "na") || strings.EqualFold(valStr, "disabled") || valStr == "" {
continue continue
} }
val, err := strconv.ParseFloat(valStr, 64) val, err := parseFanRPMValue(valStr)
if err != nil { if err != nil {
continue continue
} }
@@ -339,6 +353,84 @@ func parseFanSpeeds(raw string) []FanReading {
return fans return fans
} }
func parseFanRPMValue(raw string) (float64, error) {
fields := strings.Fields(strings.TrimSpace(strings.ReplaceAll(raw, ",", "")))
if len(fields) == 0 {
return 0, strconv.ErrSyntax
}
return strconv.ParseFloat(fields[0], 64)
}
func sampleFanSpeedsViaSensorsJSON() ([]FanReading, error) {
out, err := exec.Command("sensors", "-j").Output()
if err != nil || len(out) == 0 {
return nil, err
}
var doc map[string]map[string]any
if err := json.Unmarshal(out, &doc); err != nil {
return nil, err
}
chips := make([]string, 0, len(doc))
for chip := range doc {
chips = append(chips, chip)
}
sort.Strings(chips)
var fans []FanReading
seen := map[string]struct{}{}
for _, chip := range chips {
features := doc[chip]
names := make([]string, 0, len(features))
for name := range features {
names = append(names, name)
}
sort.Strings(names)
for _, name := range names {
feature, ok := features[name].(map[string]any)
if !ok {
continue
}
rpm, ok := firstFanInputValue(feature)
if !ok || rpm <= 0 {
continue
}
label := strings.TrimSpace(name)
if chip != "" && !strings.Contains(strings.ToLower(label), strings.ToLower(chip)) {
label = chip + " / " + label
}
if _, ok := seen[label]; ok {
continue
}
seen[label] = struct{}{}
fans = append(fans, FanReading{Name: label, RPM: rpm})
}
}
return fans, nil
}
func firstFanInputValue(feature map[string]any) (float64, bool) {
keys := make([]string, 0, len(feature))
for key := range feature {
keys = append(keys, key)
}
sort.Strings(keys)
for _, key := range keys {
lower := strings.ToLower(key)
if !strings.Contains(lower, "fan") || !strings.HasSuffix(lower, "_input") {
continue
}
switch value := feature[key].(type) {
case float64:
return value, true
case string:
f, err := strconv.ParseFloat(value, 64)
if err == nil {
return f, true
}
}
}
return 0, false
}
// sampleCPUMaxTemp returns the highest CPU/inlet temperature from ipmitool or sensors. // sampleCPUMaxTemp returns the highest CPU/inlet temperature from ipmitool or sensors.
func sampleCPUMaxTemp() float64 { func sampleCPUMaxTemp() float64 {
out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output() out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()

View File

@@ -0,0 +1,27 @@
package platform
import "testing"
func TestParseFanSpeeds(t *testing.T) {
raw := "FAN1 | 2400.000 | RPM | ok\nFAN2 | 1800 RPM | ok | ok\nFAN3 | na | RPM | ns\n"
got := parseFanSpeeds(raw)
if len(got) != 2 {
t.Fatalf("fans=%d want 2 (%v)", len(got), got)
}
if got[0].Name != "FAN1" || got[0].RPM != 2400 {
t.Fatalf("fan0=%+v", got[0])
}
if got[1].Name != "FAN2" || got[1].RPM != 1800 {
t.Fatalf("fan1=%+v", got[1])
}
}
func TestFirstFanInputValue(t *testing.T) {
feature := map[string]any{
"fan1_input": 9200.0,
}
got, ok := firstFanInputValue(feature)
if !ok || got != 9200 {
t.Fatalf("got=%v ok=%v", got, ok)
}
}

View File

@@ -8,6 +8,18 @@ type InterfaceInfo struct {
IPv4 []string IPv4 []string
} }
type NetworkInterfaceSnapshot struct {
Name string
Up bool
IPv4 []string
}
type NetworkSnapshot struct {
Interfaces []NetworkInterfaceSnapshot
DefaultRoutes []string
ResolvConf string
}
type ServiceAction string type ServiceAction string
const ( const (

View File

@@ -9,6 +9,7 @@ import (
"net/http" "net/http"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"strings"
"sync/atomic" "sync/atomic"
"time" "time"
@@ -152,11 +153,12 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
} }
var body struct { var body struct {
Duration int `json:"duration"` Duration int `json:"duration"`
DiagLevel int `json:"diag_level"` DiagLevel int `json:"diag_level"`
GPUIndices []int `json:"gpu_indices"` GPUIndices []int `json:"gpu_indices"`
Profile string `json:"profile"`
DisplayName string `json:"display_name"`
} }
body.DiagLevel = 1
if r.ContentLength > 0 { if r.ContentLength > 0 {
_ = json.NewDecoder(r.Body).Decode(&body) _ = json.NewDecoder(r.Body).Decode(&body)
} }
@@ -172,11 +174,16 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
Status: TaskPending, Status: TaskPending,
CreatedAt: time.Now(), CreatedAt: time.Now(),
params: taskParams{ params: taskParams{
Duration: body.Duration, Duration: body.Duration,
DiagLevel: body.DiagLevel, DiagLevel: body.DiagLevel,
GPUIndices: body.GPUIndices, GPUIndices: body.GPUIndices,
BurnProfile: body.Profile,
DisplayName: body.DisplayName,
}, },
} }
if strings.TrimSpace(body.DisplayName) != "" {
t.Name = body.DisplayName
}
globalQueue.enqueue(t) globalQueue.enqueue(t)
writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID}) writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
} }
@@ -320,18 +327,21 @@ func (h *handler) handleAPINetworkDHCP(w http.ResponseWriter, r *http.Request) {
} }
_ = json.NewDecoder(r.Body).Decode(&req) _ = json.NewDecoder(r.Body).Decode(&req)
var result app.ActionResult result, err := h.applyPendingNetworkChange(func() (app.ActionResult, error) {
var err error if req.Interface == "" || req.Interface == "all" {
if req.Interface == "" || req.Interface == "all" { return h.opts.App.DHCPAllResult()
result, err = h.opts.App.DHCPAllResult() }
} else { return h.opts.App.DHCPOneResult(req.Interface)
result, err = h.opts.App.DHCPOneResult(req.Interface) })
}
if err != nil { if err != nil {
writeError(w, http.StatusInternalServerError, err.Error()) writeError(w, http.StatusInternalServerError, err.Error())
return return
} }
writeJSON(w, map[string]string{"status": "ok", "output": result.Body}) writeJSON(w, map[string]any{
"status": "ok",
"output": result.Body,
"rollback_in": int(netRollbackTimeout.Seconds()),
})
} }
func (h *handler) handleAPINetworkStatic(w http.ResponseWriter, r *http.Request) { func (h *handler) handleAPINetworkStatic(w http.ResponseWriter, r *http.Request) {
@@ -357,12 +367,18 @@ func (h *handler) handleAPINetworkStatic(w http.ResponseWriter, r *http.Request)
Gateway: req.Gateway, Gateway: req.Gateway,
DNS: req.DNS, DNS: req.DNS,
} }
result, err := h.opts.App.SetStaticIPv4Result(cfg) result, err := h.applyPendingNetworkChange(func() (app.ActionResult, error) {
return h.opts.App.SetStaticIPv4Result(cfg)
})
if err != nil { if err != nil {
writeError(w, http.StatusInternalServerError, err.Error()) writeError(w, http.StatusInternalServerError, err.Error())
return return
} }
writeJSON(w, map[string]string{"status": "ok", "output": result.Body}) writeJSON(w, map[string]any{
"status": "ok",
"output": result.Body,
"rollback_in": int(netRollbackTimeout.Seconds()),
})
} }
// ── Export ──────────────────────────────────────────────────────────────────── // ── Export ────────────────────────────────────────────────────────────────────
@@ -421,6 +437,13 @@ func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request)
writeError(w, http.StatusServiceUnavailable, "app not configured") writeError(w, http.StatusServiceUnavailable, "app not configured")
return return
} }
h.installMu.Lock()
installRunning := h.installJob != nil && !h.installJob.isDone()
h.installMu.Unlock()
if installRunning {
writeError(w, http.StatusConflict, "install to disk is already running")
return
}
t := &Task{ t := &Task{
ID: newJobID("install-to-ram"), ID: newJobID("install-to-ram"),
Name: "Install to RAM", Name: "Install to RAM",
@@ -528,6 +551,10 @@ func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) {
writeError(w, http.StatusBadRequest, "device not in install candidate list") writeError(w, http.StatusBadRequest, "device not in install candidate list")
return return
} }
if globalQueue.hasActiveTarget("install-to-ram") {
writeError(w, http.StatusConflict, "install to RAM task is already pending or running")
return
}
h.installMu.Lock() h.installMu.Lock()
if h.installJob != nil && !h.installJob.isDone() { if h.installJob != nil && !h.installJob.isDone() {
@@ -576,9 +603,11 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
// Feed server ring buffers // Feed server ring buffers
for _, t := range sample.Temps { for _, t := range sample.Temps {
if t.Name == "CPU" { switch t.Group {
h.ringCPUTemp.push(t.Celsius) case "cpu":
break h.pushNamedMetricRing(&h.cpuTempRings, t.Name, t.Celsius)
case "ambient":
h.pushNamedMetricRing(&h.ambientTempRings, t.Name, t.Celsius)
} }
} }
h.ringPower.push(sample.PowerW) h.ringPower.push(sample.PowerW)
@@ -623,6 +652,23 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
} }
} }
func (h *handler) pushNamedMetricRing(dst *[]*namedMetricsRing, name string, value float64) {
if name == "" {
return
}
for _, item := range *dst {
if item != nil && item.Name == name && item.Ring != nil {
item.Ring.push(value)
return
}
}
*dst = append(*dst, &namedMetricsRing{
Name: name,
Ring: newMetricsRing(120),
})
(*dst)[len(*dst)-1].Ring.push(value)
}
// ── Network toggle ──────────────────────────────────────────────────────────── // ── Network toggle ────────────────────────────────────────────────────────────
const netRollbackTimeout = 60 * time.Second const netRollbackTimeout = 60 * time.Second
@@ -646,33 +692,14 @@ func (h *handler) handleAPINetworkToggle(w http.ResponseWriter, r *http.Request)
return return
} }
if err := h.opts.App.SetInterfaceState(req.Iface, !wasUp); err != nil { if _, err := h.applyPendingNetworkChange(func() (app.ActionResult, error) {
err := h.opts.App.SetInterfaceState(req.Iface, !wasUp)
return app.ActionResult{}, err
}); err != nil {
writeError(w, http.StatusInternalServerError, err.Error()) writeError(w, http.StatusInternalServerError, err.Error())
return return
} }
// Cancel any existing pending change (rollback it first).
h.pendingNetMu.Lock()
if h.pendingNet != nil {
prev := h.pendingNet
prev.mu.Lock()
prev.timer.Stop()
_ = h.opts.App.SetInterfaceState(prev.iface, prev.wasUp)
prev.mu.Unlock()
}
pnc := &pendingNetChange{iface: req.Iface, wasUp: wasUp}
pnc.timer = time.AfterFunc(netRollbackTimeout, func() {
_ = h.opts.App.SetInterfaceState(req.Iface, wasUp)
h.pendingNetMu.Lock()
if h.pendingNet == pnc {
h.pendingNet = nil
}
h.pendingNetMu.Unlock()
})
h.pendingNet = pnc
h.pendingNetMu.Unlock()
newState := "up" newState := "up"
if wasUp { if wasUp {
newState = "down" newState = "down"
@@ -684,6 +711,42 @@ func (h *handler) handleAPINetworkToggle(w http.ResponseWriter, r *http.Request)
}) })
} }
func (h *handler) applyPendingNetworkChange(apply func() (app.ActionResult, error)) (app.ActionResult, error) {
if h.opts.App == nil {
return app.ActionResult{}, fmt.Errorf("app not configured")
}
if err := h.rollbackPendingNetworkChange(); err != nil && err.Error() != "no pending network change" {
return app.ActionResult{}, err
}
snapshot, err := h.opts.App.CaptureNetworkSnapshot()
if err != nil {
return app.ActionResult{}, err
}
result, err := apply()
if err != nil {
return result, err
}
pnc := &pendingNetChange{snapshot: snapshot}
pnc.timer = time.AfterFunc(netRollbackTimeout, func() {
_ = h.opts.App.RestoreNetworkSnapshot(snapshot)
h.pendingNetMu.Lock()
if h.pendingNet == pnc {
h.pendingNet = nil
}
h.pendingNetMu.Unlock()
})
h.pendingNetMu.Lock()
h.pendingNet = pnc
h.pendingNetMu.Unlock()
return result, nil
}
func (h *handler) handleAPINetworkConfirm(w http.ResponseWriter, _ *http.Request) { func (h *handler) handleAPINetworkConfirm(w http.ResponseWriter, _ *http.Request) {
h.pendingNetMu.Lock() h.pendingNetMu.Lock()
pnc := h.pendingNet pnc := h.pendingNet
@@ -698,19 +761,30 @@ func (h *handler) handleAPINetworkConfirm(w http.ResponseWriter, _ *http.Request
} }
func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Request) { func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Request) {
if err := h.rollbackPendingNetworkChange(); err != nil {
if err.Error() == "no pending network change" {
writeError(w, http.StatusConflict, err.Error())
return
}
writeError(w, http.StatusInternalServerError, err.Error())
return
}
writeJSON(w, map[string]string{"status": "rolled back"})
}
func (h *handler) rollbackPendingNetworkChange() error {
h.pendingNetMu.Lock() h.pendingNetMu.Lock()
pnc := h.pendingNet pnc := h.pendingNet
h.pendingNet = nil h.pendingNet = nil
h.pendingNetMu.Unlock() h.pendingNetMu.Unlock()
if pnc == nil { if pnc == nil {
writeError(w, http.StatusConflict, "no pending network change") return fmt.Errorf("no pending network change")
return
} }
pnc.mu.Lock() pnc.mu.Lock()
pnc.timer.Stop() pnc.timer.Stop()
pnc.mu.Unlock() pnc.mu.Unlock()
if h.opts.App != nil { if h.opts.App != nil {
_ = h.opts.App.SetInterfaceState(pnc.iface, pnc.wasUp) return h.opts.App.RestoreNetworkSnapshot(pnc.snapshot)
} }
writeJSON(w, map[string]string{"status": "rolled back"}) return nil
} }

View File

@@ -1,18 +1,21 @@
package webui package webui
import ( import (
"os"
"strings"
"sync" "sync"
"time" "time"
) )
// jobState holds the output lines and completion status of an async job. // jobState holds the output lines and completion status of an async job.
type jobState struct { type jobState struct {
lines []string lines []string
done bool done bool
err string err string
mu sync.Mutex mu sync.Mutex
subs []chan string subs []chan string
cancel func() // optional cancel function; nil if job is not cancellable cancel func() // optional cancel function; nil if job is not cancellable
logPath string
} }
// abort cancels the job if it has a cancel function and is not yet done. // abort cancels the job if it has a cancel function and is not yet done.
@@ -30,6 +33,9 @@ func (j *jobState) append(line string) {
j.mu.Lock() j.mu.Lock()
defer j.mu.Unlock() defer j.mu.Unlock()
j.lines = append(j.lines, line) j.lines = append(j.lines, line)
if j.logPath != "" {
appendJobLog(j.logPath, line)
}
for _, ch := range j.subs { for _, ch := range j.subs {
select { select {
case ch <- line: case ch <- line:
@@ -100,3 +106,32 @@ func (m *jobManager) get(id string) (*jobState, bool) {
j, ok := m.jobs[id] j, ok := m.jobs[id]
return j, ok return j, ok
} }
func newTaskJobState(logPath string) *jobState {
j := &jobState{logPath: logPath}
if logPath == "" {
return j
}
data, err := os.ReadFile(logPath)
if err != nil || len(data) == 0 {
return j
}
lines := strings.Split(strings.ReplaceAll(string(data), "\r\n", "\n"), "\n")
if len(lines) > 0 && lines[len(lines)-1] == "" {
lines = lines[:len(lines)-1]
}
j.lines = append(j.lines, lines...)
return j
}
func appendJobLog(path, line string) {
if path == "" {
return
}
f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
return
}
defer f.Close()
_, _ = f.WriteString(line + "\n")
}

View File

@@ -83,10 +83,10 @@ tbody tr:hover td{background:rgba(0,0,0,.03)}
` `
} }
func layoutNav(active string) string { func layoutNav(active string, buildLabel string) string {
items := []struct{ id, label, href, onclick string }{ items := []struct{ id, label, href, onclick string }{
{"dashboard", "Dashboard", "/", ""}, {"dashboard", "Dashboard", "/", ""},
{"audit", "Audit", "#", "openAuditModal();return false;"}, {"audit", "Audit", "/audit", ""},
{"validate", "Validate", "/validate", ""}, {"validate", "Validate", "/validate", ""},
{"burn", "Burn", "/burn", ""}, {"burn", "Burn", "/burn", ""},
{"tasks", "Tasks", "/tasks", ""}, {"tasks", "Tasks", "/tasks", ""},
@@ -109,7 +109,12 @@ func layoutNav(active string) string {
cls, item.href, item.label)) cls, item.href, item.label))
} }
} }
b.WriteString(`</nav></aside>`) if strings.TrimSpace(buildLabel) == "" {
buildLabel = "dev"
}
b.WriteString(`</nav>`)
b.WriteString(`<div style="padding:12px 16px;border-top:1px solid rgba(255,255,255,.08);font-size:11px;color:rgba(255,255,255,.45)">Build ` + html.EscapeString(buildLabel) + `</div>`)
b.WriteString(`</aside>`)
return b.String() return b.String()
} }
@@ -121,6 +126,10 @@ func renderPage(page string, opts HandlerOptions) string {
pageID = "dashboard" pageID = "dashboard"
title = "Dashboard" title = "Dashboard"
body = renderDashboard(opts) body = renderDashboard(opts)
case "audit":
pageID = "audit"
title = "Audit"
body = renderAudit()
case "validate": case "validate":
pageID = "validate" pageID = "validate"
title = "Validate" title = "Validate"
@@ -173,7 +182,7 @@ func renderPage(page string, opts HandlerOptions) string {
} }
return layoutHead(opts.Title+" — "+title) + return layoutHead(opts.Title+" — "+title) +
layoutNav(pageID) + layoutNav(pageID, opts.BuildLabel) +
`<div class="main"><div class="topbar"><h1>` + html.EscapeString(title) + `</h1></div><div class="content">` + `<div class="main"><div class="topbar"><h1>` + html.EscapeString(title) + `</h1></div><div class="content">` +
body + body +
`</div></div>` + `</div></div>` +
@@ -191,6 +200,10 @@ func renderDashboard(opts HandlerOptions) string {
return b.String() return b.String()
} }
func renderAudit() string {
return `<div class="card"><div class="card-head">Audit Viewer <button class="btn btn-sm btn-secondary" style="margin-left:auto" onclick="openAuditModal()">Actions</button></div><div class="card-body" style="padding:0"><iframe class="viewer-frame" src="/viewer" title="Audit viewer"></iframe></div></div>`
}
func renderHardwareSummaryCard(opts HandlerOptions) string { func renderHardwareSummaryCard(opts HandlerOptions) string {
data, err := loadSnapshot(opts.AuditPath) data, err := loadSnapshot(opts.AuditPath)
if err != nil { if err != nil {
@@ -298,14 +311,14 @@ func renderHardwareSummaryCard(opts HandlerOptions) string {
func renderAuditModal() string { func renderAuditModal() string {
return `<div id="audit-modal-overlay" style="display:none;position:fixed;inset:0;background:rgba(0,0,0,.5);z-index:100;align-items:center;justify-content:center"> return `<div id="audit-modal-overlay" style="display:none;position:fixed;inset:0;background:rgba(0,0,0,.5);z-index:100;align-items:center;justify-content:center">
<div style="background:#fff;border-radius:6px;padding:24px;min-width:480px;max-width:700px;position:relative"> <div style="background:#fff;border-radius:6px;padding:24px;min-width:480px;max-width:1100px;width:min(1100px,92vw);max-height:92vh;overflow:auto;position:relative">
<div style="font-weight:700;font-size:16px;margin-bottom:16px">Audit</div> <div style="font-weight:700;font-size:16px;margin-bottom:16px">Audit</div>
<div style="margin-bottom:12px;display:flex;gap:8px"> <div style="margin-bottom:12px;display:flex;gap:8px">
<button class="btn btn-primary" onclick="auditModalRun()">&#9654; Re-run Audit</button> <button class="btn btn-primary" onclick="auditModalRun()">&#9654; Re-run Audit</button>
<a class="btn btn-secondary" href="/audit.json" download>&#8595; Download</a> <a class="btn btn-secondary" href="/audit.json" download>&#8595; Download</a>
<a class="btn btn-secondary" href="/viewer" target="_blank">Open Viewer</a>
</div> </div>
<div id="audit-modal-terminal" class="terminal" style="display:none;max-height:300px"></div> <div id="audit-modal-terminal" class="terminal" style="display:none;max-height:220px;margin-bottom:12px"></div>
<iframe class="viewer-frame" src="/viewer" title="Audit viewer in modal" style="height:min(70vh,720px)"></iframe>
<button class="btn btn-secondary btn-sm" onclick="closeAuditModal()" style="position:absolute;top:12px;right:12px">&#10005;</button> <button class="btn btn-secondary btn-sm" onclick="closeAuditModal()" style="position:absolute;top:12px;right:12px">&#10005;</button>
</div> </div>
</div> </div>
@@ -373,9 +386,23 @@ func renderMetrics() string {
</div> </div>
<div class="card" style="margin-bottom:16px"> <div class="card" style="margin-bottom:16px">
<div class="card-head">Server — Temperature</div> <div class="card-head">Temperature — CPU</div>
<div class="card-body" style="padding:8px"> <div class="card-body" style="padding:8px">
<img id="chart-server-temp" src="/api/metrics/chart/server-temp.svg" style="width:100%;display:block;border-radius:6px" alt="CPU temperature"> <img id="chart-server-temp-cpu" src="/api/metrics/chart/server-temp-cpu.svg" style="width:100%;display:block;border-radius:6px" alt="CPU temperature">
</div>
</div>
<div class="card" style="margin-bottom:16px">
<div class="card-head">Temperature — GPUs</div>
<div class="card-body" style="padding:8px">
<img id="chart-server-temp-gpu" src="/api/metrics/chart/server-temp-gpu.svg" style="width:100%;display:block;border-radius:6px" alt="GPU temperature">
</div>
</div>
<div class="card" style="margin-bottom:16px">
<div class="card-head">Temperature — Ambient Sensors</div>
<div class="card-body" style="padding:8px">
<img id="chart-server-temp-ambient" src="/api/metrics/chart/server-temp-ambient.svg" style="width:100%;display:block;border-radius:6px" alt="Ambient temperature sensors">
</div> </div>
</div> </div>
@@ -383,6 +410,13 @@ func renderMetrics() string {
<div class="card-head">Server — Power</div> <div class="card-head">Server — Power</div>
<div class="card-body" style="padding:8px"> <div class="card-body" style="padding:8px">
<img id="chart-server-power" src="/api/metrics/chart/server-power.svg" style="width:100%;display:block;border-radius:6px" alt="System power"> <img id="chart-server-power" src="/api/metrics/chart/server-power.svg" style="width:100%;display:block;border-radius:6px" alt="System power">
</div>
</div>
<div class="card" style="margin-bottom:16px">
<div class="card-head">Server — Fan RPM</div>
<div class="card-body" style="padding:8px">
<img id="chart-server-fans" src="/api/metrics/chart/server-fans.svg" style="width:100%;display:block;border-radius:6px" alt="Fan RPM">
<div id="sys-table" style="margin-top:8px;font-size:12px"></div> <div id="sys-table" style="margin-top:8px;font-size:12px"></div>
</div> </div>
</div> </div>
@@ -394,12 +428,12 @@ let knownGPUs = [];
function refreshCharts() { function refreshCharts() {
const t = '?t=' + Date.now(); const t = '?t=' + Date.now();
['chart-server-load','chart-server-temp','chart-server-power'].forEach(id => { ['chart-server-load','chart-server-temp-cpu','chart-server-temp-gpu','chart-server-temp-ambient','chart-server-power','chart-server-fans'].forEach(id => {
const el = document.getElementById(id); const el = document.getElementById(id);
if (el) el.src = el.src.split('?')[0] + t; if (el) el.src = el.src.split('?')[0] + t;
}); });
knownGPUs.forEach(idx => { knownGPUs.forEach(idx => {
['load','temp','power'].forEach(kind => { ['load','power'].forEach(kind => {
const el = document.getElementById('chart-gpu-' + idx + '-' + kind); const el = document.getElementById('chart-gpu-' + idx + '-' + kind);
if (el) el.src = el.src.split('?')[0] + t; if (el) el.src = el.src.split('?')[0] + t;
}); });
@@ -423,10 +457,6 @@ es.addEventListener('metrics', e => {
'<div class="card-body" style="padding:8px">' + '<div class="card-body" style="padding:8px">' +
'<img id="chart-gpu-' + g.index + '-load" src="/api/metrics/chart/gpu/' + g.index + '-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' load">' + '<img id="chart-gpu-' + g.index + '-load" src="/api/metrics/chart/gpu/' + g.index + '-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' load">' +
'</div>' + '</div>' +
'<div class="card-head">GPU ' + g.index + ' — Temperature</div>' +
'<div class="card-body" style="padding:8px">' +
'<img id="chart-gpu-' + g.index + '-temp" src="/api/metrics/chart/gpu/' + g.index + '-temp.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' temp">' +
'</div>' +
'<div class="card-head">GPU ' + g.index + ' — Power</div>' + '<div class="card-head">GPU ' + g.index + ' — Power</div>' +
'<div class="card-body" style="padding:8px">' + '<div class="card-body" style="padding:8px">' +
'<img id="chart-gpu-' + g.index + '-power" src="/api/metrics/chart/gpu/' + g.index + '-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' power">' + '<img id="chart-gpu-' + g.index + '-power" src="/api/metrics/chart/gpu/' + g.index + '-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' power">' +
@@ -437,8 +467,9 @@ es.addEventListener('metrics', e => {
// Update numeric tables // Update numeric tables
let sysHTML = ''; let sysHTML = '';
const cpuTemp = (d.temps||[]).find(t => t.name==='CPU'); (d.temps||[]).filter(t => t.group === 'cpu').forEach(t => {
if (cpuTemp) sysHTML += '<tr><td>CPU Temp</td><td>'+cpuTemp.celsius.toFixed(1)+'°C</td></tr>'; sysHTML += '<tr><td>'+t.name+'</td><td>'+t.celsius.toFixed(1)+'°C</td></tr>';
});
if (d.cpu_load_pct) sysHTML += '<tr><td>CPU Load</td><td>'+d.cpu_load_pct.toFixed(1)+'%</td></tr>'; if (d.cpu_load_pct) sysHTML += '<tr><td>CPU Load</td><td>'+d.cpu_load_pct.toFixed(1)+'%</td></tr>';
if (d.mem_load_pct) sysHTML += '<tr><td>Mem Load</td><td>'+d.mem_load_pct.toFixed(1)+'%</td></tr>'; if (d.mem_load_pct) sysHTML += '<tr><td>Mem Load</td><td>'+d.mem_load_pct.toFixed(1)+'%</td></tr>';
(d.fans||[]).forEach(f => sysHTML += '<tr><td>'+f.name+'</td><td>'+f.rpm+' RPM</td></tr>'); (d.fans||[]).forEach(f => sysHTML += '<tr><td>'+f.name+'</td><td>'+f.rpm+' RPM</td></tr>');
@@ -491,6 +522,8 @@ let satES = null;
function runSAT(target) { function runSAT(target) {
if (satES) { satES.close(); satES = null; } if (satES) { satES.close(); satES = null; }
const body = {}; const body = {};
const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU'};
body.display_name = labels[target] || ('Validate ' + target);
if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1; if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60; if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
document.getElementById('sat-output').style.display='block'; document.getElementById('sat-output').style.display='block';
@@ -520,6 +553,8 @@ function runAllSAT() {
const btn = document.getElementById('sat-btn-' + target); const btn = document.getElementById('sat-btn-' + target);
if (btn && btn.disabled) { enqueueNext(cycle, idx+1); return; } if (btn && btn.disabled) { enqueueNext(cycle, idx+1); return; }
const body = {}; const body = {};
const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU'};
body.display_name = labels[target] || ('Validate ' + target);
if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1; if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60; if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)}) fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)})
@@ -568,13 +603,15 @@ func renderSATCard(id, label, extra string) string {
func renderBurn() string { func renderBurn() string {
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>&#9888; Warning:</strong> Stress tests on this page run hardware at maximum load. Repeated or prolonged use may reduce hardware lifespan (storage endurance, GPU wear). Use only when necessary.</div> return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>&#9888; Warning:</strong> Stress tests on this page run hardware at maximum load. Repeated or prolonged use may reduce hardware lifespan (storage endurance, GPU wear). Use only when necessary.</div>
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p> <p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
<div class="card"><div class="card-head">Burn Profile</div><div class="card-body">
<div class="form-row" style="max-width:320px"><label>Preset</label><select id="burn-profile"><option value="smoke">Smoke: 5 minutes</option><option value="acceptance">Acceptance: 1 hour</option><option value="overnight">Overnight: 8 hours</option></select></div>
<p style="color:var(--muted);font-size:12px">Applied to all tests on this page. NVIDIA uses mapped DCGM levels: smoke=quick, acceptance=targeted stress, overnight=extended stress.</p>
</div></div>
<div class="grid3"> <div class="grid3">
<div class="card"><div class="card-head">NVIDIA GPU Stress</div><div class="card-body"> <div class="card"><div class="card-head">NVIDIA GPU Stress</div><div class="card-body">
<div class="form-row"><label>Duration</label><select id="bi-dur"><option value="600">10 minutes</option><option value="3600">1 hour</option><option value="28800">8 hours</option><option value="86400">24 hours</option></select></div>
<button id="sat-btn-nvidia" class="btn btn-primary" onclick="runBurnIn('nvidia')">&#9654; Start NVIDIA Stress</button> <button id="sat-btn-nvidia" class="btn btn-primary" onclick="runBurnIn('nvidia')">&#9654; Start NVIDIA Stress</button>
</div></div> </div></div>
<div class="card"><div class="card-head">CPU Stress</div><div class="card-body"> <div class="card"><div class="card-head">CPU Stress</div><div class="card-body">
<div class="form-row"><label>Duration (seconds)</label><input type="number" id="bi-cpu-dur" value="300" min="60"></div>
<button class="btn btn-primary" onclick="runBurnIn('cpu')">&#9654; Start CPU Stress</button> <button class="btn btn-primary" onclick="runBurnIn('cpu')">&#9654; Start CPU Stress</button>
</div></div> </div></div>
<div class="card"><div class="card-head">AMD GPU Stress</div><div class="card-body"> <div class="card"><div class="card-head">AMD GPU Stress</div><div class="card-body">
@@ -598,11 +635,9 @@ func renderBurn() string {
let biES = null; let biES = null;
function runBurnIn(target) { function runBurnIn(target) {
if (biES) { biES.close(); biES = null; } if (biES) { biES.close(); biES = null; }
const body = {}; const body = { profile: document.getElementById('burn-profile').value || 'smoke' };
if (target === 'nvidia') body.duration = parseInt(document.getElementById('bi-dur').value)||600;
if (target === 'cpu') body.duration = parseInt(document.getElementById('bi-cpu-dur').value)||300;
document.getElementById('bi-output').style.display='block'; document.getElementById('bi-output').style.display='block';
document.getElementById('bi-title').textContent = '— ' + target; document.getElementById('bi-title').textContent = '— ' + target + ' [' + body.profile + ']';
const term = document.getElementById('bi-terminal'); const term = document.getElementById('bi-terminal');
term.textContent = 'Enqueuing ' + target + ' stress...\n'; term.textContent = 'Enqueuing ' + target + ' stress...\n';
fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)}) fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)})
@@ -672,7 +707,7 @@ var _netCountdownTimer = null;
function loadNetwork() { function loadNetwork() {
fetch('/api/network').then(r=>r.json()).then(d => { fetch('/api/network').then(r=>r.json()).then(d => {
const rows = (d.interfaces||[]).map(i => const rows = (d.interfaces||[]).map(i =>
'<tr><td>'+i.Name+'</td>' + '<tr><td style="cursor:pointer" onclick="selectIface(\''+i.Name+'\')" title="Use this interface in the forms below"><span style="text-decoration:underline">'+i.Name+'</span></td>' +
'<td style="cursor:pointer" onclick="toggleIface(\''+i.Name+'\',\''+i.State+'\')" title="Click to toggle"><span class="badge '+(i.State==='up'?'badge-ok':'badge-warn')+'">'+i.State+'</span></td>' + '<td style="cursor:pointer" onclick="toggleIface(\''+i.Name+'\',\''+i.State+'\')" title="Click to toggle"><span class="badge '+(i.State==='up'?'badge-ok':'badge-warn')+'">'+i.State+'</span></td>' +
'<td>'+(i.IPv4||[]).join(', ')+'</td></tr>' '<td>'+(i.IPv4||[]).join(', ')+'</td></tr>'
).join(''); ).join('');
@@ -681,6 +716,10 @@ function loadNetwork() {
(d.default_route ? '<p style="font-size:12px;color:var(--muted);margin-top:8px">Default route: '+d.default_route+'</p>' : ''); (d.default_route ? '<p style="font-size:12px;color:var(--muted);margin-top:8px">Default route: '+d.default_route+'</p>' : '');
}); });
} }
function selectIface(iface) {
document.getElementById('dhcp-iface').value = iface;
document.getElementById('st-iface').value = iface;
}
function toggleIface(iface, currentState) { function toggleIface(iface, currentState) {
fetch('/api/network/toggle',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({iface:iface})}) fetch('/api/network/toggle',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({iface:iface})})
.then(r=>r.json()).then(d => { .then(r=>r.json()).then(d => {
@@ -716,6 +755,7 @@ function runDHCP() {
fetch('/api/network/dhcp',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({interface:iface||'all'})}) fetch('/api/network/dhcp',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({interface:iface||'all'})})
.then(r=>r.json()).then(d => { .then(r=>r.json()).then(d => {
document.getElementById('dhcp-out').textContent = d.output || d.error || 'Done.'; document.getElementById('dhcp-out').textContent = d.output || d.error || 'Done.';
if (!d.error) showNetPending(d.rollback_in || 60);
loadNetwork(); loadNetwork();
}); });
} }
@@ -729,6 +769,7 @@ function setStatic() {
dns: dns, dns: dns,
})}).then(r=>r.json()).then(d => { })}).then(r=>r.json()).then(d => {
document.getElementById('static-out').textContent = d.output || d.error || 'Done.'; document.getElementById('static-out').textContent = d.output || d.error || 'Done.';
if (!d.error) showNetPending(d.rollback_in || 60);
loadNetwork(); loadNetwork();
}); });
} }
@@ -846,10 +887,17 @@ func listExportFiles(exportDir string) ([]string, error) {
func renderTools() string { func renderTools() string {
return `<div class="card" style="margin-bottom:16px"> return `<div class="card" style="margin-bottom:16px">
<div class="card-head">Install to RAM</div> <div class="card-head">System Install</div>
<div class="card-body"> <div class="card-body">
<div style="margin-bottom:20px">
<div style="font-weight:600;margin-bottom:8px">Install to RAM</div>
<p id="ram-status-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Checking...</p> <p id="ram-status-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Checking...</p>
<button id="ram-install-btn" class="btn btn-primary" onclick="installToRAM()" style="display:none">&#9654; Copy to RAM</button> <button id="ram-install-btn" class="btn btn-primary" onclick="installToRAM()" style="display:none">&#9654; Copy to RAM</button>
</div>
<div style="border-top:1px solid var(--line);padding-top:20px">
<div style="font-weight:600;margin-bottom:8px">Install to Disk</div>` +
renderInstallInline() + `
</div>
</div> </div>
</div> </div>
<script> <script>
@@ -886,9 +934,6 @@ function installToRAM() {
<div class="card"><div class="card-head">Services</div><div class="card-body">` + <div class="card"><div class="card-head">Services</div><div class="card-body">` +
renderServicesInline() + `</div></div> renderServicesInline() + `</div></div>
<div class="card"><div class="card-head">Install to Disk</div><div class="card-body">` +
renderInstallInline() + `</div></div>
<script> <script>
function checkTools() { function checkTools() {
document.getElementById('tools-table').innerHTML = '<p style="color:var(--muted);font-size:13px">Checking...</p>'; document.getElementById('tools-table').innerHTML = '<p style="color:var(--muted);font-size:13px">Checking...</p>';
@@ -939,8 +984,6 @@ func renderInstallInline() string {
<div id="install-terminal" class="terminal" style="max-height:500px"></div> <div id="install-terminal" class="terminal" style="max-height:500px"></div>
<div id="install-status" style="margin-top:12px;font-size:13px"></div> <div id="install-status" style="margin-top:12px;font-size:13px"></div>
</div> </div>
</div>
</div>
<style> <style>
#install-disk-tbody tr{cursor:pointer} #install-disk-tbody tr{cursor:pointer}

View File

@@ -4,6 +4,7 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"html"
"mime" "mime"
"net/http" "net/http"
"os" "os"
@@ -13,6 +14,7 @@ import (
"time" "time"
"bee/audit/internal/app" "bee/audit/internal/app"
"bee/audit/internal/platform"
"bee/audit/internal/runtimeenv" "bee/audit/internal/runtimeenv"
gocharts "github.com/go-analyze/charts" gocharts "github.com/go-analyze/charts"
"reanimator/chart/viewer" "reanimator/chart/viewer"
@@ -35,6 +37,7 @@ func init() {
// HandlerOptions configures the web UI handler. // HandlerOptions configures the web UI handler.
type HandlerOptions struct { type HandlerOptions struct {
Title string Title string
BuildLabel string
AuditPath string AuditPath string
ExportDir string ExportDir string
App *app.App App *app.App
@@ -84,7 +87,7 @@ func relAgeLabel(age time.Duration) string {
if age < time.Hour { if age < time.Hour {
m := int(age.Minutes()) m := int(age.Minutes())
if m == 0 { if m == 0 {
return "-<1m" return "-1m"
} }
return fmt.Sprintf("-%dm", m) return fmt.Sprintf("-%dm", m)
} }
@@ -102,31 +105,36 @@ type gpuRings struct {
Power *metricsRing Power *metricsRing
} }
type namedMetricsRing struct {
Name string
Ring *metricsRing
}
// pendingNetChange tracks a network state change awaiting confirmation. // pendingNetChange tracks a network state change awaiting confirmation.
type pendingNetChange struct { type pendingNetChange struct {
iface string snapshot platform.NetworkSnapshot
wasUp bool timer *time.Timer
timer *time.Timer mu sync.Mutex
mu sync.Mutex
} }
// handler is the HTTP handler for the web UI. // handler is the HTTP handler for the web UI.
type handler struct { type handler struct {
opts HandlerOptions opts HandlerOptions
mux *http.ServeMux mux *http.ServeMux
// server rings // server rings
ringCPUTemp *metricsRing ringCPULoad *metricsRing
ringCPULoad *metricsRing ringMemLoad *metricsRing
ringMemLoad *metricsRing ringPower *metricsRing
ringPower *metricsRing ringFans []*metricsRing
ringFans []*metricsRing fanNames []string
fanNames []string cpuTempRings []*namedMetricsRing
ambientTempRings []*namedMetricsRing
// per-GPU rings (index = GPU index) // per-GPU rings (index = GPU index)
gpuRings []*gpuRings gpuRings []*gpuRings
ringsMu sync.Mutex ringsMu sync.Mutex
// install job (at most one at a time) // install job (at most one at a time)
installJob *jobState installJob *jobState
installMu sync.Mutex installMu sync.Mutex
// pending network change (rollback on timeout) // pending network change (rollback on timeout)
pendingNet *pendingNetChange pendingNet *pendingNetChange
pendingNetMu sync.Mutex pendingNetMu sync.Mutex
@@ -146,7 +154,6 @@ func NewHandler(opts HandlerOptions) http.Handler {
h := &handler{ h := &handler{
opts: opts, opts: opts,
ringCPUTemp: newMetricsRing(120),
ringCPULoad: newMetricsRing(120), ringCPULoad: newMetricsRing(120),
ringMemLoad: newMetricsRing(120), ringMemLoad: newMetricsRing(120),
ringPower: newMetricsRing(120), ringPower: newMetricsRing(120),
@@ -176,9 +183,9 @@ func NewHandler(opts HandlerOptions) http.Handler {
mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage")) mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage"))
mux.HandleFunc("POST /api/sat/cpu/run", h.handleAPISATRun("cpu")) mux.HandleFunc("POST /api/sat/cpu/run", h.handleAPISATRun("cpu"))
mux.HandleFunc("POST /api/sat/amd/run", h.handleAPISATRun("amd")) mux.HandleFunc("POST /api/sat/amd/run", h.handleAPISATRun("amd"))
mux.HandleFunc("POST /api/sat/amd-stress/run", h.handleAPISATRun("amd-stress")) mux.HandleFunc("POST /api/sat/amd-stress/run", h.handleAPISATRun("amd-stress"))
mux.HandleFunc("POST /api/sat/memory-stress/run", h.handleAPISATRun("memory-stress")) mux.HandleFunc("POST /api/sat/memory-stress/run", h.handleAPISATRun("memory-stress"))
mux.HandleFunc("POST /api/sat/sat-stress/run", h.handleAPISATRun("sat-stress")) mux.HandleFunc("POST /api/sat/sat-stress/run", h.handleAPISATRun("sat-stress"))
mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream) mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort) mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
@@ -382,21 +389,51 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
yMin = floatPtr(0) yMin = floatPtr(0)
yMax = floatPtr(100) yMax = floatPtr(100)
case path == "server-temp": case path == "server-temp", path == "server-temp-cpu":
title = "CPU Temperature" title = "CPU Temperature"
vCPUTemp, l := h.ringCPUTemp.snapshot() h.ringsMu.Lock()
labels = l datasets, names, labels = snapshotNamedRings(h.cpuTempRings)
datasets = [][]float64{vCPUTemp} h.ringsMu.Unlock()
names = []string{"CPU Temp °C"}
yMin = floatPtr(0) yMin = floatPtr(0)
yMax = autoMax120(vCPUTemp) yMax = autoMax120(datasets...)
case path == "server-temp-gpu":
title = "GPU Temperature"
h.ringsMu.Lock()
for idx, gr := range h.gpuRings {
if gr == nil {
continue
}
vTemp, l := gr.Temp.snapshot()
datasets = append(datasets, vTemp)
names = append(names, fmt.Sprintf("GPU %d", idx))
if len(labels) == 0 {
labels = l
}
}
h.ringsMu.Unlock()
yMin = floatPtr(0)
yMax = autoMax120(datasets...)
case path == "server-temp-ambient":
title = "Ambient / Other Sensors"
h.ringsMu.Lock()
datasets, names, labels = snapshotNamedRings(h.ambientTempRings)
h.ringsMu.Unlock()
yMin = floatPtr(0)
yMax = autoMax120(datasets...)
case path == "server-power": case path == "server-power":
title = "Power & Fans" title = "System Power"
vPower, l := h.ringPower.snapshot() vPower, l := h.ringPower.snapshot()
labels = l labels = l
datasets = [][]float64{vPower} datasets = [][]float64{vPower}
names = []string{"Power W"} names = []string{"Power W"}
yMin = floatPtr(0)
yMax = autoMax120(vPower)
case path == "server-fans":
title = "Fan RPM"
h.ringsMu.Lock() h.ringsMu.Lock()
for i, fr := range h.ringFans { for i, fr := range h.ringFans {
fv, _ := fr.snapshot() fv, _ := fr.snapshot()
@@ -507,14 +544,20 @@ func renderChartSVG(title string, datasets [][]float64, names []string, labels [
datasets[i] = make([]float64, n) datasets[i] = make([]float64, n)
} }
} }
sparse := sparseLabels(labels, 6) title = sanitizeChartText(title)
names = sanitizeChartTexts(names)
sparse := sanitizeChartTexts(sparseLabels(labels, 6))
opt := gocharts.NewLineChartOptionWithData(datasets) opt := gocharts.NewLineChartOptionWithData(datasets)
opt.Title = gocharts.TitleOption{Text: title} opt.Title = gocharts.TitleOption{Text: title}
opt.XAxis.Labels = sparse opt.XAxis.Labels = sparse
opt.Legend = gocharts.LegendOption{SeriesNames: names} opt.Legend = gocharts.LegendOption{SeriesNames: names}
if yMin != nil || yMax != nil { if yMin != nil || yMax != nil {
opt.YAxis = []gocharts.YAxisOption{{Min: yMin, Max: yMax}} opt.YAxis = []gocharts.YAxisOption{{
Min: yMin,
Max: yMax,
ValueFormatter: chartLegendNumber,
}}
} }
p := gocharts.NewPainter(gocharts.PainterOptions{ p := gocharts.NewPainter(gocharts.PainterOptions{
@@ -528,6 +571,26 @@ func renderChartSVG(title string, datasets [][]float64, names []string, labels [
return p.Bytes() return p.Bytes()
} }
func sanitizeChartText(s string) string {
if s == "" {
return ""
}
return html.EscapeString(strings.Map(func(r rune) rune {
if r < 0x20 && r != '\t' && r != '\n' && r != '\r' {
return -1
}
return r
}, s))
}
func sanitizeChartTexts(in []string) []string {
out := make([]string, len(in))
for i, s := range in {
out[i] = sanitizeChartText(s)
}
return out
}
func safeIdx(s []float64, i int) float64 { func safeIdx(s []float64, i int) float64 {
if i < len(s) { if i < len(s) {
return s[i] return s[i]
@@ -535,6 +598,46 @@ func safeIdx(s []float64, i int) float64 {
return 0 return 0
} }
func snapshotNamedRings(rings []*namedMetricsRing) ([][]float64, []string, []string) {
var datasets [][]float64
var names []string
var labels []string
for _, item := range rings {
if item == nil || item.Ring == nil {
continue
}
vals, l := item.Ring.snapshot()
datasets = append(datasets, vals)
names = append(names, item.Name)
if len(labels) == 0 {
labels = l
}
}
return datasets, names, labels
}
func chartLegendNumber(v float64) string {
neg := v < 0
if v < 0 {
v = -v
}
var out string
switch {
case v >= 10000:
out = fmt.Sprintf("%dk", int((v+500)/1000))
case v >= 1000:
s := fmt.Sprintf("%.2f", v/1000)
s = strings.TrimRight(strings.TrimRight(s, "0"), ".")
out = strings.ReplaceAll(s, ".", ",") + "k"
default:
out = fmt.Sprintf("%.0f", v)
}
if neg {
return "-" + out
}
return out
}
func sparseLabels(labels []string, n int) []string { func sparseLabels(labels []string, n int) []string {
out := make([]string, len(labels)) out := make([]string, len(labels))
step := len(labels) / n step := len(labels) / n

View File

@@ -9,6 +9,28 @@ import (
"testing" "testing"
) )
func TestChartLegendNumber(t *testing.T) {
tests := []struct {
in float64
want string
}{
{in: 0.4, want: "0"},
{in: 61.5, want: "62"},
{in: 999.4, want: "999"},
{in: 1200, want: "1,2k"},
{in: 1250, want: "1,25k"},
{in: 1310, want: "1,31k"},
{in: 1500, want: "1,5k"},
{in: 2600, want: "2,6k"},
{in: 10200, want: "10k"},
}
for _, tc := range tests {
if got := chartLegendNumber(tc.in); got != tc.want {
t.Fatalf("chartLegendNumber(%v)=%q want %q", tc.in, got, tc.want)
}
}
}
func TestRootRendersDashboard(t *testing.T) { func TestRootRendersDashboard(t *testing.T) {
dir := t.TempDir() dir := t.TempDir()
path := filepath.Join(dir, "audit.json") path := filepath.Join(dir, "audit.json")
@@ -31,9 +53,9 @@ func TestRootRendersDashboard(t *testing.T) {
if first.Code != http.StatusOK { if first.Code != http.StatusOK {
t.Fatalf("first status=%d", first.Code) t.Fatalf("first status=%d", first.Code)
} }
// Dashboard should contain the audit modal (with viewer link) and hardware summary // Dashboard should contain the audit nav link and hardware summary
if !strings.Contains(first.Body.String(), `openAuditModal`) { if !strings.Contains(first.Body.String(), `href="/audit"`) {
t.Fatalf("first body missing audit modal trigger: %s", first.Body.String()) t.Fatalf("first body missing audit nav link: %s", first.Body.String())
} }
if !strings.Contains(first.Body.String(), `/viewer`) { if !strings.Contains(first.Body.String(), `/viewer`) {
t.Fatalf("first body missing viewer link: %s", first.Body.String()) t.Fatalf("first body missing viewer link: %s", first.Body.String())
@@ -56,6 +78,28 @@ func TestRootRendersDashboard(t *testing.T) {
} }
} }
func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "audit.json")
if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z"}`), 0644); err != nil {
t.Fatal(err)
}
handler := NewHandler(HandlerOptions{AuditPath: path})
rec := httptest.NewRecorder()
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/audit", nil))
if rec.Code != http.StatusOK {
t.Fatalf("status=%d", rec.Code)
}
body := rec.Body.String()
if !strings.Contains(body, `iframe class="viewer-frame" src="/viewer"`) {
t.Fatalf("audit page missing viewer frame: %s", body)
}
if !strings.Contains(body, `openAuditModal()`) {
t.Fatalf("audit page missing action modal trigger: %s", body)
}
}
func TestViewerRendersLatestSnapshot(t *testing.T) { func TestViewerRendersLatestSnapshot(t *testing.T) {
dir := t.TempDir() dir := t.TempDir()
path := filepath.Join(dir, "audit.json") path := filepath.Join(dir, "audit.json")

View File

@@ -10,6 +10,8 @@ import (
"sort" "sort"
"sync" "sync"
"time" "time"
"bee/audit/internal/app"
) )
// Task statuses. // Task statuses.
@@ -23,10 +25,10 @@ const (
// taskNames maps target → human-readable name. // taskNames maps target → human-readable name.
var taskNames = map[string]string{ var taskNames = map[string]string{
"nvidia": "NVIDIA SAT", "nvidia": "NVIDIA SAT",
"memory": "Memory SAT", "memory": "Memory SAT",
"storage": "Storage SAT", "storage": "Storage SAT",
"cpu": "CPU SAT", "cpu": "CPU SAT",
"amd": "AMD GPU SAT", "amd": "AMD GPU SAT",
"amd-stress": "AMD GPU Burn-in", "amd-stress": "AMD GPU Burn-in",
"memory-stress": "Memory Burn-in", "memory-stress": "Memory Burn-in",
@@ -47,6 +49,7 @@ type Task struct {
StartedAt *time.Time `json:"started_at,omitempty"` StartedAt *time.Time `json:"started_at,omitempty"`
DoneAt *time.Time `json:"done_at,omitempty"` DoneAt *time.Time `json:"done_at,omitempty"`
ErrMsg string `json:"error,omitempty"` ErrMsg string `json:"error,omitempty"`
LogPath string `json:"log_path,omitempty"`
// runtime fields (not serialised) // runtime fields (not serialised)
job *jobState job *jobState
@@ -55,29 +58,90 @@ type Task struct {
// taskParams holds optional parameters parsed from the run request. // taskParams holds optional parameters parsed from the run request.
type taskParams struct { type taskParams struct {
Duration int Duration int `json:"duration,omitempty"`
DiagLevel int DiagLevel int `json:"diag_level,omitempty"`
GPUIndices []int GPUIndices []int `json:"gpu_indices,omitempty"`
Device string // for install BurnProfile string `json:"burn_profile,omitempty"`
DisplayName string `json:"display_name,omitempty"`
Device string `json:"device,omitempty"` // for install
}
type persistedTask struct {
ID string `json:"id"`
Name string `json:"name"`
Target string `json:"target"`
Priority int `json:"priority"`
Status string `json:"status"`
CreatedAt time.Time `json:"created_at"`
StartedAt *time.Time `json:"started_at,omitempty"`
DoneAt *time.Time `json:"done_at,omitempty"`
ErrMsg string `json:"error,omitempty"`
LogPath string `json:"log_path,omitempty"`
Params taskParams `json:"params,omitempty"`
}
type burnPreset struct {
NvidiaDiag int
DurationSec int
}
func resolveBurnPreset(profile string) burnPreset {
switch profile {
case "overnight":
return burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}
case "acceptance":
return burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}
default:
return burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}
}
} }
// taskQueue manages a priority-ordered list of tasks and runs them one at a time. // taskQueue manages a priority-ordered list of tasks and runs them one at a time.
type taskQueue struct { type taskQueue struct {
mu sync.Mutex mu sync.Mutex
tasks []*Task tasks []*Task
trigger chan struct{} trigger chan struct{}
opts *HandlerOptions // set by startWorker opts *HandlerOptions // set by startWorker
statePath string
logsDir string
started bool
} }
var globalQueue = &taskQueue{trigger: make(chan struct{}, 1)} var globalQueue = &taskQueue{trigger: make(chan struct{}, 1)}
const maxTaskHistory = 50 const maxTaskHistory = 50
var (
runMemoryAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
return a.RunMemoryAcceptancePackCtx(ctx, baseDir, logFunc)
}
runStorageAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
return a.RunStorageAcceptancePackCtx(ctx, baseDir, logFunc)
}
runCPUAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunCPUAcceptancePackCtx(ctx, baseDir, durationSec, logFunc)
}
runAMDAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
return a.RunAMDAcceptancePackCtx(ctx, baseDir, logFunc)
}
runAMDStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunAMDStressPackCtx(ctx, baseDir, durationSec, logFunc)
}
runMemoryStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunMemoryStressPackCtx(ctx, baseDir, durationSec, logFunc)
}
runSATStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
return a.RunSATStressPackCtx(ctx, baseDir, durationSec, logFunc)
}
)
// enqueue adds a task to the queue and notifies the worker. // enqueue adds a task to the queue and notifies the worker.
func (q *taskQueue) enqueue(t *Task) { func (q *taskQueue) enqueue(t *Task) {
q.mu.Lock() q.mu.Lock()
q.assignTaskLogPathLocked(t)
q.tasks = append(q.tasks, t) q.tasks = append(q.tasks, t)
q.prune() q.prune()
q.persistLocked()
q.mu.Unlock() q.mu.Unlock()
select { select {
case q.trigger <- struct{}{}: case q.trigger <- struct{}{}:
@@ -139,6 +203,20 @@ func (q *taskQueue) findJob(id string) (*jobState, bool) {
return t.job, true return t.job, true
} }
func (q *taskQueue) hasActiveTarget(target string) bool {
q.mu.Lock()
defer q.mu.Unlock()
for _, t := range q.tasks {
if t.Target != target {
continue
}
if t.Status == TaskPending || t.Status == TaskRunning {
return true
}
}
return false
}
// snapshot returns a copy of all tasks sorted for display (running first, then pending by priority, then done by doneAt desc). // snapshot returns a copy of all tasks sorted for display (running first, then pending by priority, then done by doneAt desc).
func (q *taskQueue) snapshot() []Task { func (q *taskQueue) snapshot() []Task {
q.mu.Lock() q.mu.Lock()
@@ -174,8 +252,24 @@ func statusOrder(s string) int {
// startWorker launches the queue runner goroutine. // startWorker launches the queue runner goroutine.
func (q *taskQueue) startWorker(opts *HandlerOptions) { func (q *taskQueue) startWorker(opts *HandlerOptions) {
q.mu.Lock()
q.opts = opts q.opts = opts
go q.worker() q.statePath = filepath.Join(opts.ExportDir, "tasks-state.json")
q.logsDir = filepath.Join(opts.ExportDir, "tasks")
_ = os.MkdirAll(q.logsDir, 0755)
if !q.started {
q.loadLocked()
q.started = true
go q.worker()
}
hasPending := q.nextPending() != nil
q.mu.Unlock()
if hasPending {
select {
case q.trigger <- struct{}{}:
default:
}
}
} }
func (q *taskQueue) worker() { func (q *taskQueue) worker() {
@@ -192,10 +286,13 @@ func (q *taskQueue) worker() {
now := time.Now() now := time.Now()
t.Status = TaskRunning t.Status = TaskRunning
t.StartedAt = &now t.StartedAt = &now
j := &jobState{} t.DoneAt = nil
t.ErrMsg = ""
j := newTaskJobState(t.LogPath)
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
j.cancel = cancel j.cancel = cancel
t.job = j t.job = j
q.persistLocked()
q.mu.Unlock() q.mu.Unlock()
q.runTask(t, j, ctx) q.runTask(t, j, ctx)
@@ -212,6 +309,7 @@ func (q *taskQueue) worker() {
} }
} }
q.prune() q.prune()
q.persistLocked()
q.mu.Unlock() q.mu.Unlock()
} }
setCPUGovernor("powersave") setCPUGovernor("powersave")
@@ -240,6 +338,9 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
a := q.opts.App a := q.opts.App
j.append(fmt.Sprintf("Starting %s...", t.Name)) j.append(fmt.Sprintf("Starting %s...", t.Name))
if len(j.lines) > 0 {
j.append(fmt.Sprintf("Recovered after bee-web restart at %s", time.Now().UTC().Format(time.RFC3339)))
}
var ( var (
archive string archive string
@@ -248,9 +349,13 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
switch t.Target { switch t.Target {
case "nvidia": case "nvidia":
if len(t.params.GPUIndices) > 0 || t.params.DiagLevel > 0 { diagLevel := t.params.DiagLevel
if t.params.BurnProfile != "" && diagLevel <= 0 {
diagLevel = resolveBurnPreset(t.params.BurnProfile).NvidiaDiag
}
if len(t.params.GPUIndices) > 0 || diagLevel > 0 {
result, e := a.RunNvidiaAcceptancePackWithOptions( result, e := a.RunNvidiaAcceptancePackWithOptions(
ctx, "", t.params.DiagLevel, t.params.GPUIndices, j.append, ctx, "", diagLevel, t.params.GPUIndices, j.append,
) )
if e != nil { if e != nil {
err = e err = e
@@ -261,23 +366,38 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
archive, err = a.RunNvidiaAcceptancePack("", j.append) archive, err = a.RunNvidiaAcceptancePack("", j.append)
} }
case "memory": case "memory":
archive, err = a.RunMemoryAcceptancePack("", j.append) archive, err = runMemoryAcceptancePackCtx(a, ctx, "", j.append)
case "storage": case "storage":
archive, err = a.RunStorageAcceptancePack("", j.append) archive, err = runStorageAcceptancePackCtx(a, ctx, "", j.append)
case "cpu": case "cpu":
dur := t.params.Duration dur := t.params.Duration
if t.params.BurnProfile != "" && dur <= 0 {
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
}
if dur <= 0 { if dur <= 0 {
dur = 60 dur = 60
} }
archive, err = a.RunCPUAcceptancePack("", dur, j.append) archive, err = runCPUAcceptancePackCtx(a, ctx, "", dur, j.append)
case "amd": case "amd":
archive, err = a.RunAMDAcceptancePack("", j.append) archive, err = runAMDAcceptancePackCtx(a, ctx, "", j.append)
case "amd-stress": case "amd-stress":
archive, err = a.RunAMDStressPack("", j.append) dur := t.params.Duration
if t.params.BurnProfile != "" && dur <= 0 {
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
}
archive, err = runAMDStressPackCtx(a, ctx, "", dur, j.append)
case "memory-stress": case "memory-stress":
archive, err = a.RunMemoryStressPack("", j.append) dur := t.params.Duration
if t.params.BurnProfile != "" && dur <= 0 {
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
}
archive, err = runMemoryStressPackCtx(a, ctx, "", dur, j.append)
case "sat-stress": case "sat-stress":
archive, err = a.RunSATStressPack("", j.append) dur := t.params.Duration
if t.params.BurnProfile != "" && dur <= 0 {
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
}
archive, err = runSATStressPackCtx(a, ctx, "", dur, j.append)
case "audit": case "audit":
result, e := a.RunAuditNow(q.opts.RuntimeMode) result, e := a.RunAuditNow(q.opts.RuntimeMode)
if e != nil { if e != nil {
@@ -288,7 +408,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
} }
} }
case "install-to-ram": case "install-to-ram":
err = a.RunInstallToRAM(j.append) err = a.RunInstallToRAM(ctx, j.append)
default: default:
j.append("ERROR: unknown target: " + t.Target) j.append("ERROR: unknown target: " + t.Target)
j.finish("unknown target") j.finish("unknown target")
@@ -355,6 +475,7 @@ func (h *handler) handleAPITasksCancel(w http.ResponseWriter, r *http.Request) {
t.Status = TaskCancelled t.Status = TaskCancelled
now := time.Now() now := time.Now()
t.DoneAt = &now t.DoneAt = &now
globalQueue.persistLocked()
writeJSON(w, map[string]string{"status": "cancelled"}) writeJSON(w, map[string]string{"status": "cancelled"})
case TaskRunning: case TaskRunning:
if t.job != nil { if t.job != nil {
@@ -363,6 +484,7 @@ func (h *handler) handleAPITasksCancel(w http.ResponseWriter, r *http.Request) {
t.Status = TaskCancelled t.Status = TaskCancelled
now := time.Now() now := time.Now()
t.DoneAt = &now t.DoneAt = &now
globalQueue.persistLocked()
writeJSON(w, map[string]string{"status": "cancelled"}) writeJSON(w, map[string]string{"status": "cancelled"})
default: default:
writeError(w, http.StatusConflict, "task is not running or pending") writeError(w, http.StatusConflict, "task is not running or pending")
@@ -390,6 +512,7 @@ func (h *handler) handleAPITasksPriority(w http.ResponseWriter, r *http.Request)
return return
} }
t.Priority += req.Delta t.Priority += req.Delta
globalQueue.persistLocked()
writeJSON(w, map[string]int{"priority": t.Priority}) writeJSON(w, map[string]int{"priority": t.Priority})
} }
@@ -412,6 +535,7 @@ func (h *handler) handleAPITasksCancelAll(w http.ResponseWriter, _ *http.Request
n++ n++
} }
} }
globalQueue.persistLocked()
globalQueue.mu.Unlock() globalQueue.mu.Unlock()
writeJSON(w, map[string]int{"cancelled": n}) writeJSON(w, map[string]int{"cancelled": n})
} }
@@ -434,3 +558,79 @@ func (h *handler) handleAPITasksStream(w http.ResponseWriter, r *http.Request) {
} }
streamJob(w, r, j) streamJob(w, r, j)
} }
func (q *taskQueue) assignTaskLogPathLocked(t *Task) {
if t.LogPath != "" || q.logsDir == "" || t.ID == "" {
return
}
t.LogPath = filepath.Join(q.logsDir, t.ID+".log")
}
func (q *taskQueue) loadLocked() {
if q.statePath == "" {
return
}
data, err := os.ReadFile(q.statePath)
if err != nil || len(data) == 0 {
return
}
var persisted []persistedTask
if err := json.Unmarshal(data, &persisted); err != nil {
return
}
for _, pt := range persisted {
t := &Task{
ID: pt.ID,
Name: pt.Name,
Target: pt.Target,
Priority: pt.Priority,
Status: pt.Status,
CreatedAt: pt.CreatedAt,
StartedAt: pt.StartedAt,
DoneAt: pt.DoneAt,
ErrMsg: pt.ErrMsg,
LogPath: pt.LogPath,
params: pt.Params,
}
q.assignTaskLogPathLocked(t)
if t.Status == TaskPending || t.Status == TaskRunning {
t.Status = TaskPending
t.DoneAt = nil
t.ErrMsg = ""
}
q.tasks = append(q.tasks, t)
}
q.prune()
q.persistLocked()
}
func (q *taskQueue) persistLocked() {
if q.statePath == "" {
return
}
state := make([]persistedTask, 0, len(q.tasks))
for _, t := range q.tasks {
state = append(state, persistedTask{
ID: t.ID,
Name: t.Name,
Target: t.Target,
Priority: t.Priority,
Status: t.Status,
CreatedAt: t.CreatedAt,
StartedAt: t.StartedAt,
DoneAt: t.DoneAt,
ErrMsg: t.ErrMsg,
LogPath: t.LogPath,
Params: t.params,
})
}
data, err := json.MarshalIndent(state, "", " ")
if err != nil {
return
}
tmp := q.statePath + ".tmp"
if err := os.WriteFile(tmp, data, 0644); err != nil {
return
}
_ = os.Rename(tmp, q.statePath)
}

View File

@@ -0,0 +1,156 @@
package webui
import (
"context"
"os"
"path/filepath"
"testing"
"time"
"bee/audit/internal/app"
)
func TestTaskQueuePersistsAndRecoversPendingTasks(t *testing.T) {
dir := t.TempDir()
q := &taskQueue{
statePath: filepath.Join(dir, "tasks-state.json"),
logsDir: filepath.Join(dir, "tasks"),
trigger: make(chan struct{}, 1),
}
if err := os.MkdirAll(q.logsDir, 0755); err != nil {
t.Fatal(err)
}
started := time.Now().Add(-time.Minute)
task := &Task{
ID: "task-1",
Name: "Memory Burn-in",
Target: "memory-stress",
Priority: 2,
Status: TaskRunning,
CreatedAt: time.Now().Add(-2 * time.Minute),
StartedAt: &started,
params: taskParams{
Duration: 300,
BurnProfile: "smoke",
},
}
q.tasks = append(q.tasks, task)
q.assignTaskLogPathLocked(task)
q.persistLocked()
recovered := &taskQueue{
statePath: q.statePath,
logsDir: q.logsDir,
trigger: make(chan struct{}, 1),
}
recovered.loadLocked()
if len(recovered.tasks) != 1 {
t.Fatalf("tasks=%d want 1", len(recovered.tasks))
}
got := recovered.tasks[0]
if got.Status != TaskPending {
t.Fatalf("status=%q want %q", got.Status, TaskPending)
}
if got.params.Duration != 300 || got.params.BurnProfile != "smoke" {
t.Fatalf("params=%+v", got.params)
}
if got.LogPath == "" {
t.Fatal("expected log path")
}
}
func TestNewTaskJobStateLoadsExistingLog(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "task.log")
if err := os.WriteFile(path, []byte("line1\nline2\n"), 0644); err != nil {
t.Fatal(err)
}
j := newTaskJobState(path)
existing, ch := j.subscribe()
if ch == nil {
t.Fatal("expected live subscription channel")
}
if len(existing) != 2 || existing[0] != "line1" || existing[1] != "line2" {
t.Fatalf("existing=%v", existing)
}
}
func TestResolveBurnPreset(t *testing.T) {
tests := []struct {
profile string
want burnPreset
}{
{profile: "smoke", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
{profile: "acceptance", want: burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}},
{profile: "overnight", want: burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}},
{profile: "", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
}
for _, tc := range tests {
if got := resolveBurnPreset(tc.profile); got != tc.want {
t.Fatalf("resolveBurnPreset(%q)=%+v want %+v", tc.profile, got, tc.want)
}
}
}
func TestRunTaskHonorsCancel(t *testing.T) {
t.Parallel()
blocked := make(chan struct{})
released := make(chan struct{})
aRun := func(_ any, ctx context.Context, _ string, _ int, _ func(string)) (string, error) {
close(blocked)
select {
case <-ctx.Done():
close(released)
return "", ctx.Err()
case <-time.After(5 * time.Second):
close(released)
return "unexpected", nil
}
}
q := &taskQueue{
opts: &HandlerOptions{App: &app.App{}},
}
tk := &Task{
ID: "cpu-1",
Name: "CPU SAT",
Target: "cpu",
Status: TaskRunning,
CreatedAt: time.Now(),
params: taskParams{Duration: 60},
}
j := &jobState{}
ctx, cancel := context.WithCancel(context.Background())
j.cancel = cancel
tk.job = j
orig := runCPUAcceptancePackCtx
runCPUAcceptancePackCtx = func(_ *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
return aRun(nil, ctx, baseDir, durationSec, logFunc)
}
defer func() { runCPUAcceptancePackCtx = orig }()
done := make(chan struct{})
go func() {
q.runTask(tk, j, ctx)
close(done)
}()
<-blocked
j.abort()
select {
case <-released:
case <-time.After(2 * time.Second):
t.Fatal("task did not observe cancel")
}
select {
case <-done:
case <-time.After(2 * time.Second):
t.Fatal("runTask did not return after cancel")
}
}

View File

@@ -32,7 +32,7 @@ lb config noauto \
--memtest none \ --memtest none \
--iso-volume "EASY-BEE" \ --iso-volume "EASY-BEE" \
--iso-application "EASY-BEE" \ --iso-application "EASY-BEE" \
--bootappend-live "boot=live components nomodeset video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \ --bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
--apt-recommends false \ --apt-recommends false \
--chroot-squashfs-compression-type zstd \ --chroot-squashfs-compression-type zstd \
"${@}" "${@}"

View File

@@ -20,7 +20,7 @@ menuentry "EASY-BEE (load to RAM)" {
} }
menuentry "EASY-BEE (NVIDIA GSP=off)" { menuentry "EASY-BEE (NVIDIA GSP=off)" {
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup linux @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
initrd @INITRD_LIVE@ initrd @INITRD_LIVE@
} }

View File

@@ -15,7 +15,7 @@ label live-@FLAVOUR@-gsp-off
menu label EASY-BEE (^NVIDIA GSP=off) menu label EASY-BEE (^NVIDIA GSP=off)
linux @LINUX@ linux @LINUX@
initrd @INITRD@ initrd @INITRD@
append @APPEND_LIVE@ bee.nvidia.mode=gsp-off append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off
label live-@FLAVOUR@-failsafe label live-@FLAVOUR@-failsafe
menu label EASY-BEE (^fail-safe) menu label EASY-BEE (^fail-safe)