release: v3.1
This commit is contained in:
@@ -1,11 +1,13 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime/debug"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"bee/audit/internal/app"
|
"bee/audit/internal/app"
|
||||||
@@ -16,6 +18,37 @@ import (
|
|||||||
|
|
||||||
var Version = "dev"
|
var Version = "dev"
|
||||||
|
|
||||||
|
func buildLabel() string {
|
||||||
|
label := strings.TrimSpace(Version)
|
||||||
|
if label == "" {
|
||||||
|
label = "dev"
|
||||||
|
}
|
||||||
|
if info, ok := debug.ReadBuildInfo(); ok {
|
||||||
|
var revision string
|
||||||
|
var modified bool
|
||||||
|
for _, setting := range info.Settings {
|
||||||
|
switch setting.Key {
|
||||||
|
case "vcs.revision":
|
||||||
|
revision = setting.Value
|
||||||
|
case "vcs.modified":
|
||||||
|
modified = setting.Value == "true"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if revision != "" {
|
||||||
|
short := revision
|
||||||
|
if len(short) > 12 {
|
||||||
|
short = short[:12]
|
||||||
|
}
|
||||||
|
label += " (" + short
|
||||||
|
if modified {
|
||||||
|
label += "+"
|
||||||
|
}
|
||||||
|
label += ")"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return label
|
||||||
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
os.Exit(run(os.Args[1:], os.Stdout, os.Stderr))
|
os.Exit(run(os.Args[1:], os.Stdout, os.Stderr))
|
||||||
}
|
}
|
||||||
@@ -139,7 +172,6 @@ func runAudit(args []string, stdout, stderr io.Writer) int {
|
|||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func runExport(args []string, stdout, stderr io.Writer) int {
|
func runExport(args []string, stdout, stderr io.Writer) int {
|
||||||
fs := flag.NewFlagSet("export", flag.ContinueOnError)
|
fs := flag.NewFlagSet("export", flag.ContinueOnError)
|
||||||
fs.SetOutput(stderr)
|
fs.SetOutput(stderr)
|
||||||
@@ -299,6 +331,7 @@ func runWeb(args []string, stdout, stderr io.Writer) int {
|
|||||||
|
|
||||||
if err := webui.ListenAndServe(*listenAddr, webui.HandlerOptions{
|
if err := webui.ListenAndServe(*listenAddr, webui.HandlerOptions{
|
||||||
Title: *title,
|
Title: *title,
|
||||||
|
BuildLabel: buildLabel(),
|
||||||
AuditPath: *auditPath,
|
AuditPath: *auditPath,
|
||||||
ExportDir: *exportDir,
|
ExportDir: *exportDir,
|
||||||
App: app.New(platform.New()),
|
App: app.New(platform.New()),
|
||||||
@@ -351,15 +384,15 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
|
|||||||
case "nvidia":
|
case "nvidia":
|
||||||
archive, err = application.RunNvidiaAcceptancePack("", logLine)
|
archive, err = application.RunNvidiaAcceptancePack("", logLine)
|
||||||
case "memory":
|
case "memory":
|
||||||
archive, err = application.RunMemoryAcceptancePack("", logLine)
|
archive, err = application.RunMemoryAcceptancePackCtx(context.Background(), "", logLine)
|
||||||
case "storage":
|
case "storage":
|
||||||
archive, err = application.RunStorageAcceptancePack("", logLine)
|
archive, err = application.RunStorageAcceptancePackCtx(context.Background(), "", logLine)
|
||||||
case "cpu":
|
case "cpu":
|
||||||
dur := *duration
|
dur := *duration
|
||||||
if dur <= 0 {
|
if dur <= 0 {
|
||||||
dur = 60
|
dur = 60
|
||||||
}
|
}
|
||||||
archive, err = application.RunCPUAcceptancePack("", dur, logLine)
|
archive, err = application.RunCPUAcceptancePackCtx(context.Background(), "", dur, logLine)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error("run sat", "target", target, "err", err)
|
slog.Error("run sat", "target", target, "err", err)
|
||||||
|
|||||||
@@ -55,6 +55,8 @@ type networkManager interface {
|
|||||||
SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error)
|
SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error)
|
||||||
SetInterfaceState(iface string, up bool) error
|
SetInterfaceState(iface string, up bool) error
|
||||||
GetInterfaceState(iface string) (bool, error)
|
GetInterfaceState(iface string) (bool, error)
|
||||||
|
CaptureNetworkSnapshot() (platform.NetworkSnapshot, error)
|
||||||
|
RestoreNetworkSnapshot(snapshot platform.NetworkSnapshot) error
|
||||||
}
|
}
|
||||||
|
|
||||||
type serviceManager interface {
|
type serviceManager interface {
|
||||||
@@ -78,7 +80,7 @@ type installer interface {
|
|||||||
ListInstallDisks() ([]platform.InstallDisk, error)
|
ListInstallDisks() ([]platform.InstallDisk, error)
|
||||||
InstallToDisk(ctx context.Context, device string, logFile string) error
|
InstallToDisk(ctx context.Context, device string, logFile string) error
|
||||||
IsLiveMediaInRAM() bool
|
IsLiveMediaInRAM() bool
|
||||||
RunInstallToRAM(logFunc func(string)) error
|
RunInstallToRAM(ctx context.Context, logFunc func(string)) error
|
||||||
}
|
}
|
||||||
|
|
||||||
type GPUPresenceResult struct {
|
type GPUPresenceResult struct {
|
||||||
@@ -98,23 +100,23 @@ func (a *App) IsLiveMediaInRAM() bool {
|
|||||||
return a.installer.IsLiveMediaInRAM()
|
return a.installer.IsLiveMediaInRAM()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunInstallToRAM(logFunc func(string)) error {
|
func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
|
||||||
return a.installer.RunInstallToRAM(logFunc)
|
return a.installer.RunInstallToRAM(ctx, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
type satRunner interface {
|
type satRunner interface {
|
||||||
RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error)
|
RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error)
|
||||||
RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
|
RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
|
||||||
RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error)
|
RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
|
||||||
RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error)
|
RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
|
||||||
RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error)
|
RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
|
||||||
ListNvidiaGPUs() ([]platform.NvidiaGPU, error)
|
ListNvidiaGPUs() ([]platform.NvidiaGPU, error)
|
||||||
DetectGPUVendor() string
|
DetectGPUVendor() string
|
||||||
ListAMDGPUs() ([]platform.AMDGPUInfo, error)
|
ListAMDGPUs() ([]platform.AMDGPUInfo, error)
|
||||||
RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error)
|
RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
|
||||||
RunAMDStressPack(baseDir string, logFunc func(string)) (string, error)
|
RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
|
||||||
RunMemoryStressPack(baseDir string, logFunc func(string)) (string, error)
|
RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
|
||||||
RunSATStressPack(baseDir string, logFunc func(string)) (string, error)
|
RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
|
||||||
RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
|
RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
|
||||||
RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
|
RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
|
||||||
}
|
}
|
||||||
@@ -348,6 +350,14 @@ func (a *App) GetInterfaceState(iface string) (bool, error) {
|
|||||||
return a.network.GetInterfaceState(iface)
|
return a.network.GetInterfaceState(iface)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *App) CaptureNetworkSnapshot() (platform.NetworkSnapshot, error) {
|
||||||
|
return a.network.CaptureNetworkSnapshot()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RestoreNetworkSnapshot(snapshot platform.NetworkSnapshot) error {
|
||||||
|
return a.network.RestoreNetworkSnapshot(snapshot)
|
||||||
|
}
|
||||||
|
|
||||||
func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) {
|
func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) {
|
||||||
body, err := a.network.SetStaticIPv4(cfg)
|
body, err := a.network.SetStaticIPv4(cfg)
|
||||||
return ActionResult{Title: "Static IPv4: " + cfg.Interface, Body: bodyOr(body, "Static IPv4 updated.")}, err
|
return ActionResult{Title: "Static IPv4: " + cfg.Interface, Body: bodyOr(body, "Static IPv4 updated.")}, err
|
||||||
@@ -496,10 +506,14 @@ func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir st
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (a *App) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunMemoryAcceptancePackCtx(context.Background(), baseDir, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunMemoryAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
if strings.TrimSpace(baseDir) == "" {
|
if strings.TrimSpace(baseDir) == "" {
|
||||||
baseDir = DefaultSATBaseDir
|
baseDir = DefaultSATBaseDir
|
||||||
}
|
}
|
||||||
return a.sat.RunMemoryAcceptancePack(baseDir, logFunc)
|
return a.sat.RunMemoryAcceptancePack(ctx, baseDir, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
|
func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
|
||||||
@@ -508,10 +522,14 @@ func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunCPUAcceptancePackCtx(context.Background(), baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunCPUAcceptancePackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
if strings.TrimSpace(baseDir) == "" {
|
if strings.TrimSpace(baseDir) == "" {
|
||||||
baseDir = DefaultSATBaseDir
|
baseDir = DefaultSATBaseDir
|
||||||
}
|
}
|
||||||
return a.sat.RunCPUAcceptancePack(baseDir, durationSec, logFunc)
|
return a.sat.RunCPUAcceptancePack(ctx, baseDir, durationSec, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (ActionResult, error) {
|
func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (ActionResult, error) {
|
||||||
@@ -520,10 +538,14 @@ func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (Actio
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (a *App) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunStorageAcceptancePackCtx(context.Background(), baseDir, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunStorageAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
if strings.TrimSpace(baseDir) == "" {
|
if strings.TrimSpace(baseDir) == "" {
|
||||||
baseDir = DefaultSATBaseDir
|
baseDir = DefaultSATBaseDir
|
||||||
}
|
}
|
||||||
return a.sat.RunStorageAcceptancePack(baseDir, logFunc)
|
return a.sat.RunStorageAcceptancePack(ctx, baseDir, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
|
func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
|
||||||
@@ -540,10 +562,14 @@ func (a *App) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (a *App) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunAMDAcceptancePackCtx(context.Background(), baseDir, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunAMDAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
if strings.TrimSpace(baseDir) == "" {
|
if strings.TrimSpace(baseDir) == "" {
|
||||||
baseDir = DefaultSATBaseDir
|
baseDir = DefaultSATBaseDir
|
||||||
}
|
}
|
||||||
return a.sat.RunAMDAcceptancePack(baseDir, logFunc)
|
return a.sat.RunAMDAcceptancePack(ctx, baseDir, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
|
func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
|
||||||
@@ -551,19 +577,31 @@ func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
|
|||||||
return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
|
return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunMemoryStressPack(baseDir string, logFunc func(string)) (string, error) {
|
func (a *App) RunMemoryStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
return a.sat.RunMemoryStressPack(baseDir, logFunc)
|
return a.RunMemoryStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunSATStressPack(baseDir string, logFunc func(string)) (string, error) {
|
func (a *App) RunSATStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
return a.sat.RunSATStressPack(baseDir, logFunc)
|
return a.RunSATStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunAMDStressPack(baseDir string, logFunc func(string)) (string, error) {
|
func (a *App) RunAMDStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunAMDStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunMemoryStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.sat.RunMemoryStressPack(ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunSATStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.sat.RunSATStressPack(ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunAMDStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
if strings.TrimSpace(baseDir) == "" {
|
if strings.TrimSpace(baseDir) == "" {
|
||||||
baseDir = DefaultSATBaseDir
|
baseDir = DefaultSATBaseDir
|
||||||
}
|
}
|
||||||
return a.sat.RunAMDStressPack(baseDir, logFunc)
|
return a.sat.RunAMDStressPack(ctx, baseDir, durationSec, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error) {
|
func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error) {
|
||||||
|
|||||||
@@ -43,8 +43,12 @@ func (f fakeNetwork) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error
|
|||||||
return f.setStaticIPv4Fn(cfg)
|
return f.setStaticIPv4Fn(cfg)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f fakeNetwork) SetInterfaceState(_ string, _ bool) error { return nil }
|
func (f fakeNetwork) SetInterfaceState(_ string, _ bool) error { return nil }
|
||||||
func (f fakeNetwork) GetInterfaceState(_ string) (bool, error) { return true, nil }
|
func (f fakeNetwork) GetInterfaceState(_ string) (bool, error) { return true, nil }
|
||||||
|
func (f fakeNetwork) CaptureNetworkSnapshot() (platform.NetworkSnapshot, error) {
|
||||||
|
return platform.NetworkSnapshot{}, nil
|
||||||
|
}
|
||||||
|
func (f fakeNetwork) RestoreNetworkSnapshot(platform.NetworkSnapshot) error { return nil }
|
||||||
|
|
||||||
type fakeServices struct {
|
type fakeServices struct {
|
||||||
serviceStatusFn func(string) (string, error)
|
serviceStatusFn func(string) (string, error)
|
||||||
@@ -141,15 +145,15 @@ func (f fakeSAT) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunMemoryAcceptancePack(baseDir string, _ func(string)) (string, error) {
|
func (f fakeSAT) RunMemoryAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
|
||||||
return f.runMemoryFn(baseDir)
|
return f.runMemoryFn(baseDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunStorageAcceptancePack(baseDir string, _ func(string)) (string, error) {
|
func (f fakeSAT) RunStorageAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
|
||||||
return f.runStorageFn(baseDir)
|
return f.runStorageFn(baseDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunCPUAcceptancePack(baseDir string, durationSec int, _ func(string)) (string, error) {
|
func (f fakeSAT) RunCPUAcceptancePack(_ context.Context, baseDir string, durationSec int, _ func(string)) (string, error) {
|
||||||
if f.runCPUFn != nil {
|
if f.runCPUFn != nil {
|
||||||
return f.runCPUFn(baseDir, durationSec)
|
return f.runCPUFn(baseDir, durationSec)
|
||||||
}
|
}
|
||||||
@@ -170,16 +174,22 @@ func (f fakeSAT) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunAMDAcceptancePack(baseDir string, _ func(string)) (string, error) {
|
func (f fakeSAT) RunAMDAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
|
||||||
if f.runAMDPackFn != nil {
|
if f.runAMDPackFn != nil {
|
||||||
return f.runAMDPackFn(baseDir)
|
return f.runAMDPackFn(baseDir)
|
||||||
}
|
}
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunAMDStressPack(_ string, _ func(string)) (string, error) { return "", nil }
|
func (f fakeSAT) RunAMDStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
|
||||||
func (f fakeSAT) RunMemoryStressPack(_ string, _ func(string)) (string, error) { return "", nil }
|
return "", nil
|
||||||
func (f fakeSAT) RunSATStressPack(_ string, _ func(string)) (string, error) { return "", nil }
|
}
|
||||||
|
func (f fakeSAT) RunMemoryStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
func (f fakeSAT) RunSATStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunFanStressTest(_ context.Context, _ string, _ platform.FanStressOptions) (string, error) {
|
func (f fakeSAT) RunFanStressTest(_ context.Context, _ string, _ platform.FanStressOptions) (string, error) {
|
||||||
return "", nil
|
return "", nil
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package platform
|
package platform
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@@ -18,7 +19,7 @@ func (s *System) IsLiveMediaInRAM() bool {
|
|||||||
return strings.TrimSpace(string(out)) == "tmpfs"
|
return strings.TrimSpace(string(out)) == "tmpfs"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunInstallToRAM(logFunc func(string)) error {
|
func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
|
||||||
log := func(msg string) {
|
log := func(msg string) {
|
||||||
if logFunc != nil {
|
if logFunc != nil {
|
||||||
logFunc(msg)
|
logFunc(msg)
|
||||||
@@ -56,10 +57,13 @@ func (s *System) RunInstallToRAM(logFunc func(string)) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, sf := range squashfsFiles {
|
for _, sf := range squashfsFiles {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
base := filepath.Base(sf)
|
base := filepath.Base(sf)
|
||||||
dst := filepath.Join(dstDir, base)
|
dst := filepath.Join(dstDir, base)
|
||||||
log(fmt.Sprintf("Copying %s to RAM...", base))
|
log(fmt.Sprintf("Copying %s to RAM...", base))
|
||||||
if err := copyFileLarge(sf, dst, log); err != nil {
|
if err := copyFileLarge(ctx, sf, dst, log); err != nil {
|
||||||
return fmt.Errorf("copy %s: %v", base, err)
|
return fmt.Errorf("copy %s: %v", base, err)
|
||||||
}
|
}
|
||||||
log(fmt.Sprintf("Copied %s.", base))
|
log(fmt.Sprintf("Copied %s.", base))
|
||||||
@@ -77,9 +81,12 @@ func (s *System) RunInstallToRAM(logFunc func(string)) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
log("Copying remaining medium files...")
|
log("Copying remaining medium files...")
|
||||||
if err := cpDir("/run/live/medium", dstDir, log); err != nil {
|
if err := cpDir(ctx, "/run/live/medium", dstDir, log); err != nil {
|
||||||
log(fmt.Sprintf("Warning: partial copy: %v", err))
|
log(fmt.Sprintf("Warning: partial copy: %v", err))
|
||||||
}
|
}
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
if err := exec.Command("mount", "--bind", dstDir, "/run/live/medium").Run(); err != nil {
|
if err := exec.Command("mount", "--bind", dstDir, "/run/live/medium").Run(); err != nil {
|
||||||
log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
|
log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
|
||||||
}
|
}
|
||||||
@@ -88,7 +95,7 @@ func (s *System) RunInstallToRAM(logFunc func(string)) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func copyFileLarge(src, dst string, logFunc func(string)) error {
|
func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) error {
|
||||||
in, err := os.Open(src)
|
in, err := os.Open(src)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -107,6 +114,9 @@ func copyFileLarge(src, dst string, logFunc func(string)) error {
|
|||||||
var copied int64
|
var copied int64
|
||||||
buf := make([]byte, 4*1024*1024)
|
buf := make([]byte, 4*1024*1024)
|
||||||
for {
|
for {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
n, err := in.Read(buf)
|
n, err := in.Read(buf)
|
||||||
if n > 0 {
|
if n > 0 {
|
||||||
if _, werr := out.Write(buf[:n]); werr != nil {
|
if _, werr := out.Write(buf[:n]); werr != nil {
|
||||||
@@ -128,8 +138,11 @@ func copyFileLarge(src, dst string, logFunc func(string)) error {
|
|||||||
return out.Sync()
|
return out.Sync()
|
||||||
}
|
}
|
||||||
|
|
||||||
func cpDir(src, dst string, logFunc func(string)) error {
|
func cpDir(ctx context.Context, src, dst string, logFunc func(string)) error {
|
||||||
return filepath.Walk(src, func(path string, fi os.FileInfo, err error) error {
|
return filepath.Walk(src, func(path string, fi os.FileInfo, err error) error {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -144,7 +157,7 @@ func cpDir(src, dst string, logFunc func(string)) error {
|
|||||||
if _, err := os.Stat(target); err == nil {
|
if _, err := os.Stat(target); err == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return copyFileLarge(path, target, nil)
|
return copyFileLarge(ctx, path, target, nil)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,10 @@ package platform
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -23,6 +26,7 @@ type LiveMetricSample struct {
|
|||||||
// TempReading is a named temperature sensor value.
|
// TempReading is a named temperature sensor value.
|
||||||
type TempReading struct {
|
type TempReading struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
Group string `json:"group,omitempty"`
|
||||||
Celsius float64 `json:"celsius"`
|
Celsius float64 `json:"celsius"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -43,10 +47,11 @@ func SampleLiveMetrics() LiveMetricSample {
|
|||||||
fans, _ := sampleFanSpeeds()
|
fans, _ := sampleFanSpeeds()
|
||||||
s.Fans = fans
|
s.Fans = fans
|
||||||
|
|
||||||
// CPU/system temperature — returns 0 if unavailable
|
s.Temps = append(s.Temps, sampleLiveTemperatureReadings()...)
|
||||||
cpuTemp := sampleCPUMaxTemp()
|
if !hasTempGroup(s.Temps, "cpu") {
|
||||||
if cpuTemp > 0 {
|
if cpuTemp := sampleCPUMaxTemp(); cpuTemp > 0 {
|
||||||
s.Temps = append(s.Temps, TempReading{Name: "CPU", Celsius: cpuTemp})
|
s.Temps = append(s.Temps, TempReading{Name: "CPU Max", Group: "cpu", Celsius: cpuTemp})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// System power — returns 0 if unavailable
|
// System power — returns 0 if unavailable
|
||||||
@@ -140,3 +145,181 @@ func sampleMemLoadPct() float64 {
|
|||||||
used := total - avail
|
used := total - avail
|
||||||
return float64(used) / float64(total) * 100
|
return float64(used) / float64(total) * 100
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func hasTempGroup(temps []TempReading, group string) bool {
|
||||||
|
for _, t := range temps {
|
||||||
|
if t.Group == group {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func sampleLiveTemperatureReadings() []TempReading {
|
||||||
|
if temps := sampleLiveTempsViaSensorsJSON(); len(temps) > 0 {
|
||||||
|
return temps
|
||||||
|
}
|
||||||
|
return sampleLiveTempsViaIPMI()
|
||||||
|
}
|
||||||
|
|
||||||
|
func sampleLiveTempsViaSensorsJSON() []TempReading {
|
||||||
|
out, err := exec.Command("sensors", "-j").Output()
|
||||||
|
if err != nil || len(out) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var doc map[string]map[string]any
|
||||||
|
if err := json.Unmarshal(out, &doc); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
chips := make([]string, 0, len(doc))
|
||||||
|
for chip := range doc {
|
||||||
|
chips = append(chips, chip)
|
||||||
|
}
|
||||||
|
sort.Strings(chips)
|
||||||
|
|
||||||
|
temps := make([]TempReading, 0, len(chips))
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
for _, chip := range chips {
|
||||||
|
features := doc[chip]
|
||||||
|
featureNames := make([]string, 0, len(features))
|
||||||
|
for name := range features {
|
||||||
|
featureNames = append(featureNames, name)
|
||||||
|
}
|
||||||
|
sort.Strings(featureNames)
|
||||||
|
for _, name := range featureNames {
|
||||||
|
if strings.EqualFold(name, "Adapter") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
feature, ok := features[name].(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
value, ok := firstTempInputValue(feature)
|
||||||
|
if !ok || value <= 0 || value > 150 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
group := classifyLiveTempGroup(chip, name)
|
||||||
|
if group == "gpu" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
label := strings.TrimSpace(name)
|
||||||
|
if label == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if group == "ambient" {
|
||||||
|
label = compactAmbientTempName(chip, label)
|
||||||
|
}
|
||||||
|
key := group + "\x00" + label
|
||||||
|
if _, ok := seen[key]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[key] = struct{}{}
|
||||||
|
temps = append(temps, TempReading{Name: label, Group: group, Celsius: value})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return temps
|
||||||
|
}
|
||||||
|
|
||||||
|
func sampleLiveTempsViaIPMI() []TempReading {
|
||||||
|
out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
|
||||||
|
if err != nil || len(out) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var temps []TempReading
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
||||||
|
parts := strings.Split(line, "|")
|
||||||
|
if len(parts) < 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
name := strings.TrimSpace(parts[0])
|
||||||
|
if name == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
unit := strings.ToLower(strings.TrimSpace(parts[2]))
|
||||||
|
if !strings.Contains(unit, "degrees") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
raw := strings.TrimSpace(parts[1])
|
||||||
|
if raw == "" || strings.EqualFold(raw, "na") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
value, err := strconv.ParseFloat(raw, 64)
|
||||||
|
if err != nil || value <= 0 || value > 150 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
group := classifyLiveTempGroup("", name)
|
||||||
|
if group == "gpu" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
label := name
|
||||||
|
if group == "ambient" {
|
||||||
|
label = compactAmbientTempName("", label)
|
||||||
|
}
|
||||||
|
key := group + "\x00" + label
|
||||||
|
if _, ok := seen[key]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[key] = struct{}{}
|
||||||
|
temps = append(temps, TempReading{Name: label, Group: group, Celsius: value})
|
||||||
|
}
|
||||||
|
return temps
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstTempInputValue(feature map[string]any) (float64, bool) {
|
||||||
|
keys := make([]string, 0, len(feature))
|
||||||
|
for key := range feature {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
for _, key := range keys {
|
||||||
|
lower := strings.ToLower(key)
|
||||||
|
if !strings.Contains(lower, "temp") || !strings.HasSuffix(lower, "_input") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch value := feature[key].(type) {
|
||||||
|
case float64:
|
||||||
|
return value, true
|
||||||
|
case string:
|
||||||
|
f, err := strconv.ParseFloat(value, 64)
|
||||||
|
if err == nil {
|
||||||
|
return f, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func classifyLiveTempGroup(chip, name string) string {
|
||||||
|
text := strings.ToLower(strings.TrimSpace(chip + " " + name))
|
||||||
|
switch {
|
||||||
|
case strings.Contains(text, "gpu"), strings.Contains(text, "amdgpu"), strings.Contains(text, "nvidia"), strings.Contains(text, "adeon"):
|
||||||
|
return "gpu"
|
||||||
|
case strings.Contains(text, "coretemp"),
|
||||||
|
strings.Contains(text, "k10temp"),
|
||||||
|
strings.Contains(text, "zenpower"),
|
||||||
|
strings.Contains(text, "package id"),
|
||||||
|
strings.Contains(text, "x86_pkg_temp"),
|
||||||
|
strings.Contains(text, "tctl"),
|
||||||
|
strings.Contains(text, "tdie"),
|
||||||
|
strings.Contains(text, "cpu"),
|
||||||
|
strings.Contains(text, "peci"):
|
||||||
|
return "cpu"
|
||||||
|
default:
|
||||||
|
return "ambient"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func compactAmbientTempName(chip, name string) string {
|
||||||
|
chip = strings.TrimSpace(chip)
|
||||||
|
name = strings.TrimSpace(name)
|
||||||
|
if chip == "" || strings.EqualFold(chip, name) {
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
if strings.Contains(strings.ToLower(name), strings.ToLower(chip)) {
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
return chip + " / " + name
|
||||||
|
}
|
||||||
|
|||||||
44
audit/internal/platform/live_metrics_test.go
Normal file
44
audit/internal/platform/live_metrics_test.go
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
package platform
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestFirstTempInputValue(t *testing.T) {
|
||||||
|
feature := map[string]any{
|
||||||
|
"temp1_input": 61.5,
|
||||||
|
"temp1_max": 80.0,
|
||||||
|
}
|
||||||
|
got, ok := firstTempInputValue(feature)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("expected value")
|
||||||
|
}
|
||||||
|
if got != 61.5 {
|
||||||
|
t.Fatalf("got %v want 61.5", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClassifyLiveTempGroup(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
chip string
|
||||||
|
name string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{chip: "coretemp-isa-0000", name: "Package id 0", want: "cpu"},
|
||||||
|
{chip: "amdgpu-pci-4300", name: "edge", want: "gpu"},
|
||||||
|
{chip: "nvme-pci-0100", name: "Composite", want: "ambient"},
|
||||||
|
{chip: "acpitz-acpi-0", name: "temp1", want: "ambient"},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
if got := classifyLiveTempGroup(tc.chip, tc.name); got != tc.want {
|
||||||
|
t.Fatalf("classifyLiveTempGroup(%q,%q)=%q want %q", tc.chip, tc.name, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCompactAmbientTempName(t *testing.T) {
|
||||||
|
if got := compactAmbientTempName("nvme-pci-0100", "Composite"); got != "nvme-pci-0100 / Composite" {
|
||||||
|
t.Fatalf("got %q", got)
|
||||||
|
}
|
||||||
|
if got := compactAmbientTempName("", "Inlet Temp"); got != "Inlet Temp" {
|
||||||
|
t.Fatalf("got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@ package platform
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
@@ -18,21 +19,17 @@ func (s *System) ListInterfaces() ([]InterfaceInfo, error) {
|
|||||||
out := make([]InterfaceInfo, 0, len(names))
|
out := make([]InterfaceInfo, 0, len(names))
|
||||||
for _, name := range names {
|
for _, name := range names {
|
||||||
state := "unknown"
|
state := "unknown"
|
||||||
if raw, err := exec.Command("ip", "-o", "link", "show", name).Output(); err == nil {
|
if up, err := interfaceAdminState(name); err == nil {
|
||||||
fields := strings.Fields(string(raw))
|
if up {
|
||||||
if len(fields) >= 9 {
|
state = "up"
|
||||||
state = fields[8]
|
} else {
|
||||||
|
state = "down"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var ipv4 []string
|
ipv4, err := interfaceIPv4Addrs(name)
|
||||||
if raw, err := exec.Command("ip", "-o", "-4", "addr", "show", "dev", name).Output(); err == nil {
|
if err != nil {
|
||||||
for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
|
ipv4 = nil
|
||||||
fields := strings.Fields(line)
|
|
||||||
if len(fields) >= 4 {
|
|
||||||
ipv4 = append(ipv4, fields[3])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
out = append(out, InterfaceInfo{Name: name, State: state, IPv4: ipv4})
|
out = append(out, InterfaceInfo{Name: name, State: state, IPv4: ipv4})
|
||||||
@@ -55,6 +52,109 @@ func (s *System) DefaultRoute() string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *System) CaptureNetworkSnapshot() (NetworkSnapshot, error) {
|
||||||
|
names, err := listInterfaceNames()
|
||||||
|
if err != nil {
|
||||||
|
return NetworkSnapshot{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot := NetworkSnapshot{
|
||||||
|
Interfaces: make([]NetworkInterfaceSnapshot, 0, len(names)),
|
||||||
|
}
|
||||||
|
for _, name := range names {
|
||||||
|
up, err := interfaceAdminState(name)
|
||||||
|
if err != nil {
|
||||||
|
return NetworkSnapshot{}, err
|
||||||
|
}
|
||||||
|
ipv4, err := interfaceIPv4Addrs(name)
|
||||||
|
if err != nil {
|
||||||
|
return NetworkSnapshot{}, err
|
||||||
|
}
|
||||||
|
snapshot.Interfaces = append(snapshot.Interfaces, NetworkInterfaceSnapshot{
|
||||||
|
Name: name,
|
||||||
|
Up: up,
|
||||||
|
IPv4: ipv4,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if raw, err := exec.Command("ip", "route", "show", "default").Output(); err == nil {
|
||||||
|
for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line != "" {
|
||||||
|
snapshot.DefaultRoutes = append(snapshot.DefaultRoutes, line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if raw, err := os.ReadFile("/etc/resolv.conf"); err == nil {
|
||||||
|
snapshot.ResolvConf = string(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
return snapshot, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *System) RestoreNetworkSnapshot(snapshot NetworkSnapshot) error {
|
||||||
|
var errs []string
|
||||||
|
|
||||||
|
for _, iface := range snapshot.Interfaces {
|
||||||
|
if err := exec.Command("ip", "link", "set", "dev", iface.Name, "up").Run(); err != nil {
|
||||||
|
errs = append(errs, fmt.Sprintf("%s: bring up before restore: %v", iface.Name, err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := exec.Command("ip", "addr", "flush", "dev", iface.Name).Run(); err != nil {
|
||||||
|
errs = append(errs, fmt.Sprintf("%s: flush addresses: %v", iface.Name, err))
|
||||||
|
}
|
||||||
|
for _, cidr := range iface.IPv4 {
|
||||||
|
if raw, err := exec.Command("ip", "addr", "add", cidr, "dev", iface.Name).CombinedOutput(); err != nil {
|
||||||
|
detail := strings.TrimSpace(string(raw))
|
||||||
|
if detail != "" {
|
||||||
|
errs = append(errs, fmt.Sprintf("%s: restore address %s: %v: %s", iface.Name, cidr, err, detail))
|
||||||
|
} else {
|
||||||
|
errs = append(errs, fmt.Sprintf("%s: restore address %s: %v", iface.Name, cidr, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
state := "down"
|
||||||
|
if iface.Up {
|
||||||
|
state = "up"
|
||||||
|
}
|
||||||
|
if err := exec.Command("ip", "link", "set", "dev", iface.Name, state).Run(); err != nil {
|
||||||
|
errs = append(errs, fmt.Sprintf("%s: restore state %s: %v", iface.Name, state, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := exec.Command("ip", "route", "del", "default").Run(); err != nil {
|
||||||
|
var exitErr *exec.ExitError
|
||||||
|
if !errors.As(err, &exitErr) {
|
||||||
|
errs = append(errs, fmt.Sprintf("clear default route: %v", err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, route := range snapshot.DefaultRoutes {
|
||||||
|
fields := strings.Fields(route)
|
||||||
|
if len(fields) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
args := append([]string{"route", "add"}, fields...)
|
||||||
|
if raw, err := exec.Command("ip", args...).CombinedOutput(); err != nil {
|
||||||
|
detail := strings.TrimSpace(string(raw))
|
||||||
|
if detail != "" {
|
||||||
|
errs = append(errs, fmt.Sprintf("restore route %q: %v: %s", route, err, detail))
|
||||||
|
} else {
|
||||||
|
errs = append(errs, fmt.Sprintf("restore route %q: %v", route, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile("/etc/resolv.conf", []byte(snapshot.ResolvConf), 0644); err != nil {
|
||||||
|
errs = append(errs, fmt.Sprintf("restore resolv.conf: %v", err))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(errs) > 0 {
|
||||||
|
return errors.New(strings.Join(errs, "; "))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *System) DHCPOne(iface string) (string, error) {
|
func (s *System) DHCPOne(iface string) (string, error) {
|
||||||
var out bytes.Buffer
|
var out bytes.Buffer
|
||||||
if err := exec.Command("ip", "link", "set", iface, "up").Run(); err != nil {
|
if err := exec.Command("ip", "link", "set", iface, "up").Run(); err != nil {
|
||||||
@@ -142,12 +242,52 @@ func (s *System) SetInterfaceState(iface string, up bool) error {
|
|||||||
|
|
||||||
// GetInterfaceState returns true if the interface is UP.
|
// GetInterfaceState returns true if the interface is UP.
|
||||||
func (s *System) GetInterfaceState(iface string) (bool, error) {
|
func (s *System) GetInterfaceState(iface string) (bool, error) {
|
||||||
raw, err := os.ReadFile(fmt.Sprintf("/sys/class/net/%s/operstate", iface))
|
return interfaceAdminState(iface)
|
||||||
|
}
|
||||||
|
|
||||||
|
func interfaceAdminState(iface string) (bool, error) {
|
||||||
|
raw, err := exec.Command("ip", "-o", "link", "show", "dev", iface).Output()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
state := strings.TrimSpace(string(raw))
|
return parseInterfaceAdminState(string(raw))
|
||||||
return state == "up", nil
|
}
|
||||||
|
|
||||||
|
func parseInterfaceAdminState(raw string) (bool, error) {
|
||||||
|
start := strings.IndexByte(raw, '<')
|
||||||
|
if start == -1 {
|
||||||
|
return false, fmt.Errorf("ip link output missing flags")
|
||||||
|
}
|
||||||
|
end := strings.IndexByte(raw[start+1:], '>')
|
||||||
|
if end == -1 {
|
||||||
|
return false, fmt.Errorf("ip link output missing flag terminator")
|
||||||
|
}
|
||||||
|
flags := strings.Split(raw[start+1:start+1+end], ",")
|
||||||
|
for _, flag := range flags {
|
||||||
|
if strings.TrimSpace(flag) == "UP" {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func interfaceIPv4Addrs(iface string) ([]string, error) {
|
||||||
|
raw, err := exec.Command("ip", "-o", "-4", "addr", "show", "dev", iface).Output()
|
||||||
|
if err != nil {
|
||||||
|
var exitErr *exec.ExitError
|
||||||
|
if errors.As(err, &exitErr) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var ipv4 []string
|
||||||
|
for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
|
||||||
|
fields := strings.Fields(line)
|
||||||
|
if len(fields) >= 4 {
|
||||||
|
ipv4 = append(ipv4, fields[3])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ipv4, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func listInterfaceNames() ([]string, error) {
|
func listInterfaceNames() ([]string, error) {
|
||||||
|
|||||||
46
audit/internal/platform/network_test.go
Normal file
46
audit/internal/platform/network_test.go
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
package platform
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestParseInterfaceAdminState(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
raw string
|
||||||
|
want bool
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "admin up with no carrier",
|
||||||
|
raw: "2: enp1s0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000\n",
|
||||||
|
want: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "admin down",
|
||||||
|
raw: "2: enp1s0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000\n",
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "malformed output",
|
||||||
|
raw: "2: enp1s0: mtu 1500 state DOWN\n",
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got, err := parseInterfaceAdminState(tt.raw)
|
||||||
|
if tt.wantErr {
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error")
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if got != tt.want {
|
||||||
|
t.Fatalf("got %v want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -90,6 +90,12 @@ func (s *System) DetectGPUVendor() string {
|
|||||||
if _, err := os.Stat("/dev/kfd"); err == nil {
|
if _, err := os.Stat("/dev/kfd"); err == nil {
|
||||||
return "amd"
|
return "amd"
|
||||||
}
|
}
|
||||||
|
if raw, err := exec.Command("lspci", "-nn").Output(); err == nil {
|
||||||
|
text := strings.ToLower(string(raw))
|
||||||
|
if strings.Contains(text, "advanced micro devices") || strings.Contains(text, "amd/ati") {
|
||||||
|
return "amd"
|
||||||
|
}
|
||||||
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -117,8 +123,8 @@ func (s *System) ListAMDGPUs() ([]AMDGPUInfo, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RunAMDAcceptancePack runs an AMD GPU diagnostic pack using rocm-smi.
|
// RunAMDAcceptancePack runs an AMD GPU diagnostic pack using rocm-smi.
|
||||||
func (s *System) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
return runAcceptancePack(baseDir, "gpu-amd", []satJob{
|
return runAcceptancePackCtx(ctx, baseDir, "gpu-amd", []satJob{
|
||||||
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
|
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
|
||||||
{name: "02-rocm-smi-showallinfo.log", cmd: []string{"rocm-smi", "--showallinfo"}},
|
{name: "02-rocm-smi-showallinfo.log", cmd: []string{"rocm-smi", "--showallinfo"}},
|
||||||
{name: "03-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
|
{name: "03-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
|
||||||
@@ -128,14 +134,20 @@ func (s *System) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (str
|
|||||||
|
|
||||||
// RunAMDStressPack runs an AMD GPU burn-in pack.
|
// RunAMDStressPack runs an AMD GPU burn-in pack.
|
||||||
// Missing tools are reported as UNSUPPORTED, consistent with the existing SAT pattern.
|
// Missing tools are reported as UNSUPPORTED, consistent with the existing SAT pattern.
|
||||||
func (s *System) RunAMDStressPack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
seconds := envInt("BEE_AMD_STRESS_SECONDS", 300)
|
seconds := durationSec
|
||||||
return runAcceptancePack(baseDir, "gpu-amd-stress", []satJob{
|
if seconds <= 0 {
|
||||||
|
seconds = envInt("BEE_AMD_STRESS_SECONDS", 300)
|
||||||
|
}
|
||||||
|
if err := ensureAMDRuntimeReady(); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-stress", []satJob{
|
||||||
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
|
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
|
||||||
{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
|
{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
|
||||||
{name: fmt.Sprintf("03-rocm-smi-monitor-%ds.log", seconds), cmd: []string{
|
{name: fmt.Sprintf("03-rocm-smi-monitor-%ds.log", seconds), cmd: []string{
|
||||||
"rocm-smi", "--showtemp", "--showpower",
|
"bash", "-lc",
|
||||||
fmt.Sprintf("--duration=%d", seconds),
|
fmt.Sprintf("end=$((SECONDS+%d)); while [ \"$SECONDS\" -lt \"$end\" ]; do rocm-smi --showtemp --showpower --csv; sleep 1; done", seconds),
|
||||||
}},
|
}},
|
||||||
}, logFunc)
|
}, logFunc)
|
||||||
}
|
}
|
||||||
@@ -191,7 +203,7 @@ func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
||||||
return runAcceptancePack(baseDir, "gpu-nvidia", nvidiaSATJobs(), logFunc)
|
return runAcceptancePackCtx(context.Background(), baseDir, "gpu-nvidia", nvidiaSATJobs(), logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
// RunNvidiaAcceptancePackWithOptions runs the NVIDIA diagnostics via DCGM.
|
// RunNvidiaAcceptancePackWithOptions runs the NVIDIA diagnostics via DCGM.
|
||||||
@@ -202,24 +214,27 @@ func (s *System) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir
|
|||||||
return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, gpuIndices), logFunc)
|
return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, gpuIndices), logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128)
|
sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128)
|
||||||
passes := envInt("BEE_MEMTESTER_PASSES", 1)
|
passes := envInt("BEE_MEMTESTER_PASSES", 1)
|
||||||
return runAcceptancePack(baseDir, "memory", []satJob{
|
return runAcceptancePackCtx(ctx, baseDir, "memory", []satJob{
|
||||||
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
||||||
{name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
|
{name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
|
||||||
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
|
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
|
||||||
}, logFunc)
|
}, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunMemoryStressPack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
seconds := envInt("BEE_VM_STRESS_SECONDS", 300)
|
seconds := durationSec
|
||||||
|
if seconds <= 0 {
|
||||||
|
seconds = envInt("BEE_VM_STRESS_SECONDS", 300)
|
||||||
|
}
|
||||||
// Use 80% of RAM by default; override with BEE_VM_STRESS_SIZE_MB.
|
// Use 80% of RAM by default; override with BEE_VM_STRESS_SIZE_MB.
|
||||||
sizeArg := "80%"
|
sizeArg := "80%"
|
||||||
if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 {
|
if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 {
|
||||||
sizeArg = fmt.Sprintf("%dM", mb)
|
sizeArg = fmt.Sprintf("%dM", mb)
|
||||||
}
|
}
|
||||||
return runAcceptancePack(baseDir, "memory-stress", []satJob{
|
return runAcceptancePackCtx(ctx, baseDir, "memory-stress", []satJob{
|
||||||
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
||||||
{name: "02-stress-ng-vm.log", cmd: []string{
|
{name: "02-stress-ng-vm.log", cmd: []string{
|
||||||
"stress-ng", "--vm", "1",
|
"stress-ng", "--vm", "1",
|
||||||
@@ -232,24 +247,27 @@ func (s *System) RunMemoryStressPack(baseDir string, logFunc func(string)) (stri
|
|||||||
}, logFunc)
|
}, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunSATStressPack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
seconds := envInt("BEE_SAT_STRESS_SECONDS", 300)
|
seconds := durationSec
|
||||||
|
if seconds <= 0 {
|
||||||
|
seconds = envInt("BEE_SAT_STRESS_SECONDS", 300)
|
||||||
|
}
|
||||||
cmd := []string{"stressapptest", "-s", fmt.Sprintf("%d", seconds), "-W", "--cc_test"}
|
cmd := []string{"stressapptest", "-s", fmt.Sprintf("%d", seconds), "-W", "--cc_test"}
|
||||||
if mb := envInt("BEE_SAT_STRESS_MB", 0); mb > 0 {
|
if mb := envInt("BEE_SAT_STRESS_MB", 0); mb > 0 {
|
||||||
cmd = append(cmd, "-M", fmt.Sprintf("%d", mb))
|
cmd = append(cmd, "-M", fmt.Sprintf("%d", mb))
|
||||||
}
|
}
|
||||||
return runAcceptancePack(baseDir, "sat-stress", []satJob{
|
return runAcceptancePackCtx(ctx, baseDir, "sat-stress", []satJob{
|
||||||
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
||||||
{name: "02-stressapptest.log", cmd: cmd},
|
{name: "02-stressapptest.log", cmd: cmd},
|
||||||
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
|
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
|
||||||
}, logFunc)
|
}, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
func (s *System) RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
if durationSec <= 0 {
|
if durationSec <= 0 {
|
||||||
durationSec = 60
|
durationSec = 60
|
||||||
}
|
}
|
||||||
return runAcceptancePack(baseDir, "cpu", []satJob{
|
return runAcceptancePackCtx(ctx, baseDir, "cpu", []satJob{
|
||||||
{name: "01-lscpu.log", cmd: []string{"lscpu"}},
|
{name: "01-lscpu.log", cmd: []string{"lscpu"}},
|
||||||
{name: "02-sensors-before.log", cmd: []string{"sensors"}},
|
{name: "02-sensors-before.log", cmd: []string{"sensors"}},
|
||||||
{name: "03-stress-ng.log", cmd: []string{"stress-ng", "--cpu", "0", "--cpu-method", "all", "--timeout", fmt.Sprintf("%d", durationSec)}},
|
{name: "03-stress-ng.log", cmd: []string{"stress-ng", "--cpu", "0", "--cpu-method", "all", "--timeout", fmt.Sprintf("%d", durationSec)}},
|
||||||
@@ -257,7 +275,7 @@ func (s *System) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc f
|
|||||||
}, logFunc)
|
}, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
if baseDir == "" {
|
if baseDir == "" {
|
||||||
baseDir = "/var/log/bee-sat"
|
baseDir = "/var/log/bee-sat"
|
||||||
}
|
}
|
||||||
@@ -285,11 +303,17 @@ func (s *System) RunStorageAcceptancePack(baseDir string, logFunc func(string))
|
|||||||
}
|
}
|
||||||
|
|
||||||
for index, devPath := range devices {
|
for index, devPath := range devices {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
prefix := fmt.Sprintf("%02d-%s", index+1, filepath.Base(devPath))
|
prefix := fmt.Sprintf("%02d-%s", index+1, filepath.Base(devPath))
|
||||||
commands := storageSATCommands(devPath)
|
commands := storageSATCommands(devPath)
|
||||||
for cmdIndex, job := range commands {
|
for cmdIndex, job := range commands {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
name := fmt.Sprintf("%s-%02d-%s.log", prefix, cmdIndex+1, job.name)
|
name := fmt.Sprintf("%s-%02d-%s.log", prefix, cmdIndex+1, job.name)
|
||||||
out, err := runSATCommand(verboseLog, job.name, job.cmd, logFunc)
|
out, err := runSATCommandCtx(ctx, verboseLog, job.name, job.cmd, nil, logFunc)
|
||||||
if writeErr := os.WriteFile(filepath.Join(runDir, name), out, 0644); writeErr != nil {
|
if writeErr := os.WriteFile(filepath.Join(runDir, name), out, 0644); writeErr != nil {
|
||||||
return "", writeErr
|
return "", writeErr
|
||||||
}
|
}
|
||||||
@@ -338,49 +362,6 @@ func nvidiaSATJobs() []satJob {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func runAcceptancePack(baseDir, prefix string, jobs []satJob, logFunc func(string)) (string, error) {
|
|
||||||
if baseDir == "" {
|
|
||||||
baseDir = "/var/log/bee-sat"
|
|
||||||
}
|
|
||||||
ts := time.Now().UTC().Format("20060102-150405")
|
|
||||||
runDir := filepath.Join(baseDir, prefix+"-"+ts)
|
|
||||||
if err := os.MkdirAll(runDir, 0755); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
verboseLog := filepath.Join(runDir, "verbose.log")
|
|
||||||
|
|
||||||
var summary strings.Builder
|
|
||||||
stats := satStats{}
|
|
||||||
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
|
|
||||||
for _, job := range jobs {
|
|
||||||
var out []byte
|
|
||||||
var err error
|
|
||||||
cmd := make([]string, 0, len(job.cmd))
|
|
||||||
for _, arg := range job.cmd {
|
|
||||||
cmd = append(cmd, strings.ReplaceAll(arg, "{{run_dir}}", runDir))
|
|
||||||
}
|
|
||||||
out, err = runSATCommand(verboseLog, job.name, cmd, logFunc)
|
|
||||||
if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
|
|
||||||
return "", writeErr
|
|
||||||
}
|
|
||||||
status, rc := classifySATResult(job.name, out, err)
|
|
||||||
stats.Add(status)
|
|
||||||
key := strings.TrimSuffix(strings.TrimPrefix(job.name, "0"), ".log")
|
|
||||||
fmt.Fprintf(&summary, "%s_rc=%d\n", key, rc)
|
|
||||||
fmt.Fprintf(&summary, "%s_status=%s\n", key, status)
|
|
||||||
}
|
|
||||||
writeSATStats(&summary, stats)
|
|
||||||
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
archive := filepath.Join(baseDir, prefix+"-"+ts+".tar.gz")
|
|
||||||
if err := createTarGz(archive, runDir); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
return archive, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
|
func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
|
||||||
if diagLevel < 1 || diagLevel > 4 {
|
if diagLevel < 1 || diagLevel > 4 {
|
||||||
diagLevel = 3
|
diagLevel = 3
|
||||||
@@ -402,6 +383,9 @@ func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []satJob, logFunc func(string)) (string, error) {
|
func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []satJob, logFunc func(string)) (string, error) {
|
||||||
|
if ctx == nil {
|
||||||
|
ctx = context.Background()
|
||||||
|
}
|
||||||
if baseDir == "" {
|
if baseDir == "" {
|
||||||
baseDir = "/var/log/bee-sat"
|
baseDir = "/var/log/bee-sat"
|
||||||
}
|
}
|
||||||
@@ -649,6 +633,20 @@ func resolveROCmSMICommand(args ...string) ([]string, error) {
|
|||||||
return nil, errors.New("rocm-smi not found in PATH or under /opt/rocm")
|
return nil, errors.New("rocm-smi not found in PATH or under /opt/rocm")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ensureAMDRuntimeReady() error {
|
||||||
|
if _, err := os.Stat("/dev/kfd"); err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if raw, err := os.ReadFile("/sys/module/amdgpu/initstate"); err == nil {
|
||||||
|
state := strings.TrimSpace(string(raw))
|
||||||
|
if strings.EqualFold(state, "live") {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("AMD driver is present but not initialized: amdgpu initstate=%q", state)
|
||||||
|
}
|
||||||
|
return errors.New("AMD GPUs are present but the runtime is not initialized: /dev/kfd is missing and amdgpu is not loaded")
|
||||||
|
}
|
||||||
|
|
||||||
func rocmSMIExecutableCandidates() []string {
|
func rocmSMIExecutableCandidates() []string {
|
||||||
return expandExistingPaths(rocmSMIExecutableGlobs)
|
return expandExistingPaths(rocmSMIExecutableGlobs)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,10 +2,12 @@ package platform
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -304,10 +306,19 @@ func sampleGPUStressMetrics(gpuIndices []int) []GPUStressMetric {
|
|||||||
// sampleFanSpeeds reads fan RPM values from ipmitool sdr.
|
// sampleFanSpeeds reads fan RPM values from ipmitool sdr.
|
||||||
func sampleFanSpeeds() ([]FanReading, error) {
|
func sampleFanSpeeds() ([]FanReading, error) {
|
||||||
out, err := exec.Command("ipmitool", "sdr", "type", "Fan").Output()
|
out, err := exec.Command("ipmitool", "sdr", "type", "Fan").Output()
|
||||||
|
if err == nil {
|
||||||
|
if fans := parseFanSpeeds(string(out)); len(fans) > 0 {
|
||||||
|
return fans, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fans, sensorsErr := sampleFanSpeedsViaSensorsJSON()
|
||||||
|
if len(fans) > 0 {
|
||||||
|
return fans, nil
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return parseFanSpeeds(string(out)), nil
|
return nil, sensorsErr
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseFanSpeeds parses "ipmitool sdr type Fan" output.
|
// parseFanSpeeds parses "ipmitool sdr type Fan" output.
|
||||||
@@ -316,18 +327,21 @@ func parseFanSpeeds(raw string) []FanReading {
|
|||||||
var fans []FanReading
|
var fans []FanReading
|
||||||
for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
|
for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
|
||||||
parts := strings.Split(line, "|")
|
parts := strings.Split(line, "|")
|
||||||
if len(parts) < 3 {
|
if len(parts) < 2 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
unit := strings.TrimSpace(parts[2])
|
unit := ""
|
||||||
if !strings.EqualFold(unit, "RPM") {
|
if len(parts) >= 3 {
|
||||||
continue
|
unit = strings.TrimSpace(parts[2])
|
||||||
}
|
}
|
||||||
valStr := strings.TrimSpace(parts[1])
|
valStr := strings.TrimSpace(parts[1])
|
||||||
|
if !strings.EqualFold(unit, "RPM") && !strings.Contains(strings.ToUpper(valStr), "RPM") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if strings.EqualFold(valStr, "na") || strings.EqualFold(valStr, "disabled") || valStr == "" {
|
if strings.EqualFold(valStr, "na") || strings.EqualFold(valStr, "disabled") || valStr == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
val, err := strconv.ParseFloat(valStr, 64)
|
val, err := parseFanRPMValue(valStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -339,6 +353,84 @@ func parseFanSpeeds(raw string) []FanReading {
|
|||||||
return fans
|
return fans
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func parseFanRPMValue(raw string) (float64, error) {
|
||||||
|
fields := strings.Fields(strings.TrimSpace(strings.ReplaceAll(raw, ",", "")))
|
||||||
|
if len(fields) == 0 {
|
||||||
|
return 0, strconv.ErrSyntax
|
||||||
|
}
|
||||||
|
return strconv.ParseFloat(fields[0], 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sampleFanSpeedsViaSensorsJSON() ([]FanReading, error) {
|
||||||
|
out, err := exec.Command("sensors", "-j").Output()
|
||||||
|
if err != nil || len(out) == 0 {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var doc map[string]map[string]any
|
||||||
|
if err := json.Unmarshal(out, &doc); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
chips := make([]string, 0, len(doc))
|
||||||
|
for chip := range doc {
|
||||||
|
chips = append(chips, chip)
|
||||||
|
}
|
||||||
|
sort.Strings(chips)
|
||||||
|
var fans []FanReading
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
for _, chip := range chips {
|
||||||
|
features := doc[chip]
|
||||||
|
names := make([]string, 0, len(features))
|
||||||
|
for name := range features {
|
||||||
|
names = append(names, name)
|
||||||
|
}
|
||||||
|
sort.Strings(names)
|
||||||
|
for _, name := range names {
|
||||||
|
feature, ok := features[name].(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
rpm, ok := firstFanInputValue(feature)
|
||||||
|
if !ok || rpm <= 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
label := strings.TrimSpace(name)
|
||||||
|
if chip != "" && !strings.Contains(strings.ToLower(label), strings.ToLower(chip)) {
|
||||||
|
label = chip + " / " + label
|
||||||
|
}
|
||||||
|
if _, ok := seen[label]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[label] = struct{}{}
|
||||||
|
fans = append(fans, FanReading{Name: label, RPM: rpm})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fans, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstFanInputValue(feature map[string]any) (float64, bool) {
|
||||||
|
keys := make([]string, 0, len(feature))
|
||||||
|
for key := range feature {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
for _, key := range keys {
|
||||||
|
lower := strings.ToLower(key)
|
||||||
|
if !strings.Contains(lower, "fan") || !strings.HasSuffix(lower, "_input") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch value := feature[key].(type) {
|
||||||
|
case float64:
|
||||||
|
return value, true
|
||||||
|
case string:
|
||||||
|
f, err := strconv.ParseFloat(value, 64)
|
||||||
|
if err == nil {
|
||||||
|
return f, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
// sampleCPUMaxTemp returns the highest CPU/inlet temperature from ipmitool or sensors.
|
// sampleCPUMaxTemp returns the highest CPU/inlet temperature from ipmitool or sensors.
|
||||||
func sampleCPUMaxTemp() float64 {
|
func sampleCPUMaxTemp() float64 {
|
||||||
out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
|
out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
|
||||||
|
|||||||
27
audit/internal/platform/sat_fan_stress_test.go
Normal file
27
audit/internal/platform/sat_fan_stress_test.go
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
package platform
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestParseFanSpeeds(t *testing.T) {
|
||||||
|
raw := "FAN1 | 2400.000 | RPM | ok\nFAN2 | 1800 RPM | ok | ok\nFAN3 | na | RPM | ns\n"
|
||||||
|
got := parseFanSpeeds(raw)
|
||||||
|
if len(got) != 2 {
|
||||||
|
t.Fatalf("fans=%d want 2 (%v)", len(got), got)
|
||||||
|
}
|
||||||
|
if got[0].Name != "FAN1" || got[0].RPM != 2400 {
|
||||||
|
t.Fatalf("fan0=%+v", got[0])
|
||||||
|
}
|
||||||
|
if got[1].Name != "FAN2" || got[1].RPM != 1800 {
|
||||||
|
t.Fatalf("fan1=%+v", got[1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFirstFanInputValue(t *testing.T) {
|
||||||
|
feature := map[string]any{
|
||||||
|
"fan1_input": 9200.0,
|
||||||
|
}
|
||||||
|
got, ok := firstFanInputValue(feature)
|
||||||
|
if !ok || got != 9200 {
|
||||||
|
t.Fatalf("got=%v ok=%v", got, ok)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,6 +8,18 @@ type InterfaceInfo struct {
|
|||||||
IPv4 []string
|
IPv4 []string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NetworkInterfaceSnapshot struct {
|
||||||
|
Name string
|
||||||
|
Up bool
|
||||||
|
IPv4 []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type NetworkSnapshot struct {
|
||||||
|
Interfaces []NetworkInterfaceSnapshot
|
||||||
|
DefaultRoutes []string
|
||||||
|
ResolvConf string
|
||||||
|
}
|
||||||
|
|
||||||
type ServiceAction string
|
type ServiceAction string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -152,11 +153,12 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var body struct {
|
var body struct {
|
||||||
Duration int `json:"duration"`
|
Duration int `json:"duration"`
|
||||||
DiagLevel int `json:"diag_level"`
|
DiagLevel int `json:"diag_level"`
|
||||||
GPUIndices []int `json:"gpu_indices"`
|
GPUIndices []int `json:"gpu_indices"`
|
||||||
|
Profile string `json:"profile"`
|
||||||
|
DisplayName string `json:"display_name"`
|
||||||
}
|
}
|
||||||
body.DiagLevel = 1
|
|
||||||
if r.ContentLength > 0 {
|
if r.ContentLength > 0 {
|
||||||
_ = json.NewDecoder(r.Body).Decode(&body)
|
_ = json.NewDecoder(r.Body).Decode(&body)
|
||||||
}
|
}
|
||||||
@@ -172,11 +174,16 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
|
|||||||
Status: TaskPending,
|
Status: TaskPending,
|
||||||
CreatedAt: time.Now(),
|
CreatedAt: time.Now(),
|
||||||
params: taskParams{
|
params: taskParams{
|
||||||
Duration: body.Duration,
|
Duration: body.Duration,
|
||||||
DiagLevel: body.DiagLevel,
|
DiagLevel: body.DiagLevel,
|
||||||
GPUIndices: body.GPUIndices,
|
GPUIndices: body.GPUIndices,
|
||||||
|
BurnProfile: body.Profile,
|
||||||
|
DisplayName: body.DisplayName,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
if strings.TrimSpace(body.DisplayName) != "" {
|
||||||
|
t.Name = body.DisplayName
|
||||||
|
}
|
||||||
globalQueue.enqueue(t)
|
globalQueue.enqueue(t)
|
||||||
writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
|
writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
|
||||||
}
|
}
|
||||||
@@ -320,18 +327,21 @@ func (h *handler) handleAPINetworkDHCP(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
_ = json.NewDecoder(r.Body).Decode(&req)
|
_ = json.NewDecoder(r.Body).Decode(&req)
|
||||||
|
|
||||||
var result app.ActionResult
|
result, err := h.applyPendingNetworkChange(func() (app.ActionResult, error) {
|
||||||
var err error
|
if req.Interface == "" || req.Interface == "all" {
|
||||||
if req.Interface == "" || req.Interface == "all" {
|
return h.opts.App.DHCPAllResult()
|
||||||
result, err = h.opts.App.DHCPAllResult()
|
}
|
||||||
} else {
|
return h.opts.App.DHCPOneResult(req.Interface)
|
||||||
result, err = h.opts.App.DHCPOneResult(req.Interface)
|
})
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
writeError(w, http.StatusInternalServerError, err.Error())
|
writeError(w, http.StatusInternalServerError, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
writeJSON(w, map[string]string{"status": "ok", "output": result.Body})
|
writeJSON(w, map[string]any{
|
||||||
|
"status": "ok",
|
||||||
|
"output": result.Body,
|
||||||
|
"rollback_in": int(netRollbackTimeout.Seconds()),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *handler) handleAPINetworkStatic(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handleAPINetworkStatic(w http.ResponseWriter, r *http.Request) {
|
||||||
@@ -357,12 +367,18 @@ func (h *handler) handleAPINetworkStatic(w http.ResponseWriter, r *http.Request)
|
|||||||
Gateway: req.Gateway,
|
Gateway: req.Gateway,
|
||||||
DNS: req.DNS,
|
DNS: req.DNS,
|
||||||
}
|
}
|
||||||
result, err := h.opts.App.SetStaticIPv4Result(cfg)
|
result, err := h.applyPendingNetworkChange(func() (app.ActionResult, error) {
|
||||||
|
return h.opts.App.SetStaticIPv4Result(cfg)
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
writeError(w, http.StatusInternalServerError, err.Error())
|
writeError(w, http.StatusInternalServerError, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
writeJSON(w, map[string]string{"status": "ok", "output": result.Body})
|
writeJSON(w, map[string]any{
|
||||||
|
"status": "ok",
|
||||||
|
"output": result.Body,
|
||||||
|
"rollback_in": int(netRollbackTimeout.Seconds()),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Export ────────────────────────────────────────────────────────────────────
|
// ── Export ────────────────────────────────────────────────────────────────────
|
||||||
@@ -421,6 +437,13 @@ func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request)
|
|||||||
writeError(w, http.StatusServiceUnavailable, "app not configured")
|
writeError(w, http.StatusServiceUnavailable, "app not configured")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
h.installMu.Lock()
|
||||||
|
installRunning := h.installJob != nil && !h.installJob.isDone()
|
||||||
|
h.installMu.Unlock()
|
||||||
|
if installRunning {
|
||||||
|
writeError(w, http.StatusConflict, "install to disk is already running")
|
||||||
|
return
|
||||||
|
}
|
||||||
t := &Task{
|
t := &Task{
|
||||||
ID: newJobID("install-to-ram"),
|
ID: newJobID("install-to-ram"),
|
||||||
Name: "Install to RAM",
|
Name: "Install to RAM",
|
||||||
@@ -528,6 +551,10 @@ func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) {
|
|||||||
writeError(w, http.StatusBadRequest, "device not in install candidate list")
|
writeError(w, http.StatusBadRequest, "device not in install candidate list")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if globalQueue.hasActiveTarget("install-to-ram") {
|
||||||
|
writeError(w, http.StatusConflict, "install to RAM task is already pending or running")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
h.installMu.Lock()
|
h.installMu.Lock()
|
||||||
if h.installJob != nil && !h.installJob.isDone() {
|
if h.installJob != nil && !h.installJob.isDone() {
|
||||||
@@ -576,9 +603,11 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
|
|||||||
|
|
||||||
// Feed server ring buffers
|
// Feed server ring buffers
|
||||||
for _, t := range sample.Temps {
|
for _, t := range sample.Temps {
|
||||||
if t.Name == "CPU" {
|
switch t.Group {
|
||||||
h.ringCPUTemp.push(t.Celsius)
|
case "cpu":
|
||||||
break
|
h.pushNamedMetricRing(&h.cpuTempRings, t.Name, t.Celsius)
|
||||||
|
case "ambient":
|
||||||
|
h.pushNamedMetricRing(&h.ambientTempRings, t.Name, t.Celsius)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
h.ringPower.push(sample.PowerW)
|
h.ringPower.push(sample.PowerW)
|
||||||
@@ -623,6 +652,23 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *handler) pushNamedMetricRing(dst *[]*namedMetricsRing, name string, value float64) {
|
||||||
|
if name == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, item := range *dst {
|
||||||
|
if item != nil && item.Name == name && item.Ring != nil {
|
||||||
|
item.Ring.push(value)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*dst = append(*dst, &namedMetricsRing{
|
||||||
|
Name: name,
|
||||||
|
Ring: newMetricsRing(120),
|
||||||
|
})
|
||||||
|
(*dst)[len(*dst)-1].Ring.push(value)
|
||||||
|
}
|
||||||
|
|
||||||
// ── Network toggle ────────────────────────────────────────────────────────────
|
// ── Network toggle ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
const netRollbackTimeout = 60 * time.Second
|
const netRollbackTimeout = 60 * time.Second
|
||||||
@@ -646,33 +692,14 @@ func (h *handler) handleAPINetworkToggle(w http.ResponseWriter, r *http.Request)
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := h.opts.App.SetInterfaceState(req.Iface, !wasUp); err != nil {
|
if _, err := h.applyPendingNetworkChange(func() (app.ActionResult, error) {
|
||||||
|
err := h.opts.App.SetInterfaceState(req.Iface, !wasUp)
|
||||||
|
return app.ActionResult{}, err
|
||||||
|
}); err != nil {
|
||||||
writeError(w, http.StatusInternalServerError, err.Error())
|
writeError(w, http.StatusInternalServerError, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cancel any existing pending change (rollback it first).
|
|
||||||
h.pendingNetMu.Lock()
|
|
||||||
if h.pendingNet != nil {
|
|
||||||
prev := h.pendingNet
|
|
||||||
prev.mu.Lock()
|
|
||||||
prev.timer.Stop()
|
|
||||||
_ = h.opts.App.SetInterfaceState(prev.iface, prev.wasUp)
|
|
||||||
prev.mu.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
pnc := &pendingNetChange{iface: req.Iface, wasUp: wasUp}
|
|
||||||
pnc.timer = time.AfterFunc(netRollbackTimeout, func() {
|
|
||||||
_ = h.opts.App.SetInterfaceState(req.Iface, wasUp)
|
|
||||||
h.pendingNetMu.Lock()
|
|
||||||
if h.pendingNet == pnc {
|
|
||||||
h.pendingNet = nil
|
|
||||||
}
|
|
||||||
h.pendingNetMu.Unlock()
|
|
||||||
})
|
|
||||||
h.pendingNet = pnc
|
|
||||||
h.pendingNetMu.Unlock()
|
|
||||||
|
|
||||||
newState := "up"
|
newState := "up"
|
||||||
if wasUp {
|
if wasUp {
|
||||||
newState = "down"
|
newState = "down"
|
||||||
@@ -684,6 +711,42 @@ func (h *handler) handleAPINetworkToggle(w http.ResponseWriter, r *http.Request)
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *handler) applyPendingNetworkChange(apply func() (app.ActionResult, error)) (app.ActionResult, error) {
|
||||||
|
if h.opts.App == nil {
|
||||||
|
return app.ActionResult{}, fmt.Errorf("app not configured")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := h.rollbackPendingNetworkChange(); err != nil && err.Error() != "no pending network change" {
|
||||||
|
return app.ActionResult{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot, err := h.opts.App.CaptureNetworkSnapshot()
|
||||||
|
if err != nil {
|
||||||
|
return app.ActionResult{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := apply()
|
||||||
|
if err != nil {
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
|
||||||
|
pnc := &pendingNetChange{snapshot: snapshot}
|
||||||
|
pnc.timer = time.AfterFunc(netRollbackTimeout, func() {
|
||||||
|
_ = h.opts.App.RestoreNetworkSnapshot(snapshot)
|
||||||
|
h.pendingNetMu.Lock()
|
||||||
|
if h.pendingNet == pnc {
|
||||||
|
h.pendingNet = nil
|
||||||
|
}
|
||||||
|
h.pendingNetMu.Unlock()
|
||||||
|
})
|
||||||
|
|
||||||
|
h.pendingNetMu.Lock()
|
||||||
|
h.pendingNet = pnc
|
||||||
|
h.pendingNetMu.Unlock()
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (h *handler) handleAPINetworkConfirm(w http.ResponseWriter, _ *http.Request) {
|
func (h *handler) handleAPINetworkConfirm(w http.ResponseWriter, _ *http.Request) {
|
||||||
h.pendingNetMu.Lock()
|
h.pendingNetMu.Lock()
|
||||||
pnc := h.pendingNet
|
pnc := h.pendingNet
|
||||||
@@ -698,19 +761,30 @@ func (h *handler) handleAPINetworkConfirm(w http.ResponseWriter, _ *http.Request
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Request) {
|
func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
if err := h.rollbackPendingNetworkChange(); err != nil {
|
||||||
|
if err.Error() == "no pending network change" {
|
||||||
|
writeError(w, http.StatusConflict, err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeError(w, http.StatusInternalServerError, err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, map[string]string{"status": "rolled back"})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *handler) rollbackPendingNetworkChange() error {
|
||||||
h.pendingNetMu.Lock()
|
h.pendingNetMu.Lock()
|
||||||
pnc := h.pendingNet
|
pnc := h.pendingNet
|
||||||
h.pendingNet = nil
|
h.pendingNet = nil
|
||||||
h.pendingNetMu.Unlock()
|
h.pendingNetMu.Unlock()
|
||||||
if pnc == nil {
|
if pnc == nil {
|
||||||
writeError(w, http.StatusConflict, "no pending network change")
|
return fmt.Errorf("no pending network change")
|
||||||
return
|
|
||||||
}
|
}
|
||||||
pnc.mu.Lock()
|
pnc.mu.Lock()
|
||||||
pnc.timer.Stop()
|
pnc.timer.Stop()
|
||||||
pnc.mu.Unlock()
|
pnc.mu.Unlock()
|
||||||
if h.opts.App != nil {
|
if h.opts.App != nil {
|
||||||
_ = h.opts.App.SetInterfaceState(pnc.iface, pnc.wasUp)
|
return h.opts.App.RestoreNetworkSnapshot(pnc.snapshot)
|
||||||
}
|
}
|
||||||
writeJSON(w, map[string]string{"status": "rolled back"})
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,18 +1,21 @@
|
|||||||
package webui
|
package webui
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
// jobState holds the output lines and completion status of an async job.
|
// jobState holds the output lines and completion status of an async job.
|
||||||
type jobState struct {
|
type jobState struct {
|
||||||
lines []string
|
lines []string
|
||||||
done bool
|
done bool
|
||||||
err string
|
err string
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
subs []chan string
|
subs []chan string
|
||||||
cancel func() // optional cancel function; nil if job is not cancellable
|
cancel func() // optional cancel function; nil if job is not cancellable
|
||||||
|
logPath string
|
||||||
}
|
}
|
||||||
|
|
||||||
// abort cancels the job if it has a cancel function and is not yet done.
|
// abort cancels the job if it has a cancel function and is not yet done.
|
||||||
@@ -30,6 +33,9 @@ func (j *jobState) append(line string) {
|
|||||||
j.mu.Lock()
|
j.mu.Lock()
|
||||||
defer j.mu.Unlock()
|
defer j.mu.Unlock()
|
||||||
j.lines = append(j.lines, line)
|
j.lines = append(j.lines, line)
|
||||||
|
if j.logPath != "" {
|
||||||
|
appendJobLog(j.logPath, line)
|
||||||
|
}
|
||||||
for _, ch := range j.subs {
|
for _, ch := range j.subs {
|
||||||
select {
|
select {
|
||||||
case ch <- line:
|
case ch <- line:
|
||||||
@@ -100,3 +106,32 @@ func (m *jobManager) get(id string) (*jobState, bool) {
|
|||||||
j, ok := m.jobs[id]
|
j, ok := m.jobs[id]
|
||||||
return j, ok
|
return j, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func newTaskJobState(logPath string) *jobState {
|
||||||
|
j := &jobState{logPath: logPath}
|
||||||
|
if logPath == "" {
|
||||||
|
return j
|
||||||
|
}
|
||||||
|
data, err := os.ReadFile(logPath)
|
||||||
|
if err != nil || len(data) == 0 {
|
||||||
|
return j
|
||||||
|
}
|
||||||
|
lines := strings.Split(strings.ReplaceAll(string(data), "\r\n", "\n"), "\n")
|
||||||
|
if len(lines) > 0 && lines[len(lines)-1] == "" {
|
||||||
|
lines = lines[:len(lines)-1]
|
||||||
|
}
|
||||||
|
j.lines = append(j.lines, lines...)
|
||||||
|
return j
|
||||||
|
}
|
||||||
|
|
||||||
|
func appendJobLog(path, line string) {
|
||||||
|
if path == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
_, _ = f.WriteString(line + "\n")
|
||||||
|
}
|
||||||
|
|||||||
@@ -83,10 +83,10 @@ tbody tr:hover td{background:rgba(0,0,0,.03)}
|
|||||||
`
|
`
|
||||||
}
|
}
|
||||||
|
|
||||||
func layoutNav(active string) string {
|
func layoutNav(active string, buildLabel string) string {
|
||||||
items := []struct{ id, label, href, onclick string }{
|
items := []struct{ id, label, href, onclick string }{
|
||||||
{"dashboard", "Dashboard", "/", ""},
|
{"dashboard", "Dashboard", "/", ""},
|
||||||
{"audit", "Audit", "#", "openAuditModal();return false;"},
|
{"audit", "Audit", "/audit", ""},
|
||||||
{"validate", "Validate", "/validate", ""},
|
{"validate", "Validate", "/validate", ""},
|
||||||
{"burn", "Burn", "/burn", ""},
|
{"burn", "Burn", "/burn", ""},
|
||||||
{"tasks", "Tasks", "/tasks", ""},
|
{"tasks", "Tasks", "/tasks", ""},
|
||||||
@@ -109,7 +109,12 @@ func layoutNav(active string) string {
|
|||||||
cls, item.href, item.label))
|
cls, item.href, item.label))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
b.WriteString(`</nav></aside>`)
|
if strings.TrimSpace(buildLabel) == "" {
|
||||||
|
buildLabel = "dev"
|
||||||
|
}
|
||||||
|
b.WriteString(`</nav>`)
|
||||||
|
b.WriteString(`<div style="padding:12px 16px;border-top:1px solid rgba(255,255,255,.08);font-size:11px;color:rgba(255,255,255,.45)">Build ` + html.EscapeString(buildLabel) + `</div>`)
|
||||||
|
b.WriteString(`</aside>`)
|
||||||
return b.String()
|
return b.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,6 +126,10 @@ func renderPage(page string, opts HandlerOptions) string {
|
|||||||
pageID = "dashboard"
|
pageID = "dashboard"
|
||||||
title = "Dashboard"
|
title = "Dashboard"
|
||||||
body = renderDashboard(opts)
|
body = renderDashboard(opts)
|
||||||
|
case "audit":
|
||||||
|
pageID = "audit"
|
||||||
|
title = "Audit"
|
||||||
|
body = renderAudit()
|
||||||
case "validate":
|
case "validate":
|
||||||
pageID = "validate"
|
pageID = "validate"
|
||||||
title = "Validate"
|
title = "Validate"
|
||||||
@@ -173,7 +182,7 @@ func renderPage(page string, opts HandlerOptions) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return layoutHead(opts.Title+" — "+title) +
|
return layoutHead(opts.Title+" — "+title) +
|
||||||
layoutNav(pageID) +
|
layoutNav(pageID, opts.BuildLabel) +
|
||||||
`<div class="main"><div class="topbar"><h1>` + html.EscapeString(title) + `</h1></div><div class="content">` +
|
`<div class="main"><div class="topbar"><h1>` + html.EscapeString(title) + `</h1></div><div class="content">` +
|
||||||
body +
|
body +
|
||||||
`</div></div>` +
|
`</div></div>` +
|
||||||
@@ -191,6 +200,10 @@ func renderDashboard(opts HandlerOptions) string {
|
|||||||
return b.String()
|
return b.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func renderAudit() string {
|
||||||
|
return `<div class="card"><div class="card-head">Audit Viewer <button class="btn btn-sm btn-secondary" style="margin-left:auto" onclick="openAuditModal()">Actions</button></div><div class="card-body" style="padding:0"><iframe class="viewer-frame" src="/viewer" title="Audit viewer"></iframe></div></div>`
|
||||||
|
}
|
||||||
|
|
||||||
func renderHardwareSummaryCard(opts HandlerOptions) string {
|
func renderHardwareSummaryCard(opts HandlerOptions) string {
|
||||||
data, err := loadSnapshot(opts.AuditPath)
|
data, err := loadSnapshot(opts.AuditPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -298,14 +311,14 @@ func renderHardwareSummaryCard(opts HandlerOptions) string {
|
|||||||
|
|
||||||
func renderAuditModal() string {
|
func renderAuditModal() string {
|
||||||
return `<div id="audit-modal-overlay" style="display:none;position:fixed;inset:0;background:rgba(0,0,0,.5);z-index:100;align-items:center;justify-content:center">
|
return `<div id="audit-modal-overlay" style="display:none;position:fixed;inset:0;background:rgba(0,0,0,.5);z-index:100;align-items:center;justify-content:center">
|
||||||
<div style="background:#fff;border-radius:6px;padding:24px;min-width:480px;max-width:700px;position:relative">
|
<div style="background:#fff;border-radius:6px;padding:24px;min-width:480px;max-width:1100px;width:min(1100px,92vw);max-height:92vh;overflow:auto;position:relative">
|
||||||
<div style="font-weight:700;font-size:16px;margin-bottom:16px">Audit</div>
|
<div style="font-weight:700;font-size:16px;margin-bottom:16px">Audit</div>
|
||||||
<div style="margin-bottom:12px;display:flex;gap:8px">
|
<div style="margin-bottom:12px;display:flex;gap:8px">
|
||||||
<button class="btn btn-primary" onclick="auditModalRun()">▶ Re-run Audit</button>
|
<button class="btn btn-primary" onclick="auditModalRun()">▶ Re-run Audit</button>
|
||||||
<a class="btn btn-secondary" href="/audit.json" download>↓ Download</a>
|
<a class="btn btn-secondary" href="/audit.json" download>↓ Download</a>
|
||||||
<a class="btn btn-secondary" href="/viewer" target="_blank">Open Viewer</a>
|
|
||||||
</div>
|
</div>
|
||||||
<div id="audit-modal-terminal" class="terminal" style="display:none;max-height:300px"></div>
|
<div id="audit-modal-terminal" class="terminal" style="display:none;max-height:220px;margin-bottom:12px"></div>
|
||||||
|
<iframe class="viewer-frame" src="/viewer" title="Audit viewer in modal" style="height:min(70vh,720px)"></iframe>
|
||||||
<button class="btn btn-secondary btn-sm" onclick="closeAuditModal()" style="position:absolute;top:12px;right:12px">✕</button>
|
<button class="btn btn-secondary btn-sm" onclick="closeAuditModal()" style="position:absolute;top:12px;right:12px">✕</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -373,9 +386,23 @@ func renderMetrics() string {
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="card" style="margin-bottom:16px">
|
<div class="card" style="margin-bottom:16px">
|
||||||
<div class="card-head">Server — Temperature</div>
|
<div class="card-head">Temperature — CPU</div>
|
||||||
<div class="card-body" style="padding:8px">
|
<div class="card-body" style="padding:8px">
|
||||||
<img id="chart-server-temp" src="/api/metrics/chart/server-temp.svg" style="width:100%;display:block;border-radius:6px" alt="CPU temperature">
|
<img id="chart-server-temp-cpu" src="/api/metrics/chart/server-temp-cpu.svg" style="width:100%;display:block;border-radius:6px" alt="CPU temperature">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:16px">
|
||||||
|
<div class="card-head">Temperature — GPUs</div>
|
||||||
|
<div class="card-body" style="padding:8px">
|
||||||
|
<img id="chart-server-temp-gpu" src="/api/metrics/chart/server-temp-gpu.svg" style="width:100%;display:block;border-radius:6px" alt="GPU temperature">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:16px">
|
||||||
|
<div class="card-head">Temperature — Ambient Sensors</div>
|
||||||
|
<div class="card-body" style="padding:8px">
|
||||||
|
<img id="chart-server-temp-ambient" src="/api/metrics/chart/server-temp-ambient.svg" style="width:100%;display:block;border-radius:6px" alt="Ambient temperature sensors">
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -383,6 +410,13 @@ func renderMetrics() string {
|
|||||||
<div class="card-head">Server — Power</div>
|
<div class="card-head">Server — Power</div>
|
||||||
<div class="card-body" style="padding:8px">
|
<div class="card-body" style="padding:8px">
|
||||||
<img id="chart-server-power" src="/api/metrics/chart/server-power.svg" style="width:100%;display:block;border-radius:6px" alt="System power">
|
<img id="chart-server-power" src="/api/metrics/chart/server-power.svg" style="width:100%;display:block;border-radius:6px" alt="System power">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:16px">
|
||||||
|
<div class="card-head">Server — Fan RPM</div>
|
||||||
|
<div class="card-body" style="padding:8px">
|
||||||
|
<img id="chart-server-fans" src="/api/metrics/chart/server-fans.svg" style="width:100%;display:block;border-radius:6px" alt="Fan RPM">
|
||||||
<div id="sys-table" style="margin-top:8px;font-size:12px"></div>
|
<div id="sys-table" style="margin-top:8px;font-size:12px"></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -394,12 +428,12 @@ let knownGPUs = [];
|
|||||||
|
|
||||||
function refreshCharts() {
|
function refreshCharts() {
|
||||||
const t = '?t=' + Date.now();
|
const t = '?t=' + Date.now();
|
||||||
['chart-server-load','chart-server-temp','chart-server-power'].forEach(id => {
|
['chart-server-load','chart-server-temp-cpu','chart-server-temp-gpu','chart-server-temp-ambient','chart-server-power','chart-server-fans'].forEach(id => {
|
||||||
const el = document.getElementById(id);
|
const el = document.getElementById(id);
|
||||||
if (el) el.src = el.src.split('?')[0] + t;
|
if (el) el.src = el.src.split('?')[0] + t;
|
||||||
});
|
});
|
||||||
knownGPUs.forEach(idx => {
|
knownGPUs.forEach(idx => {
|
||||||
['load','temp','power'].forEach(kind => {
|
['load','power'].forEach(kind => {
|
||||||
const el = document.getElementById('chart-gpu-' + idx + '-' + kind);
|
const el = document.getElementById('chart-gpu-' + idx + '-' + kind);
|
||||||
if (el) el.src = el.src.split('?')[0] + t;
|
if (el) el.src = el.src.split('?')[0] + t;
|
||||||
});
|
});
|
||||||
@@ -423,10 +457,6 @@ es.addEventListener('metrics', e => {
|
|||||||
'<div class="card-body" style="padding:8px">' +
|
'<div class="card-body" style="padding:8px">' +
|
||||||
'<img id="chart-gpu-' + g.index + '-load" src="/api/metrics/chart/gpu/' + g.index + '-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' load">' +
|
'<img id="chart-gpu-' + g.index + '-load" src="/api/metrics/chart/gpu/' + g.index + '-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' load">' +
|
||||||
'</div>' +
|
'</div>' +
|
||||||
'<div class="card-head">GPU ' + g.index + ' — Temperature</div>' +
|
|
||||||
'<div class="card-body" style="padding:8px">' +
|
|
||||||
'<img id="chart-gpu-' + g.index + '-temp" src="/api/metrics/chart/gpu/' + g.index + '-temp.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' temp">' +
|
|
||||||
'</div>' +
|
|
||||||
'<div class="card-head">GPU ' + g.index + ' — Power</div>' +
|
'<div class="card-head">GPU ' + g.index + ' — Power</div>' +
|
||||||
'<div class="card-body" style="padding:8px">' +
|
'<div class="card-body" style="padding:8px">' +
|
||||||
'<img id="chart-gpu-' + g.index + '-power" src="/api/metrics/chart/gpu/' + g.index + '-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' power">' +
|
'<img id="chart-gpu-' + g.index + '-power" src="/api/metrics/chart/gpu/' + g.index + '-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' power">' +
|
||||||
@@ -437,8 +467,9 @@ es.addEventListener('metrics', e => {
|
|||||||
|
|
||||||
// Update numeric tables
|
// Update numeric tables
|
||||||
let sysHTML = '';
|
let sysHTML = '';
|
||||||
const cpuTemp = (d.temps||[]).find(t => t.name==='CPU');
|
(d.temps||[]).filter(t => t.group === 'cpu').forEach(t => {
|
||||||
if (cpuTemp) sysHTML += '<tr><td>CPU Temp</td><td>'+cpuTemp.celsius.toFixed(1)+'°C</td></tr>';
|
sysHTML += '<tr><td>'+t.name+'</td><td>'+t.celsius.toFixed(1)+'°C</td></tr>';
|
||||||
|
});
|
||||||
if (d.cpu_load_pct) sysHTML += '<tr><td>CPU Load</td><td>'+d.cpu_load_pct.toFixed(1)+'%</td></tr>';
|
if (d.cpu_load_pct) sysHTML += '<tr><td>CPU Load</td><td>'+d.cpu_load_pct.toFixed(1)+'%</td></tr>';
|
||||||
if (d.mem_load_pct) sysHTML += '<tr><td>Mem Load</td><td>'+d.mem_load_pct.toFixed(1)+'%</td></tr>';
|
if (d.mem_load_pct) sysHTML += '<tr><td>Mem Load</td><td>'+d.mem_load_pct.toFixed(1)+'%</td></tr>';
|
||||||
(d.fans||[]).forEach(f => sysHTML += '<tr><td>'+f.name+'</td><td>'+f.rpm+' RPM</td></tr>');
|
(d.fans||[]).forEach(f => sysHTML += '<tr><td>'+f.name+'</td><td>'+f.rpm+' RPM</td></tr>');
|
||||||
@@ -491,6 +522,8 @@ let satES = null;
|
|||||||
function runSAT(target) {
|
function runSAT(target) {
|
||||||
if (satES) { satES.close(); satES = null; }
|
if (satES) { satES.close(); satES = null; }
|
||||||
const body = {};
|
const body = {};
|
||||||
|
const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU'};
|
||||||
|
body.display_name = labels[target] || ('Validate ' + target);
|
||||||
if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
|
if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
|
||||||
if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
|
if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
|
||||||
document.getElementById('sat-output').style.display='block';
|
document.getElementById('sat-output').style.display='block';
|
||||||
@@ -520,6 +553,8 @@ function runAllSAT() {
|
|||||||
const btn = document.getElementById('sat-btn-' + target);
|
const btn = document.getElementById('sat-btn-' + target);
|
||||||
if (btn && btn.disabled) { enqueueNext(cycle, idx+1); return; }
|
if (btn && btn.disabled) { enqueueNext(cycle, idx+1); return; }
|
||||||
const body = {};
|
const body = {};
|
||||||
|
const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU'};
|
||||||
|
body.display_name = labels[target] || ('Validate ' + target);
|
||||||
if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
|
if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
|
||||||
if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
|
if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
|
||||||
fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)})
|
fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)})
|
||||||
@@ -568,13 +603,15 @@ func renderSATCard(id, label, extra string) string {
|
|||||||
func renderBurn() string {
|
func renderBurn() string {
|
||||||
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>⚠ Warning:</strong> Stress tests on this page run hardware at maximum load. Repeated or prolonged use may reduce hardware lifespan (storage endurance, GPU wear). Use only when necessary.</div>
|
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>⚠ Warning:</strong> Stress tests on this page run hardware at maximum load. Repeated or prolonged use may reduce hardware lifespan (storage endurance, GPU wear). Use only when necessary.</div>
|
||||||
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
||||||
|
<div class="card"><div class="card-head">Burn Profile</div><div class="card-body">
|
||||||
|
<div class="form-row" style="max-width:320px"><label>Preset</label><select id="burn-profile"><option value="smoke">Smoke: 5 minutes</option><option value="acceptance">Acceptance: 1 hour</option><option value="overnight">Overnight: 8 hours</option></select></div>
|
||||||
|
<p style="color:var(--muted);font-size:12px">Applied to all tests on this page. NVIDIA uses mapped DCGM levels: smoke=quick, acceptance=targeted stress, overnight=extended stress.</p>
|
||||||
|
</div></div>
|
||||||
<div class="grid3">
|
<div class="grid3">
|
||||||
<div class="card"><div class="card-head">NVIDIA GPU Stress</div><div class="card-body">
|
<div class="card"><div class="card-head">NVIDIA GPU Stress</div><div class="card-body">
|
||||||
<div class="form-row"><label>Duration</label><select id="bi-dur"><option value="600">10 minutes</option><option value="3600">1 hour</option><option value="28800">8 hours</option><option value="86400">24 hours</option></select></div>
|
|
||||||
<button id="sat-btn-nvidia" class="btn btn-primary" onclick="runBurnIn('nvidia')">▶ Start NVIDIA Stress</button>
|
<button id="sat-btn-nvidia" class="btn btn-primary" onclick="runBurnIn('nvidia')">▶ Start NVIDIA Stress</button>
|
||||||
</div></div>
|
</div></div>
|
||||||
<div class="card"><div class="card-head">CPU Stress</div><div class="card-body">
|
<div class="card"><div class="card-head">CPU Stress</div><div class="card-body">
|
||||||
<div class="form-row"><label>Duration (seconds)</label><input type="number" id="bi-cpu-dur" value="300" min="60"></div>
|
|
||||||
<button class="btn btn-primary" onclick="runBurnIn('cpu')">▶ Start CPU Stress</button>
|
<button class="btn btn-primary" onclick="runBurnIn('cpu')">▶ Start CPU Stress</button>
|
||||||
</div></div>
|
</div></div>
|
||||||
<div class="card"><div class="card-head">AMD GPU Stress</div><div class="card-body">
|
<div class="card"><div class="card-head">AMD GPU Stress</div><div class="card-body">
|
||||||
@@ -598,11 +635,9 @@ func renderBurn() string {
|
|||||||
let biES = null;
|
let biES = null;
|
||||||
function runBurnIn(target) {
|
function runBurnIn(target) {
|
||||||
if (biES) { biES.close(); biES = null; }
|
if (biES) { biES.close(); biES = null; }
|
||||||
const body = {};
|
const body = { profile: document.getElementById('burn-profile').value || 'smoke' };
|
||||||
if (target === 'nvidia') body.duration = parseInt(document.getElementById('bi-dur').value)||600;
|
|
||||||
if (target === 'cpu') body.duration = parseInt(document.getElementById('bi-cpu-dur').value)||300;
|
|
||||||
document.getElementById('bi-output').style.display='block';
|
document.getElementById('bi-output').style.display='block';
|
||||||
document.getElementById('bi-title').textContent = '— ' + target;
|
document.getElementById('bi-title').textContent = '— ' + target + ' [' + body.profile + ']';
|
||||||
const term = document.getElementById('bi-terminal');
|
const term = document.getElementById('bi-terminal');
|
||||||
term.textContent = 'Enqueuing ' + target + ' stress...\n';
|
term.textContent = 'Enqueuing ' + target + ' stress...\n';
|
||||||
fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)})
|
fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)})
|
||||||
@@ -672,7 +707,7 @@ var _netCountdownTimer = null;
|
|||||||
function loadNetwork() {
|
function loadNetwork() {
|
||||||
fetch('/api/network').then(r=>r.json()).then(d => {
|
fetch('/api/network').then(r=>r.json()).then(d => {
|
||||||
const rows = (d.interfaces||[]).map(i =>
|
const rows = (d.interfaces||[]).map(i =>
|
||||||
'<tr><td>'+i.Name+'</td>' +
|
'<tr><td style="cursor:pointer" onclick="selectIface(\''+i.Name+'\')" title="Use this interface in the forms below"><span style="text-decoration:underline">'+i.Name+'</span></td>' +
|
||||||
'<td style="cursor:pointer" onclick="toggleIface(\''+i.Name+'\',\''+i.State+'\')" title="Click to toggle"><span class="badge '+(i.State==='up'?'badge-ok':'badge-warn')+'">'+i.State+'</span></td>' +
|
'<td style="cursor:pointer" onclick="toggleIface(\''+i.Name+'\',\''+i.State+'\')" title="Click to toggle"><span class="badge '+(i.State==='up'?'badge-ok':'badge-warn')+'">'+i.State+'</span></td>' +
|
||||||
'<td>'+(i.IPv4||[]).join(', ')+'</td></tr>'
|
'<td>'+(i.IPv4||[]).join(', ')+'</td></tr>'
|
||||||
).join('');
|
).join('');
|
||||||
@@ -681,6 +716,10 @@ function loadNetwork() {
|
|||||||
(d.default_route ? '<p style="font-size:12px;color:var(--muted);margin-top:8px">Default route: '+d.default_route+'</p>' : '');
|
(d.default_route ? '<p style="font-size:12px;color:var(--muted);margin-top:8px">Default route: '+d.default_route+'</p>' : '');
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
function selectIface(iface) {
|
||||||
|
document.getElementById('dhcp-iface').value = iface;
|
||||||
|
document.getElementById('st-iface').value = iface;
|
||||||
|
}
|
||||||
function toggleIface(iface, currentState) {
|
function toggleIface(iface, currentState) {
|
||||||
fetch('/api/network/toggle',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({iface:iface})})
|
fetch('/api/network/toggle',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({iface:iface})})
|
||||||
.then(r=>r.json()).then(d => {
|
.then(r=>r.json()).then(d => {
|
||||||
@@ -716,6 +755,7 @@ function runDHCP() {
|
|||||||
fetch('/api/network/dhcp',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({interface:iface||'all'})})
|
fetch('/api/network/dhcp',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({interface:iface||'all'})})
|
||||||
.then(r=>r.json()).then(d => {
|
.then(r=>r.json()).then(d => {
|
||||||
document.getElementById('dhcp-out').textContent = d.output || d.error || 'Done.';
|
document.getElementById('dhcp-out').textContent = d.output || d.error || 'Done.';
|
||||||
|
if (!d.error) showNetPending(d.rollback_in || 60);
|
||||||
loadNetwork();
|
loadNetwork();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -729,6 +769,7 @@ function setStatic() {
|
|||||||
dns: dns,
|
dns: dns,
|
||||||
})}).then(r=>r.json()).then(d => {
|
})}).then(r=>r.json()).then(d => {
|
||||||
document.getElementById('static-out').textContent = d.output || d.error || 'Done.';
|
document.getElementById('static-out').textContent = d.output || d.error || 'Done.';
|
||||||
|
if (!d.error) showNetPending(d.rollback_in || 60);
|
||||||
loadNetwork();
|
loadNetwork();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -846,10 +887,17 @@ func listExportFiles(exportDir string) ([]string, error) {
|
|||||||
|
|
||||||
func renderTools() string {
|
func renderTools() string {
|
||||||
return `<div class="card" style="margin-bottom:16px">
|
return `<div class="card" style="margin-bottom:16px">
|
||||||
<div class="card-head">Install to RAM</div>
|
<div class="card-head">System Install</div>
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
|
<div style="margin-bottom:20px">
|
||||||
|
<div style="font-weight:600;margin-bottom:8px">Install to RAM</div>
|
||||||
<p id="ram-status-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Checking...</p>
|
<p id="ram-status-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Checking...</p>
|
||||||
<button id="ram-install-btn" class="btn btn-primary" onclick="installToRAM()" style="display:none">▶ Copy to RAM</button>
|
<button id="ram-install-btn" class="btn btn-primary" onclick="installToRAM()" style="display:none">▶ Copy to RAM</button>
|
||||||
|
</div>
|
||||||
|
<div style="border-top:1px solid var(--line);padding-top:20px">
|
||||||
|
<div style="font-weight:600;margin-bottom:8px">Install to Disk</div>` +
|
||||||
|
renderInstallInline() + `
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<script>
|
<script>
|
||||||
@@ -886,9 +934,6 @@ function installToRAM() {
|
|||||||
<div class="card"><div class="card-head">Services</div><div class="card-body">` +
|
<div class="card"><div class="card-head">Services</div><div class="card-body">` +
|
||||||
renderServicesInline() + `</div></div>
|
renderServicesInline() + `</div></div>
|
||||||
|
|
||||||
<div class="card"><div class="card-head">Install to Disk</div><div class="card-body">` +
|
|
||||||
renderInstallInline() + `</div></div>
|
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
function checkTools() {
|
function checkTools() {
|
||||||
document.getElementById('tools-table').innerHTML = '<p style="color:var(--muted);font-size:13px">Checking...</p>';
|
document.getElementById('tools-table').innerHTML = '<p style="color:var(--muted);font-size:13px">Checking...</p>';
|
||||||
@@ -939,8 +984,6 @@ func renderInstallInline() string {
|
|||||||
<div id="install-terminal" class="terminal" style="max-height:500px"></div>
|
<div id="install-terminal" class="terminal" style="max-height:500px"></div>
|
||||||
<div id="install-status" style="margin-top:12px;font-size:13px"></div>
|
<div id="install-status" style="margin-top:12px;font-size:13px"></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<style>
|
<style>
|
||||||
#install-disk-tbody tr{cursor:pointer}
|
#install-disk-tbody tr{cursor:pointer}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"html"
|
||||||
"mime"
|
"mime"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
@@ -13,6 +14,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"bee/audit/internal/app"
|
"bee/audit/internal/app"
|
||||||
|
"bee/audit/internal/platform"
|
||||||
"bee/audit/internal/runtimeenv"
|
"bee/audit/internal/runtimeenv"
|
||||||
gocharts "github.com/go-analyze/charts"
|
gocharts "github.com/go-analyze/charts"
|
||||||
"reanimator/chart/viewer"
|
"reanimator/chart/viewer"
|
||||||
@@ -35,6 +37,7 @@ func init() {
|
|||||||
// HandlerOptions configures the web UI handler.
|
// HandlerOptions configures the web UI handler.
|
||||||
type HandlerOptions struct {
|
type HandlerOptions struct {
|
||||||
Title string
|
Title string
|
||||||
|
BuildLabel string
|
||||||
AuditPath string
|
AuditPath string
|
||||||
ExportDir string
|
ExportDir string
|
||||||
App *app.App
|
App *app.App
|
||||||
@@ -84,7 +87,7 @@ func relAgeLabel(age time.Duration) string {
|
|||||||
if age < time.Hour {
|
if age < time.Hour {
|
||||||
m := int(age.Minutes())
|
m := int(age.Minutes())
|
||||||
if m == 0 {
|
if m == 0 {
|
||||||
return "-<1m"
|
return "-1m"
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("-%dm", m)
|
return fmt.Sprintf("-%dm", m)
|
||||||
}
|
}
|
||||||
@@ -102,31 +105,36 @@ type gpuRings struct {
|
|||||||
Power *metricsRing
|
Power *metricsRing
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type namedMetricsRing struct {
|
||||||
|
Name string
|
||||||
|
Ring *metricsRing
|
||||||
|
}
|
||||||
|
|
||||||
// pendingNetChange tracks a network state change awaiting confirmation.
|
// pendingNetChange tracks a network state change awaiting confirmation.
|
||||||
type pendingNetChange struct {
|
type pendingNetChange struct {
|
||||||
iface string
|
snapshot platform.NetworkSnapshot
|
||||||
wasUp bool
|
timer *time.Timer
|
||||||
timer *time.Timer
|
mu sync.Mutex
|
||||||
mu sync.Mutex
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// handler is the HTTP handler for the web UI.
|
// handler is the HTTP handler for the web UI.
|
||||||
type handler struct {
|
type handler struct {
|
||||||
opts HandlerOptions
|
opts HandlerOptions
|
||||||
mux *http.ServeMux
|
mux *http.ServeMux
|
||||||
// server rings
|
// server rings
|
||||||
ringCPUTemp *metricsRing
|
ringCPULoad *metricsRing
|
||||||
ringCPULoad *metricsRing
|
ringMemLoad *metricsRing
|
||||||
ringMemLoad *metricsRing
|
ringPower *metricsRing
|
||||||
ringPower *metricsRing
|
ringFans []*metricsRing
|
||||||
ringFans []*metricsRing
|
fanNames []string
|
||||||
fanNames []string
|
cpuTempRings []*namedMetricsRing
|
||||||
|
ambientTempRings []*namedMetricsRing
|
||||||
// per-GPU rings (index = GPU index)
|
// per-GPU rings (index = GPU index)
|
||||||
gpuRings []*gpuRings
|
gpuRings []*gpuRings
|
||||||
ringsMu sync.Mutex
|
ringsMu sync.Mutex
|
||||||
// install job (at most one at a time)
|
// install job (at most one at a time)
|
||||||
installJob *jobState
|
installJob *jobState
|
||||||
installMu sync.Mutex
|
installMu sync.Mutex
|
||||||
// pending network change (rollback on timeout)
|
// pending network change (rollback on timeout)
|
||||||
pendingNet *pendingNetChange
|
pendingNet *pendingNetChange
|
||||||
pendingNetMu sync.Mutex
|
pendingNetMu sync.Mutex
|
||||||
@@ -146,7 +154,6 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
|
|
||||||
h := &handler{
|
h := &handler{
|
||||||
opts: opts,
|
opts: opts,
|
||||||
ringCPUTemp: newMetricsRing(120),
|
|
||||||
ringCPULoad: newMetricsRing(120),
|
ringCPULoad: newMetricsRing(120),
|
||||||
ringMemLoad: newMetricsRing(120),
|
ringMemLoad: newMetricsRing(120),
|
||||||
ringPower: newMetricsRing(120),
|
ringPower: newMetricsRing(120),
|
||||||
@@ -176,9 +183,9 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage"))
|
mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage"))
|
||||||
mux.HandleFunc("POST /api/sat/cpu/run", h.handleAPISATRun("cpu"))
|
mux.HandleFunc("POST /api/sat/cpu/run", h.handleAPISATRun("cpu"))
|
||||||
mux.HandleFunc("POST /api/sat/amd/run", h.handleAPISATRun("amd"))
|
mux.HandleFunc("POST /api/sat/amd/run", h.handleAPISATRun("amd"))
|
||||||
mux.HandleFunc("POST /api/sat/amd-stress/run", h.handleAPISATRun("amd-stress"))
|
mux.HandleFunc("POST /api/sat/amd-stress/run", h.handleAPISATRun("amd-stress"))
|
||||||
mux.HandleFunc("POST /api/sat/memory-stress/run", h.handleAPISATRun("memory-stress"))
|
mux.HandleFunc("POST /api/sat/memory-stress/run", h.handleAPISATRun("memory-stress"))
|
||||||
mux.HandleFunc("POST /api/sat/sat-stress/run", h.handleAPISATRun("sat-stress"))
|
mux.HandleFunc("POST /api/sat/sat-stress/run", h.handleAPISATRun("sat-stress"))
|
||||||
mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
|
mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
|
||||||
mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
|
mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
|
||||||
|
|
||||||
@@ -382,21 +389,51 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
|
|||||||
yMin = floatPtr(0)
|
yMin = floatPtr(0)
|
||||||
yMax = floatPtr(100)
|
yMax = floatPtr(100)
|
||||||
|
|
||||||
case path == "server-temp":
|
case path == "server-temp", path == "server-temp-cpu":
|
||||||
title = "CPU Temperature"
|
title = "CPU Temperature"
|
||||||
vCPUTemp, l := h.ringCPUTemp.snapshot()
|
h.ringsMu.Lock()
|
||||||
labels = l
|
datasets, names, labels = snapshotNamedRings(h.cpuTempRings)
|
||||||
datasets = [][]float64{vCPUTemp}
|
h.ringsMu.Unlock()
|
||||||
names = []string{"CPU Temp °C"}
|
|
||||||
yMin = floatPtr(0)
|
yMin = floatPtr(0)
|
||||||
yMax = autoMax120(vCPUTemp)
|
yMax = autoMax120(datasets...)
|
||||||
|
|
||||||
|
case path == "server-temp-gpu":
|
||||||
|
title = "GPU Temperature"
|
||||||
|
h.ringsMu.Lock()
|
||||||
|
for idx, gr := range h.gpuRings {
|
||||||
|
if gr == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
vTemp, l := gr.Temp.snapshot()
|
||||||
|
datasets = append(datasets, vTemp)
|
||||||
|
names = append(names, fmt.Sprintf("GPU %d", idx))
|
||||||
|
if len(labels) == 0 {
|
||||||
|
labels = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
h.ringsMu.Unlock()
|
||||||
|
yMin = floatPtr(0)
|
||||||
|
yMax = autoMax120(datasets...)
|
||||||
|
|
||||||
|
case path == "server-temp-ambient":
|
||||||
|
title = "Ambient / Other Sensors"
|
||||||
|
h.ringsMu.Lock()
|
||||||
|
datasets, names, labels = snapshotNamedRings(h.ambientTempRings)
|
||||||
|
h.ringsMu.Unlock()
|
||||||
|
yMin = floatPtr(0)
|
||||||
|
yMax = autoMax120(datasets...)
|
||||||
|
|
||||||
case path == "server-power":
|
case path == "server-power":
|
||||||
title = "Power & Fans"
|
title = "System Power"
|
||||||
vPower, l := h.ringPower.snapshot()
|
vPower, l := h.ringPower.snapshot()
|
||||||
labels = l
|
labels = l
|
||||||
datasets = [][]float64{vPower}
|
datasets = [][]float64{vPower}
|
||||||
names = []string{"Power W"}
|
names = []string{"Power W"}
|
||||||
|
yMin = floatPtr(0)
|
||||||
|
yMax = autoMax120(vPower)
|
||||||
|
|
||||||
|
case path == "server-fans":
|
||||||
|
title = "Fan RPM"
|
||||||
h.ringsMu.Lock()
|
h.ringsMu.Lock()
|
||||||
for i, fr := range h.ringFans {
|
for i, fr := range h.ringFans {
|
||||||
fv, _ := fr.snapshot()
|
fv, _ := fr.snapshot()
|
||||||
@@ -507,14 +544,20 @@ func renderChartSVG(title string, datasets [][]float64, names []string, labels [
|
|||||||
datasets[i] = make([]float64, n)
|
datasets[i] = make([]float64, n)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sparse := sparseLabels(labels, 6)
|
title = sanitizeChartText(title)
|
||||||
|
names = sanitizeChartTexts(names)
|
||||||
|
sparse := sanitizeChartTexts(sparseLabels(labels, 6))
|
||||||
|
|
||||||
opt := gocharts.NewLineChartOptionWithData(datasets)
|
opt := gocharts.NewLineChartOptionWithData(datasets)
|
||||||
opt.Title = gocharts.TitleOption{Text: title}
|
opt.Title = gocharts.TitleOption{Text: title}
|
||||||
opt.XAxis.Labels = sparse
|
opt.XAxis.Labels = sparse
|
||||||
opt.Legend = gocharts.LegendOption{SeriesNames: names}
|
opt.Legend = gocharts.LegendOption{SeriesNames: names}
|
||||||
if yMin != nil || yMax != nil {
|
if yMin != nil || yMax != nil {
|
||||||
opt.YAxis = []gocharts.YAxisOption{{Min: yMin, Max: yMax}}
|
opt.YAxis = []gocharts.YAxisOption{{
|
||||||
|
Min: yMin,
|
||||||
|
Max: yMax,
|
||||||
|
ValueFormatter: chartLegendNumber,
|
||||||
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
p := gocharts.NewPainter(gocharts.PainterOptions{
|
p := gocharts.NewPainter(gocharts.PainterOptions{
|
||||||
@@ -528,6 +571,26 @@ func renderChartSVG(title string, datasets [][]float64, names []string, labels [
|
|||||||
return p.Bytes()
|
return p.Bytes()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sanitizeChartText(s string) string {
|
||||||
|
if s == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return html.EscapeString(strings.Map(func(r rune) rune {
|
||||||
|
if r < 0x20 && r != '\t' && r != '\n' && r != '\r' {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}, s))
|
||||||
|
}
|
||||||
|
|
||||||
|
func sanitizeChartTexts(in []string) []string {
|
||||||
|
out := make([]string, len(in))
|
||||||
|
for i, s := range in {
|
||||||
|
out[i] = sanitizeChartText(s)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
func safeIdx(s []float64, i int) float64 {
|
func safeIdx(s []float64, i int) float64 {
|
||||||
if i < len(s) {
|
if i < len(s) {
|
||||||
return s[i]
|
return s[i]
|
||||||
@@ -535,6 +598,46 @@ func safeIdx(s []float64, i int) float64 {
|
|||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func snapshotNamedRings(rings []*namedMetricsRing) ([][]float64, []string, []string) {
|
||||||
|
var datasets [][]float64
|
||||||
|
var names []string
|
||||||
|
var labels []string
|
||||||
|
for _, item := range rings {
|
||||||
|
if item == nil || item.Ring == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
vals, l := item.Ring.snapshot()
|
||||||
|
datasets = append(datasets, vals)
|
||||||
|
names = append(names, item.Name)
|
||||||
|
if len(labels) == 0 {
|
||||||
|
labels = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return datasets, names, labels
|
||||||
|
}
|
||||||
|
|
||||||
|
func chartLegendNumber(v float64) string {
|
||||||
|
neg := v < 0
|
||||||
|
if v < 0 {
|
||||||
|
v = -v
|
||||||
|
}
|
||||||
|
var out string
|
||||||
|
switch {
|
||||||
|
case v >= 10000:
|
||||||
|
out = fmt.Sprintf("%dk", int((v+500)/1000))
|
||||||
|
case v >= 1000:
|
||||||
|
s := fmt.Sprintf("%.2f", v/1000)
|
||||||
|
s = strings.TrimRight(strings.TrimRight(s, "0"), ".")
|
||||||
|
out = strings.ReplaceAll(s, ".", ",") + "k"
|
||||||
|
default:
|
||||||
|
out = fmt.Sprintf("%.0f", v)
|
||||||
|
}
|
||||||
|
if neg {
|
||||||
|
return "-" + out
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
func sparseLabels(labels []string, n int) []string {
|
func sparseLabels(labels []string, n int) []string {
|
||||||
out := make([]string, len(labels))
|
out := make([]string, len(labels))
|
||||||
step := len(labels) / n
|
step := len(labels) / n
|
||||||
|
|||||||
@@ -9,6 +9,28 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestChartLegendNumber(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
in float64
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{in: 0.4, want: "0"},
|
||||||
|
{in: 61.5, want: "62"},
|
||||||
|
{in: 999.4, want: "999"},
|
||||||
|
{in: 1200, want: "1,2k"},
|
||||||
|
{in: 1250, want: "1,25k"},
|
||||||
|
{in: 1310, want: "1,31k"},
|
||||||
|
{in: 1500, want: "1,5k"},
|
||||||
|
{in: 2600, want: "2,6k"},
|
||||||
|
{in: 10200, want: "10k"},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
if got := chartLegendNumber(tc.in); got != tc.want {
|
||||||
|
t.Fatalf("chartLegendNumber(%v)=%q want %q", tc.in, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRootRendersDashboard(t *testing.T) {
|
func TestRootRendersDashboard(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
path := filepath.Join(dir, "audit.json")
|
path := filepath.Join(dir, "audit.json")
|
||||||
@@ -31,9 +53,9 @@ func TestRootRendersDashboard(t *testing.T) {
|
|||||||
if first.Code != http.StatusOK {
|
if first.Code != http.StatusOK {
|
||||||
t.Fatalf("first status=%d", first.Code)
|
t.Fatalf("first status=%d", first.Code)
|
||||||
}
|
}
|
||||||
// Dashboard should contain the audit modal (with viewer link) and hardware summary
|
// Dashboard should contain the audit nav link and hardware summary
|
||||||
if !strings.Contains(first.Body.String(), `openAuditModal`) {
|
if !strings.Contains(first.Body.String(), `href="/audit"`) {
|
||||||
t.Fatalf("first body missing audit modal trigger: %s", first.Body.String())
|
t.Fatalf("first body missing audit nav link: %s", first.Body.String())
|
||||||
}
|
}
|
||||||
if !strings.Contains(first.Body.String(), `/viewer`) {
|
if !strings.Contains(first.Body.String(), `/viewer`) {
|
||||||
t.Fatalf("first body missing viewer link: %s", first.Body.String())
|
t.Fatalf("first body missing viewer link: %s", first.Body.String())
|
||||||
@@ -56,6 +78,28 @@ func TestRootRendersDashboard(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "audit.json")
|
||||||
|
if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z"}`), 0644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
handler := NewHandler(HandlerOptions{AuditPath: path})
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/audit", nil))
|
||||||
|
if rec.Code != http.StatusOK {
|
||||||
|
t.Fatalf("status=%d", rec.Code)
|
||||||
|
}
|
||||||
|
body := rec.Body.String()
|
||||||
|
if !strings.Contains(body, `iframe class="viewer-frame" src="/viewer"`) {
|
||||||
|
t.Fatalf("audit page missing viewer frame: %s", body)
|
||||||
|
}
|
||||||
|
if !strings.Contains(body, `openAuditModal()`) {
|
||||||
|
t.Fatalf("audit page missing action modal trigger: %s", body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestViewerRendersLatestSnapshot(t *testing.T) {
|
func TestViewerRendersLatestSnapshot(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
path := filepath.Join(dir, "audit.json")
|
path := filepath.Join(dir, "audit.json")
|
||||||
|
|||||||
@@ -10,6 +10,8 @@ import (
|
|||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"bee/audit/internal/app"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Task statuses.
|
// Task statuses.
|
||||||
@@ -23,10 +25,10 @@ const (
|
|||||||
|
|
||||||
// taskNames maps target → human-readable name.
|
// taskNames maps target → human-readable name.
|
||||||
var taskNames = map[string]string{
|
var taskNames = map[string]string{
|
||||||
"nvidia": "NVIDIA SAT",
|
"nvidia": "NVIDIA SAT",
|
||||||
"memory": "Memory SAT",
|
"memory": "Memory SAT",
|
||||||
"storage": "Storage SAT",
|
"storage": "Storage SAT",
|
||||||
"cpu": "CPU SAT",
|
"cpu": "CPU SAT",
|
||||||
"amd": "AMD GPU SAT",
|
"amd": "AMD GPU SAT",
|
||||||
"amd-stress": "AMD GPU Burn-in",
|
"amd-stress": "AMD GPU Burn-in",
|
||||||
"memory-stress": "Memory Burn-in",
|
"memory-stress": "Memory Burn-in",
|
||||||
@@ -47,6 +49,7 @@ type Task struct {
|
|||||||
StartedAt *time.Time `json:"started_at,omitempty"`
|
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||||
DoneAt *time.Time `json:"done_at,omitempty"`
|
DoneAt *time.Time `json:"done_at,omitempty"`
|
||||||
ErrMsg string `json:"error,omitempty"`
|
ErrMsg string `json:"error,omitempty"`
|
||||||
|
LogPath string `json:"log_path,omitempty"`
|
||||||
|
|
||||||
// runtime fields (not serialised)
|
// runtime fields (not serialised)
|
||||||
job *jobState
|
job *jobState
|
||||||
@@ -55,29 +58,90 @@ type Task struct {
|
|||||||
|
|
||||||
// taskParams holds optional parameters parsed from the run request.
|
// taskParams holds optional parameters parsed from the run request.
|
||||||
type taskParams struct {
|
type taskParams struct {
|
||||||
Duration int
|
Duration int `json:"duration,omitempty"`
|
||||||
DiagLevel int
|
DiagLevel int `json:"diag_level,omitempty"`
|
||||||
GPUIndices []int
|
GPUIndices []int `json:"gpu_indices,omitempty"`
|
||||||
Device string // for install
|
BurnProfile string `json:"burn_profile,omitempty"`
|
||||||
|
DisplayName string `json:"display_name,omitempty"`
|
||||||
|
Device string `json:"device,omitempty"` // for install
|
||||||
|
}
|
||||||
|
|
||||||
|
type persistedTask struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Target string `json:"target"`
|
||||||
|
Priority int `json:"priority"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
CreatedAt time.Time `json:"created_at"`
|
||||||
|
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||||
|
DoneAt *time.Time `json:"done_at,omitempty"`
|
||||||
|
ErrMsg string `json:"error,omitempty"`
|
||||||
|
LogPath string `json:"log_path,omitempty"`
|
||||||
|
Params taskParams `json:"params,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type burnPreset struct {
|
||||||
|
NvidiaDiag int
|
||||||
|
DurationSec int
|
||||||
|
}
|
||||||
|
|
||||||
|
func resolveBurnPreset(profile string) burnPreset {
|
||||||
|
switch profile {
|
||||||
|
case "overnight":
|
||||||
|
return burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}
|
||||||
|
case "acceptance":
|
||||||
|
return burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}
|
||||||
|
default:
|
||||||
|
return burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// taskQueue manages a priority-ordered list of tasks and runs them one at a time.
|
// taskQueue manages a priority-ordered list of tasks and runs them one at a time.
|
||||||
type taskQueue struct {
|
type taskQueue struct {
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
tasks []*Task
|
tasks []*Task
|
||||||
trigger chan struct{}
|
trigger chan struct{}
|
||||||
opts *HandlerOptions // set by startWorker
|
opts *HandlerOptions // set by startWorker
|
||||||
|
statePath string
|
||||||
|
logsDir string
|
||||||
|
started bool
|
||||||
}
|
}
|
||||||
|
|
||||||
var globalQueue = &taskQueue{trigger: make(chan struct{}, 1)}
|
var globalQueue = &taskQueue{trigger: make(chan struct{}, 1)}
|
||||||
|
|
||||||
const maxTaskHistory = 50
|
const maxTaskHistory = 50
|
||||||
|
|
||||||
|
var (
|
||||||
|
runMemoryAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunMemoryAcceptancePackCtx(ctx, baseDir, logFunc)
|
||||||
|
}
|
||||||
|
runStorageAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunStorageAcceptancePackCtx(ctx, baseDir, logFunc)
|
||||||
|
}
|
||||||
|
runCPUAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunCPUAcceptancePackCtx(ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
runAMDAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunAMDAcceptancePackCtx(ctx, baseDir, logFunc)
|
||||||
|
}
|
||||||
|
runAMDStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunAMDStressPackCtx(ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
runMemoryStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunMemoryStressPackCtx(ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
runSATStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunSATStressPackCtx(ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
// enqueue adds a task to the queue and notifies the worker.
|
// enqueue adds a task to the queue and notifies the worker.
|
||||||
func (q *taskQueue) enqueue(t *Task) {
|
func (q *taskQueue) enqueue(t *Task) {
|
||||||
q.mu.Lock()
|
q.mu.Lock()
|
||||||
|
q.assignTaskLogPathLocked(t)
|
||||||
q.tasks = append(q.tasks, t)
|
q.tasks = append(q.tasks, t)
|
||||||
q.prune()
|
q.prune()
|
||||||
|
q.persistLocked()
|
||||||
q.mu.Unlock()
|
q.mu.Unlock()
|
||||||
select {
|
select {
|
||||||
case q.trigger <- struct{}{}:
|
case q.trigger <- struct{}{}:
|
||||||
@@ -139,6 +203,20 @@ func (q *taskQueue) findJob(id string) (*jobState, bool) {
|
|||||||
return t.job, true
|
return t.job, true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (q *taskQueue) hasActiveTarget(target string) bool {
|
||||||
|
q.mu.Lock()
|
||||||
|
defer q.mu.Unlock()
|
||||||
|
for _, t := range q.tasks {
|
||||||
|
if t.Target != target {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if t.Status == TaskPending || t.Status == TaskRunning {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// snapshot returns a copy of all tasks sorted for display (running first, then pending by priority, then done by doneAt desc).
|
// snapshot returns a copy of all tasks sorted for display (running first, then pending by priority, then done by doneAt desc).
|
||||||
func (q *taskQueue) snapshot() []Task {
|
func (q *taskQueue) snapshot() []Task {
|
||||||
q.mu.Lock()
|
q.mu.Lock()
|
||||||
@@ -174,8 +252,24 @@ func statusOrder(s string) int {
|
|||||||
|
|
||||||
// startWorker launches the queue runner goroutine.
|
// startWorker launches the queue runner goroutine.
|
||||||
func (q *taskQueue) startWorker(opts *HandlerOptions) {
|
func (q *taskQueue) startWorker(opts *HandlerOptions) {
|
||||||
|
q.mu.Lock()
|
||||||
q.opts = opts
|
q.opts = opts
|
||||||
go q.worker()
|
q.statePath = filepath.Join(opts.ExportDir, "tasks-state.json")
|
||||||
|
q.logsDir = filepath.Join(opts.ExportDir, "tasks")
|
||||||
|
_ = os.MkdirAll(q.logsDir, 0755)
|
||||||
|
if !q.started {
|
||||||
|
q.loadLocked()
|
||||||
|
q.started = true
|
||||||
|
go q.worker()
|
||||||
|
}
|
||||||
|
hasPending := q.nextPending() != nil
|
||||||
|
q.mu.Unlock()
|
||||||
|
if hasPending {
|
||||||
|
select {
|
||||||
|
case q.trigger <- struct{}{}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *taskQueue) worker() {
|
func (q *taskQueue) worker() {
|
||||||
@@ -192,10 +286,13 @@ func (q *taskQueue) worker() {
|
|||||||
now := time.Now()
|
now := time.Now()
|
||||||
t.Status = TaskRunning
|
t.Status = TaskRunning
|
||||||
t.StartedAt = &now
|
t.StartedAt = &now
|
||||||
j := &jobState{}
|
t.DoneAt = nil
|
||||||
|
t.ErrMsg = ""
|
||||||
|
j := newTaskJobState(t.LogPath)
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
j.cancel = cancel
|
j.cancel = cancel
|
||||||
t.job = j
|
t.job = j
|
||||||
|
q.persistLocked()
|
||||||
q.mu.Unlock()
|
q.mu.Unlock()
|
||||||
|
|
||||||
q.runTask(t, j, ctx)
|
q.runTask(t, j, ctx)
|
||||||
@@ -212,6 +309,7 @@ func (q *taskQueue) worker() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
q.prune()
|
q.prune()
|
||||||
|
q.persistLocked()
|
||||||
q.mu.Unlock()
|
q.mu.Unlock()
|
||||||
}
|
}
|
||||||
setCPUGovernor("powersave")
|
setCPUGovernor("powersave")
|
||||||
@@ -240,6 +338,9 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
|||||||
a := q.opts.App
|
a := q.opts.App
|
||||||
|
|
||||||
j.append(fmt.Sprintf("Starting %s...", t.Name))
|
j.append(fmt.Sprintf("Starting %s...", t.Name))
|
||||||
|
if len(j.lines) > 0 {
|
||||||
|
j.append(fmt.Sprintf("Recovered after bee-web restart at %s", time.Now().UTC().Format(time.RFC3339)))
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
archive string
|
archive string
|
||||||
@@ -248,9 +349,13 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
|||||||
|
|
||||||
switch t.Target {
|
switch t.Target {
|
||||||
case "nvidia":
|
case "nvidia":
|
||||||
if len(t.params.GPUIndices) > 0 || t.params.DiagLevel > 0 {
|
diagLevel := t.params.DiagLevel
|
||||||
|
if t.params.BurnProfile != "" && diagLevel <= 0 {
|
||||||
|
diagLevel = resolveBurnPreset(t.params.BurnProfile).NvidiaDiag
|
||||||
|
}
|
||||||
|
if len(t.params.GPUIndices) > 0 || diagLevel > 0 {
|
||||||
result, e := a.RunNvidiaAcceptancePackWithOptions(
|
result, e := a.RunNvidiaAcceptancePackWithOptions(
|
||||||
ctx, "", t.params.DiagLevel, t.params.GPUIndices, j.append,
|
ctx, "", diagLevel, t.params.GPUIndices, j.append,
|
||||||
)
|
)
|
||||||
if e != nil {
|
if e != nil {
|
||||||
err = e
|
err = e
|
||||||
@@ -261,23 +366,38 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
|||||||
archive, err = a.RunNvidiaAcceptancePack("", j.append)
|
archive, err = a.RunNvidiaAcceptancePack("", j.append)
|
||||||
}
|
}
|
||||||
case "memory":
|
case "memory":
|
||||||
archive, err = a.RunMemoryAcceptancePack("", j.append)
|
archive, err = runMemoryAcceptancePackCtx(a, ctx, "", j.append)
|
||||||
case "storage":
|
case "storage":
|
||||||
archive, err = a.RunStorageAcceptancePack("", j.append)
|
archive, err = runStorageAcceptancePackCtx(a, ctx, "", j.append)
|
||||||
case "cpu":
|
case "cpu":
|
||||||
dur := t.params.Duration
|
dur := t.params.Duration
|
||||||
|
if t.params.BurnProfile != "" && dur <= 0 {
|
||||||
|
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
|
||||||
|
}
|
||||||
if dur <= 0 {
|
if dur <= 0 {
|
||||||
dur = 60
|
dur = 60
|
||||||
}
|
}
|
||||||
archive, err = a.RunCPUAcceptancePack("", dur, j.append)
|
archive, err = runCPUAcceptancePackCtx(a, ctx, "", dur, j.append)
|
||||||
case "amd":
|
case "amd":
|
||||||
archive, err = a.RunAMDAcceptancePack("", j.append)
|
archive, err = runAMDAcceptancePackCtx(a, ctx, "", j.append)
|
||||||
case "amd-stress":
|
case "amd-stress":
|
||||||
archive, err = a.RunAMDStressPack("", j.append)
|
dur := t.params.Duration
|
||||||
|
if t.params.BurnProfile != "" && dur <= 0 {
|
||||||
|
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
|
||||||
|
}
|
||||||
|
archive, err = runAMDStressPackCtx(a, ctx, "", dur, j.append)
|
||||||
case "memory-stress":
|
case "memory-stress":
|
||||||
archive, err = a.RunMemoryStressPack("", j.append)
|
dur := t.params.Duration
|
||||||
|
if t.params.BurnProfile != "" && dur <= 0 {
|
||||||
|
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
|
||||||
|
}
|
||||||
|
archive, err = runMemoryStressPackCtx(a, ctx, "", dur, j.append)
|
||||||
case "sat-stress":
|
case "sat-stress":
|
||||||
archive, err = a.RunSATStressPack("", j.append)
|
dur := t.params.Duration
|
||||||
|
if t.params.BurnProfile != "" && dur <= 0 {
|
||||||
|
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
|
||||||
|
}
|
||||||
|
archive, err = runSATStressPackCtx(a, ctx, "", dur, j.append)
|
||||||
case "audit":
|
case "audit":
|
||||||
result, e := a.RunAuditNow(q.opts.RuntimeMode)
|
result, e := a.RunAuditNow(q.opts.RuntimeMode)
|
||||||
if e != nil {
|
if e != nil {
|
||||||
@@ -288,7 +408,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
case "install-to-ram":
|
case "install-to-ram":
|
||||||
err = a.RunInstallToRAM(j.append)
|
err = a.RunInstallToRAM(ctx, j.append)
|
||||||
default:
|
default:
|
||||||
j.append("ERROR: unknown target: " + t.Target)
|
j.append("ERROR: unknown target: " + t.Target)
|
||||||
j.finish("unknown target")
|
j.finish("unknown target")
|
||||||
@@ -355,6 +475,7 @@ func (h *handler) handleAPITasksCancel(w http.ResponseWriter, r *http.Request) {
|
|||||||
t.Status = TaskCancelled
|
t.Status = TaskCancelled
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
t.DoneAt = &now
|
t.DoneAt = &now
|
||||||
|
globalQueue.persistLocked()
|
||||||
writeJSON(w, map[string]string{"status": "cancelled"})
|
writeJSON(w, map[string]string{"status": "cancelled"})
|
||||||
case TaskRunning:
|
case TaskRunning:
|
||||||
if t.job != nil {
|
if t.job != nil {
|
||||||
@@ -363,6 +484,7 @@ func (h *handler) handleAPITasksCancel(w http.ResponseWriter, r *http.Request) {
|
|||||||
t.Status = TaskCancelled
|
t.Status = TaskCancelled
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
t.DoneAt = &now
|
t.DoneAt = &now
|
||||||
|
globalQueue.persistLocked()
|
||||||
writeJSON(w, map[string]string{"status": "cancelled"})
|
writeJSON(w, map[string]string{"status": "cancelled"})
|
||||||
default:
|
default:
|
||||||
writeError(w, http.StatusConflict, "task is not running or pending")
|
writeError(w, http.StatusConflict, "task is not running or pending")
|
||||||
@@ -390,6 +512,7 @@ func (h *handler) handleAPITasksPriority(w http.ResponseWriter, r *http.Request)
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
t.Priority += req.Delta
|
t.Priority += req.Delta
|
||||||
|
globalQueue.persistLocked()
|
||||||
writeJSON(w, map[string]int{"priority": t.Priority})
|
writeJSON(w, map[string]int{"priority": t.Priority})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -412,6 +535,7 @@ func (h *handler) handleAPITasksCancelAll(w http.ResponseWriter, _ *http.Request
|
|||||||
n++
|
n++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
globalQueue.persistLocked()
|
||||||
globalQueue.mu.Unlock()
|
globalQueue.mu.Unlock()
|
||||||
writeJSON(w, map[string]int{"cancelled": n})
|
writeJSON(w, map[string]int{"cancelled": n})
|
||||||
}
|
}
|
||||||
@@ -434,3 +558,79 @@ func (h *handler) handleAPITasksStream(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
streamJob(w, r, j)
|
streamJob(w, r, j)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (q *taskQueue) assignTaskLogPathLocked(t *Task) {
|
||||||
|
if t.LogPath != "" || q.logsDir == "" || t.ID == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.LogPath = filepath.Join(q.logsDir, t.ID+".log")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *taskQueue) loadLocked() {
|
||||||
|
if q.statePath == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
data, err := os.ReadFile(q.statePath)
|
||||||
|
if err != nil || len(data) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var persisted []persistedTask
|
||||||
|
if err := json.Unmarshal(data, &persisted); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, pt := range persisted {
|
||||||
|
t := &Task{
|
||||||
|
ID: pt.ID,
|
||||||
|
Name: pt.Name,
|
||||||
|
Target: pt.Target,
|
||||||
|
Priority: pt.Priority,
|
||||||
|
Status: pt.Status,
|
||||||
|
CreatedAt: pt.CreatedAt,
|
||||||
|
StartedAt: pt.StartedAt,
|
||||||
|
DoneAt: pt.DoneAt,
|
||||||
|
ErrMsg: pt.ErrMsg,
|
||||||
|
LogPath: pt.LogPath,
|
||||||
|
params: pt.Params,
|
||||||
|
}
|
||||||
|
q.assignTaskLogPathLocked(t)
|
||||||
|
if t.Status == TaskPending || t.Status == TaskRunning {
|
||||||
|
t.Status = TaskPending
|
||||||
|
t.DoneAt = nil
|
||||||
|
t.ErrMsg = ""
|
||||||
|
}
|
||||||
|
q.tasks = append(q.tasks, t)
|
||||||
|
}
|
||||||
|
q.prune()
|
||||||
|
q.persistLocked()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *taskQueue) persistLocked() {
|
||||||
|
if q.statePath == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
state := make([]persistedTask, 0, len(q.tasks))
|
||||||
|
for _, t := range q.tasks {
|
||||||
|
state = append(state, persistedTask{
|
||||||
|
ID: t.ID,
|
||||||
|
Name: t.Name,
|
||||||
|
Target: t.Target,
|
||||||
|
Priority: t.Priority,
|
||||||
|
Status: t.Status,
|
||||||
|
CreatedAt: t.CreatedAt,
|
||||||
|
StartedAt: t.StartedAt,
|
||||||
|
DoneAt: t.DoneAt,
|
||||||
|
ErrMsg: t.ErrMsg,
|
||||||
|
LogPath: t.LogPath,
|
||||||
|
Params: t.params,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
data, err := json.MarshalIndent(state, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
tmp := q.statePath + ".tmp"
|
||||||
|
if err := os.WriteFile(tmp, data, 0644); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_ = os.Rename(tmp, q.statePath)
|
||||||
|
}
|
||||||
|
|||||||
156
audit/internal/webui/tasks_test.go
Normal file
156
audit/internal/webui/tasks_test.go
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
package webui
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"bee/audit/internal/app"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestTaskQueuePersistsAndRecoversPendingTasks(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
q := &taskQueue{
|
||||||
|
statePath: filepath.Join(dir, "tasks-state.json"),
|
||||||
|
logsDir: filepath.Join(dir, "tasks"),
|
||||||
|
trigger: make(chan struct{}, 1),
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(q.logsDir, 0755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
started := time.Now().Add(-time.Minute)
|
||||||
|
task := &Task{
|
||||||
|
ID: "task-1",
|
||||||
|
Name: "Memory Burn-in",
|
||||||
|
Target: "memory-stress",
|
||||||
|
Priority: 2,
|
||||||
|
Status: TaskRunning,
|
||||||
|
CreatedAt: time.Now().Add(-2 * time.Minute),
|
||||||
|
StartedAt: &started,
|
||||||
|
params: taskParams{
|
||||||
|
Duration: 300,
|
||||||
|
BurnProfile: "smoke",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
q.tasks = append(q.tasks, task)
|
||||||
|
q.assignTaskLogPathLocked(task)
|
||||||
|
q.persistLocked()
|
||||||
|
|
||||||
|
recovered := &taskQueue{
|
||||||
|
statePath: q.statePath,
|
||||||
|
logsDir: q.logsDir,
|
||||||
|
trigger: make(chan struct{}, 1),
|
||||||
|
}
|
||||||
|
recovered.loadLocked()
|
||||||
|
|
||||||
|
if len(recovered.tasks) != 1 {
|
||||||
|
t.Fatalf("tasks=%d want 1", len(recovered.tasks))
|
||||||
|
}
|
||||||
|
got := recovered.tasks[0]
|
||||||
|
if got.Status != TaskPending {
|
||||||
|
t.Fatalf("status=%q want %q", got.Status, TaskPending)
|
||||||
|
}
|
||||||
|
if got.params.Duration != 300 || got.params.BurnProfile != "smoke" {
|
||||||
|
t.Fatalf("params=%+v", got.params)
|
||||||
|
}
|
||||||
|
if got.LogPath == "" {
|
||||||
|
t.Fatal("expected log path")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewTaskJobStateLoadsExistingLog(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "task.log")
|
||||||
|
if err := os.WriteFile(path, []byte("line1\nline2\n"), 0644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
j := newTaskJobState(path)
|
||||||
|
existing, ch := j.subscribe()
|
||||||
|
if ch == nil {
|
||||||
|
t.Fatal("expected live subscription channel")
|
||||||
|
}
|
||||||
|
if len(existing) != 2 || existing[0] != "line1" || existing[1] != "line2" {
|
||||||
|
t.Fatalf("existing=%v", existing)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveBurnPreset(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
profile string
|
||||||
|
want burnPreset
|
||||||
|
}{
|
||||||
|
{profile: "smoke", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
|
||||||
|
{profile: "acceptance", want: burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}},
|
||||||
|
{profile: "overnight", want: burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}},
|
||||||
|
{profile: "", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
if got := resolveBurnPreset(tc.profile); got != tc.want {
|
||||||
|
t.Fatalf("resolveBurnPreset(%q)=%+v want %+v", tc.profile, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunTaskHonorsCancel(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
blocked := make(chan struct{})
|
||||||
|
released := make(chan struct{})
|
||||||
|
aRun := func(_ any, ctx context.Context, _ string, _ int, _ func(string)) (string, error) {
|
||||||
|
close(blocked)
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
close(released)
|
||||||
|
return "", ctx.Err()
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
|
close(released)
|
||||||
|
return "unexpected", nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
q := &taskQueue{
|
||||||
|
opts: &HandlerOptions{App: &app.App{}},
|
||||||
|
}
|
||||||
|
tk := &Task{
|
||||||
|
ID: "cpu-1",
|
||||||
|
Name: "CPU SAT",
|
||||||
|
Target: "cpu",
|
||||||
|
Status: TaskRunning,
|
||||||
|
CreatedAt: time.Now(),
|
||||||
|
params: taskParams{Duration: 60},
|
||||||
|
}
|
||||||
|
j := &jobState{}
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
j.cancel = cancel
|
||||||
|
tk.job = j
|
||||||
|
|
||||||
|
orig := runCPUAcceptancePackCtx
|
||||||
|
runCPUAcceptancePackCtx = func(_ *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return aRun(nil, ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
defer func() { runCPUAcceptancePackCtx = orig }()
|
||||||
|
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
q.runTask(tk, j, ctx)
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
|
||||||
|
<-blocked
|
||||||
|
j.abort()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-released:
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Fatal("task did not observe cancel")
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Fatal("runTask did not return after cancel")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -32,7 +32,7 @@ lb config noauto \
|
|||||||
--memtest none \
|
--memtest none \
|
||||||
--iso-volume "EASY-BEE" \
|
--iso-volume "EASY-BEE" \
|
||||||
--iso-application "EASY-BEE" \
|
--iso-application "EASY-BEE" \
|
||||||
--bootappend-live "boot=live components nomodeset video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
|
--bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
|
||||||
--apt-recommends false \
|
--apt-recommends false \
|
||||||
--chroot-squashfs-compression-type zstd \
|
--chroot-squashfs-compression-type zstd \
|
||||||
"${@}"
|
"${@}"
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ menuentry "EASY-BEE (load to RAM)" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
menuentry "EASY-BEE (NVIDIA GSP=off)" {
|
menuentry "EASY-BEE (NVIDIA GSP=off)" {
|
||||||
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
|
linux @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
|
||||||
initrd @INITRD_LIVE@
|
initrd @INITRD_LIVE@
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ label live-@FLAVOUR@-gsp-off
|
|||||||
menu label EASY-BEE (^NVIDIA GSP=off)
|
menu label EASY-BEE (^NVIDIA GSP=off)
|
||||||
linux @LINUX@
|
linux @LINUX@
|
||||||
initrd @INITRD@
|
initrd @INITRD@
|
||||||
append @APPEND_LIVE@ bee.nvidia.mode=gsp-off
|
append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off
|
||||||
|
|
||||||
label live-@FLAVOUR@-failsafe
|
label live-@FLAVOUR@-failsafe
|
||||||
menu label EASY-BEE (^fail-safe)
|
menu label EASY-BEE (^fail-safe)
|
||||||
|
|||||||
Reference in New Issue
Block a user