Compare commits
27 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4db89e9773 | |||
| 3fda18f708 | |||
| ea518abf30 | |||
| 744de588bb | |||
| a3ed9473a3 | |||
| a714c45f10 | |||
| 349e026cfa | |||
| 889fe1dc2f | |||
| befdbf3768 | |||
| ec6a0b292d | |||
| a03312c286 | |||
| e69e9109da | |||
| 413869809d | |||
| f9bd38572a | |||
| 662e3d2cdd | |||
| 126af96780 | |||
| ada15ac777 | |||
| dfb94f9ca6 | |||
| 5857805518 | |||
| 59a1d4b209 | |||
| 0dbfaf6121 | |||
| 5d72d48714 | |||
| 096b4a09ca | |||
| 5d42a92e4c | |||
| 3e54763367 | |||
| f91bce8661 | |||
| 585e6d7311 |
@@ -1,11 +1,13 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime/debug"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"bee/audit/internal/app"
|
"bee/audit/internal/app"
|
||||||
@@ -16,6 +18,37 @@ import (
|
|||||||
|
|
||||||
var Version = "dev"
|
var Version = "dev"
|
||||||
|
|
||||||
|
func buildLabel() string {
|
||||||
|
label := strings.TrimSpace(Version)
|
||||||
|
if label == "" {
|
||||||
|
label = "dev"
|
||||||
|
}
|
||||||
|
if info, ok := debug.ReadBuildInfo(); ok {
|
||||||
|
var revision string
|
||||||
|
var modified bool
|
||||||
|
for _, setting := range info.Settings {
|
||||||
|
switch setting.Key {
|
||||||
|
case "vcs.revision":
|
||||||
|
revision = setting.Value
|
||||||
|
case "vcs.modified":
|
||||||
|
modified = setting.Value == "true"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if revision != "" {
|
||||||
|
short := revision
|
||||||
|
if len(short) > 12 {
|
||||||
|
short = short[:12]
|
||||||
|
}
|
||||||
|
label += " (" + short
|
||||||
|
if modified {
|
||||||
|
label += "+"
|
||||||
|
}
|
||||||
|
label += ")"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return label
|
||||||
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
os.Exit(run(os.Args[1:], os.Stdout, os.Stderr))
|
os.Exit(run(os.Args[1:], os.Stdout, os.Stderr))
|
||||||
}
|
}
|
||||||
@@ -139,7 +172,6 @@ func runAudit(args []string, stdout, stderr io.Writer) int {
|
|||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
func runExport(args []string, stdout, stderr io.Writer) int {
|
func runExport(args []string, stdout, stderr io.Writer) int {
|
||||||
fs := flag.NewFlagSet("export", flag.ContinueOnError)
|
fs := flag.NewFlagSet("export", flag.ContinueOnError)
|
||||||
fs.SetOutput(stderr)
|
fs.SetOutput(stderr)
|
||||||
@@ -299,6 +331,7 @@ func runWeb(args []string, stdout, stderr io.Writer) int {
|
|||||||
|
|
||||||
if err := webui.ListenAndServe(*listenAddr, webui.HandlerOptions{
|
if err := webui.ListenAndServe(*listenAddr, webui.HandlerOptions{
|
||||||
Title: *title,
|
Title: *title,
|
||||||
|
BuildLabel: buildLabel(),
|
||||||
AuditPath: *auditPath,
|
AuditPath: *auditPath,
|
||||||
ExportDir: *exportDir,
|
ExportDir: *exportDir,
|
||||||
App: app.New(platform.New()),
|
App: app.New(platform.New()),
|
||||||
@@ -351,15 +384,15 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
|
|||||||
case "nvidia":
|
case "nvidia":
|
||||||
archive, err = application.RunNvidiaAcceptancePack("", logLine)
|
archive, err = application.RunNvidiaAcceptancePack("", logLine)
|
||||||
case "memory":
|
case "memory":
|
||||||
archive, err = application.RunMemoryAcceptancePack("", logLine)
|
archive, err = application.RunMemoryAcceptancePackCtx(context.Background(), "", logLine)
|
||||||
case "storage":
|
case "storage":
|
||||||
archive, err = application.RunStorageAcceptancePack("", logLine)
|
archive, err = application.RunStorageAcceptancePackCtx(context.Background(), "", logLine)
|
||||||
case "cpu":
|
case "cpu":
|
||||||
dur := *duration
|
dur := *duration
|
||||||
if dur <= 0 {
|
if dur <= 0 {
|
||||||
dur = 60
|
dur = 60
|
||||||
}
|
}
|
||||||
archive, err = application.RunCPUAcceptancePack("", dur, logLine)
|
archive, err = application.RunCPUAcceptancePackCtx(context.Background(), "", dur, logLine)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Error("run sat", "target", target, "err", err)
|
slog.Error("run sat", "target", target, "err", err)
|
||||||
|
|||||||
11
audit/go.mod
11
audit/go.mod
@@ -1,6 +1,6 @@
|
|||||||
module bee/audit
|
module bee/audit
|
||||||
|
|
||||||
go 1.24.0
|
go 1.25.0
|
||||||
|
|
||||||
replace reanimator/chart => ../internal/chart
|
replace reanimator/chart => ../internal/chart
|
||||||
|
|
||||||
@@ -13,5 +13,14 @@ require (
|
|||||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||||
github.com/go-analyze/bulk v0.1.3 // indirect
|
github.com/go-analyze/bulk v0.1.3 // indirect
|
||||||
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
|
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
|
||||||
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
|
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||||
|
github.com/ncruces/go-strftime v1.0.0 // indirect
|
||||||
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||||
golang.org/x/image v0.24.0 // indirect
|
golang.org/x/image v0.24.0 // indirect
|
||||||
|
golang.org/x/sys v0.42.0 // indirect
|
||||||
|
modernc.org/libc v1.70.0 // indirect
|
||||||
|
modernc.org/mathutil v1.7.1 // indirect
|
||||||
|
modernc.org/memory v1.11.0 // indirect
|
||||||
|
modernc.org/sqlite v1.48.0 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
19
audit/go.sum
19
audit/go.sum
@@ -8,11 +8,30 @@ github.com/go-analyze/charts v0.5.26 h1:rSwZikLQuFX6cJzwI8OAgaWZneG1kDYxD857ms00
|
|||||||
github.com/go-analyze/charts v0.5.26/go.mod h1:s1YvQhjiSwtLx1f2dOKfiV9x2TT49nVSL6v2rlRpTbY=
|
github.com/go-analyze/charts v0.5.26/go.mod h1:s1YvQhjiSwtLx1f2dOKfiV9x2TT49nVSL6v2rlRpTbY=
|
||||||
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
|
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
|
||||||
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
|
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
|
||||||
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
|
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||||
|
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||||
|
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
||||||
|
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
||||||
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
golang.org/x/image v0.24.0 h1:AN7zRgVsbvmTfNyqIbbOraYL8mSwcKncEj8ofjgzcMQ=
|
golang.org/x/image v0.24.0 h1:AN7zRgVsbvmTfNyqIbbOraYL8mSwcKncEj8ofjgzcMQ=
|
||||||
golang.org/x/image v0.24.0/go.mod h1:4b/ITuLfqYq1hqZcjofwctIhi7sZh2WaCjvsBNjjya8=
|
golang.org/x/image v0.24.0/go.mod h1:4b/ITuLfqYq1hqZcjofwctIhi7sZh2WaCjvsBNjjya8=
|
||||||
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
|
||||||
|
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
modernc.org/libc v1.70.0 h1:U58NawXqXbgpZ/dcdS9kMshu08aiA6b7gusEusqzNkw=
|
||||||
|
modernc.org/libc v1.70.0/go.mod h1:OVmxFGP1CI/Z4L3E0Q3Mf1PDE0BucwMkcXjjLntvHJo=
|
||||||
|
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
|
||||||
|
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
|
||||||
|
modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
|
||||||
|
modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
|
||||||
|
modernc.org/sqlite v1.48.0 h1:ElZyLop3Q2mHYk5IFPPXADejZrlHu7APbpB0sF78bq4=
|
||||||
|
modernc.org/sqlite v1.48.0/go.mod h1:hWjRO6Tj/5Ik8ieqxQybiEOUXy0NJFNp2tpvVpKlvig=
|
||||||
|
|||||||
@@ -55,6 +55,8 @@ type networkManager interface {
|
|||||||
SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error)
|
SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error)
|
||||||
SetInterfaceState(iface string, up bool) error
|
SetInterfaceState(iface string, up bool) error
|
||||||
GetInterfaceState(iface string) (bool, error)
|
GetInterfaceState(iface string) (bool, error)
|
||||||
|
CaptureNetworkSnapshot() (platform.NetworkSnapshot, error)
|
||||||
|
RestoreNetworkSnapshot(snapshot platform.NetworkSnapshot) error
|
||||||
}
|
}
|
||||||
|
|
||||||
type serviceManager interface {
|
type serviceManager interface {
|
||||||
@@ -78,7 +80,7 @@ type installer interface {
|
|||||||
ListInstallDisks() ([]platform.InstallDisk, error)
|
ListInstallDisks() ([]platform.InstallDisk, error)
|
||||||
InstallToDisk(ctx context.Context, device string, logFile string) error
|
InstallToDisk(ctx context.Context, device string, logFile string) error
|
||||||
IsLiveMediaInRAM() bool
|
IsLiveMediaInRAM() bool
|
||||||
RunInstallToRAM(logFunc func(string)) error
|
RunInstallToRAM(ctx context.Context, logFunc func(string)) error
|
||||||
}
|
}
|
||||||
|
|
||||||
type GPUPresenceResult struct {
|
type GPUPresenceResult struct {
|
||||||
@@ -98,23 +100,23 @@ func (a *App) IsLiveMediaInRAM() bool {
|
|||||||
return a.installer.IsLiveMediaInRAM()
|
return a.installer.IsLiveMediaInRAM()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunInstallToRAM(logFunc func(string)) error {
|
func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
|
||||||
return a.installer.RunInstallToRAM(logFunc)
|
return a.installer.RunInstallToRAM(ctx, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
type satRunner interface {
|
type satRunner interface {
|
||||||
RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error)
|
RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error)
|
||||||
RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
|
RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
|
||||||
RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error)
|
RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
|
||||||
RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error)
|
RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
|
||||||
RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error)
|
RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
|
||||||
ListNvidiaGPUs() ([]platform.NvidiaGPU, error)
|
ListNvidiaGPUs() ([]platform.NvidiaGPU, error)
|
||||||
DetectGPUVendor() string
|
DetectGPUVendor() string
|
||||||
ListAMDGPUs() ([]platform.AMDGPUInfo, error)
|
ListAMDGPUs() ([]platform.AMDGPUInfo, error)
|
||||||
RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error)
|
RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
|
||||||
RunAMDStressPack(baseDir string, logFunc func(string)) (string, error)
|
RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
|
||||||
RunMemoryStressPack(baseDir string, logFunc func(string)) (string, error)
|
RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
|
||||||
RunSATStressPack(baseDir string, logFunc func(string)) (string, error)
|
RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
|
||||||
RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
|
RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
|
||||||
RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
|
RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
|
||||||
}
|
}
|
||||||
@@ -348,6 +350,14 @@ func (a *App) GetInterfaceState(iface string) (bool, error) {
|
|||||||
return a.network.GetInterfaceState(iface)
|
return a.network.GetInterfaceState(iface)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (a *App) CaptureNetworkSnapshot() (platform.NetworkSnapshot, error) {
|
||||||
|
return a.network.CaptureNetworkSnapshot()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RestoreNetworkSnapshot(snapshot platform.NetworkSnapshot) error {
|
||||||
|
return a.network.RestoreNetworkSnapshot(snapshot)
|
||||||
|
}
|
||||||
|
|
||||||
func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) {
|
func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) {
|
||||||
body, err := a.network.SetStaticIPv4(cfg)
|
body, err := a.network.SetStaticIPv4(cfg)
|
||||||
return ActionResult{Title: "Static IPv4: " + cfg.Interface, Body: bodyOr(body, "Static IPv4 updated.")}, err
|
return ActionResult{Title: "Static IPv4: " + cfg.Interface, Body: bodyOr(body, "Static IPv4 updated.")}, err
|
||||||
@@ -496,10 +506,14 @@ func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir st
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (a *App) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunMemoryAcceptancePackCtx(context.Background(), baseDir, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunMemoryAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
if strings.TrimSpace(baseDir) == "" {
|
if strings.TrimSpace(baseDir) == "" {
|
||||||
baseDir = DefaultSATBaseDir
|
baseDir = DefaultSATBaseDir
|
||||||
}
|
}
|
||||||
return a.sat.RunMemoryAcceptancePack(baseDir, logFunc)
|
return a.sat.RunMemoryAcceptancePack(ctx, baseDir, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
|
func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
|
||||||
@@ -508,10 +522,14 @@ func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunCPUAcceptancePackCtx(context.Background(), baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunCPUAcceptancePackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
if strings.TrimSpace(baseDir) == "" {
|
if strings.TrimSpace(baseDir) == "" {
|
||||||
baseDir = DefaultSATBaseDir
|
baseDir = DefaultSATBaseDir
|
||||||
}
|
}
|
||||||
return a.sat.RunCPUAcceptancePack(baseDir, durationSec, logFunc)
|
return a.sat.RunCPUAcceptancePack(ctx, baseDir, durationSec, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (ActionResult, error) {
|
func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (ActionResult, error) {
|
||||||
@@ -520,10 +538,14 @@ func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (Actio
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (a *App) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunStorageAcceptancePackCtx(context.Background(), baseDir, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunStorageAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
if strings.TrimSpace(baseDir) == "" {
|
if strings.TrimSpace(baseDir) == "" {
|
||||||
baseDir = DefaultSATBaseDir
|
baseDir = DefaultSATBaseDir
|
||||||
}
|
}
|
||||||
return a.sat.RunStorageAcceptancePack(baseDir, logFunc)
|
return a.sat.RunStorageAcceptancePack(ctx, baseDir, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
|
func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
|
||||||
@@ -540,10 +562,14 @@ func (a *App) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (a *App) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunAMDAcceptancePackCtx(context.Background(), baseDir, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunAMDAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
if strings.TrimSpace(baseDir) == "" {
|
if strings.TrimSpace(baseDir) == "" {
|
||||||
baseDir = DefaultSATBaseDir
|
baseDir = DefaultSATBaseDir
|
||||||
}
|
}
|
||||||
return a.sat.RunAMDAcceptancePack(baseDir, logFunc)
|
return a.sat.RunAMDAcceptancePack(ctx, baseDir, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
|
func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
|
||||||
@@ -551,19 +577,31 @@ func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
|
|||||||
return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
|
return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunMemoryStressPack(baseDir string, logFunc func(string)) (string, error) {
|
func (a *App) RunMemoryStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
return a.sat.RunMemoryStressPack(baseDir, logFunc)
|
return a.RunMemoryStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunSATStressPack(baseDir string, logFunc func(string)) (string, error) {
|
func (a *App) RunSATStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
return a.sat.RunSATStressPack(baseDir, logFunc)
|
return a.RunSATStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunAMDStressPack(baseDir string, logFunc func(string)) (string, error) {
|
func (a *App) RunAMDStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunAMDStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunMemoryStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.sat.RunMemoryStressPack(ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunSATStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.sat.RunSATStressPack(ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *App) RunAMDStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
if strings.TrimSpace(baseDir) == "" {
|
if strings.TrimSpace(baseDir) == "" {
|
||||||
baseDir = DefaultSATBaseDir
|
baseDir = DefaultSATBaseDir
|
||||||
}
|
}
|
||||||
return a.sat.RunAMDStressPack(baseDir, logFunc)
|
return a.sat.RunAMDStressPack(ctx, baseDir, durationSec, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error) {
|
func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error) {
|
||||||
|
|||||||
@@ -45,6 +45,10 @@ func (f fakeNetwork) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error
|
|||||||
|
|
||||||
func (f fakeNetwork) SetInterfaceState(_ string, _ bool) error { return nil }
|
func (f fakeNetwork) SetInterfaceState(_ string, _ bool) error { return nil }
|
||||||
func (f fakeNetwork) GetInterfaceState(_ string) (bool, error) { return true, nil }
|
func (f fakeNetwork) GetInterfaceState(_ string) (bool, error) { return true, nil }
|
||||||
|
func (f fakeNetwork) CaptureNetworkSnapshot() (platform.NetworkSnapshot, error) {
|
||||||
|
return platform.NetworkSnapshot{}, nil
|
||||||
|
}
|
||||||
|
func (f fakeNetwork) RestoreNetworkSnapshot(platform.NetworkSnapshot) error { return nil }
|
||||||
|
|
||||||
type fakeServices struct {
|
type fakeServices struct {
|
||||||
serviceStatusFn func(string) (string, error)
|
serviceStatusFn func(string) (string, error)
|
||||||
@@ -141,15 +145,15 @@ func (f fakeSAT) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunMemoryAcceptancePack(baseDir string, _ func(string)) (string, error) {
|
func (f fakeSAT) RunMemoryAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
|
||||||
return f.runMemoryFn(baseDir)
|
return f.runMemoryFn(baseDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunStorageAcceptancePack(baseDir string, _ func(string)) (string, error) {
|
func (f fakeSAT) RunStorageAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
|
||||||
return f.runStorageFn(baseDir)
|
return f.runStorageFn(baseDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunCPUAcceptancePack(baseDir string, durationSec int, _ func(string)) (string, error) {
|
func (f fakeSAT) RunCPUAcceptancePack(_ context.Context, baseDir string, durationSec int, _ func(string)) (string, error) {
|
||||||
if f.runCPUFn != nil {
|
if f.runCPUFn != nil {
|
||||||
return f.runCPUFn(baseDir, durationSec)
|
return f.runCPUFn(baseDir, durationSec)
|
||||||
}
|
}
|
||||||
@@ -170,16 +174,22 @@ func (f fakeSAT) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunAMDAcceptancePack(baseDir string, _ func(string)) (string, error) {
|
func (f fakeSAT) RunAMDAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
|
||||||
if f.runAMDPackFn != nil {
|
if f.runAMDPackFn != nil {
|
||||||
return f.runAMDPackFn(baseDir)
|
return f.runAMDPackFn(baseDir)
|
||||||
}
|
}
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunAMDStressPack(_ string, _ func(string)) (string, error) { return "", nil }
|
func (f fakeSAT) RunAMDStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
|
||||||
func (f fakeSAT) RunMemoryStressPack(_ string, _ func(string)) (string, error) { return "", nil }
|
return "", nil
|
||||||
func (f fakeSAT) RunSATStressPack(_ string, _ func(string)) (string, error) { return "", nil }
|
}
|
||||||
|
func (f fakeSAT) RunMemoryStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
func (f fakeSAT) RunSATStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
||||||
func (f fakeSAT) RunFanStressTest(_ context.Context, _ string, _ platform.FanStressOptions) (string, error) {
|
func (f fakeSAT) RunFanStressTest(_ context.Context, _ string, _ platform.FanStressOptions) (string, error) {
|
||||||
return "", nil
|
return "", nil
|
||||||
|
|||||||
@@ -78,48 +78,56 @@ func SampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
|
|||||||
|
|
||||||
// sampleAMDGPUMetrics queries rocm-smi for live GPU metrics.
|
// sampleAMDGPUMetrics queries rocm-smi for live GPU metrics.
|
||||||
func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
|
func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
|
||||||
// --showtemp --showuse --showpower --csv — one row per GPU
|
|
||||||
out, err := runROCmSMI("--showtemp", "--showuse", "--showpower", "--showmemuse", "--csv")
|
out, err := runROCmSMI("--showtemp", "--showuse", "--showpower", "--showmemuse", "--csv")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
|
||||||
|
if len(lines) < 2 {
|
||||||
|
return nil, fmt.Errorf("rocm-smi: insufficient output")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse header to find column indices by name.
|
||||||
|
headers := strings.Split(lines[0], ",")
|
||||||
|
colIdx := func(keywords ...string) int {
|
||||||
|
for i, h := range headers {
|
||||||
|
hl := strings.ToLower(strings.TrimSpace(h))
|
||||||
|
for _, kw := range keywords {
|
||||||
|
if strings.Contains(hl, kw) {
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
idxTemp := colIdx("sensor edge", "temperature (c)", "temp")
|
||||||
|
idxUse := colIdx("gpu use (%)")
|
||||||
|
idxMem := colIdx("vram%", "memory allocated")
|
||||||
|
idxPow := colIdx("average graphics package power", "power (w)")
|
||||||
|
|
||||||
var rows []GPUMetricRow
|
var rows []GPUMetricRow
|
||||||
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
for _, line := range lines[1:] {
|
||||||
line = strings.TrimSpace(line)
|
line = strings.TrimSpace(line)
|
||||||
if line == "" || strings.HasPrefix(strings.ToLower(line), "device") {
|
if line == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// CSV format: device,temp_c,gpu_use%,mem_use%,power_w (order may vary by rocm-smi version)
|
|
||||||
// We parse by column header from the first line.
|
|
||||||
parts := strings.Split(line, ",")
|
parts := strings.Split(line, ",")
|
||||||
if len(parts) < 2 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
idx := len(rows)
|
idx := len(rows)
|
||||||
row := GPUMetricRow{GPUIndex: idx}
|
row := GPUMetricRow{GPUIndex: idx}
|
||||||
// rocm-smi CSV columns vary; extract what we can
|
get := func(i int) float64 {
|
||||||
for i, p := range parts {
|
if i < 0 || i >= len(parts) {
|
||||||
p = strings.TrimSpace(p)
|
return 0
|
||||||
switch {
|
|
||||||
case i == 0:
|
|
||||||
// device index like "card0" or "0"
|
|
||||||
case strings.Contains(strings.ToLower(p), "n/a"):
|
|
||||||
// skip N/A
|
|
||||||
default:
|
|
||||||
// Try to match by position heuristic: temp, use%, memuse%, power
|
|
||||||
v := parseGPUFloat(p)
|
|
||||||
switch {
|
|
||||||
case i == 1 && row.TempC == 0:
|
|
||||||
row.TempC = v
|
|
||||||
case i == 2 && row.UsagePct == 0:
|
|
||||||
row.UsagePct = v
|
|
||||||
case i == 3 && row.MemUsagePct == 0:
|
|
||||||
row.MemUsagePct = v
|
|
||||||
case i == 4 && row.PowerW == 0:
|
|
||||||
row.PowerW = v
|
|
||||||
}
|
}
|
||||||
|
v := strings.TrimSpace(parts[i])
|
||||||
|
if strings.EqualFold(v, "n/a") {
|
||||||
|
return 0
|
||||||
}
|
}
|
||||||
|
return parseGPUFloat(v)
|
||||||
}
|
}
|
||||||
|
row.TempC = get(idxTemp)
|
||||||
|
row.UsagePct = get(idxUse)
|
||||||
|
row.MemUsagePct = get(idxMem)
|
||||||
|
row.PowerW = get(idxPow)
|
||||||
rows = append(rows, row)
|
rows = append(rows, row)
|
||||||
}
|
}
|
||||||
if len(rows) == 0 {
|
if len(rows) == 0 {
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package platform
|
package platform
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@@ -18,7 +19,7 @@ func (s *System) IsLiveMediaInRAM() bool {
|
|||||||
return strings.TrimSpace(string(out)) == "tmpfs"
|
return strings.TrimSpace(string(out)) == "tmpfs"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunInstallToRAM(logFunc func(string)) error {
|
func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
|
||||||
log := func(msg string) {
|
log := func(msg string) {
|
||||||
if logFunc != nil {
|
if logFunc != nil {
|
||||||
logFunc(msg)
|
logFunc(msg)
|
||||||
@@ -56,10 +57,13 @@ func (s *System) RunInstallToRAM(logFunc func(string)) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, sf := range squashfsFiles {
|
for _, sf := range squashfsFiles {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
base := filepath.Base(sf)
|
base := filepath.Base(sf)
|
||||||
dst := filepath.Join(dstDir, base)
|
dst := filepath.Join(dstDir, base)
|
||||||
log(fmt.Sprintf("Copying %s to RAM...", base))
|
log(fmt.Sprintf("Copying %s to RAM...", base))
|
||||||
if err := copyFileLarge(sf, dst, log); err != nil {
|
if err := copyFileLarge(ctx, sf, dst, log); err != nil {
|
||||||
return fmt.Errorf("copy %s: %v", base, err)
|
return fmt.Errorf("copy %s: %v", base, err)
|
||||||
}
|
}
|
||||||
log(fmt.Sprintf("Copied %s.", base))
|
log(fmt.Sprintf("Copied %s.", base))
|
||||||
@@ -77,9 +81,12 @@ func (s *System) RunInstallToRAM(logFunc func(string)) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
log("Copying remaining medium files...")
|
log("Copying remaining medium files...")
|
||||||
if err := cpDir("/run/live/medium", dstDir, log); err != nil {
|
if err := cpDir(ctx, "/run/live/medium", dstDir, log); err != nil {
|
||||||
log(fmt.Sprintf("Warning: partial copy: %v", err))
|
log(fmt.Sprintf("Warning: partial copy: %v", err))
|
||||||
}
|
}
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
if err := exec.Command("mount", "--bind", dstDir, "/run/live/medium").Run(); err != nil {
|
if err := exec.Command("mount", "--bind", dstDir, "/run/live/medium").Run(); err != nil {
|
||||||
log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
|
log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
|
||||||
}
|
}
|
||||||
@@ -88,7 +95,7 @@ func (s *System) RunInstallToRAM(logFunc func(string)) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func copyFileLarge(src, dst string, logFunc func(string)) error {
|
func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) error {
|
||||||
in, err := os.Open(src)
|
in, err := os.Open(src)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -107,6 +114,9 @@ func copyFileLarge(src, dst string, logFunc func(string)) error {
|
|||||||
var copied int64
|
var copied int64
|
||||||
buf := make([]byte, 4*1024*1024)
|
buf := make([]byte, 4*1024*1024)
|
||||||
for {
|
for {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
n, err := in.Read(buf)
|
n, err := in.Read(buf)
|
||||||
if n > 0 {
|
if n > 0 {
|
||||||
if _, werr := out.Write(buf[:n]); werr != nil {
|
if _, werr := out.Write(buf[:n]); werr != nil {
|
||||||
@@ -128,8 +138,11 @@ func copyFileLarge(src, dst string, logFunc func(string)) error {
|
|||||||
return out.Sync()
|
return out.Sync()
|
||||||
}
|
}
|
||||||
|
|
||||||
func cpDir(src, dst string, logFunc func(string)) error {
|
func cpDir(ctx context.Context, src, dst string, logFunc func(string)) error {
|
||||||
return filepath.Walk(src, func(path string, fi os.FileInfo, err error) error {
|
return filepath.Walk(src, func(path string, fi os.FileInfo, err error) error {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -144,7 +157,7 @@ func cpDir(src, dst string, logFunc func(string)) error {
|
|||||||
if _, err := os.Stat(target); err == nil {
|
if _, err := os.Stat(target); err == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return copyFileLarge(path, target, nil)
|
return copyFileLarge(ctx, path, target, nil)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,10 @@ package platform
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
"os"
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -23,6 +26,7 @@ type LiveMetricSample struct {
|
|||||||
// TempReading is a named temperature sensor value.
|
// TempReading is a named temperature sensor value.
|
||||||
type TempReading struct {
|
type TempReading struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
Group string `json:"group,omitempty"`
|
||||||
Celsius float64 `json:"celsius"`
|
Celsius float64 `json:"celsius"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -43,10 +47,11 @@ func SampleLiveMetrics() LiveMetricSample {
|
|||||||
fans, _ := sampleFanSpeeds()
|
fans, _ := sampleFanSpeeds()
|
||||||
s.Fans = fans
|
s.Fans = fans
|
||||||
|
|
||||||
// CPU/system temperature — returns 0 if unavailable
|
s.Temps = append(s.Temps, sampleLiveTemperatureReadings()...)
|
||||||
cpuTemp := sampleCPUMaxTemp()
|
if !hasTempGroup(s.Temps, "cpu") {
|
||||||
if cpuTemp > 0 {
|
if cpuTemp := sampleCPUMaxTemp(); cpuTemp > 0 {
|
||||||
s.Temps = append(s.Temps, TempReading{Name: "CPU", Celsius: cpuTemp})
|
s.Temps = append(s.Temps, TempReading{Name: "CPU Max", Group: "cpu", Celsius: cpuTemp})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// System power — returns 0 if unavailable
|
// System power — returns 0 if unavailable
|
||||||
@@ -140,3 +145,182 @@ func sampleMemLoadPct() float64 {
|
|||||||
used := total - avail
|
used := total - avail
|
||||||
return float64(used) / float64(total) * 100
|
return float64(used) / float64(total) * 100
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func hasTempGroup(temps []TempReading, group string) bool {
|
||||||
|
for _, t := range temps {
|
||||||
|
if t.Group == group {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func sampleLiveTemperatureReadings() []TempReading {
|
||||||
|
if temps := sampleLiveTempsViaSensorsJSON(); len(temps) > 0 {
|
||||||
|
return temps
|
||||||
|
}
|
||||||
|
return sampleLiveTempsViaIPMI()
|
||||||
|
}
|
||||||
|
|
||||||
|
func sampleLiveTempsViaSensorsJSON() []TempReading {
|
||||||
|
out, err := exec.Command("sensors", "-j").Output()
|
||||||
|
if err != nil || len(out) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var doc map[string]map[string]any
|
||||||
|
if err := json.Unmarshal(out, &doc); err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
chips := make([]string, 0, len(doc))
|
||||||
|
for chip := range doc {
|
||||||
|
chips = append(chips, chip)
|
||||||
|
}
|
||||||
|
sort.Strings(chips)
|
||||||
|
|
||||||
|
temps := make([]TempReading, 0, len(chips))
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
for _, chip := range chips {
|
||||||
|
features := doc[chip]
|
||||||
|
featureNames := make([]string, 0, len(features))
|
||||||
|
for name := range features {
|
||||||
|
featureNames = append(featureNames, name)
|
||||||
|
}
|
||||||
|
sort.Strings(featureNames)
|
||||||
|
for _, name := range featureNames {
|
||||||
|
if strings.EqualFold(name, "Adapter") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
feature, ok := features[name].(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
value, ok := firstTempInputValue(feature)
|
||||||
|
if !ok || value <= 0 || value > 150 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
group := classifyLiveTempGroup(chip, name)
|
||||||
|
if group == "gpu" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
label := strings.TrimSpace(name)
|
||||||
|
if label == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if group == "ambient" {
|
||||||
|
label = compactAmbientTempName(chip, label)
|
||||||
|
}
|
||||||
|
key := group + "\x00" + label
|
||||||
|
if _, ok := seen[key]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[key] = struct{}{}
|
||||||
|
temps = append(temps, TempReading{Name: label, Group: group, Celsius: value})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return temps
|
||||||
|
}
|
||||||
|
|
||||||
|
func sampleLiveTempsViaIPMI() []TempReading {
|
||||||
|
out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
|
||||||
|
if err != nil || len(out) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var temps []TempReading
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
|
||||||
|
parts := strings.Split(line, "|")
|
||||||
|
if len(parts) < 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
name := strings.TrimSpace(parts[0])
|
||||||
|
if name == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
unit := strings.ToLower(strings.TrimSpace(parts[2]))
|
||||||
|
if !strings.Contains(unit, "degrees") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
raw := strings.TrimSpace(parts[1])
|
||||||
|
if raw == "" || strings.EqualFold(raw, "na") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
value, err := strconv.ParseFloat(raw, 64)
|
||||||
|
if err != nil || value <= 0 || value > 150 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
group := classifyLiveTempGroup("", name)
|
||||||
|
if group == "gpu" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
label := name
|
||||||
|
if group == "ambient" {
|
||||||
|
label = compactAmbientTempName("", label)
|
||||||
|
}
|
||||||
|
key := group + "\x00" + label
|
||||||
|
if _, ok := seen[key]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[key] = struct{}{}
|
||||||
|
temps = append(temps, TempReading{Name: label, Group: group, Celsius: value})
|
||||||
|
}
|
||||||
|
return temps
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstTempInputValue(feature map[string]any) (float64, bool) {
|
||||||
|
keys := make([]string, 0, len(feature))
|
||||||
|
for key := range feature {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
for _, key := range keys {
|
||||||
|
lower := strings.ToLower(key)
|
||||||
|
if !strings.Contains(lower, "temp") || !strings.HasSuffix(lower, "_input") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch value := feature[key].(type) {
|
||||||
|
case float64:
|
||||||
|
return value, true
|
||||||
|
case string:
|
||||||
|
f, err := strconv.ParseFloat(value, 64)
|
||||||
|
if err == nil {
|
||||||
|
return f, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
|
func classifyLiveTempGroup(chip, name string) string {
|
||||||
|
text := strings.ToLower(strings.TrimSpace(chip + " " + name))
|
||||||
|
switch {
|
||||||
|
case strings.Contains(text, "gpu"), strings.Contains(text, "amdgpu"), strings.Contains(text, "nvidia"), strings.Contains(text, "adeon"):
|
||||||
|
return "gpu"
|
||||||
|
case strings.Contains(text, "coretemp"),
|
||||||
|
strings.Contains(text, "k10temp"),
|
||||||
|
strings.Contains(text, "zenpower"),
|
||||||
|
strings.Contains(text, "package id"),
|
||||||
|
strings.Contains(text, "x86_pkg_temp"),
|
||||||
|
strings.Contains(text, "tctl"),
|
||||||
|
strings.Contains(text, "tdie"),
|
||||||
|
strings.Contains(text, "tccd"),
|
||||||
|
strings.Contains(text, "cpu"),
|
||||||
|
strings.Contains(text, "peci"):
|
||||||
|
return "cpu"
|
||||||
|
default:
|
||||||
|
return "ambient"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func compactAmbientTempName(chip, name string) string {
|
||||||
|
chip = strings.TrimSpace(chip)
|
||||||
|
name = strings.TrimSpace(name)
|
||||||
|
if chip == "" || strings.EqualFold(chip, name) {
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
if strings.Contains(strings.ToLower(name), strings.ToLower(chip)) {
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
return chip + " / " + name
|
||||||
|
}
|
||||||
|
|||||||
44
audit/internal/platform/live_metrics_test.go
Normal file
44
audit/internal/platform/live_metrics_test.go
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
package platform
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestFirstTempInputValue(t *testing.T) {
|
||||||
|
feature := map[string]any{
|
||||||
|
"temp1_input": 61.5,
|
||||||
|
"temp1_max": 80.0,
|
||||||
|
}
|
||||||
|
got, ok := firstTempInputValue(feature)
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("expected value")
|
||||||
|
}
|
||||||
|
if got != 61.5 {
|
||||||
|
t.Fatalf("got %v want 61.5", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClassifyLiveTempGroup(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
chip string
|
||||||
|
name string
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{chip: "coretemp-isa-0000", name: "Package id 0", want: "cpu"},
|
||||||
|
{chip: "amdgpu-pci-4300", name: "edge", want: "gpu"},
|
||||||
|
{chip: "nvme-pci-0100", name: "Composite", want: "ambient"},
|
||||||
|
{chip: "acpitz-acpi-0", name: "temp1", want: "ambient"},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
if got := classifyLiveTempGroup(tc.chip, tc.name); got != tc.want {
|
||||||
|
t.Fatalf("classifyLiveTempGroup(%q,%q)=%q want %q", tc.chip, tc.name, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCompactAmbientTempName(t *testing.T) {
|
||||||
|
if got := compactAmbientTempName("nvme-pci-0100", "Composite"); got != "nvme-pci-0100 / Composite" {
|
||||||
|
t.Fatalf("got %q", got)
|
||||||
|
}
|
||||||
|
if got := compactAmbientTempName("", "Inlet Temp"); got != "Inlet Temp" {
|
||||||
|
t.Fatalf("got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,6 +2,7 @@ package platform
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
@@ -18,21 +19,17 @@ func (s *System) ListInterfaces() ([]InterfaceInfo, error) {
|
|||||||
out := make([]InterfaceInfo, 0, len(names))
|
out := make([]InterfaceInfo, 0, len(names))
|
||||||
for _, name := range names {
|
for _, name := range names {
|
||||||
state := "unknown"
|
state := "unknown"
|
||||||
if raw, err := exec.Command("ip", "-o", "link", "show", name).Output(); err == nil {
|
if up, err := interfaceAdminState(name); err == nil {
|
||||||
fields := strings.Fields(string(raw))
|
if up {
|
||||||
if len(fields) >= 9 {
|
state = "up"
|
||||||
state = fields[8]
|
} else {
|
||||||
|
state = "down"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var ipv4 []string
|
ipv4, err := interfaceIPv4Addrs(name)
|
||||||
if raw, err := exec.Command("ip", "-o", "-4", "addr", "show", "dev", name).Output(); err == nil {
|
if err != nil {
|
||||||
for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
|
ipv4 = nil
|
||||||
fields := strings.Fields(line)
|
|
||||||
if len(fields) >= 4 {
|
|
||||||
ipv4 = append(ipv4, fields[3])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
out = append(out, InterfaceInfo{Name: name, State: state, IPv4: ipv4})
|
out = append(out, InterfaceInfo{Name: name, State: state, IPv4: ipv4})
|
||||||
@@ -55,6 +52,119 @@ func (s *System) DefaultRoute() string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *System) CaptureNetworkSnapshot() (NetworkSnapshot, error) {
|
||||||
|
names, err := listInterfaceNames()
|
||||||
|
if err != nil {
|
||||||
|
return NetworkSnapshot{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot := NetworkSnapshot{
|
||||||
|
Interfaces: make([]NetworkInterfaceSnapshot, 0, len(names)),
|
||||||
|
}
|
||||||
|
for _, name := range names {
|
||||||
|
up, err := interfaceAdminState(name)
|
||||||
|
if err != nil {
|
||||||
|
return NetworkSnapshot{}, err
|
||||||
|
}
|
||||||
|
ipv4, err := interfaceIPv4Addrs(name)
|
||||||
|
if err != nil {
|
||||||
|
return NetworkSnapshot{}, err
|
||||||
|
}
|
||||||
|
snapshot.Interfaces = append(snapshot.Interfaces, NetworkInterfaceSnapshot{
|
||||||
|
Name: name,
|
||||||
|
Up: up,
|
||||||
|
IPv4: ipv4,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if raw, err := exec.Command("ip", "route", "show", "default").Output(); err == nil {
|
||||||
|
for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
|
||||||
|
line = strings.TrimSpace(line)
|
||||||
|
if line != "" {
|
||||||
|
snapshot.DefaultRoutes = append(snapshot.DefaultRoutes, line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if raw, err := os.ReadFile("/etc/resolv.conf"); err == nil {
|
||||||
|
snapshot.ResolvConf = string(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
return snapshot, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *System) RestoreNetworkSnapshot(snapshot NetworkSnapshot) error {
|
||||||
|
var errs []string
|
||||||
|
|
||||||
|
for _, iface := range snapshot.Interfaces {
|
||||||
|
if err := exec.Command("ip", "link", "set", "dev", iface.Name, "up").Run(); err != nil {
|
||||||
|
errs = append(errs, fmt.Sprintf("%s: bring up before restore: %v", iface.Name, err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := exec.Command("ip", "addr", "flush", "dev", iface.Name).Run(); err != nil {
|
||||||
|
errs = append(errs, fmt.Sprintf("%s: flush addresses: %v", iface.Name, err))
|
||||||
|
}
|
||||||
|
for _, cidr := range iface.IPv4 {
|
||||||
|
if raw, err := exec.Command("ip", "addr", "add", cidr, "dev", iface.Name).CombinedOutput(); err != nil {
|
||||||
|
detail := strings.TrimSpace(string(raw))
|
||||||
|
if detail != "" {
|
||||||
|
errs = append(errs, fmt.Sprintf("%s: restore address %s: %v: %s", iface.Name, cidr, err, detail))
|
||||||
|
} else {
|
||||||
|
errs = append(errs, fmt.Sprintf("%s: restore address %s: %v", iface.Name, cidr, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
state := "down"
|
||||||
|
if iface.Up {
|
||||||
|
state = "up"
|
||||||
|
}
|
||||||
|
if err := exec.Command("ip", "link", "set", "dev", iface.Name, state).Run(); err != nil {
|
||||||
|
errs = append(errs, fmt.Sprintf("%s: restore state %s: %v", iface.Name, state, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := exec.Command("ip", "route", "del", "default").Run(); err != nil {
|
||||||
|
var exitErr *exec.ExitError
|
||||||
|
if !errors.As(err, &exitErr) {
|
||||||
|
errs = append(errs, fmt.Sprintf("clear default route: %v", err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, route := range snapshot.DefaultRoutes {
|
||||||
|
fields := strings.Fields(route)
|
||||||
|
if len(fields) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Strip state flags that ip-route(8) does not accept as add arguments.
|
||||||
|
filtered := fields[:0]
|
||||||
|
for _, f := range fields {
|
||||||
|
switch f {
|
||||||
|
case "linkdown", "dead", "onlink", "pervasive":
|
||||||
|
// skip
|
||||||
|
default:
|
||||||
|
filtered = append(filtered, f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
args := append([]string{"route", "add"}, filtered...)
|
||||||
|
if raw, err := exec.Command("ip", args...).CombinedOutput(); err != nil {
|
||||||
|
detail := strings.TrimSpace(string(raw))
|
||||||
|
if detail != "" {
|
||||||
|
errs = append(errs, fmt.Sprintf("restore route %q: %v: %s", route, err, detail))
|
||||||
|
} else {
|
||||||
|
errs = append(errs, fmt.Sprintf("restore route %q: %v", route, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile("/etc/resolv.conf", []byte(snapshot.ResolvConf), 0644); err != nil {
|
||||||
|
errs = append(errs, fmt.Sprintf("restore resolv.conf: %v", err))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(errs) > 0 {
|
||||||
|
return errors.New(strings.Join(errs, "; "))
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *System) DHCPOne(iface string) (string, error) {
|
func (s *System) DHCPOne(iface string) (string, error) {
|
||||||
var out bytes.Buffer
|
var out bytes.Buffer
|
||||||
if err := exec.Command("ip", "link", "set", iface, "up").Run(); err != nil {
|
if err := exec.Command("ip", "link", "set", iface, "up").Run(); err != nil {
|
||||||
@@ -142,12 +252,52 @@ func (s *System) SetInterfaceState(iface string, up bool) error {
|
|||||||
|
|
||||||
// GetInterfaceState returns true if the interface is UP.
|
// GetInterfaceState returns true if the interface is UP.
|
||||||
func (s *System) GetInterfaceState(iface string) (bool, error) {
|
func (s *System) GetInterfaceState(iface string) (bool, error) {
|
||||||
raw, err := os.ReadFile(fmt.Sprintf("/sys/class/net/%s/operstate", iface))
|
return interfaceAdminState(iface)
|
||||||
|
}
|
||||||
|
|
||||||
|
func interfaceAdminState(iface string) (bool, error) {
|
||||||
|
raw, err := exec.Command("ip", "-o", "link", "show", "dev", iface).Output()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
state := strings.TrimSpace(string(raw))
|
return parseInterfaceAdminState(string(raw))
|
||||||
return state == "up", nil
|
}
|
||||||
|
|
||||||
|
func parseInterfaceAdminState(raw string) (bool, error) {
|
||||||
|
start := strings.IndexByte(raw, '<')
|
||||||
|
if start == -1 {
|
||||||
|
return false, fmt.Errorf("ip link output missing flags")
|
||||||
|
}
|
||||||
|
end := strings.IndexByte(raw[start+1:], '>')
|
||||||
|
if end == -1 {
|
||||||
|
return false, fmt.Errorf("ip link output missing flag terminator")
|
||||||
|
}
|
||||||
|
flags := strings.Split(raw[start+1:start+1+end], ",")
|
||||||
|
for _, flag := range flags {
|
||||||
|
if strings.TrimSpace(flag) == "UP" {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func interfaceIPv4Addrs(iface string) ([]string, error) {
|
||||||
|
raw, err := exec.Command("ip", "-o", "-4", "addr", "show", "dev", iface).Output()
|
||||||
|
if err != nil {
|
||||||
|
var exitErr *exec.ExitError
|
||||||
|
if errors.As(err, &exitErr) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var ipv4 []string
|
||||||
|
for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
|
||||||
|
fields := strings.Fields(line)
|
||||||
|
if len(fields) >= 4 {
|
||||||
|
ipv4 = append(ipv4, fields[3])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ipv4, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func listInterfaceNames() ([]string, error) {
|
func listInterfaceNames() ([]string, error) {
|
||||||
|
|||||||
46
audit/internal/platform/network_test.go
Normal file
46
audit/internal/platform/network_test.go
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
package platform
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestParseInterfaceAdminState(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
raw string
|
||||||
|
want bool
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "admin up with no carrier",
|
||||||
|
raw: "2: enp1s0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000\n",
|
||||||
|
want: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "admin down",
|
||||||
|
raw: "2: enp1s0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000\n",
|
||||||
|
want: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "malformed output",
|
||||||
|
raw: "2: enp1s0: mtu 1500 state DOWN\n",
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got, err := parseInterfaceAdminState(tt.raw)
|
||||||
|
if tt.wantErr {
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error")
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if got != tt.want {
|
||||||
|
t.Fatalf("got %v want %v", got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -33,6 +33,10 @@ var (
|
|||||||
"/opt/rocm/libexec/rocm_smi/rocm_smi.py",
|
"/opt/rocm/libexec/rocm_smi/rocm_smi.py",
|
||||||
"/opt/rocm-*/libexec/rocm_smi/rocm_smi.py",
|
"/opt/rocm-*/libexec/rocm_smi/rocm_smi.py",
|
||||||
}
|
}
|
||||||
|
rvsExecutableGlobs = []string{
|
||||||
|
"/opt/rocm/bin/rvs",
|
||||||
|
"/opt/rocm-*/bin/rvs",
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
// streamExecOutput runs cmd and streams each output line to logFunc (if non-nil).
|
// streamExecOutput runs cmd and streams each output line to logFunc (if non-nil).
|
||||||
@@ -90,6 +94,12 @@ func (s *System) DetectGPUVendor() string {
|
|||||||
if _, err := os.Stat("/dev/kfd"); err == nil {
|
if _, err := os.Stat("/dev/kfd"); err == nil {
|
||||||
return "amd"
|
return "amd"
|
||||||
}
|
}
|
||||||
|
if raw, err := exec.Command("lspci", "-nn").Output(); err == nil {
|
||||||
|
text := strings.ToLower(string(raw))
|
||||||
|
if strings.Contains(text, "advanced micro devices") || strings.Contains(text, "amd/ati") {
|
||||||
|
return "amd"
|
||||||
|
}
|
||||||
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -117,8 +127,8 @@ func (s *System) ListAMDGPUs() ([]AMDGPUInfo, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RunAMDAcceptancePack runs an AMD GPU diagnostic pack using rocm-smi.
|
// RunAMDAcceptancePack runs an AMD GPU diagnostic pack using rocm-smi.
|
||||||
func (s *System) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
return runAcceptancePack(baseDir, "gpu-amd", []satJob{
|
return runAcceptancePackCtx(ctx, baseDir, "gpu-amd", []satJob{
|
||||||
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
|
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
|
||||||
{name: "02-rocm-smi-showallinfo.log", cmd: []string{"rocm-smi", "--showallinfo"}},
|
{name: "02-rocm-smi-showallinfo.log", cmd: []string{"rocm-smi", "--showallinfo"}},
|
||||||
{name: "03-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
|
{name: "03-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
|
||||||
@@ -128,15 +138,35 @@ func (s *System) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (str
|
|||||||
|
|
||||||
// RunAMDStressPack runs an AMD GPU burn-in pack.
|
// RunAMDStressPack runs an AMD GPU burn-in pack.
|
||||||
// Missing tools are reported as UNSUPPORTED, consistent with the existing SAT pattern.
|
// Missing tools are reported as UNSUPPORTED, consistent with the existing SAT pattern.
|
||||||
func (s *System) RunAMDStressPack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
seconds := envInt("BEE_AMD_STRESS_SECONDS", 300)
|
seconds := durationSec
|
||||||
return runAcceptancePack(baseDir, "gpu-amd-stress", []satJob{
|
if seconds <= 0 {
|
||||||
|
seconds = envInt("BEE_AMD_STRESS_SECONDS", 300)
|
||||||
|
}
|
||||||
|
if err := ensureAMDRuntimeReady(); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
// Write RVS GST config to a temp file
|
||||||
|
rvsCfg := fmt.Sprintf(`actions:
|
||||||
|
- name: gst_stress
|
||||||
|
device: all
|
||||||
|
module: gst
|
||||||
|
parallel: true
|
||||||
|
duration: %d
|
||||||
|
copy_matrix: false
|
||||||
|
target_stress: 90
|
||||||
|
matrix_size_a: 8640
|
||||||
|
matrix_size_b: 8640
|
||||||
|
matrix_size_c: 8640
|
||||||
|
`, seconds*1000)
|
||||||
|
cfgFile := "/tmp/bee-amd-gst.conf"
|
||||||
|
_ = os.WriteFile(cfgFile, []byte(rvsCfg), 0644)
|
||||||
|
|
||||||
|
return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-stress", []satJob{
|
||||||
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
|
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
|
||||||
{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
|
{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
|
||||||
{name: fmt.Sprintf("03-rocm-smi-monitor-%ds.log", seconds), cmd: []string{
|
{name: fmt.Sprintf("03-rvs-gst-%ds.log", seconds), cmd: []string{"rvs", "-c", cfgFile}},
|
||||||
"rocm-smi", "--showtemp", "--showpower",
|
{name: fmt.Sprintf("04-rocm-smi-after.log"), cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--csv"}},
|
||||||
fmt.Sprintf("--duration=%d", seconds),
|
|
||||||
}},
|
|
||||||
}, logFunc)
|
}, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -191,7 +221,7 @@ func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
||||||
return runAcceptancePack(baseDir, "gpu-nvidia", nvidiaSATJobs(), logFunc)
|
return runAcceptancePackCtx(context.Background(), baseDir, "gpu-nvidia", nvidiaSATJobs(), logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
// RunNvidiaAcceptancePackWithOptions runs the NVIDIA diagnostics via DCGM.
|
// RunNvidiaAcceptancePackWithOptions runs the NVIDIA diagnostics via DCGM.
|
||||||
@@ -202,24 +232,27 @@ func (s *System) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir
|
|||||||
return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, gpuIndices), logFunc)
|
return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, gpuIndices), logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128)
|
sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128)
|
||||||
passes := envInt("BEE_MEMTESTER_PASSES", 1)
|
passes := envInt("BEE_MEMTESTER_PASSES", 1)
|
||||||
return runAcceptancePack(baseDir, "memory", []satJob{
|
return runAcceptancePackCtx(ctx, baseDir, "memory", []satJob{
|
||||||
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
||||||
{name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
|
{name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
|
||||||
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
|
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
|
||||||
}, logFunc)
|
}, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunMemoryStressPack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
seconds := envInt("BEE_VM_STRESS_SECONDS", 300)
|
seconds := durationSec
|
||||||
|
if seconds <= 0 {
|
||||||
|
seconds = envInt("BEE_VM_STRESS_SECONDS", 300)
|
||||||
|
}
|
||||||
// Use 80% of RAM by default; override with BEE_VM_STRESS_SIZE_MB.
|
// Use 80% of RAM by default; override with BEE_VM_STRESS_SIZE_MB.
|
||||||
sizeArg := "80%"
|
sizeArg := "80%"
|
||||||
if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 {
|
if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 {
|
||||||
sizeArg = fmt.Sprintf("%dM", mb)
|
sizeArg = fmt.Sprintf("%dM", mb)
|
||||||
}
|
}
|
||||||
return runAcceptancePack(baseDir, "memory-stress", []satJob{
|
return runAcceptancePackCtx(ctx, baseDir, "memory-stress", []satJob{
|
||||||
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
||||||
{name: "02-stress-ng-vm.log", cmd: []string{
|
{name: "02-stress-ng-vm.log", cmd: []string{
|
||||||
"stress-ng", "--vm", "1",
|
"stress-ng", "--vm", "1",
|
||||||
@@ -232,24 +265,27 @@ func (s *System) RunMemoryStressPack(baseDir string, logFunc func(string)) (stri
|
|||||||
}, logFunc)
|
}, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunSATStressPack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
seconds := envInt("BEE_SAT_STRESS_SECONDS", 300)
|
seconds := durationSec
|
||||||
|
if seconds <= 0 {
|
||||||
|
seconds = envInt("BEE_SAT_STRESS_SECONDS", 300)
|
||||||
|
}
|
||||||
cmd := []string{"stressapptest", "-s", fmt.Sprintf("%d", seconds), "-W", "--cc_test"}
|
cmd := []string{"stressapptest", "-s", fmt.Sprintf("%d", seconds), "-W", "--cc_test"}
|
||||||
if mb := envInt("BEE_SAT_STRESS_MB", 0); mb > 0 {
|
if mb := envInt("BEE_SAT_STRESS_MB", 0); mb > 0 {
|
||||||
cmd = append(cmd, "-M", fmt.Sprintf("%d", mb))
|
cmd = append(cmd, "-M", fmt.Sprintf("%d", mb))
|
||||||
}
|
}
|
||||||
return runAcceptancePack(baseDir, "sat-stress", []satJob{
|
return runAcceptancePackCtx(ctx, baseDir, "sat-stress", []satJob{
|
||||||
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
{name: "01-free-before.log", cmd: []string{"free", "-h"}},
|
||||||
{name: "02-stressapptest.log", cmd: cmd},
|
{name: "02-stressapptest.log", cmd: cmd},
|
||||||
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
|
{name: "03-free-after.log", cmd: []string{"free", "-h"}},
|
||||||
}, logFunc)
|
}, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
func (s *System) RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
if durationSec <= 0 {
|
if durationSec <= 0 {
|
||||||
durationSec = 60
|
durationSec = 60
|
||||||
}
|
}
|
||||||
return runAcceptancePack(baseDir, "cpu", []satJob{
|
return runAcceptancePackCtx(ctx, baseDir, "cpu", []satJob{
|
||||||
{name: "01-lscpu.log", cmd: []string{"lscpu"}},
|
{name: "01-lscpu.log", cmd: []string{"lscpu"}},
|
||||||
{name: "02-sensors-before.log", cmd: []string{"sensors"}},
|
{name: "02-sensors-before.log", cmd: []string{"sensors"}},
|
||||||
{name: "03-stress-ng.log", cmd: []string{"stress-ng", "--cpu", "0", "--cpu-method", "all", "--timeout", fmt.Sprintf("%d", durationSec)}},
|
{name: "03-stress-ng.log", cmd: []string{"stress-ng", "--cpu", "0", "--cpu-method", "all", "--timeout", fmt.Sprintf("%d", durationSec)}},
|
||||||
@@ -257,7 +293,7 @@ func (s *System) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc f
|
|||||||
}, logFunc)
|
}, logFunc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *System) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
|
func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
if baseDir == "" {
|
if baseDir == "" {
|
||||||
baseDir = "/var/log/bee-sat"
|
baseDir = "/var/log/bee-sat"
|
||||||
}
|
}
|
||||||
@@ -285,11 +321,17 @@ func (s *System) RunStorageAcceptancePack(baseDir string, logFunc func(string))
|
|||||||
}
|
}
|
||||||
|
|
||||||
for index, devPath := range devices {
|
for index, devPath := range devices {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
prefix := fmt.Sprintf("%02d-%s", index+1, filepath.Base(devPath))
|
prefix := fmt.Sprintf("%02d-%s", index+1, filepath.Base(devPath))
|
||||||
commands := storageSATCommands(devPath)
|
commands := storageSATCommands(devPath)
|
||||||
for cmdIndex, job := range commands {
|
for cmdIndex, job := range commands {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
name := fmt.Sprintf("%s-%02d-%s.log", prefix, cmdIndex+1, job.name)
|
name := fmt.Sprintf("%s-%02d-%s.log", prefix, cmdIndex+1, job.name)
|
||||||
out, err := runSATCommand(verboseLog, job.name, job.cmd, logFunc)
|
out, err := runSATCommandCtx(ctx, verboseLog, job.name, job.cmd, nil, logFunc)
|
||||||
if writeErr := os.WriteFile(filepath.Join(runDir, name), out, 0644); writeErr != nil {
|
if writeErr := os.WriteFile(filepath.Join(runDir, name), out, 0644); writeErr != nil {
|
||||||
return "", writeErr
|
return "", writeErr
|
||||||
}
|
}
|
||||||
@@ -338,49 +380,6 @@ func nvidiaSATJobs() []satJob {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func runAcceptancePack(baseDir, prefix string, jobs []satJob, logFunc func(string)) (string, error) {
|
|
||||||
if baseDir == "" {
|
|
||||||
baseDir = "/var/log/bee-sat"
|
|
||||||
}
|
|
||||||
ts := time.Now().UTC().Format("20060102-150405")
|
|
||||||
runDir := filepath.Join(baseDir, prefix+"-"+ts)
|
|
||||||
if err := os.MkdirAll(runDir, 0755); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
verboseLog := filepath.Join(runDir, "verbose.log")
|
|
||||||
|
|
||||||
var summary strings.Builder
|
|
||||||
stats := satStats{}
|
|
||||||
fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
|
|
||||||
for _, job := range jobs {
|
|
||||||
var out []byte
|
|
||||||
var err error
|
|
||||||
cmd := make([]string, 0, len(job.cmd))
|
|
||||||
for _, arg := range job.cmd {
|
|
||||||
cmd = append(cmd, strings.ReplaceAll(arg, "{{run_dir}}", runDir))
|
|
||||||
}
|
|
||||||
out, err = runSATCommand(verboseLog, job.name, cmd, logFunc)
|
|
||||||
if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
|
|
||||||
return "", writeErr
|
|
||||||
}
|
|
||||||
status, rc := classifySATResult(job.name, out, err)
|
|
||||||
stats.Add(status)
|
|
||||||
key := strings.TrimSuffix(strings.TrimPrefix(job.name, "0"), ".log")
|
|
||||||
fmt.Fprintf(&summary, "%s_rc=%d\n", key, rc)
|
|
||||||
fmt.Fprintf(&summary, "%s_status=%s\n", key, status)
|
|
||||||
}
|
|
||||||
writeSATStats(&summary, stats)
|
|
||||||
if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
archive := filepath.Join(baseDir, prefix+"-"+ts+".tar.gz")
|
|
||||||
if err := createTarGz(archive, runDir); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
return archive, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
|
func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
|
||||||
if diagLevel < 1 || diagLevel > 4 {
|
if diagLevel < 1 || diagLevel > 4 {
|
||||||
diagLevel = 3
|
diagLevel = 3
|
||||||
@@ -402,6 +401,9 @@ func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []satJob, logFunc func(string)) (string, error) {
|
func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []satJob, logFunc func(string)) (string, error) {
|
||||||
|
if ctx == nil {
|
||||||
|
ctx = context.Background()
|
||||||
|
}
|
||||||
if baseDir == "" {
|
if baseDir == "" {
|
||||||
baseDir = "/var/log/bee-sat"
|
baseDir = "/var/log/bee-sat"
|
||||||
}
|
}
|
||||||
@@ -622,10 +624,23 @@ func resolveSATCommand(cmd []string) ([]string, error) {
|
|||||||
if len(cmd) == 0 {
|
if len(cmd) == 0 {
|
||||||
return nil, errors.New("empty SAT command")
|
return nil, errors.New("empty SAT command")
|
||||||
}
|
}
|
||||||
if cmd[0] != "rocm-smi" {
|
switch cmd[0] {
|
||||||
|
case "rocm-smi":
|
||||||
|
return resolveROCmSMICommand(cmd[1:]...)
|
||||||
|
case "rvs":
|
||||||
|
return resolveRVSCommand(cmd[1:]...)
|
||||||
|
}
|
||||||
return cmd, nil
|
return cmd, nil
|
||||||
}
|
}
|
||||||
return resolveROCmSMICommand(cmd[1:]...)
|
|
||||||
|
func resolveRVSCommand(args ...string) ([]string, error) {
|
||||||
|
if path, err := satLookPath("rvs"); err == nil {
|
||||||
|
return append([]string{path}, args...), nil
|
||||||
|
}
|
||||||
|
for _, path := range expandExistingPaths(rvsExecutableGlobs) {
|
||||||
|
return append([]string{path}, args...), nil
|
||||||
|
}
|
||||||
|
return nil, errors.New("rvs not found in PATH or under /opt/rocm")
|
||||||
}
|
}
|
||||||
|
|
||||||
func resolveROCmSMICommand(args ...string) ([]string, error) {
|
func resolveROCmSMICommand(args ...string) ([]string, error) {
|
||||||
@@ -649,6 +664,20 @@ func resolveROCmSMICommand(args ...string) ([]string, error) {
|
|||||||
return nil, errors.New("rocm-smi not found in PATH or under /opt/rocm")
|
return nil, errors.New("rocm-smi not found in PATH or under /opt/rocm")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ensureAMDRuntimeReady() error {
|
||||||
|
if _, err := os.Stat("/dev/kfd"); err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if raw, err := os.ReadFile("/sys/module/amdgpu/initstate"); err == nil {
|
||||||
|
state := strings.TrimSpace(string(raw))
|
||||||
|
if strings.EqualFold(state, "live") {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf("AMD driver is present but not initialized: amdgpu initstate=%q", state)
|
||||||
|
}
|
||||||
|
return errors.New("AMD GPUs are present but the runtime is not initialized: /dev/kfd is missing and amdgpu is not loaded")
|
||||||
|
}
|
||||||
|
|
||||||
func rocmSMIExecutableCandidates() []string {
|
func rocmSMIExecutableCandidates() []string {
|
||||||
return expandExistingPaths(rocmSMIExecutableGlobs)
|
return expandExistingPaths(rocmSMIExecutableGlobs)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,10 +2,12 @@ package platform
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -304,41 +306,147 @@ func sampleGPUStressMetrics(gpuIndices []int) []GPUStressMetric {
|
|||||||
// sampleFanSpeeds reads fan RPM values from ipmitool sdr.
|
// sampleFanSpeeds reads fan RPM values from ipmitool sdr.
|
||||||
func sampleFanSpeeds() ([]FanReading, error) {
|
func sampleFanSpeeds() ([]FanReading, error) {
|
||||||
out, err := exec.Command("ipmitool", "sdr", "type", "Fan").Output()
|
out, err := exec.Command("ipmitool", "sdr", "type", "Fan").Output()
|
||||||
|
if err == nil {
|
||||||
|
if fans := parseFanSpeeds(string(out)); len(fans) > 0 {
|
||||||
|
return fans, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fans, sensorsErr := sampleFanSpeedsViaSensorsJSON()
|
||||||
|
if len(fans) > 0 {
|
||||||
|
return fans, nil
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return parseFanSpeeds(string(out)), nil
|
return nil, sensorsErr
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseFanSpeeds parses "ipmitool sdr type Fan" output.
|
// parseFanSpeeds parses "ipmitool sdr type Fan" output.
|
||||||
// Line format: "FAN1 | 2400.000 | RPM | ok"
|
// Handles two formats:
|
||||||
|
// Old: "FAN1 | 2400.000 | RPM | ok" (value in col[1], unit in col[2])
|
||||||
|
// New: "FAN1 | 41h | ok | 29.1 | 4340 RPM" (value+unit combined in last col)
|
||||||
func parseFanSpeeds(raw string) []FanReading {
|
func parseFanSpeeds(raw string) []FanReading {
|
||||||
var fans []FanReading
|
var fans []FanReading
|
||||||
for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
|
for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
|
||||||
parts := strings.Split(line, "|")
|
parts := strings.Split(line, "|")
|
||||||
if len(parts) < 3 {
|
if len(parts) < 2 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
unit := strings.TrimSpace(parts[2])
|
name := strings.TrimSpace(parts[0])
|
||||||
if !strings.EqualFold(unit, "RPM") {
|
// Find the first field that contains "RPM" (either as a standalone unit or inline)
|
||||||
|
rpmVal := 0.0
|
||||||
|
found := false
|
||||||
|
for _, p := range parts[1:] {
|
||||||
|
p = strings.TrimSpace(p)
|
||||||
|
if !strings.Contains(strings.ToUpper(p), "RPM") {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if strings.EqualFold(p, "RPM") {
|
||||||
|
continue // unit-only column in old format; value is in previous field
|
||||||
|
}
|
||||||
|
val, err := parseFanRPMValue(p)
|
||||||
|
if err == nil {
|
||||||
|
rpmVal = val
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Old format: unit "RPM" is in col[2], value is in col[1]
|
||||||
|
if !found && len(parts) >= 3 && strings.EqualFold(strings.TrimSpace(parts[2]), "RPM") {
|
||||||
valStr := strings.TrimSpace(parts[1])
|
valStr := strings.TrimSpace(parts[1])
|
||||||
if strings.EqualFold(valStr, "na") || strings.EqualFold(valStr, "disabled") || valStr == "" {
|
if !strings.EqualFold(valStr, "na") && !strings.EqualFold(valStr, "disabled") && valStr != "" {
|
||||||
|
if val, err := parseFanRPMValue(valStr); err == nil {
|
||||||
|
rpmVal = val
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
val, err := strconv.ParseFloat(valStr, 64)
|
fans = append(fans, FanReading{Name: name, RPM: rpmVal})
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
fans = append(fans, FanReading{
|
|
||||||
Name: strings.TrimSpace(parts[0]),
|
|
||||||
RPM: val,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
return fans
|
return fans
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func parseFanRPMValue(raw string) (float64, error) {
|
||||||
|
fields := strings.Fields(strings.TrimSpace(strings.ReplaceAll(raw, ",", "")))
|
||||||
|
if len(fields) == 0 {
|
||||||
|
return 0, strconv.ErrSyntax
|
||||||
|
}
|
||||||
|
return strconv.ParseFloat(fields[0], 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sampleFanSpeedsViaSensorsJSON() ([]FanReading, error) {
|
||||||
|
out, err := exec.Command("sensors", "-j").Output()
|
||||||
|
if err != nil || len(out) == 0 {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var doc map[string]map[string]any
|
||||||
|
if err := json.Unmarshal(out, &doc); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
chips := make([]string, 0, len(doc))
|
||||||
|
for chip := range doc {
|
||||||
|
chips = append(chips, chip)
|
||||||
|
}
|
||||||
|
sort.Strings(chips)
|
||||||
|
var fans []FanReading
|
||||||
|
seen := map[string]struct{}{}
|
||||||
|
for _, chip := range chips {
|
||||||
|
features := doc[chip]
|
||||||
|
names := make([]string, 0, len(features))
|
||||||
|
for name := range features {
|
||||||
|
names = append(names, name)
|
||||||
|
}
|
||||||
|
sort.Strings(names)
|
||||||
|
for _, name := range names {
|
||||||
|
feature, ok := features[name].(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
rpm, ok := firstFanInputValue(feature)
|
||||||
|
if !ok || rpm <= 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
label := strings.TrimSpace(name)
|
||||||
|
if chip != "" && !strings.Contains(strings.ToLower(label), strings.ToLower(chip)) {
|
||||||
|
label = chip + " / " + label
|
||||||
|
}
|
||||||
|
if _, ok := seen[label]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[label] = struct{}{}
|
||||||
|
fans = append(fans, FanReading{Name: label, RPM: rpm})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fans, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func firstFanInputValue(feature map[string]any) (float64, bool) {
|
||||||
|
keys := make([]string, 0, len(feature))
|
||||||
|
for key := range feature {
|
||||||
|
keys = append(keys, key)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
for _, key := range keys {
|
||||||
|
lower := strings.ToLower(key)
|
||||||
|
if !strings.Contains(lower, "fan") || !strings.HasSuffix(lower, "_input") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch value := feature[key].(type) {
|
||||||
|
case float64:
|
||||||
|
return value, true
|
||||||
|
case string:
|
||||||
|
f, err := strconv.ParseFloat(value, 64)
|
||||||
|
if err == nil {
|
||||||
|
return f, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
// sampleCPUMaxTemp returns the highest CPU/inlet temperature from ipmitool or sensors.
|
// sampleCPUMaxTemp returns the highest CPU/inlet temperature from ipmitool or sensors.
|
||||||
func sampleCPUMaxTemp() float64 {
|
func sampleCPUMaxTemp() float64 {
|
||||||
out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
|
out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
|
||||||
|
|||||||
27
audit/internal/platform/sat_fan_stress_test.go
Normal file
27
audit/internal/platform/sat_fan_stress_test.go
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
package platform
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestParseFanSpeeds(t *testing.T) {
|
||||||
|
raw := "FAN1 | 2400.000 | RPM | ok\nFAN2 | 1800 RPM | ok | ok\nFAN3 | na | RPM | ns\n"
|
||||||
|
got := parseFanSpeeds(raw)
|
||||||
|
if len(got) != 2 {
|
||||||
|
t.Fatalf("fans=%d want 2 (%v)", len(got), got)
|
||||||
|
}
|
||||||
|
if got[0].Name != "FAN1" || got[0].RPM != 2400 {
|
||||||
|
t.Fatalf("fan0=%+v", got[0])
|
||||||
|
}
|
||||||
|
if got[1].Name != "FAN2" || got[1].RPM != 1800 {
|
||||||
|
t.Fatalf("fan1=%+v", got[1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFirstFanInputValue(t *testing.T) {
|
||||||
|
feature := map[string]any{
|
||||||
|
"fan1_input": 9200.0,
|
||||||
|
}
|
||||||
|
got, ok := firstFanInputValue(feature)
|
||||||
|
if !ok || got != 9200 {
|
||||||
|
t.Fatalf("got=%v ok=%v", got, ok)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,6 +8,18 @@ type InterfaceInfo struct {
|
|||||||
IPv4 []string
|
IPv4 []string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NetworkInterfaceSnapshot struct {
|
||||||
|
Name string
|
||||||
|
Up bool
|
||||||
|
IPv4 []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type NetworkSnapshot struct {
|
||||||
|
Interfaces []NetworkInterfaceSnapshot
|
||||||
|
DefaultRoutes []string
|
||||||
|
ResolvConf string
|
||||||
|
}
|
||||||
|
|
||||||
type ServiceAction string
|
type ServiceAction string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -155,8 +156,9 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
|
|||||||
Duration int `json:"duration"`
|
Duration int `json:"duration"`
|
||||||
DiagLevel int `json:"diag_level"`
|
DiagLevel int `json:"diag_level"`
|
||||||
GPUIndices []int `json:"gpu_indices"`
|
GPUIndices []int `json:"gpu_indices"`
|
||||||
|
Profile string `json:"profile"`
|
||||||
|
DisplayName string `json:"display_name"`
|
||||||
}
|
}
|
||||||
body.DiagLevel = 1
|
|
||||||
if r.ContentLength > 0 {
|
if r.ContentLength > 0 {
|
||||||
_ = json.NewDecoder(r.Body).Decode(&body)
|
_ = json.NewDecoder(r.Body).Decode(&body)
|
||||||
}
|
}
|
||||||
@@ -175,8 +177,13 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
|
|||||||
Duration: body.Duration,
|
Duration: body.Duration,
|
||||||
DiagLevel: body.DiagLevel,
|
DiagLevel: body.DiagLevel,
|
||||||
GPUIndices: body.GPUIndices,
|
GPUIndices: body.GPUIndices,
|
||||||
|
BurnProfile: body.Profile,
|
||||||
|
DisplayName: body.DisplayName,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
if strings.TrimSpace(body.DisplayName) != "" {
|
||||||
|
t.Name = body.DisplayName
|
||||||
|
}
|
||||||
globalQueue.enqueue(t)
|
globalQueue.enqueue(t)
|
||||||
writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
|
writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
|
||||||
}
|
}
|
||||||
@@ -320,18 +327,21 @@ func (h *handler) handleAPINetworkDHCP(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
_ = json.NewDecoder(r.Body).Decode(&req)
|
_ = json.NewDecoder(r.Body).Decode(&req)
|
||||||
|
|
||||||
var result app.ActionResult
|
result, err := h.applyPendingNetworkChange(func() (app.ActionResult, error) {
|
||||||
var err error
|
|
||||||
if req.Interface == "" || req.Interface == "all" {
|
if req.Interface == "" || req.Interface == "all" {
|
||||||
result, err = h.opts.App.DHCPAllResult()
|
return h.opts.App.DHCPAllResult()
|
||||||
} else {
|
|
||||||
result, err = h.opts.App.DHCPOneResult(req.Interface)
|
|
||||||
}
|
}
|
||||||
|
return h.opts.App.DHCPOneResult(req.Interface)
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
writeError(w, http.StatusInternalServerError, err.Error())
|
writeError(w, http.StatusInternalServerError, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
writeJSON(w, map[string]string{"status": "ok", "output": result.Body})
|
writeJSON(w, map[string]any{
|
||||||
|
"status": "ok",
|
||||||
|
"output": result.Body,
|
||||||
|
"rollback_in": int(netRollbackTimeout.Seconds()),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *handler) handleAPINetworkStatic(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handleAPINetworkStatic(w http.ResponseWriter, r *http.Request) {
|
||||||
@@ -357,12 +367,18 @@ func (h *handler) handleAPINetworkStatic(w http.ResponseWriter, r *http.Request)
|
|||||||
Gateway: req.Gateway,
|
Gateway: req.Gateway,
|
||||||
DNS: req.DNS,
|
DNS: req.DNS,
|
||||||
}
|
}
|
||||||
result, err := h.opts.App.SetStaticIPv4Result(cfg)
|
result, err := h.applyPendingNetworkChange(func() (app.ActionResult, error) {
|
||||||
|
return h.opts.App.SetStaticIPv4Result(cfg)
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
writeError(w, http.StatusInternalServerError, err.Error())
|
writeError(w, http.StatusInternalServerError, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
writeJSON(w, map[string]string{"status": "ok", "output": result.Body})
|
writeJSON(w, map[string]any{
|
||||||
|
"status": "ok",
|
||||||
|
"output": result.Body,
|
||||||
|
"rollback_in": int(netRollbackTimeout.Seconds()),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Export ────────────────────────────────────────────────────────────────────
|
// ── Export ────────────────────────────────────────────────────────────────────
|
||||||
@@ -421,6 +437,13 @@ func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request)
|
|||||||
writeError(w, http.StatusServiceUnavailable, "app not configured")
|
writeError(w, http.StatusServiceUnavailable, "app not configured")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
h.installMu.Lock()
|
||||||
|
installRunning := h.installJob != nil && !h.installJob.isDone()
|
||||||
|
h.installMu.Unlock()
|
||||||
|
if installRunning {
|
||||||
|
writeError(w, http.StatusConflict, "install to disk is already running")
|
||||||
|
return
|
||||||
|
}
|
||||||
t := &Task{
|
t := &Task{
|
||||||
ID: newJobID("install-to-ram"),
|
ID: newJobID("install-to-ram"),
|
||||||
Name: "Install to RAM",
|
Name: "Install to RAM",
|
||||||
@@ -528,6 +551,10 @@ func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) {
|
|||||||
writeError(w, http.StatusBadRequest, "device not in install candidate list")
|
writeError(w, http.StatusBadRequest, "device not in install candidate list")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if globalQueue.hasActiveTarget("install-to-ram") {
|
||||||
|
writeError(w, http.StatusConflict, "install to RAM task is already pending or running")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
h.installMu.Lock()
|
h.installMu.Lock()
|
||||||
if h.installJob != nil && !h.installJob.isDone() {
|
if h.installJob != nil && !h.installJob.isDone() {
|
||||||
@@ -565,7 +592,7 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
|
|||||||
if !sseStart(w) {
|
if !sseStart(w) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ticker := time.NewTicker(time.Second)
|
ticker := time.NewTicker(1 * time.Second)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@@ -573,19 +600,35 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
|
|||||||
return
|
return
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
sample := platform.SampleLiveMetrics()
|
sample := platform.SampleLiveMetrics()
|
||||||
|
h.feedRings(sample)
|
||||||
|
if h.metricsDB != nil {
|
||||||
|
_ = h.metricsDB.Write(sample)
|
||||||
|
}
|
||||||
|
b, err := json.Marshal(sample)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !sseWrite(w, "metrics", string(b)) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Feed server ring buffers
|
// feedRings pushes one sample into all in-memory ring buffers.
|
||||||
|
func (h *handler) feedRings(sample platform.LiveMetricSample) {
|
||||||
for _, t := range sample.Temps {
|
for _, t := range sample.Temps {
|
||||||
if t.Name == "CPU" {
|
switch t.Group {
|
||||||
h.ringCPUTemp.push(t.Celsius)
|
case "cpu":
|
||||||
break
|
h.pushNamedMetricRing(&h.cpuTempRings, t.Name, t.Celsius)
|
||||||
|
case "ambient":
|
||||||
|
h.pushNamedMetricRing(&h.ambientTempRings, t.Name, t.Celsius)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
h.ringPower.push(sample.PowerW)
|
h.ringPower.push(sample.PowerW)
|
||||||
h.ringCPULoad.push(sample.CPULoadPct)
|
h.ringCPULoad.push(sample.CPULoadPct)
|
||||||
h.ringMemLoad.push(sample.MemLoadPct)
|
h.ringMemLoad.push(sample.MemLoadPct)
|
||||||
|
|
||||||
// Feed fan ring buffers (grow on first sight)
|
|
||||||
h.ringsMu.Lock()
|
h.ringsMu.Lock()
|
||||||
for i, fan := range sample.Fans {
|
for i, fan := range sample.Fans {
|
||||||
for len(h.ringFans) <= i {
|
for len(h.ringFans) <= i {
|
||||||
@@ -594,7 +637,6 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
|
|||||||
}
|
}
|
||||||
h.ringFans[i].push(float64(fan.RPM))
|
h.ringFans[i].push(float64(fan.RPM))
|
||||||
}
|
}
|
||||||
// Feed per-GPU ring buffers (grow on first sight)
|
|
||||||
for _, gpu := range sample.GPUs {
|
for _, gpu := range sample.GPUs {
|
||||||
idx := gpu.GPUIndex
|
idx := gpu.GPUIndex
|
||||||
for len(h.gpuRings) <= idx {
|
for len(h.gpuRings) <= idx {
|
||||||
@@ -611,16 +653,23 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
|
|||||||
h.gpuRings[idx].Power.push(gpu.PowerW)
|
h.gpuRings[idx].Power.push(gpu.PowerW)
|
||||||
}
|
}
|
||||||
h.ringsMu.Unlock()
|
h.ringsMu.Unlock()
|
||||||
|
|
||||||
b, err := json.Marshal(sample)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
if !sseWrite(w, "metrics", string(b)) {
|
|
||||||
|
func (h *handler) pushNamedMetricRing(dst *[]*namedMetricsRing, name string, value float64) {
|
||||||
|
if name == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, item := range *dst {
|
||||||
|
if item != nil && item.Name == name && item.Ring != nil {
|
||||||
|
item.Ring.push(value)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
*dst = append(*dst, &namedMetricsRing{
|
||||||
|
Name: name,
|
||||||
|
Ring: newMetricsRing(120),
|
||||||
|
})
|
||||||
|
(*dst)[len(*dst)-1].Ring.push(value)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Network toggle ────────────────────────────────────────────────────────────
|
// ── Network toggle ────────────────────────────────────────────────────────────
|
||||||
@@ -646,33 +695,14 @@ func (h *handler) handleAPINetworkToggle(w http.ResponseWriter, r *http.Request)
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := h.opts.App.SetInterfaceState(req.Iface, !wasUp); err != nil {
|
if _, err := h.applyPendingNetworkChange(func() (app.ActionResult, error) {
|
||||||
|
err := h.opts.App.SetInterfaceState(req.Iface, !wasUp)
|
||||||
|
return app.ActionResult{}, err
|
||||||
|
}); err != nil {
|
||||||
writeError(w, http.StatusInternalServerError, err.Error())
|
writeError(w, http.StatusInternalServerError, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cancel any existing pending change (rollback it first).
|
|
||||||
h.pendingNetMu.Lock()
|
|
||||||
if h.pendingNet != nil {
|
|
||||||
prev := h.pendingNet
|
|
||||||
prev.mu.Lock()
|
|
||||||
prev.timer.Stop()
|
|
||||||
_ = h.opts.App.SetInterfaceState(prev.iface, prev.wasUp)
|
|
||||||
prev.mu.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
pnc := &pendingNetChange{iface: req.Iface, wasUp: wasUp}
|
|
||||||
pnc.timer = time.AfterFunc(netRollbackTimeout, func() {
|
|
||||||
_ = h.opts.App.SetInterfaceState(req.Iface, wasUp)
|
|
||||||
h.pendingNetMu.Lock()
|
|
||||||
if h.pendingNet == pnc {
|
|
||||||
h.pendingNet = nil
|
|
||||||
}
|
|
||||||
h.pendingNetMu.Unlock()
|
|
||||||
})
|
|
||||||
h.pendingNet = pnc
|
|
||||||
h.pendingNetMu.Unlock()
|
|
||||||
|
|
||||||
newState := "up"
|
newState := "up"
|
||||||
if wasUp {
|
if wasUp {
|
||||||
newState = "down"
|
newState = "down"
|
||||||
@@ -684,6 +714,42 @@ func (h *handler) handleAPINetworkToggle(w http.ResponseWriter, r *http.Request)
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *handler) applyPendingNetworkChange(apply func() (app.ActionResult, error)) (app.ActionResult, error) {
|
||||||
|
if h.opts.App == nil {
|
||||||
|
return app.ActionResult{}, fmt.Errorf("app not configured")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := h.rollbackPendingNetworkChange(); err != nil && err.Error() != "no pending network change" {
|
||||||
|
return app.ActionResult{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
snapshot, err := h.opts.App.CaptureNetworkSnapshot()
|
||||||
|
if err != nil {
|
||||||
|
return app.ActionResult{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := apply()
|
||||||
|
if err != nil {
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
|
||||||
|
pnc := &pendingNetChange{snapshot: snapshot}
|
||||||
|
pnc.timer = time.AfterFunc(netRollbackTimeout, func() {
|
||||||
|
_ = h.opts.App.RestoreNetworkSnapshot(snapshot)
|
||||||
|
h.pendingNetMu.Lock()
|
||||||
|
if h.pendingNet == pnc {
|
||||||
|
h.pendingNet = nil
|
||||||
|
}
|
||||||
|
h.pendingNetMu.Unlock()
|
||||||
|
})
|
||||||
|
|
||||||
|
h.pendingNetMu.Lock()
|
||||||
|
h.pendingNet = pnc
|
||||||
|
h.pendingNetMu.Unlock()
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (h *handler) handleAPINetworkConfirm(w http.ResponseWriter, _ *http.Request) {
|
func (h *handler) handleAPINetworkConfirm(w http.ResponseWriter, _ *http.Request) {
|
||||||
h.pendingNetMu.Lock()
|
h.pendingNetMu.Lock()
|
||||||
pnc := h.pendingNet
|
pnc := h.pendingNet
|
||||||
@@ -698,19 +764,30 @@ func (h *handler) handleAPINetworkConfirm(w http.ResponseWriter, _ *http.Request
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Request) {
|
func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
if err := h.rollbackPendingNetworkChange(); err != nil {
|
||||||
|
if err.Error() == "no pending network change" {
|
||||||
|
writeError(w, http.StatusConflict, err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeError(w, http.StatusInternalServerError, err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
writeJSON(w, map[string]string{"status": "rolled back"})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *handler) rollbackPendingNetworkChange() error {
|
||||||
h.pendingNetMu.Lock()
|
h.pendingNetMu.Lock()
|
||||||
pnc := h.pendingNet
|
pnc := h.pendingNet
|
||||||
h.pendingNet = nil
|
h.pendingNet = nil
|
||||||
h.pendingNetMu.Unlock()
|
h.pendingNetMu.Unlock()
|
||||||
if pnc == nil {
|
if pnc == nil {
|
||||||
writeError(w, http.StatusConflict, "no pending network change")
|
return fmt.Errorf("no pending network change")
|
||||||
return
|
|
||||||
}
|
}
|
||||||
pnc.mu.Lock()
|
pnc.mu.Lock()
|
||||||
pnc.timer.Stop()
|
pnc.timer.Stop()
|
||||||
pnc.mu.Unlock()
|
pnc.mu.Unlock()
|
||||||
if h.opts.App != nil {
|
if h.opts.App != nil {
|
||||||
_ = h.opts.App.SetInterfaceState(pnc.iface, pnc.wasUp)
|
return h.opts.App.RestoreNetworkSnapshot(pnc.snapshot)
|
||||||
}
|
}
|
||||||
writeJSON(w, map[string]string{"status": "rolled back"})
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
package webui
|
package webui
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@@ -13,6 +15,7 @@ type jobState struct {
|
|||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
subs []chan string
|
subs []chan string
|
||||||
cancel func() // optional cancel function; nil if job is not cancellable
|
cancel func() // optional cancel function; nil if job is not cancellable
|
||||||
|
logPath string
|
||||||
}
|
}
|
||||||
|
|
||||||
// abort cancels the job if it has a cancel function and is not yet done.
|
// abort cancels the job if it has a cancel function and is not yet done.
|
||||||
@@ -30,6 +33,9 @@ func (j *jobState) append(line string) {
|
|||||||
j.mu.Lock()
|
j.mu.Lock()
|
||||||
defer j.mu.Unlock()
|
defer j.mu.Unlock()
|
||||||
j.lines = append(j.lines, line)
|
j.lines = append(j.lines, line)
|
||||||
|
if j.logPath != "" {
|
||||||
|
appendJobLog(j.logPath, line)
|
||||||
|
}
|
||||||
for _, ch := range j.subs {
|
for _, ch := range j.subs {
|
||||||
select {
|
select {
|
||||||
case ch <- line:
|
case ch <- line:
|
||||||
@@ -100,3 +106,32 @@ func (m *jobManager) get(id string) (*jobState, bool) {
|
|||||||
j, ok := m.jobs[id]
|
j, ok := m.jobs[id]
|
||||||
return j, ok
|
return j, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func newTaskJobState(logPath string) *jobState {
|
||||||
|
j := &jobState{logPath: logPath}
|
||||||
|
if logPath == "" {
|
||||||
|
return j
|
||||||
|
}
|
||||||
|
data, err := os.ReadFile(logPath)
|
||||||
|
if err != nil || len(data) == 0 {
|
||||||
|
return j
|
||||||
|
}
|
||||||
|
lines := strings.Split(strings.ReplaceAll(string(data), "\r\n", "\n"), "\n")
|
||||||
|
if len(lines) > 0 && lines[len(lines)-1] == "" {
|
||||||
|
lines = lines[:len(lines)-1]
|
||||||
|
}
|
||||||
|
j.lines = append(j.lines, lines...)
|
||||||
|
return j
|
||||||
|
}
|
||||||
|
|
||||||
|
func appendJobLog(path, line string) {
|
||||||
|
if path == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
_, _ = f.WriteString(line + "\n")
|
||||||
|
}
|
||||||
|
|||||||
320
audit/internal/webui/metricsdb.go
Normal file
320
audit/internal/webui/metricsdb.go
Normal file
@@ -0,0 +1,320 @@
|
|||||||
|
package webui
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"encoding/csv"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"bee/audit/internal/platform"
|
||||||
|
_ "modernc.org/sqlite"
|
||||||
|
)
|
||||||
|
|
||||||
|
const metricsDBPath = "/appdata/bee/metrics.db"
|
||||||
|
const metricsKeepDuration = 24 * time.Hour
|
||||||
|
|
||||||
|
// MetricsDB persists live metric samples to SQLite.
|
||||||
|
type MetricsDB struct {
|
||||||
|
db *sql.DB
|
||||||
|
}
|
||||||
|
|
||||||
|
// openMetricsDB opens (or creates) the metrics database at the given path.
|
||||||
|
func openMetricsDB(path string) (*MetricsDB, error) {
|
||||||
|
db, err := sql.Open("sqlite", path+"?_journal=WAL&_busy_timeout=5000")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
db.SetMaxOpenConns(1)
|
||||||
|
if err := initMetricsSchema(db); err != nil {
|
||||||
|
_ = db.Close()
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &MetricsDB{db: db}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func initMetricsSchema(db *sql.DB) error {
|
||||||
|
_, err := db.Exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS sys_metrics (
|
||||||
|
ts INTEGER NOT NULL,
|
||||||
|
cpu_load_pct REAL,
|
||||||
|
mem_load_pct REAL,
|
||||||
|
power_w REAL,
|
||||||
|
PRIMARY KEY (ts)
|
||||||
|
);
|
||||||
|
CREATE TABLE IF NOT EXISTS gpu_metrics (
|
||||||
|
ts INTEGER NOT NULL,
|
||||||
|
gpu_index INTEGER NOT NULL,
|
||||||
|
temp_c REAL,
|
||||||
|
usage_pct REAL,
|
||||||
|
mem_usage_pct REAL,
|
||||||
|
power_w REAL,
|
||||||
|
PRIMARY KEY (ts, gpu_index)
|
||||||
|
);
|
||||||
|
CREATE TABLE IF NOT EXISTS fan_metrics (
|
||||||
|
ts INTEGER NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
rpm REAL,
|
||||||
|
PRIMARY KEY (ts, name)
|
||||||
|
);
|
||||||
|
CREATE TABLE IF NOT EXISTS temp_metrics (
|
||||||
|
ts INTEGER NOT NULL,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
grp TEXT NOT NULL,
|
||||||
|
celsius REAL,
|
||||||
|
PRIMARY KEY (ts, name)
|
||||||
|
);
|
||||||
|
`)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write inserts one sample into all relevant tables.
|
||||||
|
func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
|
||||||
|
ts := s.Timestamp.Unix()
|
||||||
|
tx, err := m.db.Begin()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer func() { _ = tx.Rollback() }()
|
||||||
|
|
||||||
|
_, err = tx.Exec(
|
||||||
|
`INSERT OR REPLACE INTO sys_metrics(ts,cpu_load_pct,mem_load_pct,power_w) VALUES(?,?,?,?)`,
|
||||||
|
ts, s.CPULoadPct, s.MemLoadPct, s.PowerW,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, g := range s.GPUs {
|
||||||
|
_, err = tx.Exec(
|
||||||
|
`INSERT OR REPLACE INTO gpu_metrics(ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w) VALUES(?,?,?,?,?,?)`,
|
||||||
|
ts, g.GPUIndex, g.TempC, g.UsagePct, g.MemUsagePct, g.PowerW,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, f := range s.Fans {
|
||||||
|
_, err = tx.Exec(
|
||||||
|
`INSERT OR REPLACE INTO fan_metrics(ts,name,rpm) VALUES(?,?,?)`,
|
||||||
|
ts, f.Name, f.RPM,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, t := range s.Temps {
|
||||||
|
_, err = tx.Exec(
|
||||||
|
`INSERT OR REPLACE INTO temp_metrics(ts,name,grp,celsius) VALUES(?,?,?,?)`,
|
||||||
|
ts, t.Name, t.Group, t.Celsius,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tx.Commit()
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadRecent returns up to n samples in chronological order (oldest first).
|
||||||
|
// It reconstructs LiveMetricSample from the normalized tables.
|
||||||
|
func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
|
||||||
|
rows, err := m.db.Query(
|
||||||
|
`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?`, n,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
type sysRow struct {
|
||||||
|
ts int64
|
||||||
|
cpu, mem, pwr float64
|
||||||
|
}
|
||||||
|
var sysRows []sysRow
|
||||||
|
for rows.Next() {
|
||||||
|
var r sysRow
|
||||||
|
if err := rows.Scan(&r.ts, &r.cpu, &r.mem, &r.pwr); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
sysRows = append(sysRows, r)
|
||||||
|
}
|
||||||
|
if len(sysRows) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
// Reverse to chronological order
|
||||||
|
for i, j := 0, len(sysRows)-1; i < j; i, j = i+1, j-1 {
|
||||||
|
sysRows[i], sysRows[j] = sysRows[j], sysRows[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect min/max ts for range query
|
||||||
|
minTS := sysRows[0].ts
|
||||||
|
maxTS := sysRows[len(sysRows)-1].ts
|
||||||
|
|
||||||
|
// Load GPU rows in range
|
||||||
|
type gpuKey struct{ ts int64; idx int }
|
||||||
|
gpuData := map[gpuKey]platform.GPUMetricRow{}
|
||||||
|
gRows, err := m.db.Query(
|
||||||
|
`SELECT ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w FROM gpu_metrics WHERE ts>=? AND ts<=? ORDER BY ts,gpu_index`,
|
||||||
|
minTS, maxTS,
|
||||||
|
)
|
||||||
|
if err == nil {
|
||||||
|
defer gRows.Close()
|
||||||
|
for gRows.Next() {
|
||||||
|
var ts int64
|
||||||
|
var g platform.GPUMetricRow
|
||||||
|
if err := gRows.Scan(&ts, &g.GPUIndex, &g.TempC, &g.UsagePct, &g.MemUsagePct, &g.PowerW); err == nil {
|
||||||
|
gpuData[gpuKey{ts, g.GPUIndex}] = g
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load fan rows in range
|
||||||
|
type fanKey struct{ ts int64; name string }
|
||||||
|
fanData := map[fanKey]float64{}
|
||||||
|
fRows, err := m.db.Query(
|
||||||
|
`SELECT ts,name,rpm FROM fan_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
|
||||||
|
)
|
||||||
|
if err == nil {
|
||||||
|
defer fRows.Close()
|
||||||
|
for fRows.Next() {
|
||||||
|
var ts int64
|
||||||
|
var name string
|
||||||
|
var rpm float64
|
||||||
|
if err := fRows.Scan(&ts, &name, &rpm); err == nil {
|
||||||
|
fanData[fanKey{ts, name}] = rpm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load temp rows in range
|
||||||
|
type tempKey struct{ ts int64; name string }
|
||||||
|
tempData := map[tempKey]platform.TempReading{}
|
||||||
|
tRows, err := m.db.Query(
|
||||||
|
`SELECT ts,name,grp,celsius FROM temp_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
|
||||||
|
)
|
||||||
|
if err == nil {
|
||||||
|
defer tRows.Close()
|
||||||
|
for tRows.Next() {
|
||||||
|
var ts int64
|
||||||
|
var t platform.TempReading
|
||||||
|
if err := tRows.Scan(&ts, &t.Name, &t.Group, &t.Celsius); err == nil {
|
||||||
|
tempData[tempKey{ts, t.Name}] = t
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect unique GPU indices and fan names from loaded data (preserve order)
|
||||||
|
seenGPU := map[int]bool{}
|
||||||
|
var gpuIndices []int
|
||||||
|
for k := range gpuData {
|
||||||
|
if !seenGPU[k.idx] {
|
||||||
|
seenGPU[k.idx] = true
|
||||||
|
gpuIndices = append(gpuIndices, k.idx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
seenFan := map[string]bool{}
|
||||||
|
var fanNames []string
|
||||||
|
for k := range fanData {
|
||||||
|
if !seenFan[k.name] {
|
||||||
|
seenFan[k.name] = true
|
||||||
|
fanNames = append(fanNames, k.name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
seenTemp := map[string]bool{}
|
||||||
|
var tempNames []string
|
||||||
|
for k := range tempData {
|
||||||
|
if !seenTemp[k.name] {
|
||||||
|
seenTemp[k.name] = true
|
||||||
|
tempNames = append(tempNames, k.name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
samples := make([]platform.LiveMetricSample, len(sysRows))
|
||||||
|
for i, r := range sysRows {
|
||||||
|
s := platform.LiveMetricSample{
|
||||||
|
Timestamp: time.Unix(r.ts, 0).UTC(),
|
||||||
|
CPULoadPct: r.cpu,
|
||||||
|
MemLoadPct: r.mem,
|
||||||
|
PowerW: r.pwr,
|
||||||
|
}
|
||||||
|
for _, idx := range gpuIndices {
|
||||||
|
if g, ok := gpuData[gpuKey{r.ts, idx}]; ok {
|
||||||
|
s.GPUs = append(s.GPUs, g)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, name := range fanNames {
|
||||||
|
if rpm, ok := fanData[fanKey{r.ts, name}]; ok {
|
||||||
|
s.Fans = append(s.Fans, platform.FanReading{Name: name, RPM: rpm})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, name := range tempNames {
|
||||||
|
if t, ok := tempData[tempKey{r.ts, name}]; ok {
|
||||||
|
s.Temps = append(s.Temps, t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
samples[i] = s
|
||||||
|
}
|
||||||
|
return samples, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prune deletes samples older than keepDuration.
|
||||||
|
func (m *MetricsDB) Prune(keepDuration time.Duration) {
|
||||||
|
cutoff := time.Now().Add(-keepDuration).Unix()
|
||||||
|
for _, table := range []string{"sys_metrics", "gpu_metrics", "fan_metrics", "temp_metrics"} {
|
||||||
|
_, _ = m.db.Exec(fmt.Sprintf("DELETE FROM %s WHERE ts < ?", table), cutoff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExportCSV writes all sys+gpu data as CSV to w.
|
||||||
|
func (m *MetricsDB) ExportCSV(w io.Writer) error {
|
||||||
|
rows, err := m.db.Query(`
|
||||||
|
SELECT s.ts, s.cpu_load_pct, s.mem_load_pct, s.power_w,
|
||||||
|
g.gpu_index, g.temp_c, g.usage_pct, g.mem_usage_pct, g.power_w
|
||||||
|
FROM sys_metrics s
|
||||||
|
LEFT JOIN gpu_metrics g ON g.ts = s.ts
|
||||||
|
ORDER BY s.ts, g.gpu_index
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
cw := csv.NewWriter(w)
|
||||||
|
_ = cw.Write([]string{"ts", "cpu_load_pct", "mem_load_pct", "sys_power_w", "gpu_index", "gpu_temp_c", "gpu_usage_pct", "gpu_mem_pct", "gpu_power_w"})
|
||||||
|
for rows.Next() {
|
||||||
|
var ts int64
|
||||||
|
var cpu, mem, pwr float64
|
||||||
|
var gpuIdx sql.NullInt64
|
||||||
|
var gpuTemp, gpuUse, gpuMem, gpuPow sql.NullFloat64
|
||||||
|
if err := rows.Scan(&ts, &cpu, &mem, &pwr, &gpuIdx, &gpuTemp, &gpuUse, &gpuMem, &gpuPow); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
row := []string{
|
||||||
|
strconv.FormatInt(ts, 10),
|
||||||
|
strconv.FormatFloat(cpu, 'f', 2, 64),
|
||||||
|
strconv.FormatFloat(mem, 'f', 2, 64),
|
||||||
|
strconv.FormatFloat(pwr, 'f', 1, 64),
|
||||||
|
}
|
||||||
|
if gpuIdx.Valid {
|
||||||
|
row = append(row,
|
||||||
|
strconv.FormatInt(gpuIdx.Int64, 10),
|
||||||
|
strconv.FormatFloat(gpuTemp.Float64, 'f', 1, 64),
|
||||||
|
strconv.FormatFloat(gpuUse.Float64, 'f', 1, 64),
|
||||||
|
strconv.FormatFloat(gpuMem.Float64, 'f', 1, 64),
|
||||||
|
strconv.FormatFloat(gpuPow.Float64, 'f', 1, 64),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
row = append(row, "", "", "", "", "")
|
||||||
|
}
|
||||||
|
_ = cw.Write(row)
|
||||||
|
}
|
||||||
|
cw.Flush()
|
||||||
|
return cw.Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close closes the database.
|
||||||
|
func (m *MetricsDB) Close() { _ = m.db.Close() }
|
||||||
|
|
||||||
|
func nullFloat(v float64) sql.NullFloat64 {
|
||||||
|
return sql.NullFloat64{Float64: v, Valid: true}
|
||||||
|
}
|
||||||
@@ -61,7 +61,8 @@ tbody tr:hover td{background:rgba(0,0,0,.03)}
|
|||||||
.badge-err{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)}
|
.badge-err{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)}
|
||||||
.badge-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)}
|
.badge-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)}
|
||||||
/* Output terminal */
|
/* Output terminal */
|
||||||
.terminal{background:#1b1c1d;border:1px solid rgba(0,0,0,.2);border-radius:4px;padding:14px;font-family:monospace;font-size:12px;color:#b5cea8;max-height:400px;overflow-y:auto;white-space:pre-wrap;word-break:break-all}
|
.terminal{background:#1b1c1d;border:1px solid rgba(0,0,0,.2);border-radius:4px;padding:14px;font-family:monospace;font-size:12px;color:#b5cea8;max-height:400px;overflow-y:auto;white-space:pre-wrap;word-break:break-all;user-select:text;-webkit-user-select:text}
|
||||||
|
.terminal-wrap{position:relative}.terminal-copy{position:absolute;top:6px;right:6px;background:#2d2f30;border:1px solid #444;color:#aaa;font-size:11px;padding:2px 8px;border-radius:3px;cursor:pointer;opacity:.7}.terminal-copy:hover{opacity:1}
|
||||||
/* Forms */
|
/* Forms */
|
||||||
.form-row{margin-bottom:14px}
|
.form-row{margin-bottom:14px}
|
||||||
.form-row label{display:block;font-size:12px;color:var(--muted);margin-bottom:5px;font-weight:700}
|
.form-row label{display:block;font-size:12px;color:var(--muted);margin-bottom:5px;font-weight:700}
|
||||||
@@ -83,10 +84,10 @@ tbody tr:hover td{background:rgba(0,0,0,.03)}
|
|||||||
`
|
`
|
||||||
}
|
}
|
||||||
|
|
||||||
func layoutNav(active string) string {
|
func layoutNav(active string, buildLabel string) string {
|
||||||
items := []struct{ id, label, href, onclick string }{
|
items := []struct{ id, label, href, onclick string }{
|
||||||
{"dashboard", "Dashboard", "/", ""},
|
{"dashboard", "Dashboard", "/", ""},
|
||||||
{"audit", "Audit", "#", "openAuditModal();return false;"},
|
{"audit", "Audit", "/audit", ""},
|
||||||
{"validate", "Validate", "/validate", ""},
|
{"validate", "Validate", "/validate", ""},
|
||||||
{"burn", "Burn", "/burn", ""},
|
{"burn", "Burn", "/burn", ""},
|
||||||
{"tasks", "Tasks", "/tasks", ""},
|
{"tasks", "Tasks", "/tasks", ""},
|
||||||
@@ -109,7 +110,12 @@ func layoutNav(active string) string {
|
|||||||
cls, item.href, item.label))
|
cls, item.href, item.label))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
b.WriteString(`</nav></aside>`)
|
if strings.TrimSpace(buildLabel) == "" {
|
||||||
|
buildLabel = "dev"
|
||||||
|
}
|
||||||
|
b.WriteString(`</nav>`)
|
||||||
|
b.WriteString(`<div style="padding:12px 16px;border-top:1px solid rgba(255,255,255,.08);font-size:11px;color:rgba(255,255,255,.45)">Build ` + html.EscapeString(buildLabel) + `</div>`)
|
||||||
|
b.WriteString(`</aside>`)
|
||||||
return b.String()
|
return b.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,6 +127,10 @@ func renderPage(page string, opts HandlerOptions) string {
|
|||||||
pageID = "dashboard"
|
pageID = "dashboard"
|
||||||
title = "Dashboard"
|
title = "Dashboard"
|
||||||
body = renderDashboard(opts)
|
body = renderDashboard(opts)
|
||||||
|
case "audit":
|
||||||
|
pageID = "audit"
|
||||||
|
title = "Audit"
|
||||||
|
body = renderAudit()
|
||||||
case "validate":
|
case "validate":
|
||||||
pageID = "validate"
|
pageID = "validate"
|
||||||
title = "Validate"
|
title = "Validate"
|
||||||
@@ -173,11 +183,21 @@ func renderPage(page string, opts HandlerOptions) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return layoutHead(opts.Title+" — "+title) +
|
return layoutHead(opts.Title+" — "+title) +
|
||||||
layoutNav(pageID) +
|
layoutNav(pageID, opts.BuildLabel) +
|
||||||
`<div class="main"><div class="topbar"><h1>` + html.EscapeString(title) + `</h1></div><div class="content">` +
|
`<div class="main"><div class="topbar"><h1>` + html.EscapeString(title) + `</h1></div><div class="content">` +
|
||||||
body +
|
body +
|
||||||
`</div></div>` +
|
`</div></div>` +
|
||||||
renderAuditModal() +
|
renderAuditModal() +
|
||||||
|
`<script>
|
||||||
|
// Add copy button to every .terminal on the page
|
||||||
|
document.querySelectorAll('.terminal').forEach(function(t){
|
||||||
|
var w=document.createElement('div');w.className='terminal-wrap';
|
||||||
|
t.parentNode.insertBefore(w,t);w.appendChild(t);
|
||||||
|
var btn=document.createElement('button');btn.className='terminal-copy';btn.textContent='Copy';
|
||||||
|
btn.onclick=function(){navigator.clipboard.writeText(t.textContent).then(function(){btn.textContent='Copied!';setTimeout(function(){btn.textContent='Copy';},1500);});};
|
||||||
|
w.appendChild(btn);
|
||||||
|
});
|
||||||
|
</script>` +
|
||||||
`</body></html>`
|
`</body></html>`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -191,6 +211,10 @@ func renderDashboard(opts HandlerOptions) string {
|
|||||||
return b.String()
|
return b.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func renderAudit() string {
|
||||||
|
return `<div class="card"><div class="card-head">Audit Viewer <button class="btn btn-sm btn-secondary" style="margin-left:auto" onclick="openAuditModal()">Actions</button></div><div class="card-body" style="padding:0"><iframe class="viewer-frame" src="/viewer" title="Audit viewer"></iframe></div></div>`
|
||||||
|
}
|
||||||
|
|
||||||
func renderHardwareSummaryCard(opts HandlerOptions) string {
|
func renderHardwareSummaryCard(opts HandlerOptions) string {
|
||||||
data, err := loadSnapshot(opts.AuditPath)
|
data, err := loadSnapshot(opts.AuditPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -298,14 +322,14 @@ func renderHardwareSummaryCard(opts HandlerOptions) string {
|
|||||||
|
|
||||||
func renderAuditModal() string {
|
func renderAuditModal() string {
|
||||||
return `<div id="audit-modal-overlay" style="display:none;position:fixed;inset:0;background:rgba(0,0,0,.5);z-index:100;align-items:center;justify-content:center">
|
return `<div id="audit-modal-overlay" style="display:none;position:fixed;inset:0;background:rgba(0,0,0,.5);z-index:100;align-items:center;justify-content:center">
|
||||||
<div style="background:#fff;border-radius:6px;padding:24px;min-width:480px;max-width:700px;position:relative">
|
<div style="background:#fff;border-radius:6px;padding:24px;min-width:480px;max-width:1100px;width:min(1100px,92vw);max-height:92vh;overflow:auto;position:relative">
|
||||||
<div style="font-weight:700;font-size:16px;margin-bottom:16px">Audit</div>
|
<div style="font-weight:700;font-size:16px;margin-bottom:16px">Audit</div>
|
||||||
<div style="margin-bottom:12px;display:flex;gap:8px">
|
<div style="margin-bottom:12px;display:flex;gap:8px">
|
||||||
<button class="btn btn-primary" onclick="auditModalRun()">▶ Re-run Audit</button>
|
<button class="btn btn-primary" onclick="auditModalRun()">▶ Re-run Audit</button>
|
||||||
<a class="btn btn-secondary" href="/audit.json" download>↓ Download</a>
|
<a class="btn btn-secondary" href="/audit.json" download>↓ Download</a>
|
||||||
<a class="btn btn-secondary" href="/viewer" target="_blank">Open Viewer</a>
|
|
||||||
</div>
|
</div>
|
||||||
<div id="audit-modal-terminal" class="terminal" style="display:none;max-height:300px"></div>
|
<div id="audit-modal-terminal" class="terminal" style="display:none;max-height:220px;margin-bottom:12px"></div>
|
||||||
|
<iframe class="viewer-frame" src="/viewer" title="Audit viewer in modal" style="height:min(70vh,720px)"></iframe>
|
||||||
<button class="btn btn-secondary btn-sm" onclick="closeAuditModal()" style="position:absolute;top:12px;right:12px">✕</button>
|
<button class="btn btn-secondary btn-sm" onclick="closeAuditModal()" style="position:absolute;top:12px;right:12px">✕</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -373,9 +397,17 @@ func renderMetrics() string {
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="card" style="margin-bottom:16px">
|
<div class="card" style="margin-bottom:16px">
|
||||||
<div class="card-head">Server — Temperature</div>
|
<div class="card-head">Temperature — CPU</div>
|
||||||
<div class="card-body" style="padding:8px">
|
<div class="card-body" style="padding:8px">
|
||||||
<img id="chart-server-temp" src="/api/metrics/chart/server-temp.svg" style="width:100%;display:block;border-radius:6px" alt="CPU temperature">
|
<img id="chart-server-temp-cpu" src="/api/metrics/chart/server-temp-cpu.svg" style="width:100%;display:block;border-radius:6px" alt="CPU temperature">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:16px">
|
||||||
|
<div class="card-head">Temperature — Ambient Sensors</div>
|
||||||
|
<div class="card-body" style="padding:8px">
|
||||||
|
<img id="chart-server-temp-ambient" src="/api/metrics/chart/server-temp-ambient.svg" style="width:100%;display:block;border-radius:6px" alt="Ambient temperature sensors">
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -383,78 +415,60 @@ func renderMetrics() string {
|
|||||||
<div class="card-head">Server — Power</div>
|
<div class="card-head">Server — Power</div>
|
||||||
<div class="card-body" style="padding:8px">
|
<div class="card-body" style="padding:8px">
|
||||||
<img id="chart-server-power" src="/api/metrics/chart/server-power.svg" style="width:100%;display:block;border-radius:6px" alt="System power">
|
<img id="chart-server-power" src="/api/metrics/chart/server-power.svg" style="width:100%;display:block;border-radius:6px" alt="System power">
|
||||||
<div id="sys-table" style="margin-top:8px;font-size:12px"></div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="gpu-charts"></div>
|
<div id="card-server-fans" class="card" style="margin-bottom:16px;display:none">
|
||||||
|
<div class="card-head">Server — Fan RPM</div>
|
||||||
|
<div class="card-body" style="padding:8px">
|
||||||
|
<img id="chart-server-fans" src="/api/metrics/chart/server-fans.svg" style="width:100%;display:block;border-radius:6px" alt="Fan RPM">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="card" style="margin-bottom:16px">
|
||||||
|
<div class="card-head">GPU — Compute Load</div>
|
||||||
|
<div class="card-body" style="padding:8px">
|
||||||
|
<img id="chart-gpu-all-load" src="/api/metrics/chart/gpu-all-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU compute load">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="card" style="margin-bottom:16px">
|
||||||
|
<div class="card-head">GPU — Memory Load</div>
|
||||||
|
<div class="card-body" style="padding:8px">
|
||||||
|
<img id="chart-gpu-all-memload" src="/api/metrics/chart/gpu-all-memload.svg" style="width:100%;display:block;border-radius:6px" alt="GPU memory load">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="card" style="margin-bottom:16px">
|
||||||
|
<div class="card-head">GPU — Power</div>
|
||||||
|
<div class="card-body" style="padding:8px">
|
||||||
|
<img id="chart-gpu-all-power" src="/api/metrics/chart/gpu-all-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU power">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="card" style="margin-bottom:16px">
|
||||||
|
<div class="card-head">GPU — Temperature</div>
|
||||||
|
<div class="card-body" style="padding:8px">
|
||||||
|
<img id="chart-gpu-all-temp" src="/api/metrics/chart/gpu-all-temp.svg" style="width:100%;display:block;border-radius:6px" alt="GPU temperature">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
let knownGPUs = [];
|
|
||||||
|
|
||||||
function refreshCharts() {
|
function refreshCharts() {
|
||||||
const t = '?t=' + Date.now();
|
const t = '?t=' + Date.now();
|
||||||
['chart-server-load','chart-server-temp','chart-server-power'].forEach(id => {
|
['chart-server-load','chart-server-temp-cpu','chart-server-temp-gpu','chart-server-temp-ambient','chart-server-power','chart-server-fans',
|
||||||
|
'chart-gpu-all-load','chart-gpu-all-memload','chart-gpu-all-power','chart-gpu-all-temp'].forEach(id => {
|
||||||
const el = document.getElementById(id);
|
const el = document.getElementById(id);
|
||||||
if (el) el.src = el.src.split('?')[0] + t;
|
if (el) el.src = el.src.split('?')[0] + t;
|
||||||
});
|
});
|
||||||
knownGPUs.forEach(idx => {
|
|
||||||
['load','temp','power'].forEach(kind => {
|
|
||||||
const el = document.getElementById('chart-gpu-' + idx + '-' + kind);
|
|
||||||
if (el) el.src = el.src.split('?')[0] + t;
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
setInterval(refreshCharts, 2000);
|
setInterval(refreshCharts, 3000);
|
||||||
|
|
||||||
const es = new EventSource('/api/metrics/stream');
|
const es = new EventSource('/api/metrics/stream');
|
||||||
es.addEventListener('metrics', e => {
|
es.addEventListener('metrics', e => {
|
||||||
const d = JSON.parse(e.data);
|
const d = JSON.parse(e.data);
|
||||||
|
|
||||||
// Add GPU chart cards as GPUs appear
|
// Show/hide Fan RPM card based on data availability
|
||||||
(d.gpus||[]).forEach(g => {
|
const fanCard = document.getElementById('card-server-fans');
|
||||||
if (knownGPUs.includes(g.index)) return;
|
if (fanCard) fanCard.style.display = (d.fans && d.fans.length > 0) ? '' : 'none';
|
||||||
knownGPUs.push(g.index);
|
|
||||||
const div = document.createElement('div');
|
|
||||||
div.className = 'card';
|
|
||||||
div.style.marginBottom = '16px';
|
|
||||||
div.innerHTML =
|
|
||||||
'<div class="card-head">GPU ' + g.index + ' — Load</div>' +
|
|
||||||
'<div class="card-body" style="padding:8px">' +
|
|
||||||
'<img id="chart-gpu-' + g.index + '-load" src="/api/metrics/chart/gpu/' + g.index + '-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' load">' +
|
|
||||||
'</div>' +
|
|
||||||
'<div class="card-head">GPU ' + g.index + ' — Temperature</div>' +
|
|
||||||
'<div class="card-body" style="padding:8px">' +
|
|
||||||
'<img id="chart-gpu-' + g.index + '-temp" src="/api/metrics/chart/gpu/' + g.index + '-temp.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' temp">' +
|
|
||||||
'</div>' +
|
|
||||||
'<div class="card-head">GPU ' + g.index + ' — Power</div>' +
|
|
||||||
'<div class="card-body" style="padding:8px">' +
|
|
||||||
'<img id="chart-gpu-' + g.index + '-power" src="/api/metrics/chart/gpu/' + g.index + '-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' power">' +
|
|
||||||
'<div id="gpu-table-' + g.index + '" style="margin-top:8px;font-size:12px"></div>' +
|
|
||||||
'</div>';
|
|
||||||
document.getElementById('gpu-charts').appendChild(div);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Update numeric tables
|
|
||||||
let sysHTML = '';
|
|
||||||
const cpuTemp = (d.temps||[]).find(t => t.name==='CPU');
|
|
||||||
if (cpuTemp) sysHTML += '<tr><td>CPU Temp</td><td>'+cpuTemp.celsius.toFixed(1)+'°C</td></tr>';
|
|
||||||
if (d.cpu_load_pct) sysHTML += '<tr><td>CPU Load</td><td>'+d.cpu_load_pct.toFixed(1)+'%</td></tr>';
|
|
||||||
if (d.mem_load_pct) sysHTML += '<tr><td>Mem Load</td><td>'+d.mem_load_pct.toFixed(1)+'%</td></tr>';
|
|
||||||
(d.fans||[]).forEach(f => sysHTML += '<tr><td>'+f.name+'</td><td>'+f.rpm+' RPM</td></tr>');
|
|
||||||
if (d.power_w) sysHTML += '<tr><td>Power</td><td>'+d.power_w.toFixed(0)+' W</td></tr>';
|
|
||||||
const st = document.getElementById('sys-table');
|
|
||||||
if (st) st.innerHTML = sysHTML ? '<table>'+sysHTML+'</table>' : '<p style="color:var(--muted)">No sensor data (ipmitool/sensors required)</p>';
|
|
||||||
|
|
||||||
(d.gpus||[]).forEach(g => {
|
|
||||||
const t = document.getElementById('gpu-table-' + g.index);
|
|
||||||
if (!t) return;
|
|
||||||
t.innerHTML = '<table>' +
|
|
||||||
'<tr><td>Temp</td><td>'+g.temp_c+'°C</td>' +
|
|
||||||
'<td>Load</td><td>'+g.usage_pct+'%</td>' +
|
|
||||||
'<td>Mem</td><td>'+g.mem_usage_pct+'%</td>' +
|
|
||||||
'<td>Power</td><td>'+g.power_w+' W</td></tr></table>';
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
es.onerror = () => {};
|
es.onerror = () => {};
|
||||||
</script>`
|
</script>`
|
||||||
@@ -491,6 +505,8 @@ let satES = null;
|
|||||||
function runSAT(target) {
|
function runSAT(target) {
|
||||||
if (satES) { satES.close(); satES = null; }
|
if (satES) { satES.close(); satES = null; }
|
||||||
const body = {};
|
const body = {};
|
||||||
|
const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU'};
|
||||||
|
body.display_name = labels[target] || ('Validate ' + target);
|
||||||
if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
|
if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
|
||||||
if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
|
if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
|
||||||
document.getElementById('sat-output').style.display='block';
|
document.getElementById('sat-output').style.display='block';
|
||||||
@@ -520,6 +536,8 @@ function runAllSAT() {
|
|||||||
const btn = document.getElementById('sat-btn-' + target);
|
const btn = document.getElementById('sat-btn-' + target);
|
||||||
if (btn && btn.disabled) { enqueueNext(cycle, idx+1); return; }
|
if (btn && btn.disabled) { enqueueNext(cycle, idx+1); return; }
|
||||||
const body = {};
|
const body = {};
|
||||||
|
const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU'};
|
||||||
|
body.display_name = labels[target] || ('Validate ' + target);
|
||||||
if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
|
if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
|
||||||
if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
|
if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
|
||||||
fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)})
|
fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)})
|
||||||
@@ -568,13 +586,15 @@ func renderSATCard(id, label, extra string) string {
|
|||||||
func renderBurn() string {
|
func renderBurn() string {
|
||||||
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>⚠ Warning:</strong> Stress tests on this page run hardware at maximum load. Repeated or prolonged use may reduce hardware lifespan (storage endurance, GPU wear). Use only when necessary.</div>
|
return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>⚠ Warning:</strong> Stress tests on this page run hardware at maximum load. Repeated or prolonged use may reduce hardware lifespan (storage endurance, GPU wear). Use only when necessary.</div>
|
||||||
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
|
||||||
|
<div class="card"><div class="card-head">Burn Profile</div><div class="card-body">
|
||||||
|
<div class="form-row" style="max-width:320px"><label>Preset</label><select id="burn-profile"><option value="smoke">Smoke: 5 minutes</option><option value="acceptance">Acceptance: 1 hour</option><option value="overnight">Overnight: 8 hours</option></select></div>
|
||||||
|
<p style="color:var(--muted);font-size:12px">Applied to all tests on this page. NVIDIA uses mapped DCGM levels: smoke=quick, acceptance=targeted stress, overnight=extended stress.</p>
|
||||||
|
</div></div>
|
||||||
<div class="grid3">
|
<div class="grid3">
|
||||||
<div class="card"><div class="card-head">NVIDIA GPU Stress</div><div class="card-body">
|
<div class="card"><div class="card-head">NVIDIA GPU Stress</div><div class="card-body">
|
||||||
<div class="form-row"><label>Duration</label><select id="bi-dur"><option value="600">10 minutes</option><option value="3600">1 hour</option><option value="28800">8 hours</option><option value="86400">24 hours</option></select></div>
|
|
||||||
<button id="sat-btn-nvidia" class="btn btn-primary" onclick="runBurnIn('nvidia')">▶ Start NVIDIA Stress</button>
|
<button id="sat-btn-nvidia" class="btn btn-primary" onclick="runBurnIn('nvidia')">▶ Start NVIDIA Stress</button>
|
||||||
</div></div>
|
</div></div>
|
||||||
<div class="card"><div class="card-head">CPU Stress</div><div class="card-body">
|
<div class="card"><div class="card-head">CPU Stress</div><div class="card-body">
|
||||||
<div class="form-row"><label>Duration (seconds)</label><input type="number" id="bi-cpu-dur" value="300" min="60"></div>
|
|
||||||
<button class="btn btn-primary" onclick="runBurnIn('cpu')">▶ Start CPU Stress</button>
|
<button class="btn btn-primary" onclick="runBurnIn('cpu')">▶ Start CPU Stress</button>
|
||||||
</div></div>
|
</div></div>
|
||||||
<div class="card"><div class="card-head">AMD GPU Stress</div><div class="card-body">
|
<div class="card"><div class="card-head">AMD GPU Stress</div><div class="card-body">
|
||||||
@@ -598,11 +618,9 @@ func renderBurn() string {
|
|||||||
let biES = null;
|
let biES = null;
|
||||||
function runBurnIn(target) {
|
function runBurnIn(target) {
|
||||||
if (biES) { biES.close(); biES = null; }
|
if (biES) { biES.close(); biES = null; }
|
||||||
const body = {};
|
const body = { profile: document.getElementById('burn-profile').value || 'smoke' };
|
||||||
if (target === 'nvidia') body.duration = parseInt(document.getElementById('bi-dur').value)||600;
|
|
||||||
if (target === 'cpu') body.duration = parseInt(document.getElementById('bi-cpu-dur').value)||300;
|
|
||||||
document.getElementById('bi-output').style.display='block';
|
document.getElementById('bi-output').style.display='block';
|
||||||
document.getElementById('bi-title').textContent = '— ' + target;
|
document.getElementById('bi-title').textContent = '— ' + target + ' [' + body.profile + ']';
|
||||||
const term = document.getElementById('bi-terminal');
|
const term = document.getElementById('bi-terminal');
|
||||||
term.textContent = 'Enqueuing ' + target + ' stress...\n';
|
term.textContent = 'Enqueuing ' + target + ' stress...\n';
|
||||||
fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)})
|
fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)})
|
||||||
@@ -672,7 +690,7 @@ var _netCountdownTimer = null;
|
|||||||
function loadNetwork() {
|
function loadNetwork() {
|
||||||
fetch('/api/network').then(r=>r.json()).then(d => {
|
fetch('/api/network').then(r=>r.json()).then(d => {
|
||||||
const rows = (d.interfaces||[]).map(i =>
|
const rows = (d.interfaces||[]).map(i =>
|
||||||
'<tr><td>'+i.Name+'</td>' +
|
'<tr><td style="cursor:pointer" onclick="selectIface(\''+i.Name+'\')" title="Use this interface in the forms below"><span style="text-decoration:underline">'+i.Name+'</span></td>' +
|
||||||
'<td style="cursor:pointer" onclick="toggleIface(\''+i.Name+'\',\''+i.State+'\')" title="Click to toggle"><span class="badge '+(i.State==='up'?'badge-ok':'badge-warn')+'">'+i.State+'</span></td>' +
|
'<td style="cursor:pointer" onclick="toggleIface(\''+i.Name+'\',\''+i.State+'\')" title="Click to toggle"><span class="badge '+(i.State==='up'?'badge-ok':'badge-warn')+'">'+i.State+'</span></td>' +
|
||||||
'<td>'+(i.IPv4||[]).join(', ')+'</td></tr>'
|
'<td>'+(i.IPv4||[]).join(', ')+'</td></tr>'
|
||||||
).join('');
|
).join('');
|
||||||
@@ -681,6 +699,10 @@ function loadNetwork() {
|
|||||||
(d.default_route ? '<p style="font-size:12px;color:var(--muted);margin-top:8px">Default route: '+d.default_route+'</p>' : '');
|
(d.default_route ? '<p style="font-size:12px;color:var(--muted);margin-top:8px">Default route: '+d.default_route+'</p>' : '');
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
function selectIface(iface) {
|
||||||
|
document.getElementById('dhcp-iface').value = iface;
|
||||||
|
document.getElementById('st-iface').value = iface;
|
||||||
|
}
|
||||||
function toggleIface(iface, currentState) {
|
function toggleIface(iface, currentState) {
|
||||||
fetch('/api/network/toggle',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({iface:iface})})
|
fetch('/api/network/toggle',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({iface:iface})})
|
||||||
.then(r=>r.json()).then(d => {
|
.then(r=>r.json()).then(d => {
|
||||||
@@ -716,6 +738,7 @@ function runDHCP() {
|
|||||||
fetch('/api/network/dhcp',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({interface:iface||'all'})})
|
fetch('/api/network/dhcp',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({interface:iface||'all'})})
|
||||||
.then(r=>r.json()).then(d => {
|
.then(r=>r.json()).then(d => {
|
||||||
document.getElementById('dhcp-out').textContent = d.output || d.error || 'Done.';
|
document.getElementById('dhcp-out').textContent = d.output || d.error || 'Done.';
|
||||||
|
if (!d.error) showNetPending(d.rollback_in || 60);
|
||||||
loadNetwork();
|
loadNetwork();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -729,6 +752,7 @@ function setStatic() {
|
|||||||
dns: dns,
|
dns: dns,
|
||||||
})}).then(r=>r.json()).then(d => {
|
})}).then(r=>r.json()).then(d => {
|
||||||
document.getElementById('static-out').textContent = d.output || d.error || 'Done.';
|
document.getElementById('static-out').textContent = d.output || d.error || 'Done.';
|
||||||
|
if (!d.error) showNetPending(d.rollback_in || 60);
|
||||||
loadNetwork();
|
loadNetwork();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -846,11 +870,18 @@ func listExportFiles(exportDir string) ([]string, error) {
|
|||||||
|
|
||||||
func renderTools() string {
|
func renderTools() string {
|
||||||
return `<div class="card" style="margin-bottom:16px">
|
return `<div class="card" style="margin-bottom:16px">
|
||||||
<div class="card-head">Install to RAM</div>
|
<div class="card-head">System Install</div>
|
||||||
<div class="card-body">
|
<div class="card-body">
|
||||||
|
<div style="margin-bottom:20px">
|
||||||
|
<div style="font-weight:600;margin-bottom:8px">Install to RAM</div>
|
||||||
<p id="ram-status-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Checking...</p>
|
<p id="ram-status-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Checking...</p>
|
||||||
<button id="ram-install-btn" class="btn btn-primary" onclick="installToRAM()" style="display:none">▶ Copy to RAM</button>
|
<button id="ram-install-btn" class="btn btn-primary" onclick="installToRAM()" style="display:none">▶ Copy to RAM</button>
|
||||||
</div>
|
</div>
|
||||||
|
<div style="border-top:1px solid var(--line);padding-top:20px">
|
||||||
|
<div style="font-weight:600;margin-bottom:8px">Install to Disk</div>` +
|
||||||
|
renderInstallInline() + `
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<script>
|
<script>
|
||||||
fetch('/api/system/ram-status').then(r=>r.json()).then(d=>{
|
fetch('/api/system/ram-status').then(r=>r.json()).then(d=>{
|
||||||
@@ -886,9 +917,6 @@ function installToRAM() {
|
|||||||
<div class="card"><div class="card-head">Services</div><div class="card-body">` +
|
<div class="card"><div class="card-head">Services</div><div class="card-body">` +
|
||||||
renderServicesInline() + `</div></div>
|
renderServicesInline() + `</div></div>
|
||||||
|
|
||||||
<div class="card"><div class="card-head">Install to Disk</div><div class="card-body">` +
|
|
||||||
renderInstallInline() + `</div></div>
|
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
function checkTools() {
|
function checkTools() {
|
||||||
document.getElementById('tools-table').innerHTML = '<p style="color:var(--muted);font-size:13px">Checking...</p>';
|
document.getElementById('tools-table').innerHTML = '<p style="color:var(--muted);font-size:13px">Checking...</p>';
|
||||||
@@ -939,8 +967,6 @@ func renderInstallInline() string {
|
|||||||
<div id="install-terminal" class="terminal" style="max-height:500px"></div>
|
<div id="install-terminal" class="terminal" style="max-height:500px"></div>
|
||||||
<div id="install-status" style="margin-top:12px;font-size:13px"></div>
|
<div id="install-status" style="margin-top:12px;font-size:13px"></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<style>
|
<style>
|
||||||
#install-disk-tbody tr{cursor:pointer}
|
#install-disk-tbody tr{cursor:pointer}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"html"
|
||||||
"mime"
|
"mime"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
@@ -13,6 +14,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"bee/audit/internal/app"
|
"bee/audit/internal/app"
|
||||||
|
"bee/audit/internal/platform"
|
||||||
"bee/audit/internal/runtimeenv"
|
"bee/audit/internal/runtimeenv"
|
||||||
gocharts "github.com/go-analyze/charts"
|
gocharts "github.com/go-analyze/charts"
|
||||||
"reanimator/chart/viewer"
|
"reanimator/chart/viewer"
|
||||||
@@ -35,6 +37,7 @@ func init() {
|
|||||||
// HandlerOptions configures the web UI handler.
|
// HandlerOptions configures the web UI handler.
|
||||||
type HandlerOptions struct {
|
type HandlerOptions struct {
|
||||||
Title string
|
Title string
|
||||||
|
BuildLabel string
|
||||||
AuditPath string
|
AuditPath string
|
||||||
ExportDir string
|
ExportDir string
|
||||||
App *app.App
|
App *app.App
|
||||||
@@ -84,7 +87,7 @@ func relAgeLabel(age time.Duration) string {
|
|||||||
if age < time.Hour {
|
if age < time.Hour {
|
||||||
m := int(age.Minutes())
|
m := int(age.Minutes())
|
||||||
if m == 0 {
|
if m == 0 {
|
||||||
return "-<1m"
|
return "-1m"
|
||||||
}
|
}
|
||||||
return fmt.Sprintf("-%dm", m)
|
return fmt.Sprintf("-%dm", m)
|
||||||
}
|
}
|
||||||
@@ -102,10 +105,14 @@ type gpuRings struct {
|
|||||||
Power *metricsRing
|
Power *metricsRing
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type namedMetricsRing struct {
|
||||||
|
Name string
|
||||||
|
Ring *metricsRing
|
||||||
|
}
|
||||||
|
|
||||||
// pendingNetChange tracks a network state change awaiting confirmation.
|
// pendingNetChange tracks a network state change awaiting confirmation.
|
||||||
type pendingNetChange struct {
|
type pendingNetChange struct {
|
||||||
iface string
|
snapshot platform.NetworkSnapshot
|
||||||
wasUp bool
|
|
||||||
timer *time.Timer
|
timer *time.Timer
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
}
|
}
|
||||||
@@ -115,15 +122,18 @@ type handler struct {
|
|||||||
opts HandlerOptions
|
opts HandlerOptions
|
||||||
mux *http.ServeMux
|
mux *http.ServeMux
|
||||||
// server rings
|
// server rings
|
||||||
ringCPUTemp *metricsRing
|
|
||||||
ringCPULoad *metricsRing
|
ringCPULoad *metricsRing
|
||||||
ringMemLoad *metricsRing
|
ringMemLoad *metricsRing
|
||||||
ringPower *metricsRing
|
ringPower *metricsRing
|
||||||
ringFans []*metricsRing
|
ringFans []*metricsRing
|
||||||
fanNames []string
|
fanNames []string
|
||||||
|
cpuTempRings []*namedMetricsRing
|
||||||
|
ambientTempRings []*namedMetricsRing
|
||||||
// per-GPU rings (index = GPU index)
|
// per-GPU rings (index = GPU index)
|
||||||
gpuRings []*gpuRings
|
gpuRings []*gpuRings
|
||||||
ringsMu sync.Mutex
|
ringsMu sync.Mutex
|
||||||
|
// metrics persistence (nil if DB unavailable)
|
||||||
|
metricsDB *MetricsDB
|
||||||
// install job (at most one at a time)
|
// install job (at most one at a time)
|
||||||
installJob *jobState
|
installJob *jobState
|
||||||
installMu sync.Mutex
|
installMu sync.Mutex
|
||||||
@@ -146,16 +156,28 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
|
|
||||||
h := &handler{
|
h := &handler{
|
||||||
opts: opts,
|
opts: opts,
|
||||||
ringCPUTemp: newMetricsRing(120),
|
|
||||||
ringCPULoad: newMetricsRing(120),
|
ringCPULoad: newMetricsRing(120),
|
||||||
ringMemLoad: newMetricsRing(120),
|
ringMemLoad: newMetricsRing(120),
|
||||||
ringPower: newMetricsRing(120),
|
ringPower: newMetricsRing(120),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Open metrics DB and pre-fill ring buffers from history.
|
||||||
|
if db, err := openMetricsDB(metricsDBPath); err == nil {
|
||||||
|
h.metricsDB = db
|
||||||
|
db.Prune(metricsKeepDuration)
|
||||||
|
if samples, err := db.LoadRecent(120); err == nil {
|
||||||
|
for _, s := range samples {
|
||||||
|
h.feedRings(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
globalQueue.startWorker(&opts)
|
globalQueue.startWorker(&opts)
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
// ── Infrastructure ──────────────────────────────────────────────────────
|
// ── Infrastructure ──────────────────────────────────────────────────────
|
||||||
mux.HandleFunc("GET /healthz", h.handleHealthz)
|
mux.HandleFunc("GET /healthz", h.handleHealthz)
|
||||||
|
mux.HandleFunc("GET /api/ready", h.handleReady)
|
||||||
|
|
||||||
// ── Existing read-only endpoints (preserved for compatibility) ──────────
|
// ── Existing read-only endpoints (preserved for compatibility) ──────────
|
||||||
mux.HandleFunc("GET /audit.json", h.handleAuditJSON)
|
mux.HandleFunc("GET /audit.json", h.handleAuditJSON)
|
||||||
@@ -223,9 +245,10 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
|||||||
mux.HandleFunc("POST /api/install/run", h.handleAPIInstallRun)
|
mux.HandleFunc("POST /api/install/run", h.handleAPIInstallRun)
|
||||||
mux.HandleFunc("GET /api/install/stream", h.handleAPIInstallStream)
|
mux.HandleFunc("GET /api/install/stream", h.handleAPIInstallStream)
|
||||||
|
|
||||||
// Metrics — SSE stream of live sensor data + server-side SVG charts
|
// Metrics — SSE stream of live sensor data + server-side SVG charts + CSV export
|
||||||
mux.HandleFunc("GET /api/metrics/stream", h.handleAPIMetricsStream)
|
mux.HandleFunc("GET /api/metrics/stream", h.handleAPIMetricsStream)
|
||||||
mux.HandleFunc("GET /api/metrics/chart/", h.handleMetricsChartSVG)
|
mux.HandleFunc("GET /api/metrics/chart/", h.handleMetricsChartSVG)
|
||||||
|
mux.HandleFunc("GET /api/metrics/export.csv", h.handleAPIMetricsExportCSV)
|
||||||
|
|
||||||
// Reanimator chart static assets (viewer template expects /static/*)
|
// Reanimator chart static assets (viewer template expects /static/*)
|
||||||
mux.Handle("GET /static/", http.StripPrefix("/static/", web.Static()))
|
mux.Handle("GET /static/", http.StripPrefix("/static/", web.Static()))
|
||||||
@@ -382,21 +405,51 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
|
|||||||
yMin = floatPtr(0)
|
yMin = floatPtr(0)
|
||||||
yMax = floatPtr(100)
|
yMax = floatPtr(100)
|
||||||
|
|
||||||
case path == "server-temp":
|
case path == "server-temp", path == "server-temp-cpu":
|
||||||
title = "CPU Temperature"
|
title = "CPU Temperature"
|
||||||
vCPUTemp, l := h.ringCPUTemp.snapshot()
|
h.ringsMu.Lock()
|
||||||
labels = l
|
datasets, names, labels = snapshotNamedRings(h.cpuTempRings)
|
||||||
datasets = [][]float64{vCPUTemp}
|
h.ringsMu.Unlock()
|
||||||
names = []string{"CPU Temp °C"}
|
|
||||||
yMin = floatPtr(0)
|
yMin = floatPtr(0)
|
||||||
yMax = autoMax120(vCPUTemp)
|
yMax = autoMax120(datasets...)
|
||||||
|
|
||||||
|
case path == "server-temp-gpu":
|
||||||
|
title = "GPU Temperature"
|
||||||
|
h.ringsMu.Lock()
|
||||||
|
for idx, gr := range h.gpuRings {
|
||||||
|
if gr == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
vTemp, l := gr.Temp.snapshot()
|
||||||
|
datasets = append(datasets, vTemp)
|
||||||
|
names = append(names, fmt.Sprintf("GPU %d", idx))
|
||||||
|
if len(labels) == 0 {
|
||||||
|
labels = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
h.ringsMu.Unlock()
|
||||||
|
yMin = floatPtr(0)
|
||||||
|
yMax = autoMax120(datasets...)
|
||||||
|
|
||||||
|
case path == "server-temp-ambient":
|
||||||
|
title = "Ambient / Other Sensors"
|
||||||
|
h.ringsMu.Lock()
|
||||||
|
datasets, names, labels = snapshotNamedRings(h.ambientTempRings)
|
||||||
|
h.ringsMu.Unlock()
|
||||||
|
yMin = floatPtr(0)
|
||||||
|
yMax = autoMax120(datasets...)
|
||||||
|
|
||||||
case path == "server-power":
|
case path == "server-power":
|
||||||
title = "Power & Fans"
|
title = "System Power"
|
||||||
vPower, l := h.ringPower.snapshot()
|
vPower, l := h.ringPower.snapshot()
|
||||||
labels = l
|
labels = l
|
||||||
datasets = [][]float64{vPower}
|
datasets = [][]float64{vPower}
|
||||||
names = []string{"Power W"}
|
names = []string{"Power W"}
|
||||||
|
yMin = floatPtr(0)
|
||||||
|
yMax = autoMax120(vPower)
|
||||||
|
|
||||||
|
case path == "server-fans":
|
||||||
|
title = "Fan RPM"
|
||||||
h.ringsMu.Lock()
|
h.ringsMu.Lock()
|
||||||
for i, fr := range h.ringFans {
|
for i, fr := range h.ringFans {
|
||||||
fv, _ := fr.snapshot()
|
fv, _ := fr.snapshot()
|
||||||
@@ -411,7 +464,80 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
|
|||||||
yMin = floatPtr(0)
|
yMin = floatPtr(0)
|
||||||
yMax = autoMax120(datasets...)
|
yMax = autoMax120(datasets...)
|
||||||
|
|
||||||
// ── GPU sub-charts ────────────────────────────────────────────────────
|
// ── Combined GPU charts (all GPUs on one chart) ───────────────────────
|
||||||
|
case path == "gpu-all-load":
|
||||||
|
title = "GPU Compute Load"
|
||||||
|
h.ringsMu.Lock()
|
||||||
|
for idx, gr := range h.gpuRings {
|
||||||
|
if gr == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
vUtil, l := gr.Util.snapshot()
|
||||||
|
datasets = append(datasets, vUtil)
|
||||||
|
names = append(names, fmt.Sprintf("GPU %d", idx))
|
||||||
|
if len(labels) == 0 {
|
||||||
|
labels = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
h.ringsMu.Unlock()
|
||||||
|
yMin = floatPtr(0)
|
||||||
|
yMax = floatPtr(100)
|
||||||
|
|
||||||
|
case path == "gpu-all-memload":
|
||||||
|
title = "GPU Memory Load"
|
||||||
|
h.ringsMu.Lock()
|
||||||
|
for idx, gr := range h.gpuRings {
|
||||||
|
if gr == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
vMem, l := gr.MemUtil.snapshot()
|
||||||
|
datasets = append(datasets, vMem)
|
||||||
|
names = append(names, fmt.Sprintf("GPU %d", idx))
|
||||||
|
if len(labels) == 0 {
|
||||||
|
labels = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
h.ringsMu.Unlock()
|
||||||
|
yMin = floatPtr(0)
|
||||||
|
yMax = floatPtr(100)
|
||||||
|
|
||||||
|
case path == "gpu-all-power":
|
||||||
|
title = "GPU Power"
|
||||||
|
h.ringsMu.Lock()
|
||||||
|
for idx, gr := range h.gpuRings {
|
||||||
|
if gr == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
vPow, l := gr.Power.snapshot()
|
||||||
|
datasets = append(datasets, vPow)
|
||||||
|
names = append(names, fmt.Sprintf("GPU %d", idx))
|
||||||
|
if len(labels) == 0 {
|
||||||
|
labels = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
h.ringsMu.Unlock()
|
||||||
|
yMin = floatPtr(0)
|
||||||
|
yMax = autoMax120(datasets...)
|
||||||
|
|
||||||
|
case path == "gpu-all-temp":
|
||||||
|
title = "GPU Temperature"
|
||||||
|
h.ringsMu.Lock()
|
||||||
|
for idx, gr := range h.gpuRings {
|
||||||
|
if gr == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
vTemp, l := gr.Temp.snapshot()
|
||||||
|
datasets = append(datasets, vTemp)
|
||||||
|
names = append(names, fmt.Sprintf("GPU %d", idx))
|
||||||
|
if len(labels) == 0 {
|
||||||
|
labels = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
h.ringsMu.Unlock()
|
||||||
|
yMin = floatPtr(0)
|
||||||
|
yMax = autoMax120(datasets...)
|
||||||
|
|
||||||
|
// ── Per-GPU sub-charts ────────────────────────────────────────────────
|
||||||
case strings.HasPrefix(path, "gpu/"):
|
case strings.HasPrefix(path, "gpu/"):
|
||||||
rest := strings.TrimPrefix(path, "gpu/")
|
rest := strings.TrimPrefix(path, "gpu/")
|
||||||
// rest is either "{idx}-load", "{idx}-temp", "{idx}-power", or legacy "{idx}"
|
// rest is either "{idx}-load", "{idx}-temp", "{idx}-power", or legacy "{idx}"
|
||||||
@@ -507,14 +633,39 @@ func renderChartSVG(title string, datasets [][]float64, names []string, labels [
|
|||||||
datasets[i] = make([]float64, n)
|
datasets[i] = make([]float64, n)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sparse := sparseLabels(labels, 6)
|
// Append global min/avg/max to title.
|
||||||
|
mn, avg, mx := globalStats(datasets)
|
||||||
|
if mx > 0 {
|
||||||
|
title = fmt.Sprintf("%s ↓%s ~%s ↑%s",
|
||||||
|
title,
|
||||||
|
chartLegendNumber(mn),
|
||||||
|
chartLegendNumber(avg),
|
||||||
|
chartLegendNumber(mx),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
title = sanitizeChartText(title)
|
||||||
|
names = sanitizeChartTexts(names)
|
||||||
|
sparse := sanitizeChartTexts(sparseLabels(labels, 6))
|
||||||
|
|
||||||
opt := gocharts.NewLineChartOptionWithData(datasets)
|
opt := gocharts.NewLineChartOptionWithData(datasets)
|
||||||
opt.Title = gocharts.TitleOption{Text: title}
|
opt.Title = gocharts.TitleOption{Text: title}
|
||||||
opt.XAxis.Labels = sparse
|
opt.XAxis.Labels = sparse
|
||||||
opt.Legend = gocharts.LegendOption{SeriesNames: names}
|
opt.Legend = gocharts.LegendOption{SeriesNames: names}
|
||||||
|
opt.Symbol = gocharts.SymbolNone
|
||||||
|
// Right padding: reserve space for the MarkLine label (library recommendation).
|
||||||
|
opt.Padding = gocharts.NewBox(20, 20, 80, 20)
|
||||||
if yMin != nil || yMax != nil {
|
if yMin != nil || yMax != nil {
|
||||||
opt.YAxis = []gocharts.YAxisOption{{Min: yMin, Max: yMax}}
|
opt.YAxis = []gocharts.YAxisOption{{
|
||||||
|
Min: yMin,
|
||||||
|
Max: yMax,
|
||||||
|
ValueFormatter: chartLegendNumber,
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a single peak mark line on the series that holds the global maximum.
|
||||||
|
peakIdx, _ := globalPeakSeries(datasets)
|
||||||
|
if peakIdx >= 0 && peakIdx < len(opt.SeriesList) {
|
||||||
|
opt.SeriesList[peakIdx].MarkLine = gocharts.NewMarkLine(gocharts.SeriesMarkTypeMax)
|
||||||
}
|
}
|
||||||
|
|
||||||
p := gocharts.NewPainter(gocharts.PainterOptions{
|
p := gocharts.NewPainter(gocharts.PainterOptions{
|
||||||
@@ -528,6 +679,68 @@ func renderChartSVG(title string, datasets [][]float64, names []string, labels [
|
|||||||
return p.Bytes()
|
return p.Bytes()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// globalPeakSeries returns the index of the series containing the global maximum
|
||||||
|
// value across all datasets, and that maximum value.
|
||||||
|
func globalPeakSeries(datasets [][]float64) (idx int, peak float64) {
|
||||||
|
idx = -1
|
||||||
|
for i, ds := range datasets {
|
||||||
|
for _, v := range ds {
|
||||||
|
if v > peak {
|
||||||
|
peak = v
|
||||||
|
idx = i
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return idx, peak
|
||||||
|
}
|
||||||
|
|
||||||
|
// globalStats returns min, average, and max across all values in all datasets.
|
||||||
|
func globalStats(datasets [][]float64) (mn, avg, mx float64) {
|
||||||
|
var sum float64
|
||||||
|
var count int
|
||||||
|
first := true
|
||||||
|
for _, ds := range datasets {
|
||||||
|
for _, v := range ds {
|
||||||
|
if first {
|
||||||
|
mn, mx = v, v
|
||||||
|
first = false
|
||||||
|
}
|
||||||
|
if v < mn {
|
||||||
|
mn = v
|
||||||
|
}
|
||||||
|
if v > mx {
|
||||||
|
mx = v
|
||||||
|
}
|
||||||
|
sum += v
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if count > 0 {
|
||||||
|
avg = sum / float64(count)
|
||||||
|
}
|
||||||
|
return mn, avg, mx
|
||||||
|
}
|
||||||
|
|
||||||
|
func sanitizeChartText(s string) string {
|
||||||
|
if s == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return html.EscapeString(strings.Map(func(r rune) rune {
|
||||||
|
if r < 0x20 && r != '\t' && r != '\n' && r != '\r' {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
return r
|
||||||
|
}, s))
|
||||||
|
}
|
||||||
|
|
||||||
|
func sanitizeChartTexts(in []string) []string {
|
||||||
|
out := make([]string, len(in))
|
||||||
|
for i, s := range in {
|
||||||
|
out[i] = sanitizeChartText(s)
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
func safeIdx(s []float64, i int) float64 {
|
func safeIdx(s []float64, i int) float64 {
|
||||||
if i < len(s) {
|
if i < len(s) {
|
||||||
return s[i]
|
return s[i]
|
||||||
@@ -535,6 +748,46 @@ func safeIdx(s []float64, i int) float64 {
|
|||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func snapshotNamedRings(rings []*namedMetricsRing) ([][]float64, []string, []string) {
|
||||||
|
var datasets [][]float64
|
||||||
|
var names []string
|
||||||
|
var labels []string
|
||||||
|
for _, item := range rings {
|
||||||
|
if item == nil || item.Ring == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
vals, l := item.Ring.snapshot()
|
||||||
|
datasets = append(datasets, vals)
|
||||||
|
names = append(names, item.Name)
|
||||||
|
if len(labels) == 0 {
|
||||||
|
labels = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return datasets, names, labels
|
||||||
|
}
|
||||||
|
|
||||||
|
func chartLegendNumber(v float64) string {
|
||||||
|
neg := v < 0
|
||||||
|
if v < 0 {
|
||||||
|
v = -v
|
||||||
|
}
|
||||||
|
var out string
|
||||||
|
switch {
|
||||||
|
case v >= 10000:
|
||||||
|
out = fmt.Sprintf("%dk", int((v+500)/1000))
|
||||||
|
case v >= 1000:
|
||||||
|
s := fmt.Sprintf("%.2f", v/1000)
|
||||||
|
s = strings.TrimRight(strings.TrimRight(s, "0"), ".")
|
||||||
|
out = strings.ReplaceAll(s, ".", ",") + "k"
|
||||||
|
default:
|
||||||
|
out = fmt.Sprintf("%.0f", v)
|
||||||
|
}
|
||||||
|
if neg {
|
||||||
|
return "-" + out
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
func sparseLabels(labels []string, n int) []string {
|
func sparseLabels(labels []string, n int) []string {
|
||||||
out := make([]string, len(labels))
|
out := make([]string, len(labels))
|
||||||
step := len(labels) / n
|
step := len(labels) / n
|
||||||
@@ -549,11 +802,79 @@ func sparseLabels(labels []string, n int) []string {
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (h *handler) handleAPIMetricsExportCSV(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if h.metricsDB == nil {
|
||||||
|
http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "text/csv; charset=utf-8")
|
||||||
|
w.Header().Set("Content-Disposition", `attachment; filename="bee-metrics.csv"`)
|
||||||
|
w.Header().Set("Cache-Control", "no-store")
|
||||||
|
_ = h.metricsDB.ExportCSV(w)
|
||||||
|
}
|
||||||
|
|
||||||
// ── Page handler ─────────────────────────────────────────────────────────────
|
// ── Page handler ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
func (h *handler) handleReady(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Cache-Control", "no-store")
|
||||||
|
if _, err := os.Stat(h.opts.AuditPath); err != nil {
|
||||||
|
w.WriteHeader(http.StatusServiceUnavailable)
|
||||||
|
_, _ = w.Write([]byte("starting"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte("ready"))
|
||||||
|
}
|
||||||
|
|
||||||
|
const loadingPageHTML = `<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>EASY-BEE</title>
|
||||||
|
<style>
|
||||||
|
*{margin:0;padding:0;box-sizing:border-box}
|
||||||
|
html,body{height:100%;background:#0f1117;display:flex;align-items:center;justify-content:center;font-family:'Courier New',monospace;color:#e2e8f0}
|
||||||
|
.logo{font-size:13px;line-height:1.4;color:#f6c90e;margin-bottom:48px;white-space:pre}
|
||||||
|
.spinner{width:48px;height:48px;border:4px solid #2d3748;border-top-color:#f6c90e;border-radius:50%;animation:spin .8s linear infinite;margin:0 auto 24px}
|
||||||
|
@keyframes spin{to{transform:rotate(360deg)}}
|
||||||
|
.status{font-size:14px;color:#a0aec0;letter-spacing:.05em}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div style="text-align:center">
|
||||||
|
<div class="logo"> ███████╗ █████╗ ███████╗██╗ ██╗ ██████╗ ███████╗███████╗
|
||||||
|
██╔════╝██╔══██╗██╔════╝╚██╗ ██╔╝ ██╔══██╗██╔════╝██╔════╝
|
||||||
|
█████╗ ███████║███████╗ ╚████╔╝ █████╗██████╔╝█████╗ █████╗
|
||||||
|
██╔══╝ ██╔══██║╚════██║ ╚██╔╝ ╚════╝██╔══██╗██╔══╝ ██╔══╝
|
||||||
|
███████╗██║ ██║███████║ ██║ ██████╔╝███████╗███████╗
|
||||||
|
╚══════╝╚═╝ ╚═╝╚══════╝ ╚═╝ ╚═════╝ ╚══════╝╚══════╝</div>
|
||||||
|
<div class="spinner"></div>
|
||||||
|
<div class="status" id="s">Starting up...</div>
|
||||||
|
</div>
|
||||||
|
<script>
|
||||||
|
function probe(){
|
||||||
|
fetch('/api/ready',{cache:'no-store'})
|
||||||
|
.then(function(r){
|
||||||
|
if(r.ok){window.location.replace('/');}
|
||||||
|
else{setTimeout(probe,1000);}
|
||||||
|
})
|
||||||
|
.catch(function(){setTimeout(probe,1000);});
|
||||||
|
}
|
||||||
|
probe();
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>`
|
||||||
|
|
||||||
func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {
|
func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {
|
||||||
page := strings.TrimPrefix(r.URL.Path, "/")
|
page := strings.TrimPrefix(r.URL.Path, "/")
|
||||||
if page == "" {
|
if page == "" {
|
||||||
|
// Serve loading page until audit snapshot exists
|
||||||
|
if _, err := os.Stat(h.opts.AuditPath); err != nil {
|
||||||
|
w.Header().Set("Cache-Control", "no-store")
|
||||||
|
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||||
|
_, _ = w.Write([]byte(loadingPageHTML))
|
||||||
|
return
|
||||||
|
}
|
||||||
page = "dashboard"
|
page = "dashboard"
|
||||||
}
|
}
|
||||||
// Redirect old routes to new names
|
// Redirect old routes to new names
|
||||||
|
|||||||
@@ -9,6 +9,28 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestChartLegendNumber(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
in float64
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{in: 0.4, want: "0"},
|
||||||
|
{in: 61.5, want: "62"},
|
||||||
|
{in: 999.4, want: "999"},
|
||||||
|
{in: 1200, want: "1,2k"},
|
||||||
|
{in: 1250, want: "1,25k"},
|
||||||
|
{in: 1310, want: "1,31k"},
|
||||||
|
{in: 1500, want: "1,5k"},
|
||||||
|
{in: 2600, want: "2,6k"},
|
||||||
|
{in: 10200, want: "10k"},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
if got := chartLegendNumber(tc.in); got != tc.want {
|
||||||
|
t.Fatalf("chartLegendNumber(%v)=%q want %q", tc.in, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRootRendersDashboard(t *testing.T) {
|
func TestRootRendersDashboard(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
path := filepath.Join(dir, "audit.json")
|
path := filepath.Join(dir, "audit.json")
|
||||||
@@ -31,9 +53,9 @@ func TestRootRendersDashboard(t *testing.T) {
|
|||||||
if first.Code != http.StatusOK {
|
if first.Code != http.StatusOK {
|
||||||
t.Fatalf("first status=%d", first.Code)
|
t.Fatalf("first status=%d", first.Code)
|
||||||
}
|
}
|
||||||
// Dashboard should contain the audit modal (with viewer link) and hardware summary
|
// Dashboard should contain the audit nav link and hardware summary
|
||||||
if !strings.Contains(first.Body.String(), `openAuditModal`) {
|
if !strings.Contains(first.Body.String(), `href="/audit"`) {
|
||||||
t.Fatalf("first body missing audit modal trigger: %s", first.Body.String())
|
t.Fatalf("first body missing audit nav link: %s", first.Body.String())
|
||||||
}
|
}
|
||||||
if !strings.Contains(first.Body.String(), `/viewer`) {
|
if !strings.Contains(first.Body.String(), `/viewer`) {
|
||||||
t.Fatalf("first body missing viewer link: %s", first.Body.String())
|
t.Fatalf("first body missing viewer link: %s", first.Body.String())
|
||||||
@@ -56,6 +78,28 @@ func TestRootRendersDashboard(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "audit.json")
|
||||||
|
if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z"}`), 0644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
handler := NewHandler(HandlerOptions{AuditPath: path})
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/audit", nil))
|
||||||
|
if rec.Code != http.StatusOK {
|
||||||
|
t.Fatalf("status=%d", rec.Code)
|
||||||
|
}
|
||||||
|
body := rec.Body.String()
|
||||||
|
if !strings.Contains(body, `iframe class="viewer-frame" src="/viewer"`) {
|
||||||
|
t.Fatalf("audit page missing viewer frame: %s", body)
|
||||||
|
}
|
||||||
|
if !strings.Contains(body, `openAuditModal()`) {
|
||||||
|
t.Fatalf("audit page missing action modal trigger: %s", body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestViewerRendersLatestSnapshot(t *testing.T) {
|
func TestViewerRendersLatestSnapshot(t *testing.T) {
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
path := filepath.Join(dir, "audit.json")
|
path := filepath.Join(dir, "audit.json")
|
||||||
|
|||||||
@@ -5,9 +5,13 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"bee/audit/internal/app"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Task statuses.
|
// Task statuses.
|
||||||
@@ -45,6 +49,7 @@ type Task struct {
|
|||||||
StartedAt *time.Time `json:"started_at,omitempty"`
|
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||||
DoneAt *time.Time `json:"done_at,omitempty"`
|
DoneAt *time.Time `json:"done_at,omitempty"`
|
||||||
ErrMsg string `json:"error,omitempty"`
|
ErrMsg string `json:"error,omitempty"`
|
||||||
|
LogPath string `json:"log_path,omitempty"`
|
||||||
|
|
||||||
// runtime fields (not serialised)
|
// runtime fields (not serialised)
|
||||||
job *jobState
|
job *jobState
|
||||||
@@ -53,10 +58,42 @@ type Task struct {
|
|||||||
|
|
||||||
// taskParams holds optional parameters parsed from the run request.
|
// taskParams holds optional parameters parsed from the run request.
|
||||||
type taskParams struct {
|
type taskParams struct {
|
||||||
Duration int
|
Duration int `json:"duration,omitempty"`
|
||||||
DiagLevel int
|
DiagLevel int `json:"diag_level,omitempty"`
|
||||||
GPUIndices []int
|
GPUIndices []int `json:"gpu_indices,omitempty"`
|
||||||
Device string // for install
|
BurnProfile string `json:"burn_profile,omitempty"`
|
||||||
|
DisplayName string `json:"display_name,omitempty"`
|
||||||
|
Device string `json:"device,omitempty"` // for install
|
||||||
|
}
|
||||||
|
|
||||||
|
type persistedTask struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Target string `json:"target"`
|
||||||
|
Priority int `json:"priority"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
CreatedAt time.Time `json:"created_at"`
|
||||||
|
StartedAt *time.Time `json:"started_at,omitempty"`
|
||||||
|
DoneAt *time.Time `json:"done_at,omitempty"`
|
||||||
|
ErrMsg string `json:"error,omitempty"`
|
||||||
|
LogPath string `json:"log_path,omitempty"`
|
||||||
|
Params taskParams `json:"params,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type burnPreset struct {
|
||||||
|
NvidiaDiag int
|
||||||
|
DurationSec int
|
||||||
|
}
|
||||||
|
|
||||||
|
func resolveBurnPreset(profile string) burnPreset {
|
||||||
|
switch profile {
|
||||||
|
case "overnight":
|
||||||
|
return burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}
|
||||||
|
case "acceptance":
|
||||||
|
return burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}
|
||||||
|
default:
|
||||||
|
return burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// taskQueue manages a priority-ordered list of tasks and runs them one at a time.
|
// taskQueue manages a priority-ordered list of tasks and runs them one at a time.
|
||||||
@@ -65,17 +102,46 @@ type taskQueue struct {
|
|||||||
tasks []*Task
|
tasks []*Task
|
||||||
trigger chan struct{}
|
trigger chan struct{}
|
||||||
opts *HandlerOptions // set by startWorker
|
opts *HandlerOptions // set by startWorker
|
||||||
|
statePath string
|
||||||
|
logsDir string
|
||||||
|
started bool
|
||||||
}
|
}
|
||||||
|
|
||||||
var globalQueue = &taskQueue{trigger: make(chan struct{}, 1)}
|
var globalQueue = &taskQueue{trigger: make(chan struct{}, 1)}
|
||||||
|
|
||||||
const maxTaskHistory = 50
|
const maxTaskHistory = 50
|
||||||
|
|
||||||
|
var (
|
||||||
|
runMemoryAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunMemoryAcceptancePackCtx(ctx, baseDir, logFunc)
|
||||||
|
}
|
||||||
|
runStorageAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunStorageAcceptancePackCtx(ctx, baseDir, logFunc)
|
||||||
|
}
|
||||||
|
runCPUAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunCPUAcceptancePackCtx(ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
runAMDAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunAMDAcceptancePackCtx(ctx, baseDir, logFunc)
|
||||||
|
}
|
||||||
|
runAMDStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunAMDStressPackCtx(ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
runMemoryStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunMemoryStressPackCtx(ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
runSATStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return a.RunSATStressPackCtx(ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
// enqueue adds a task to the queue and notifies the worker.
|
// enqueue adds a task to the queue and notifies the worker.
|
||||||
func (q *taskQueue) enqueue(t *Task) {
|
func (q *taskQueue) enqueue(t *Task) {
|
||||||
q.mu.Lock()
|
q.mu.Lock()
|
||||||
|
q.assignTaskLogPathLocked(t)
|
||||||
q.tasks = append(q.tasks, t)
|
q.tasks = append(q.tasks, t)
|
||||||
q.prune()
|
q.prune()
|
||||||
|
q.persistLocked()
|
||||||
q.mu.Unlock()
|
q.mu.Unlock()
|
||||||
select {
|
select {
|
||||||
case q.trigger <- struct{}{}:
|
case q.trigger <- struct{}{}:
|
||||||
@@ -137,6 +203,20 @@ func (q *taskQueue) findJob(id string) (*jobState, bool) {
|
|||||||
return t.job, true
|
return t.job, true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (q *taskQueue) hasActiveTarget(target string) bool {
|
||||||
|
q.mu.Lock()
|
||||||
|
defer q.mu.Unlock()
|
||||||
|
for _, t := range q.tasks {
|
||||||
|
if t.Target != target {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if t.Status == TaskPending || t.Status == TaskRunning {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// snapshot returns a copy of all tasks sorted for display (running first, then pending by priority, then done by doneAt desc).
|
// snapshot returns a copy of all tasks sorted for display (running first, then pending by priority, then done by doneAt desc).
|
||||||
func (q *taskQueue) snapshot() []Task {
|
func (q *taskQueue) snapshot() []Task {
|
||||||
q.mu.Lock()
|
q.mu.Lock()
|
||||||
@@ -172,13 +252,30 @@ func statusOrder(s string) int {
|
|||||||
|
|
||||||
// startWorker launches the queue runner goroutine.
|
// startWorker launches the queue runner goroutine.
|
||||||
func (q *taskQueue) startWorker(opts *HandlerOptions) {
|
func (q *taskQueue) startWorker(opts *HandlerOptions) {
|
||||||
|
q.mu.Lock()
|
||||||
q.opts = opts
|
q.opts = opts
|
||||||
|
q.statePath = filepath.Join(opts.ExportDir, "tasks-state.json")
|
||||||
|
q.logsDir = filepath.Join(opts.ExportDir, "tasks")
|
||||||
|
_ = os.MkdirAll(q.logsDir, 0755)
|
||||||
|
if !q.started {
|
||||||
|
q.loadLocked()
|
||||||
|
q.started = true
|
||||||
go q.worker()
|
go q.worker()
|
||||||
}
|
}
|
||||||
|
hasPending := q.nextPending() != nil
|
||||||
|
q.mu.Unlock()
|
||||||
|
if hasPending {
|
||||||
|
select {
|
||||||
|
case q.trigger <- struct{}{}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (q *taskQueue) worker() {
|
func (q *taskQueue) worker() {
|
||||||
for {
|
for {
|
||||||
<-q.trigger
|
<-q.trigger
|
||||||
|
setCPUGovernor("performance")
|
||||||
for {
|
for {
|
||||||
q.mu.Lock()
|
q.mu.Lock()
|
||||||
t := q.nextPending()
|
t := q.nextPending()
|
||||||
@@ -189,10 +286,13 @@ func (q *taskQueue) worker() {
|
|||||||
now := time.Now()
|
now := time.Now()
|
||||||
t.Status = TaskRunning
|
t.Status = TaskRunning
|
||||||
t.StartedAt = &now
|
t.StartedAt = &now
|
||||||
j := &jobState{}
|
t.DoneAt = nil
|
||||||
|
t.ErrMsg = ""
|
||||||
|
j := newTaskJobState(t.LogPath)
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
j.cancel = cancel
|
j.cancel = cancel
|
||||||
t.job = j
|
t.job = j
|
||||||
|
q.persistLocked()
|
||||||
q.mu.Unlock()
|
q.mu.Unlock()
|
||||||
|
|
||||||
q.runTask(t, j, ctx)
|
q.runTask(t, j, ctx)
|
||||||
@@ -209,8 +309,22 @@ func (q *taskQueue) worker() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
q.prune()
|
q.prune()
|
||||||
|
q.persistLocked()
|
||||||
q.mu.Unlock()
|
q.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
setCPUGovernor("powersave")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// setCPUGovernor writes the given governor to all CPU scaling_governor sysfs files.
|
||||||
|
// Silently ignores errors (e.g. when cpufreq is not available).
|
||||||
|
func setCPUGovernor(governor string) {
|
||||||
|
matches, err := filepath.Glob("/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor")
|
||||||
|
if err != nil || len(matches) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, path := range matches {
|
||||||
|
_ = os.WriteFile(path, []byte(governor), 0644)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -224,6 +338,9 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
|||||||
a := q.opts.App
|
a := q.opts.App
|
||||||
|
|
||||||
j.append(fmt.Sprintf("Starting %s...", t.Name))
|
j.append(fmt.Sprintf("Starting %s...", t.Name))
|
||||||
|
if len(j.lines) > 0 {
|
||||||
|
j.append(fmt.Sprintf("Recovered after bee-web restart at %s", time.Now().UTC().Format(time.RFC3339)))
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
archive string
|
archive string
|
||||||
@@ -232,9 +349,13 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
|||||||
|
|
||||||
switch t.Target {
|
switch t.Target {
|
||||||
case "nvidia":
|
case "nvidia":
|
||||||
if len(t.params.GPUIndices) > 0 || t.params.DiagLevel > 0 {
|
diagLevel := t.params.DiagLevel
|
||||||
|
if t.params.BurnProfile != "" && diagLevel <= 0 {
|
||||||
|
diagLevel = resolveBurnPreset(t.params.BurnProfile).NvidiaDiag
|
||||||
|
}
|
||||||
|
if len(t.params.GPUIndices) > 0 || diagLevel > 0 {
|
||||||
result, e := a.RunNvidiaAcceptancePackWithOptions(
|
result, e := a.RunNvidiaAcceptancePackWithOptions(
|
||||||
ctx, "", t.params.DiagLevel, t.params.GPUIndices, j.append,
|
ctx, "", diagLevel, t.params.GPUIndices, j.append,
|
||||||
)
|
)
|
||||||
if e != nil {
|
if e != nil {
|
||||||
err = e
|
err = e
|
||||||
@@ -245,23 +366,38 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
|||||||
archive, err = a.RunNvidiaAcceptancePack("", j.append)
|
archive, err = a.RunNvidiaAcceptancePack("", j.append)
|
||||||
}
|
}
|
||||||
case "memory":
|
case "memory":
|
||||||
archive, err = a.RunMemoryAcceptancePack("", j.append)
|
archive, err = runMemoryAcceptancePackCtx(a, ctx, "", j.append)
|
||||||
case "storage":
|
case "storage":
|
||||||
archive, err = a.RunStorageAcceptancePack("", j.append)
|
archive, err = runStorageAcceptancePackCtx(a, ctx, "", j.append)
|
||||||
case "cpu":
|
case "cpu":
|
||||||
dur := t.params.Duration
|
dur := t.params.Duration
|
||||||
|
if t.params.BurnProfile != "" && dur <= 0 {
|
||||||
|
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
|
||||||
|
}
|
||||||
if dur <= 0 {
|
if dur <= 0 {
|
||||||
dur = 60
|
dur = 60
|
||||||
}
|
}
|
||||||
archive, err = a.RunCPUAcceptancePack("", dur, j.append)
|
archive, err = runCPUAcceptancePackCtx(a, ctx, "", dur, j.append)
|
||||||
case "amd":
|
case "amd":
|
||||||
archive, err = a.RunAMDAcceptancePack("", j.append)
|
archive, err = runAMDAcceptancePackCtx(a, ctx, "", j.append)
|
||||||
case "amd-stress":
|
case "amd-stress":
|
||||||
archive, err = a.RunAMDStressPack("", j.append)
|
dur := t.params.Duration
|
||||||
|
if t.params.BurnProfile != "" && dur <= 0 {
|
||||||
|
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
|
||||||
|
}
|
||||||
|
archive, err = runAMDStressPackCtx(a, ctx, "", dur, j.append)
|
||||||
case "memory-stress":
|
case "memory-stress":
|
||||||
archive, err = a.RunMemoryStressPack("", j.append)
|
dur := t.params.Duration
|
||||||
|
if t.params.BurnProfile != "" && dur <= 0 {
|
||||||
|
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
|
||||||
|
}
|
||||||
|
archive, err = runMemoryStressPackCtx(a, ctx, "", dur, j.append)
|
||||||
case "sat-stress":
|
case "sat-stress":
|
||||||
archive, err = a.RunSATStressPack("", j.append)
|
dur := t.params.Duration
|
||||||
|
if t.params.BurnProfile != "" && dur <= 0 {
|
||||||
|
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
|
||||||
|
}
|
||||||
|
archive, err = runSATStressPackCtx(a, ctx, "", dur, j.append)
|
||||||
case "audit":
|
case "audit":
|
||||||
result, e := a.RunAuditNow(q.opts.RuntimeMode)
|
result, e := a.RunAuditNow(q.opts.RuntimeMode)
|
||||||
if e != nil {
|
if e != nil {
|
||||||
@@ -272,7 +408,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
case "install-to-ram":
|
case "install-to-ram":
|
||||||
err = a.RunInstallToRAM(j.append)
|
err = a.RunInstallToRAM(ctx, j.append)
|
||||||
default:
|
default:
|
||||||
j.append("ERROR: unknown target: " + t.Target)
|
j.append("ERROR: unknown target: " + t.Target)
|
||||||
j.finish("unknown target")
|
j.finish("unknown target")
|
||||||
@@ -339,6 +475,7 @@ func (h *handler) handleAPITasksCancel(w http.ResponseWriter, r *http.Request) {
|
|||||||
t.Status = TaskCancelled
|
t.Status = TaskCancelled
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
t.DoneAt = &now
|
t.DoneAt = &now
|
||||||
|
globalQueue.persistLocked()
|
||||||
writeJSON(w, map[string]string{"status": "cancelled"})
|
writeJSON(w, map[string]string{"status": "cancelled"})
|
||||||
case TaskRunning:
|
case TaskRunning:
|
||||||
if t.job != nil {
|
if t.job != nil {
|
||||||
@@ -347,6 +484,7 @@ func (h *handler) handleAPITasksCancel(w http.ResponseWriter, r *http.Request) {
|
|||||||
t.Status = TaskCancelled
|
t.Status = TaskCancelled
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
t.DoneAt = &now
|
t.DoneAt = &now
|
||||||
|
globalQueue.persistLocked()
|
||||||
writeJSON(w, map[string]string{"status": "cancelled"})
|
writeJSON(w, map[string]string{"status": "cancelled"})
|
||||||
default:
|
default:
|
||||||
writeError(w, http.StatusConflict, "task is not running or pending")
|
writeError(w, http.StatusConflict, "task is not running or pending")
|
||||||
@@ -374,6 +512,7 @@ func (h *handler) handleAPITasksPriority(w http.ResponseWriter, r *http.Request)
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
t.Priority += req.Delta
|
t.Priority += req.Delta
|
||||||
|
globalQueue.persistLocked()
|
||||||
writeJSON(w, map[string]int{"priority": t.Priority})
|
writeJSON(w, map[string]int{"priority": t.Priority})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -396,6 +535,7 @@ func (h *handler) handleAPITasksCancelAll(w http.ResponseWriter, _ *http.Request
|
|||||||
n++
|
n++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
globalQueue.persistLocked()
|
||||||
globalQueue.mu.Unlock()
|
globalQueue.mu.Unlock()
|
||||||
writeJSON(w, map[string]int{"cancelled": n})
|
writeJSON(w, map[string]int{"cancelled": n})
|
||||||
}
|
}
|
||||||
@@ -418,3 +558,79 @@ func (h *handler) handleAPITasksStream(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
streamJob(w, r, j)
|
streamJob(w, r, j)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (q *taskQueue) assignTaskLogPathLocked(t *Task) {
|
||||||
|
if t.LogPath != "" || q.logsDir == "" || t.ID == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.LogPath = filepath.Join(q.logsDir, t.ID+".log")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *taskQueue) loadLocked() {
|
||||||
|
if q.statePath == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
data, err := os.ReadFile(q.statePath)
|
||||||
|
if err != nil || len(data) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var persisted []persistedTask
|
||||||
|
if err := json.Unmarshal(data, &persisted); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, pt := range persisted {
|
||||||
|
t := &Task{
|
||||||
|
ID: pt.ID,
|
||||||
|
Name: pt.Name,
|
||||||
|
Target: pt.Target,
|
||||||
|
Priority: pt.Priority,
|
||||||
|
Status: pt.Status,
|
||||||
|
CreatedAt: pt.CreatedAt,
|
||||||
|
StartedAt: pt.StartedAt,
|
||||||
|
DoneAt: pt.DoneAt,
|
||||||
|
ErrMsg: pt.ErrMsg,
|
||||||
|
LogPath: pt.LogPath,
|
||||||
|
params: pt.Params,
|
||||||
|
}
|
||||||
|
q.assignTaskLogPathLocked(t)
|
||||||
|
if t.Status == TaskPending || t.Status == TaskRunning {
|
||||||
|
t.Status = TaskPending
|
||||||
|
t.DoneAt = nil
|
||||||
|
t.ErrMsg = ""
|
||||||
|
}
|
||||||
|
q.tasks = append(q.tasks, t)
|
||||||
|
}
|
||||||
|
q.prune()
|
||||||
|
q.persistLocked()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *taskQueue) persistLocked() {
|
||||||
|
if q.statePath == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
state := make([]persistedTask, 0, len(q.tasks))
|
||||||
|
for _, t := range q.tasks {
|
||||||
|
state = append(state, persistedTask{
|
||||||
|
ID: t.ID,
|
||||||
|
Name: t.Name,
|
||||||
|
Target: t.Target,
|
||||||
|
Priority: t.Priority,
|
||||||
|
Status: t.Status,
|
||||||
|
CreatedAt: t.CreatedAt,
|
||||||
|
StartedAt: t.StartedAt,
|
||||||
|
DoneAt: t.DoneAt,
|
||||||
|
ErrMsg: t.ErrMsg,
|
||||||
|
LogPath: t.LogPath,
|
||||||
|
Params: t.params,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
data, err := json.MarshalIndent(state, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
tmp := q.statePath + ".tmp"
|
||||||
|
if err := os.WriteFile(tmp, data, 0644); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_ = os.Rename(tmp, q.statePath)
|
||||||
|
}
|
||||||
|
|||||||
156
audit/internal/webui/tasks_test.go
Normal file
156
audit/internal/webui/tasks_test.go
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
package webui
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"bee/audit/internal/app"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestTaskQueuePersistsAndRecoversPendingTasks(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
q := &taskQueue{
|
||||||
|
statePath: filepath.Join(dir, "tasks-state.json"),
|
||||||
|
logsDir: filepath.Join(dir, "tasks"),
|
||||||
|
trigger: make(chan struct{}, 1),
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(q.logsDir, 0755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
started := time.Now().Add(-time.Minute)
|
||||||
|
task := &Task{
|
||||||
|
ID: "task-1",
|
||||||
|
Name: "Memory Burn-in",
|
||||||
|
Target: "memory-stress",
|
||||||
|
Priority: 2,
|
||||||
|
Status: TaskRunning,
|
||||||
|
CreatedAt: time.Now().Add(-2 * time.Minute),
|
||||||
|
StartedAt: &started,
|
||||||
|
params: taskParams{
|
||||||
|
Duration: 300,
|
||||||
|
BurnProfile: "smoke",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
q.tasks = append(q.tasks, task)
|
||||||
|
q.assignTaskLogPathLocked(task)
|
||||||
|
q.persistLocked()
|
||||||
|
|
||||||
|
recovered := &taskQueue{
|
||||||
|
statePath: q.statePath,
|
||||||
|
logsDir: q.logsDir,
|
||||||
|
trigger: make(chan struct{}, 1),
|
||||||
|
}
|
||||||
|
recovered.loadLocked()
|
||||||
|
|
||||||
|
if len(recovered.tasks) != 1 {
|
||||||
|
t.Fatalf("tasks=%d want 1", len(recovered.tasks))
|
||||||
|
}
|
||||||
|
got := recovered.tasks[0]
|
||||||
|
if got.Status != TaskPending {
|
||||||
|
t.Fatalf("status=%q want %q", got.Status, TaskPending)
|
||||||
|
}
|
||||||
|
if got.params.Duration != 300 || got.params.BurnProfile != "smoke" {
|
||||||
|
t.Fatalf("params=%+v", got.params)
|
||||||
|
}
|
||||||
|
if got.LogPath == "" {
|
||||||
|
t.Fatal("expected log path")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewTaskJobStateLoadsExistingLog(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
path := filepath.Join(dir, "task.log")
|
||||||
|
if err := os.WriteFile(path, []byte("line1\nline2\n"), 0644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
j := newTaskJobState(path)
|
||||||
|
existing, ch := j.subscribe()
|
||||||
|
if ch == nil {
|
||||||
|
t.Fatal("expected live subscription channel")
|
||||||
|
}
|
||||||
|
if len(existing) != 2 || existing[0] != "line1" || existing[1] != "line2" {
|
||||||
|
t.Fatalf("existing=%v", existing)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResolveBurnPreset(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
profile string
|
||||||
|
want burnPreset
|
||||||
|
}{
|
||||||
|
{profile: "smoke", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
|
||||||
|
{profile: "acceptance", want: burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}},
|
||||||
|
{profile: "overnight", want: burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}},
|
||||||
|
{profile: "", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
if got := resolveBurnPreset(tc.profile); got != tc.want {
|
||||||
|
t.Fatalf("resolveBurnPreset(%q)=%+v want %+v", tc.profile, got, tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRunTaskHonorsCancel(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
blocked := make(chan struct{})
|
||||||
|
released := make(chan struct{})
|
||||||
|
aRun := func(_ any, ctx context.Context, _ string, _ int, _ func(string)) (string, error) {
|
||||||
|
close(blocked)
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
close(released)
|
||||||
|
return "", ctx.Err()
|
||||||
|
case <-time.After(5 * time.Second):
|
||||||
|
close(released)
|
||||||
|
return "unexpected", nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
q := &taskQueue{
|
||||||
|
opts: &HandlerOptions{App: &app.App{}},
|
||||||
|
}
|
||||||
|
tk := &Task{
|
||||||
|
ID: "cpu-1",
|
||||||
|
Name: "CPU SAT",
|
||||||
|
Target: "cpu",
|
||||||
|
Status: TaskRunning,
|
||||||
|
CreatedAt: time.Now(),
|
||||||
|
params: taskParams{Duration: 60},
|
||||||
|
}
|
||||||
|
j := &jobState{}
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
j.cancel = cancel
|
||||||
|
tk.job = j
|
||||||
|
|
||||||
|
orig := runCPUAcceptancePackCtx
|
||||||
|
runCPUAcceptancePackCtx = func(_ *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
|
||||||
|
return aRun(nil, ctx, baseDir, durationSec, logFunc)
|
||||||
|
}
|
||||||
|
defer func() { runCPUAcceptancePackCtx = orig }()
|
||||||
|
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
q.runTask(tk, j, ctx)
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
|
||||||
|
<-blocked
|
||||||
|
j.abort()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-released:
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Fatal("task did not observe cancel")
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Fatal("runTask did not return after cancel")
|
||||||
|
}
|
||||||
|
}
|
||||||
21
bible-local/docs/iso-build-rules.md
Normal file
21
bible-local/docs/iso-build-rules.md
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# ISO Build Rules
|
||||||
|
|
||||||
|
## Verify package names before use
|
||||||
|
|
||||||
|
ISO builds take 30–60 minutes. A wrong package name wastes an entire build cycle.
|
||||||
|
|
||||||
|
**Rule: before adding any Debian package name to the ISO config, verify it exists and check its file list.**
|
||||||
|
|
||||||
|
Use one of:
|
||||||
|
- `https://packages.debian.org/bookworm/<package-name>` — existence + description
|
||||||
|
- `https://packages.debian.org/bookworm/amd64/<package-name>/filelist` — exact files installed
|
||||||
|
- `apt-cache show <package>` inside a Debian bookworm container
|
||||||
|
|
||||||
|
This applies to:
|
||||||
|
- `iso/builder/config/package-lists/*.list.chroot`
|
||||||
|
- Any package referenced in `grub.cfg`, hooks, or overlay scripts (e.g. file paths like `/boot/memtest86+x64.bin`)
|
||||||
|
|
||||||
|
## Example of what goes wrong without this
|
||||||
|
|
||||||
|
`memtest86+` in Debian bookworm installs `/boot/memtest86+x64.bin`, not `/boot/memtest86+.bin`.
|
||||||
|
Guessing the filename caused a broken GRUB entry that only surfaced at boot time, after a full rebuild.
|
||||||
35
bible-local/docs/validate-vs-burn.md
Normal file
35
bible-local/docs/validate-vs-burn.md
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# Validate vs Burn: Hardware Impact Policy
|
||||||
|
|
||||||
|
## Validate Tests (non-destructive)
|
||||||
|
|
||||||
|
Tests on the **Validate** page are purely diagnostic. They:
|
||||||
|
|
||||||
|
- **Do not write to disks** — no data is written to storage devices; SMART counters (power-on hours, load cycle count, reallocated sectors) are not incremented.
|
||||||
|
- **Do not run sustained high load** — commands complete quickly (seconds to minutes) and do not push hardware to thermal or electrical limits.
|
||||||
|
- **Do not increment hardware wear counters** — GPU memory ECC counters, NVMe wear leveling counters, and similar endurance metrics are unaffected.
|
||||||
|
- **Are safe to run repeatedly** — on new, production-bound, or already-deployed hardware without concern for reducing lifespan.
|
||||||
|
|
||||||
|
### What Validate tests actually do
|
||||||
|
|
||||||
|
| Test | What it runs |
|
||||||
|
|---|---|
|
||||||
|
| NVIDIA GPU | `nvidia-smi`, `dcgmi diag` (levels 1–4 read-only diagnostics) |
|
||||||
|
| Memory | `memtester` on a limited allocation; reads/writes to RAM only |
|
||||||
|
| Storage | `smartctl -a`, `nvme smart-log` — reads SMART data only |
|
||||||
|
| CPU | `stress-ng` for a bounded duration; CPU-only, no I/O |
|
||||||
|
| AMD GPU | `rocm-smi --showallinfo`, `dmidecode` — read-only queries |
|
||||||
|
|
||||||
|
## Burn Tests (hardware wear)
|
||||||
|
|
||||||
|
Tests on the **Burn** page run hardware at maximum or near-maximum load for extended durations. They:
|
||||||
|
|
||||||
|
- **Wear storage**: write-intensive patterns can reduce SSD endurance (P/E cycles).
|
||||||
|
- **Stress GPU memory**: extended ECC stress tests may surface latent defects but also exercise memory cells.
|
||||||
|
- **Accelerate thermal cycling**: repeated heat/cool cycles degrade solder joints and capacitors over time.
|
||||||
|
- **May increment wear counters**: GPU power-on hours, NVMe media wear indicator, and similar metrics will advance.
|
||||||
|
|
||||||
|
### Rule
|
||||||
|
|
||||||
|
> Run **Validate** freely on any server, at any time, before or after deployment.
|
||||||
|
> Run **Burn** only when explicitly required (e.g., initial acceptance after repair, or per customer SLA).
|
||||||
|
> Document when and why Burn tests were run.
|
||||||
@@ -11,5 +11,12 @@ CUDA_USERSPACE_VERSION=13.0.96-1
|
|||||||
DCGM_VERSION=3.3.9
|
DCGM_VERSION=3.3.9
|
||||||
ROCM_VERSION=6.3.4
|
ROCM_VERSION=6.3.4
|
||||||
ROCM_SMI_VERSION=7.4.0.60304-76~22.04
|
ROCM_SMI_VERSION=7.4.0.60304-76~22.04
|
||||||
|
ROCM_BANDWIDTH_TEST_VERSION=1.4.0.60304-76~22.04
|
||||||
|
ROCM_VALIDATION_SUITE_VERSION=1.1.0.60304-76~22.04
|
||||||
|
ROCBLAS_VERSION=4.3.0.60304-76~22.04
|
||||||
|
ROCRAND_VERSION=3.2.0.60304-76~22.04
|
||||||
|
HIP_RUNTIME_AMD_VERSION=6.3.42134.60304-76~22.04
|
||||||
|
HIPBLASLT_VERSION=0.10.0.60304-76~22.04
|
||||||
|
COMGR_VERSION=2.8.0.60304-76~22.04
|
||||||
GO_VERSION=1.24.0
|
GO_VERSION=1.24.0
|
||||||
AUDIT_VERSION=1.0.0
|
AUDIT_VERSION=1.0.0
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ lb config noauto \
|
|||||||
--memtest none \
|
--memtest none \
|
||||||
--iso-volume "EASY-BEE" \
|
--iso-volume "EASY-BEE" \
|
||||||
--iso-application "EASY-BEE" \
|
--iso-application "EASY-BEE" \
|
||||||
--bootappend-live "boot=live components nomodeset video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
|
--bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
|
||||||
--apt-recommends false \
|
--apt-recommends false \
|
||||||
--chroot-squashfs-compression-type zstd \
|
--chroot-squashfs-compression-type zstd \
|
||||||
"${@}"
|
"${@}"
|
||||||
|
|||||||
@@ -10,28 +10,34 @@ echo " ╚══════╝╚═╝ ╚═╝╚══════╝
|
|||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
menuentry "EASY-BEE" {
|
menuentry "EASY-BEE" {
|
||||||
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal
|
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
|
||||||
initrd @INITRD_LIVE@
|
initrd @INITRD_LIVE@
|
||||||
}
|
}
|
||||||
|
|
||||||
menuentry "EASY-BEE (load to RAM)" {
|
menuentry "EASY-BEE (load to RAM)" {
|
||||||
linux @KERNEL_LIVE@ @APPEND_LIVE@ toram bee.nvidia.mode=normal
|
linux @KERNEL_LIVE@ @APPEND_LIVE@ toram bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
|
||||||
initrd @INITRD_LIVE@
|
initrd @INITRD_LIVE@
|
||||||
}
|
}
|
||||||
|
|
||||||
menuentry "EASY-BEE (NVIDIA GSP=off)" {
|
menuentry "EASY-BEE (NVIDIA GSP=off)" {
|
||||||
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=gsp-off
|
linux @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
|
||||||
initrd @INITRD_LIVE@
|
initrd @INITRD_LIVE@
|
||||||
}
|
}
|
||||||
|
|
||||||
menuentry "EASY-BEE (fail-safe)" {
|
menuentry "EASY-BEE (fail-safe)" {
|
||||||
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal
|
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0
|
||||||
initrd @INITRD_LIVE@
|
initrd @INITRD_LIVE@
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if [ "${grub_platform}" = "efi" ]; then
|
||||||
menuentry "Memory Test (memtest86+)" {
|
menuentry "Memory Test (memtest86+)" {
|
||||||
linux16 /boot/memtest86+.bin
|
chainloader /boot/memtest86+x64.efi
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
menuentry "Memory Test (memtest86+)" {
|
||||||
|
linux16 /boot/memtest86+x64.bin
|
||||||
|
}
|
||||||
|
fi
|
||||||
|
|
||||||
if [ "${grub_platform}" = "efi" ]; then
|
if [ "${grub_platform}" = "efi" ]; then
|
||||||
menuentry "UEFI Firmware Settings" {
|
menuentry "UEFI Firmware Settings" {
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ label live-@FLAVOUR@-gsp-off
|
|||||||
menu label EASY-BEE (^NVIDIA GSP=off)
|
menu label EASY-BEE (^NVIDIA GSP=off)
|
||||||
linux @LINUX@
|
linux @LINUX@
|
||||||
initrd @INITRD@
|
initrd @INITRD@
|
||||||
append @APPEND_LIVE@ bee.nvidia.mode=gsp-off
|
append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off
|
||||||
|
|
||||||
label live-@FLAVOUR@-failsafe
|
label live-@FLAVOUR@-failsafe
|
||||||
menu label EASY-BEE (^fail-safe)
|
menu label EASY-BEE (^fail-safe)
|
||||||
|
|||||||
@@ -7,15 +7,16 @@ echo "=== bee chroot setup ==="
|
|||||||
|
|
||||||
ensure_bee_console_user() {
|
ensure_bee_console_user() {
|
||||||
if id bee >/dev/null 2>&1; then
|
if id bee >/dev/null 2>&1; then
|
||||||
usermod -d /home/bee -s /bin/sh bee 2>/dev/null || true
|
usermod -d /home/bee -s /bin/bash bee 2>/dev/null || true
|
||||||
else
|
else
|
||||||
useradd -d /home/bee -m -s /bin/sh -U bee
|
useradd -d /home/bee -m -s /bin/bash -U bee
|
||||||
fi
|
fi
|
||||||
|
|
||||||
mkdir -p /home/bee
|
mkdir -p /home/bee
|
||||||
chown -R bee:bee /home/bee
|
chown -R bee:bee /home/bee
|
||||||
echo "bee:eeb" | chpasswd
|
echo "bee:eeb" | chpasswd
|
||||||
usermod -aG sudo,video,input bee 2>/dev/null || true
|
groupadd -f ipmi 2>/dev/null || true
|
||||||
|
usermod -aG sudo,video,input,render,ipmi bee 2>/dev/null || true
|
||||||
}
|
}
|
||||||
|
|
||||||
ensure_bee_console_user
|
ensure_bee_console_user
|
||||||
@@ -46,11 +47,13 @@ chmod +x /usr/local/bin/bee-log-run 2>/dev/null || true
|
|||||||
# Reload udev rules
|
# Reload udev rules
|
||||||
udevadm control --reload-rules 2>/dev/null || true
|
udevadm control --reload-rules 2>/dev/null || true
|
||||||
|
|
||||||
# rocm-smi symlink (package installs to /opt/rocm-*/bin/rocm-smi)
|
# rocm symlinks (packages install to /opt/rocm-*/bin/)
|
||||||
if [ ! -e /usr/local/bin/rocm-smi ]; then
|
for tool in rocm-smi rocm-bandwidth-test rvs; do
|
||||||
smi_path="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
|
if [ ! -e /usr/local/bin/${tool} ]; then
|
||||||
[ -n "${smi_path}" ] && ln -sf "${smi_path}" /usr/local/bin/rocm-smi
|
bin_path="$(find /opt -path "*/bin/${tool}" -type f 2>/dev/null | sort | tail -1)"
|
||||||
|
[ -n "${bin_path}" ] && ln -sf "${bin_path}" /usr/local/bin/${tool}
|
||||||
fi
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
# Create export directory
|
# Create export directory
|
||||||
mkdir -p /appdata/bee/export
|
mkdir -p /appdata/bee/export
|
||||||
|
|||||||
13
iso/builder/config/hooks/normal/9100-memtest.hook.binary
Executable file
13
iso/builder/config/hooks/normal/9100-memtest.hook.binary
Executable file
@@ -0,0 +1,13 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
# Copy memtest86+ binaries from chroot /boot into the ISO boot directory
|
||||||
|
# so GRUB can chainload them directly (they must be on the ISO filesystem,
|
||||||
|
# not inside the squashfs).
|
||||||
|
set -e
|
||||||
|
|
||||||
|
for f in memtest86+x64.bin memtest86+x64.efi memtest86+ia32.bin memtest86+ia32.efi; do
|
||||||
|
src="chroot/boot/${f}"
|
||||||
|
if [ -f "${src}" ]; then
|
||||||
|
cp "${src}" "binary/boot/${f}"
|
||||||
|
echo "memtest: copied ${f} to binary/boot/"
|
||||||
|
fi
|
||||||
|
done
|
||||||
@@ -75,8 +75,15 @@ firmware-qlogic
|
|||||||
# NVIDIA DCGM (Data Center GPU Manager) — dcgmi diag for acceptance testing
|
# NVIDIA DCGM (Data Center GPU Manager) — dcgmi diag for acceptance testing
|
||||||
datacenter-gpu-manager=1:%%DCGM_VERSION%%
|
datacenter-gpu-manager=1:%%DCGM_VERSION%%
|
||||||
|
|
||||||
# AMD ROCm SMI — GPU monitoring for Instinct cards (repo: rocm/apt/6.3.4 jammy)
|
# AMD ROCm — GPU monitoring, bandwidth test, and compute stress (RVS GST)
|
||||||
rocm-smi-lib=%%ROCM_SMI_VERSION%%
|
rocm-smi-lib=%%ROCM_SMI_VERSION%%
|
||||||
|
rocm-bandwidth-test=%%ROCM_BANDWIDTH_TEST_VERSION%%
|
||||||
|
rocm-validation-suite=%%ROCM_VALIDATION_SUITE_VERSION%%
|
||||||
|
rocblas=%%ROCBLAS_VERSION%%
|
||||||
|
rocrand=%%ROCRAND_VERSION%%
|
||||||
|
hip-runtime-amd=%%HIP_RUNTIME_AMD_VERSION%%
|
||||||
|
hipblaslt=%%HIPBLASLT_VERSION%%
|
||||||
|
comgr=%%COMGR_VERSION%%
|
||||||
|
|
||||||
# glibc compat helpers (for any external binaries that need it)
|
# glibc compat helpers (for any external binaries that need it)
|
||||||
libc6
|
libc6
|
||||||
|
|||||||
3
iso/overlay/etc/modules-load.d/bee-ipmi.conf
Normal file
3
iso/overlay/etc/modules-load.d/bee-ipmi.conf
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# Load IPMI modules for fan/sensor/power monitoring via ipmitool
|
||||||
|
ipmi_si
|
||||||
|
ipmi_devintf
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=Bee: hardware audit web viewer
|
Description=Bee: hardware audit web viewer
|
||||||
After=bee-network.service bee-audit.service
|
After=bee-network.service
|
||||||
Wants=bee-audit.service
|
Wants=bee-audit.service
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
|
|||||||
2
iso/overlay/etc/udev/rules.d/99-ipmi.rules
Normal file
2
iso/overlay/etc/udev/rules.d/99-ipmi.rules
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# Allow ipmi group to access IPMI device without root
|
||||||
|
KERNEL=="ipmi[0-9]*", GROUP="ipmi", MODE="0660"
|
||||||
@@ -2,22 +2,19 @@
|
|||||||
# openbox session: launch tint2 taskbar + chromium, then openbox as WM.
|
# openbox session: launch tint2 taskbar + chromium, then openbox as WM.
|
||||||
# This file is used as an xinitrc by bee-desktop.
|
# This file is used as an xinitrc by bee-desktop.
|
||||||
|
|
||||||
# Wait for bee-web to be accepting connections (up to 15 seconds)
|
|
||||||
i=0
|
|
||||||
while [ $i -lt 15 ]; do
|
|
||||||
if curl -sf http://localhost/healthz >/dev/null 2>&1; then
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
sleep 1
|
|
||||||
i=$((i+1))
|
|
||||||
done
|
|
||||||
|
|
||||||
# Disable screensaver and DPMS
|
# Disable screensaver and DPMS
|
||||||
xset s off
|
xset s off
|
||||||
xset -dpms
|
xset -dpms
|
||||||
xset s noblank
|
xset s noblank
|
||||||
|
|
||||||
tint2 &
|
tint2 &
|
||||||
|
# Wait for bee-web to bind (Go starts fast, usually <2s)
|
||||||
|
i=0
|
||||||
|
while [ $i -lt 30 ]; do
|
||||||
|
if curl -sf http://localhost/healthz >/dev/null 2>&1; then break; fi
|
||||||
|
sleep 1
|
||||||
|
i=$((i+1))
|
||||||
|
done
|
||||||
chromium \
|
chromium \
|
||||||
--disable-infobars \
|
--disable-infobars \
|
||||||
--disable-translate \
|
--disable-translate \
|
||||||
|
|||||||
Reference in New Issue
Block a user