Update bible submodule

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Add HPL (LINPACK) benchmark as validate/stress task
2026-04-08 07:14:31 +03:00 · 2026-04-08 07:08:18 +03:00 · 2026-04-08 00:42:12 +03:00 · 2026-04-08 00:25:12 +03:00 · 2026-04-08 00:19:11 +03:00 · 2026-04-08 00:16:42 +03:00
141 changed files with 23673 additions and 2065 deletions
--- a/PLAN.md
+++ b/PLAN.md
@@ -343,9 +343,9 @@ Planned code shape:
 - `bee tui` can rerun the audit manually
 - `bee tui` can export the latest audit JSON to removable media
 - `bee tui` can show health summary and run NVIDIA/memory/storage acceptance tests
- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-stress`
+- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-burn`
 - SAT summaries now expose `overall_status` plus per-job `OK/FAILED/UNSUPPORTED`
- Memory/GPU SAT runtime defaults can be overridden via `BEE_MEMTESTER_*` and `BEE_GPU_STRESS_*`
+- Memory SAT runtime defaults can be overridden via `BEE_MEMTESTER_*`
 - removable export requires explicit target selection, mount, confirmation, copy, and cleanup
 ### 2.6 — Vendor utilities and optional assets
--- a/audit/Makefile
+++ b/audit/Makefile
@@ -0,0 +1,22 @@
 LISTEN ?= :8080
 AUDIT_PATH ?=
 EXPORT_DIR ?= $(CURDIR)/.tmp/export
 VERSION ?= $(shell sh ./scripts/resolve-version.sh)
 GO_LDFLAGS := -X main.Version=$(VERSION)
 RUN_ARGS := web --listen $(LISTEN) --export-dir $(EXPORT_DIR)
 ifneq ($(AUDIT_PATH),)
 RUN_ARGS += --audit-path $(AUDIT_PATH)
 endif
 .PHONY: run build test
 run:
 	mkdir -p $(EXPORT_DIR)
 	go run -ldflags "$(GO_LDFLAGS)" ./cmd/bee $(RUN_ARGS)
 build:
 	go build -ldflags "$(GO_LDFLAGS)" -o bee ./cmd/bee
 test:
 	go test ./...
--- a/audit/bee
+++ b/audit/bee
--- a/audit/cmd/bee/main.go
+++ b/audit/cmd/bee/main.go
@@ -1,11 +1,14 @@
 package main
 import (
 	"context"
 	"flag"
 	"fmt"
 	"io"
 	"log/slog"
 	"os"
 	"runtime/debug"
 	"strconv"
 	"strings"
 	"bee/audit/internal/app"
@@ -16,14 +19,31 @@ import (
 var Version = "dev"
 func buildLabel() string {
 	label := strings.TrimSpace(Version)
 	if label == "" {
 		return "dev"
 	}
 	return label
 }
 func main() {
 	os.Exit(run(os.Args[1:], os.Stdout, os.Stderr))
 }
-func run(args []string, stdout, stderr io.Writer) int {
+func run(args []string, stdout, stderr io.Writer) (exitCode int) {
 	slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
 		Level: slog.LevelInfo,
 	})))
 	defer func() {
 		if rec := recover(); rec != nil {
 			slog.Error("fatal panic",
 				"panic", fmt.Sprint(rec),
 				"stack", string(debug.Stack()),
 			)
 			exitCode = 1
 		}
 	}()
 	if len(args) == 0 {
 		printRootUsage(stderr)
@@ -49,6 +69,8 @@ func run(args []string, stdout, stderr io.Writer) int {
 		return runWeb(args[1:], stdout, stderr)
 	case "sat":
 		return runSAT(args[1:], stdout, stderr)
 	case "benchmark":
 		return runBenchmark(args[1:], stdout, stderr)
 	case "version", "--version", "-version":
 		fmt.Fprintln(stdout, Version)
 		return 0
@@ -65,8 +87,9 @@ func printRootUsage(w io.Writer) {
  bee preflight --output stdout|file:<path>
  bee export  --target <device>
  bee support-bundle --output stdout|file:<path>
-  bee web     --listen :80 --audit-path `+app.DefaultAuditJSONPath+`
+  bee web     --listen :80 [--audit-path `+app.DefaultAuditJSONPath+`]
  bee sat nvidia|memory|storage|cpu [--duration <seconds>]
  bee benchmark nvidia [--profile standard|stability|overnight]
  bee version
  bee help [command]`)
 }
@@ -85,6 +108,8 @@ func runHelp(args []string, stdout, stderr io.Writer) int {
 		return runWeb([]string{"--help"}, stdout, stdout)
 	case "sat":
 		return runSAT([]string{"--help"}, stdout, stderr)
 	case "benchmark":
 		return runBenchmark([]string{"--help"}, stdout, stderr)
 	case "version":
 		fmt.Fprintln(stdout, "usage: bee version")
 		return 0
@@ -139,7 +164,6 @@ func runAudit(args []string, stdout, stderr io.Writer) int {
 	return 0
 }
 func runExport(args []string, stdout, stderr io.Writer) int {
 	fs := flag.NewFlagSet("export", flag.ContinueOnError)
 	fs.SetOutput(stderr)
@@ -272,7 +296,7 @@ func runWeb(args []string, stdout, stderr io.Writer) int {
 	fs := flag.NewFlagSet("web", flag.ContinueOnError)
 	fs.SetOutput(stderr)
 	listenAddr := fs.String("listen", ":8080", "listen address, e.g. :80")
-	auditPath := fs.String("audit-path", app.DefaultAuditJSONPath, "path to the latest audit JSON snapshot")
+	auditPath := fs.String("audit-path", "", "optional path to the latest audit JSON snapshot")
 	exportDir := fs.String("export-dir", app.DefaultExportDir, "directory with logs, SAT results, and support bundles")
 	title := fs.String("title", "Bee Hardware Audit", "page title")
 	fs.Usage = func() {
@@ -299,6 +323,7 @@ func runWeb(args []string, stdout, stderr io.Writer) int {
 	if err := webui.ListenAndServe(*listenAddr, webui.HandlerOptions{
 		Title:       *title,
 		BuildLabel:  buildLabel(),
 		AuditPath:   *auditPath,
 		ExportDir:   *exportDir,
 		App:         app.New(platform.New()),
@@ -323,6 +348,7 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	fs := flag.NewFlagSet("sat", flag.ContinueOnError)
 	fs.SetOutput(stderr)
 	duration := fs.Int("duration", 0, "stress-ng duration in seconds (cpu only; default: 60)")
 	diagLevel := fs.Int("diag-level", 0, "DCGM diagnostic level for nvidia (1=quick, 2=medium, 3=targeted stress, 4=extended stress; default: 1)")
 	if err := fs.Parse(args[1:]); err != nil {
 		if err == flag.ErrHelp {
 			return 0
@@ -337,7 +363,7 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	target := args[0]
 	if target != "nvidia" && target != "memory" && target != "storage" && target != "cpu" {
 		fmt.Fprintf(stderr, "bee sat: unknown target %q\n", target)
-		fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>]")
+		fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>] [--diag-level <1-4>]")
 		return 2
 	}
@@ -346,19 +372,25 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 		archive string
 		err     error
 	)
 	logLine := func(s string) { fmt.Fprintln(os.Stderr, s) }
 	switch target {
 	case "nvidia":
-		archive, err = application.RunNvidiaAcceptancePack("")
+		level := *diagLevel
 		if level > 0 {
 			_, err = application.RunNvidiaAcceptancePackWithOptions(context.Background(), "", level, nil, logLine)
 		} else {
 			archive, err = application.RunNvidiaAcceptancePack("", logLine)
 		}
 	case "memory":
-		archive, err = application.RunMemoryAcceptancePack("")
+		archive, err = application.RunMemoryAcceptancePackCtx(context.Background(), "", 256, 1, logLine)
 	case "storage":
-		archive, err = application.RunStorageAcceptancePack("")
+		archive, err = application.RunStorageAcceptancePackCtx(context.Background(), "", false, logLine)
 	case "cpu":
 		dur := *duration
 		if dur <= 0 {
 			dur = 60
 		}
-		archive, err = application.RunCPUAcceptancePack("", dur)
+		archive, err = application.RunCPUAcceptancePackCtx(context.Background(), "", dur, logLine)
 	}
 	if err != nil {
 		slog.Error("run sat", "target", target, "err", err)
@@ -367,3 +399,85 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	slog.Info("sat archive written", "target", target, "path", archive)
 	return 0
 }
 func runBenchmark(args []string, stdout, stderr io.Writer) int {
 	if len(args) == 0 {
 		fmt.Fprintln(stderr, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]")
 		return 2
 	}
 	if args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
 		fmt.Fprintln(stdout, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]")
 		return 0
 	}
 	target := args[0]
 	if target != "nvidia" {
 		fmt.Fprintf(stderr, "bee benchmark: unknown target %q\n", target)
 		fmt.Fprintln(stderr, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]")
 		return 2
 	}
 	fs := flag.NewFlagSet("benchmark", flag.ContinueOnError)
 	fs.SetOutput(stderr)
 	profile := fs.String("profile", platform.NvidiaBenchmarkProfileStandard, "benchmark profile: standard, stability, overnight")
 	devices := fs.String("devices", "", "comma-separated GPU indices to include")
 	exclude := fs.String("exclude", "", "comma-separated GPU indices to exclude")
 	sizeMB := fs.Int("size-mb", 0, "per-GPU benchmark buffer size in MB (0 = auto)")
 	skipNCCL := fs.Bool("skip-nccl", false, "skip multi-GPU NCCL interconnect benchmark")
 	if err := fs.Parse(args[1:]); err != nil {
 		if err == flag.ErrHelp {
 			return 0
 		}
 		return 2
 	}
 	if fs.NArg() != 0 {
 		fmt.Fprintf(stderr, "bee benchmark: unexpected arguments\n")
 		return 2
 	}
 	includeIndices, err := parseBenchmarkIndexCSV(*devices)
 	if err != nil {
 		fmt.Fprintf(stderr, "bee benchmark: invalid --devices: %v\n", err)
 		return 2
 	}
 	excludeIndices, err := parseBenchmarkIndexCSV(*exclude)
 	if err != nil {
 		fmt.Fprintf(stderr, "bee benchmark: invalid --exclude: %v\n", err)
 		return 2
 	}
 	application := app.New(platform.New())
 	logLine := func(s string) { fmt.Fprintln(os.Stderr, s) }
 	archive, err := application.RunNvidiaBenchmark("", platform.NvidiaBenchmarkOptions{
 		Profile:           *profile,
 		SizeMB:            *sizeMB,
 		GPUIndices:        includeIndices,
 		ExcludeGPUIndices: excludeIndices,
 		RunNCCL:           !*skipNCCL,
 	}, logLine)
 	if err != nil {
 		slog.Error("run benchmark", "target", target, "err", err)
 		return 1
 	}
 	slog.Info("benchmark archive written", "target", target, "path", archive)
 	return 0
 }
 func parseBenchmarkIndexCSV(raw string) ([]int, error) {
 	raw = strings.TrimSpace(raw)
 	if raw == "" {
 		return nil, nil
 	}
 	var indices []int
 	for _, part := range strings.Split(raw, ",") {
 		part = strings.TrimSpace(part)
 		if part == "" {
 			continue
 		}
 		value, err := strconv.Atoi(part)
 		if err != nil || value < 0 {
 			return nil, fmt.Errorf("bad gpu index %q", part)
 		}
 		indices = append(indices, value)
 	}
 	return indices, nil
 }
--- a/audit/cmd/bee/main_test.go
+++ b/audit/cmd/bee/main_test.go
@@ -46,8 +46,6 @@ func TestRunUnknownCommand(t *testing.T) {
 }
 func TestRunVersion(t *testing.T) {
 	t.Parallel()
 	old := Version
 	Version = "test-version"
 	t.Cleanup(func() { Version = old })
@@ -62,6 +60,16 @@ func TestRunVersion(t *testing.T) {
 	}
 }
 func TestBuildLabelUsesVersionAsIs(t *testing.T) {
 	old := Version
 	Version = "1.2.3"
 	t.Cleanup(func() { Version = old })
 	if got := buildLabel(); got != "1.2.3" {
 		t.Fatalf("buildLabel=%q want %q", got, "1.2.3")
 	}
 }
 func TestRunExportRequiresTarget(t *testing.T) {
 	t.Parallel()
--- a/audit/go.mod
+++ b/audit/go.mod
@@ -1,3 +1,26 @@
 module bee/audit
-go 1.24.0
+go 1.25.0
 replace reanimator/chart => ../internal/chart
 require (
 	github.com/go-analyze/charts v0.5.26
 	reanimator/chart v0.0.0-00010101000000-000000000000
 )
 require (
 	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/go-analyze/bulk v0.1.3 // indirect
 	github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/ncruces/go-strftime v1.0.0 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	golang.org/x/image v0.24.0 // indirect
 	golang.org/x/sys v0.42.0 // indirect
 	modernc.org/libc v1.70.0 // indirect
 	modernc.org/mathutil v1.7.1 // indirect
 	modernc.org/memory v1.11.0 // indirect
 	modernc.org/sqlite v1.48.0 // indirect
 )
--- a/audit/go.sum
+++ b/audit/go.sum
@@ -0,0 +1,37 @@
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
 github.com/go-analyze/bulk v0.1.3 h1:pzRdBqzHDAT9PyROt0SlWE0YqPtdmTcEpIJY0C3vF0c=
 github.com/go-analyze/bulk v0.1.3/go.mod h1:afon/KtFJYnekIyN20H/+XUvcLFjE8sKR1CfpqfClgM=
 github.com/go-analyze/charts v0.5.26 h1:rSwZikLQuFX6cJzwI8OAgaWZneG1kDYxD857ms00ZxY=
 github.com/go-analyze/charts v0.5.26/go.mod h1:s1YvQhjiSwtLx1f2dOKfiV9x2TT49nVSL6v2rlRpTbY=
 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
 github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 golang.org/x/image v0.24.0 h1:AN7zRgVsbvmTfNyqIbbOraYL8mSwcKncEj8ofjgzcMQ=
 golang.org/x/image v0.24.0/go.mod h1:4b/ITuLfqYq1hqZcjofwctIhi7sZh2WaCjvsBNjjya8=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
 golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 modernc.org/libc v1.70.0 h1:U58NawXqXbgpZ/dcdS9kMshu08aiA6b7gusEusqzNkw=
 modernc.org/libc v1.70.0/go.mod h1:OVmxFGP1CI/Z4L3E0Q3Mf1PDE0BucwMkcXjjLntvHJo=
 modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
 modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
 modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
 modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
 modernc.org/sqlite v1.48.0 h1:ElZyLop3Q2mHYk5IFPPXADejZrlHu7APbpB0sF78bq4=
 modernc.org/sqlite v1.48.0/go.mod h1:hWjRO6Tj/5Ik8ieqxQybiEOUXy0NJFNp2tpvVpKlvig=
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -19,17 +19,18 @@ import (
 )
 var (
-	DefaultExportDir       = "/appdata/bee/export"
+	DefaultExportDir        = "/appdata/bee/export"
-	DefaultAuditJSONPath   = DefaultExportDir + "/bee-audit.json"
+	DefaultAuditJSONPath    = DefaultExportDir + "/bee-audit.json"
-	DefaultAuditLogPath    = DefaultExportDir + "/bee-audit.log"
+	DefaultAuditLogPath     = DefaultExportDir + "/bee-audit.log"
-	DefaultWebLogPath      = DefaultExportDir + "/bee-web.log"
+	DefaultWebLogPath       = DefaultExportDir + "/bee-web.log"
-	DefaultNetworkLogPath  = DefaultExportDir + "/bee-network.log"
+	DefaultNetworkLogPath   = DefaultExportDir + "/bee-network.log"
-	DefaultNvidiaLogPath   = DefaultExportDir + "/bee-nvidia.log"
+	DefaultNvidiaLogPath    = DefaultExportDir + "/bee-nvidia.log"
-	DefaultSSHLogPath      = DefaultExportDir + "/bee-sshsetup.log"
+	DefaultSSHLogPath       = DefaultExportDir + "/bee-sshsetup.log"
-	DefaultRuntimeJSONPath = DefaultExportDir + "/runtime-health.json"
+	DefaultRuntimeJSONPath  = DefaultExportDir + "/runtime-health.json"
-	DefaultRuntimeLogPath  = DefaultExportDir + "/runtime-health.log"
+	DefaultRuntimeLogPath   = DefaultExportDir + "/runtime-health.log"
-	DefaultTechDumpDir     = DefaultExportDir + "/techdump"
+	DefaultTechDumpDir      = DefaultExportDir + "/techdump"
-	DefaultSATBaseDir      = DefaultExportDir + "/bee-sat"
+	DefaultSATBaseDir       = DefaultExportDir + "/bee-sat"
 	DefaultBenchmarkBaseDir = DefaultExportDir + "/bee-benchmark"
 )
 type App struct {
@@ -40,6 +41,8 @@ type App struct {
 	sat       satRunner
 	runtime   runtimeChecker
 	installer installer
 	// StatusDB is the unified component health store (nil if unavailable).
 	StatusDB *ComponentStatusDB
 }
 type ActionResult struct {
@@ -53,10 +56,15 @@ type networkManager interface {
 	DHCPOne(iface string) (string, error)
 	DHCPAll() (string, error)
 	SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error)
 	SetInterfaceState(iface string, up bool) error
 	GetInterfaceState(iface string) (bool, error)
 	CaptureNetworkSnapshot() (platform.NetworkSnapshot, error)
 	RestoreNetworkSnapshot(snapshot platform.NetworkSnapshot) error
 }
 type serviceManager interface {
 	ListBeeServices() ([]string, error)
 	ServiceState(name string) string
 	ServiceStatus(name string) (string, error)
 	ServiceDo(name string, action platform.ServiceAction) (string, error)
 }
@@ -74,20 +82,64 @@ type toolManager interface {
 type installer interface {
 	ListInstallDisks() ([]platform.InstallDisk, error)
 	InstallToDisk(ctx context.Context, device string, logFile string) error
 	IsLiveMediaInRAM() bool
 	LiveBootSource() platform.LiveBootSource
 	RunInstallToRAM(ctx context.Context, logFunc func(string)) error
 }
 type GPUPresenceResult struct {
 	Nvidia bool
 	AMD    bool
 }
 func (a *App) DetectGPUPresence() GPUPresenceResult {
 	vendor := a.sat.DetectGPUVendor()
 	return GPUPresenceResult{
 		Nvidia: vendor == "nvidia",
 		AMD:    vendor == "amd",
 	}
 }
 func (a *App) IsLiveMediaInRAM() bool {
 	return a.installer.IsLiveMediaInRAM()
 }
 func (a *App) LiveBootSource() platform.LiveBootSource {
 	return a.installer.LiveBootSource()
 }
 func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
 	return a.installer.RunInstallToRAM(ctx, logFunc)
 }
 type satRunner interface {
-	RunNvidiaAcceptancePack(baseDir string) (string, error)
+	RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error)
-	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int) (string, error)
+	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
-	RunMemoryAcceptancePack(baseDir string) (string, error)
+	RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
-	RunStorageAcceptancePack(baseDir string) (string, error)
+	RunNvidiaBenchmark(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
-	RunCPUAcceptancePack(baseDir string, durationSec int) (string, error)
+	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaStressPack(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error)
 	ListNvidiaGPUStatuses() ([]platform.NvidiaGPUStatus, error)
 	ResetNvidiaGPU(index int) (string, error)
 	RunMemoryAcceptancePack(ctx context.Context, baseDir string, sizeMB, passes int, logFunc func(string)) (string, error)
 	RunStorageAcceptancePack(ctx context.Context, baseDir string, extended bool, logFunc func(string)) (string, error)
 	RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	ListNvidiaGPUs() ([]platform.NvidiaGPU, error)
 	DetectGPUVendor() string
 	ListAMDGPUs() ([]platform.AMDGPUInfo, error)
-	RunAMDAcceptancePack(baseDir string) (string, error)
+	RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunAMDMemIntegrityPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunAMDMemBandwidthPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
-	RunNCCLTests(ctx context.Context, baseDir string) (string, error)
+	RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error)
 	RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunHPL(ctx context.Context, baseDir string, opts platform.HPLOptions, logFunc func(string)) (string, *platform.HPLResult, error)
 }
 type runtimeChecker interface {
@@ -96,7 +148,7 @@ type runtimeChecker interface {
 }
 func New(platform *platform.System) *App {
-	return &App{
+	a := &App{
 		network:   platform,
 		services:  platform,
 		exports:   platform,
@@ -105,6 +157,30 @@ func New(platform *platform.System) *App {
 		runtime:   platform,
 		installer: platform,
 	}
 	if db, err := OpenComponentStatusDB(DefaultExportDir + "/component-status.json"); err == nil {
 		a.StatusDB = db
 	}
 	return a
 }
 // ApplySATOverlay parses a raw audit JSON, overlays the latest SAT results,
 // and returns the updated JSON. Used by the web UI to serve always-fresh status.
 func ApplySATOverlay(auditJSON []byte) ([]byte, error) {
 	snap, err := readAuditSnapshot(auditJSON)
 	if err != nil {
 		return nil, err
 	}
 	applyLatestSATStatuses(&snap.Hardware, DefaultSATBaseDir, nil)
 	return json.MarshalIndent(snap, "", "  ")
 }
 func readAuditSnapshot(auditJSON []byte) (schema.HardwareIngestRequest, error) {
 	var snap schema.HardwareIngestRequest
 	if err := json.Unmarshal(auditJSON, &snap); err != nil {
 		return schema.HardwareIngestRequest{}, err
 	}
 	collector.NormalizeSnapshot(&snap.Hardware, snap.CollectedAt)
 	return snap, nil
 }
 func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, error) {
@@ -114,7 +190,7 @@ func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, erro
 		}
 	}
 	result := collector.Run(runtimeMode)
-	applyLatestSATStatuses(&result.Hardware, DefaultSATBaseDir)
+	applyLatestSATStatuses(&result.Hardware, DefaultSATBaseDir, a.StatusDB)
 	if health, err := ReadRuntimeHealth(DefaultRuntimeJSONPath); err == nil {
 		result.Runtime = &health
 	}
@@ -129,10 +205,7 @@ func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, erro
 		return "stdout", err
 	case strings.HasPrefix(output, "file:"):
 		path := strings.TrimPrefix(output, "file:")
-		if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		if err := atomicWriteFile(path, append(data, '\n'), 0644); err != nil {
 			return "", err
 		}
 		if err := os.WriteFile(path, append(data, '\n'), 0644); err != nil {
 			return "", err
 		}
 		return path, nil
@@ -157,10 +230,7 @@ func (a *App) RunRuntimePreflight(output string) (string, error) {
 		return "stdout", err
 	case strings.HasPrefix(output, "file:"):
 		path := strings.TrimPrefix(output, "file:")
-		if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		if err := atomicWriteFile(path, append(data, '\n'), 0644); err != nil {
 			return "", err
 		}
 		if err := os.WriteFile(path, append(data, '\n'), 0644); err != nil {
 			return "", err
 		}
 		return path, nil
@@ -230,6 +300,9 @@ func (a *App) ExportLatestAudit(target platform.RemovableTarget) (string, error)
 	if err != nil {
 		return "", err
 	}
 	if normalized, normErr := ApplySATOverlay(data); normErr == nil {
 		data = normalized
 	}
 	if err := os.WriteFile(tmpPath, data, 0644); err != nil {
 		return "", err
 	}
@@ -300,6 +373,22 @@ func (a *App) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error) {
 	return a.network.SetStaticIPv4(cfg)
 }
 func (a *App) SetInterfaceState(iface string, up bool) error {
 	return a.network.SetInterfaceState(iface, up)
 }
 func (a *App) GetInterfaceState(iface string) (bool, error) {
 	return a.network.GetInterfaceState(iface)
 }
 func (a *App) CaptureNetworkSnapshot() (platform.NetworkSnapshot, error) {
 	return a.network.CaptureNetworkSnapshot()
 }
 func (a *App) RestoreNetworkSnapshot(snapshot platform.NetworkSnapshot) error {
 	return a.network.RestoreNetworkSnapshot(snapshot)
 }
 func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) {
 	body, err := a.network.SetStaticIPv4(cfg)
 	return ActionResult{Title: "Static IPv4: " + cfg.Interface, Body: bodyOr(body, "Static IPv4 updated.")}, err
@@ -356,6 +445,10 @@ func (a *App) ListBeeServices() ([]string, error) {
 	return a.services.ListBeeServices()
 }
 func (a *App) ServiceState(name string) string {
 	return a.services.ServiceState(name)
 }
 func (a *App) ServiceStatus(name string) (string, error) {
 	return a.services.ServiceStatus(name)
 }
@@ -411,15 +504,15 @@ func (a *App) AuditLogTailResult() ActionResult {
 	return ActionResult{Title: "Audit log tail", Body: body}
 }
-func (a *App) RunNvidiaAcceptancePack(baseDir string) (string, error) {
+func (a *App) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunNvidiaAcceptancePack(baseDir)
+	return a.sat.RunNvidiaAcceptancePack(baseDir, logFunc)
 }
 func (a *App) RunNvidiaAcceptancePackResult(baseDir string) (ActionResult, error) {
-	path, err := a.RunNvidiaAcceptancePack(baseDir)
+	path, err := a.RunNvidiaAcceptancePack(baseDir, nil)
 	body := "Archive written."
 	if path != "" {
 		body = "Archive written to " + path
@@ -431,11 +524,20 @@ func (a *App) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
 	return a.sat.ListNvidiaGPUs()
 }
-func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int) (ActionResult, error) {
+func (a *App) ListNvidiaGPUStatuses() ([]platform.NvidiaGPUStatus, error) {
 	return a.sat.ListNvidiaGPUStatuses()
 }
 func (a *App) ResetNvidiaGPU(index int) (ActionResult, error) {
 	out, err := a.sat.ResetNvidiaGPU(index)
 	return ActionResult{Title: fmt.Sprintf("Reset NVIDIA GPU %d", index), Body: strings.TrimSpace(out)}, err
 }
 func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (ActionResult, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	path, err := a.sat.RunNvidiaAcceptancePackWithOptions(ctx, baseDir, diagLevel, gpuIndices)
+	path, err := a.sat.RunNvidiaAcceptancePackWithOptions(ctx, baseDir, diagLevel, gpuIndices, logFunc)
 	body := "Archive written."
 	if path != "" {
 		body = "Archive written to " + path
@@ -443,39 +545,108 @@ func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir st
 	return ActionResult{Title: "NVIDIA DCGM", Body: body}, err
 }
-func (a *App) RunMemoryAcceptancePack(baseDir string) (string, error) {
+func (a *App) RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunMemoryAcceptancePack(baseDir)
+	return a.sat.RunNvidiaTargetedStressValidatePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaStressPack(baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
 	return a.RunNvidiaStressPackCtx(context.Background(), baseDir, opts, logFunc)
 }
 func (a *App) RunNvidiaBenchmark(baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	return a.RunNvidiaBenchmarkCtx(context.Background(), baseDir, opts, logFunc)
 }
 func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultBenchmarkBaseDir
 	}
 	return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
 }
 func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaTargetedPowerPack(ctx, baseDir, durationSec, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaPulseTestPack(ctx, baseDir, durationSec, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaBandwidthPack(ctx, baseDir, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaStressPackCtx(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaStressPack(ctx, baseDir, opts, logFunc)
 }
 func (a *App) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
 	return a.RunMemoryAcceptancePackCtx(context.Background(), baseDir, 256, 1, logFunc)
 }
 func (a *App) RunMemoryAcceptancePackCtx(ctx context.Context, baseDir string, sizeMB, passes int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunMemoryAcceptancePack(ctx, baseDir, sizeMB, passes, logFunc)
 }
 func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
-	path, err := a.RunMemoryAcceptancePack(baseDir)
+	path, err := a.RunMemoryAcceptancePack(baseDir, nil)
 	return ActionResult{Title: "Memory SAT", Body: satResultBody(path)}, err
 }
-func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int) (string, error) {
+func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	return a.RunCPUAcceptancePackCtx(context.Background(), baseDir, durationSec, logFunc)
 }
 func (a *App) RunCPUAcceptancePackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunCPUAcceptancePack(baseDir, durationSec)
+	return a.sat.RunCPUAcceptancePack(ctx, baseDir, durationSec, logFunc)
 }
 func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (ActionResult, error) {
-	path, err := a.RunCPUAcceptancePack(baseDir, durationSec)
+	path, err := a.RunCPUAcceptancePack(baseDir, durationSec, nil)
 	return ActionResult{Title: "CPU SAT", Body: satResultBody(path)}, err
 }
-func (a *App) RunStorageAcceptancePack(baseDir string) (string, error) {
+func (a *App) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
 	return a.RunStorageAcceptancePackCtx(context.Background(), baseDir, false, logFunc)
 }
 func (a *App) RunStorageAcceptancePackCtx(ctx context.Context, baseDir string, extended bool, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunStorageAcceptancePack(baseDir)
+	return a.sat.RunStorageAcceptancePack(ctx, baseDir, extended, logFunc)
 }
 func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
-	path, err := a.RunStorageAcceptancePack(baseDir)
+	path, err := a.RunStorageAcceptancePack(baseDir, nil)
 	return ActionResult{Title: "Storage SAT", Body: satResultBody(path)}, err
 }
@@ -487,18 +658,63 @@ func (a *App) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
 	return a.sat.ListAMDGPUs()
 }
-func (a *App) RunAMDAcceptancePack(baseDir string) (string, error) {
+func (a *App) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
 	return a.RunAMDAcceptancePackCtx(context.Background(), baseDir, logFunc)
 }
 func (a *App) RunAMDAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunAMDAcceptancePack(baseDir)
+	return a.sat.RunAMDAcceptancePack(ctx, baseDir, logFunc)
 }
 func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
-	path, err := a.RunAMDAcceptancePack(baseDir)
+	path, err := a.RunAMDAcceptancePack(baseDir, nil)
 	return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
 }
 func (a *App) RunAMDMemIntegrityPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunAMDMemIntegrityPack(ctx, baseDir, logFunc)
 }
 func (a *App) RunAMDMemBandwidthPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunAMDMemBandwidthPack(ctx, baseDir, logFunc)
 }
 func (a *App) RunMemoryStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	return a.RunMemoryStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
 }
 func (a *App) RunSATStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	return a.RunSATStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
 }
 func (a *App) RunAMDStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	return a.RunAMDStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
 }
 func (a *App) RunMemoryStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	return a.sat.RunMemoryStressPack(ctx, baseDir, durationSec, logFunc)
 }
 func (a *App) RunSATStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	return a.sat.RunSATStressPack(ctx, baseDir, durationSec, logFunc)
 }
 func (a *App) RunAMDStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunAMDStressPack(ctx, baseDir, durationSec, logFunc)
 }
 func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
@@ -506,8 +722,15 @@ func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platfor
 	return a.sat.RunFanStressTest(ctx, baseDir, opts)
 }
 func (a *App) RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunPlatformStress(ctx, baseDir, opts, logFunc)
 }
 func (a *App) RunNCCLTestsResult(ctx context.Context) (ActionResult, error) {
-	path, err := a.sat.RunNCCLTests(ctx, DefaultSATBaseDir)
+	path, err := a.sat.RunNCCLTests(ctx, DefaultSATBaseDir, nil)
 	body := "Results: " + path
 	if err != nil && err != context.Canceled {
 		body += "\nERROR: " + err.Error()
@@ -515,6 +738,13 @@ func (a *App) RunNCCLTestsResult(ctx context.Context) (ActionResult, error) {
 	return ActionResult{Title: "NCCL bandwidth test", Body: body}, err
 }
 func (a *App) RunHPL(ctx context.Context, baseDir string, opts platform.HPLOptions, logFunc func(string)) (string, *platform.HPLResult, error) {
 	if a == nil {
 		return "", nil, fmt.Errorf("app not configured")
 	}
 	return a.sat.RunHPL(ctx, baseDir, opts, logFunc)
 }
 func (a *App) RunFanStressTestResult(ctx context.Context, opts platform.FanStressOptions) (ActionResult, error) {
 	path, err := a.RunFanStressTest(ctx, "", opts)
 	body := formatFanStressResult(path)
@@ -592,6 +822,7 @@ func (a *App) HealthSummaryResult() ActionResult {
 	if err := json.Unmarshal(raw, &snapshot); err != nil {
 		return ActionResult{Title: "Health summary", Body: "Audit JSON is unreadable."}
 	}
 	collector.NormalizeSnapshot(&snapshot.Hardware, snapshot.CollectedAt)
 	summary := collector.BuildHealthSummary(snapshot.Hardware)
 	var body strings.Builder
@@ -626,6 +857,7 @@ func (a *App) MainBanner() string {
 	if err := json.Unmarshal(raw, &snapshot); err != nil {
 		return ""
 	}
 	collector.NormalizeSnapshot(&snapshot.Hardware, snapshot.CollectedAt)
 	var lines []string
 	if system := formatSystemLine(snapshot.Hardware.Board); system != "" {
@@ -720,6 +952,12 @@ func latestSATSummaries() []string {
 		prefix string
 	}{
 		{label: "NVIDIA SAT", prefix: "gpu-nvidia-"},
 		{label: "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)", prefix: "gpu-nvidia-targeted-stress-"},
 		{label: "NVIDIA Max Compute Load (dcgmproftester)", prefix: "gpu-nvidia-compute-"},
 		{label: "NVIDIA Targeted Power (dcgmi diag targeted_power)", prefix: "gpu-nvidia-targeted-power-"},
 		{label: "NVIDIA Pulse Test (dcgmi diag pulse_test)", prefix: "gpu-nvidia-pulse-"},
 		{label: "NVIDIA Interconnect Test (NCCL all_reduce_perf)", prefix: "gpu-nvidia-nccl-"},
 		{label: "NVIDIA Bandwidth Test (NVBandwidth)", prefix: "gpu-nvidia-bandwidth-"},
 		{label: "Memory SAT", prefix: "memory-"},
 		{label: "Storage SAT", prefix: "storage-"},
 		{label: "CPU SAT", prefix: "cpu-"},
@@ -1018,3 +1256,62 @@ func (a *App) ListInstallDisks() ([]platform.InstallDisk, error) {
 func (a *App) InstallToDisk(ctx context.Context, device string, logFile string) error {
 	return a.installer.InstallToDisk(ctx, device, logFile)
 }
 func formatSATDetail(raw string) string {
 	var b strings.Builder
 	kv := parseKeyValueSummary(raw)
 	if t, ok := kv["run_at_utc"]; ok {
 		fmt.Fprintf(&b, "Run: %s\n\n", t)
 	}
 	lines := strings.Split(raw, "\n")
 	var stepKeys []string
 	seenStep := map[string]bool{}
 	for _, line := range lines {
 		if idx := strings.Index(line, "_status="); idx >= 0 {
 			key := line[:idx]
 			if !seenStep[key] && key != "overall" {
 				seenStep[key] = true
 				stepKeys = append(stepKeys, key)
 			}
 		}
 	}
 	for _, key := range stepKeys {
 		status := kv[key+"_status"]
 		display := cleanSummaryKey(key)
 		switch status {
 		case "OK":
 			fmt.Fprintf(&b, "PASS  %s\n", display)
 		case "FAILED":
 			fmt.Fprintf(&b, "FAIL  %s\n", display)
 		case "UNSUPPORTED":
 			fmt.Fprintf(&b, "SKIP  %s\n", display)
 		default:
 			fmt.Fprintf(&b, "?     %s\n", display)
 		}
 	}
 	if overall, ok := kv["overall_status"]; ok {
 		ok2 := kv["job_ok"]
 		failed := kv["job_failed"]
 		fmt.Fprintf(&b, "\nOverall: %s  (ok=%s  failed=%s)", overall, ok2, failed)
 	}
 	return strings.TrimSpace(b.String())
 }
 func cleanSummaryKey(key string) string {
 	idx := strings.Index(key, "-")
 	if idx <= 0 {
 		return key
 	}
 	prefix := key[:idx]
 	for _, c := range prefix {
 		if c < '0' || c > '9' {
 			return key
 		}
 	}
 	return key[idx+1:]
 }
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -43,6 +43,13 @@ func (f fakeNetwork) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error
 	return f.setStaticIPv4Fn(cfg)
 }
 func (f fakeNetwork) SetInterfaceState(_ string, _ bool) error { return nil }
 func (f fakeNetwork) GetInterfaceState(_ string) (bool, error) { return true, nil }
 func (f fakeNetwork) CaptureNetworkSnapshot() (platform.NetworkSnapshot, error) {
 	return platform.NetworkSnapshot{}, nil
 }
 func (f fakeNetwork) RestoreNetworkSnapshot(platform.NetworkSnapshot) error { return nil }
 type fakeServices struct {
 	serviceStatusFn func(string) (string, error)
 	serviceDoFn     func(string, platform.ServiceAction) (string, error)
@@ -52,6 +59,10 @@ func (f fakeServices) ListBeeServices() ([]string, error) {
 	return nil, nil
 }
 func (f fakeServices) ServiceState(name string) string {
 	return "active"
 }
 func (f fakeServices) ServiceStatus(name string) (string, error) {
 	return f.serviceStatusFn(name)
 }
@@ -109,21 +120,79 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
 }
 type fakeSAT struct {
-	runNvidiaFn      func(string) (string, error)
+	runNvidiaFn               func(string) (string, error)
-	runMemoryFn      func(string) (string, error)
+	runNvidiaBenchmarkFn      func(string, platform.NvidiaBenchmarkOptions) (string, error)
-	runStorageFn     func(string) (string, error)
+	runNvidiaStressFn         func(string, platform.NvidiaStressOptions) (string, error)
-	runCPUFn         func(string, int) (string, error)
+	runNvidiaComputeFn        func(string, int, []int) (string, error)
-	detectVendorFn   func() string
+	runNvidiaPowerFn          func(string, int, []int) (string, error)
-	listAMDGPUsFn    func() ([]platform.AMDGPUInfo, error)
+	runNvidiaPulseFn          func(string, int, []int) (string, error)
-	runAMDPackFn     func(string) (string, error)
+	runNvidiaBandwidthFn      func(string, []int) (string, error)
-	listNvidiaGPUsFn func() ([]platform.NvidiaGPU, error)
+	runNvidiaTargetedStressFn func(string, int, []int) (string, error)
 	runMemoryFn               func(string) (string, error)
 	runStorageFn              func(string) (string, error)
 	runCPUFn                  func(string, int) (string, error)
 	detectVendorFn            func() string
 	listAMDGPUsFn             func() ([]platform.AMDGPUInfo, error)
 	runAMDPackFn              func(string) (string, error)
 	listNvidiaGPUsFn          func() ([]platform.NvidiaGPU, error)
 	listNvidiaGPUStatusesFn   func() ([]platform.NvidiaGPUStatus, error)
 	resetNvidiaGPUFn          func(int) (string, error)
 }
-func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string) (string, error) {
+func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string, _ func(string)) (string, error) {
 	return f.runNvidiaFn(baseDir)
 }
-func (f fakeSAT) RunNvidiaAcceptancePackWithOptions(_ context.Context, baseDir string, _ int, _ []int) (string, error) {
+func (f fakeSAT) RunNvidiaAcceptancePackWithOptions(_ context.Context, baseDir string, _ int, _ []int, _ func(string)) (string, error) {
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaBenchmark(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, _ func(string)) (string, error) {
 	if f.runNvidiaBenchmarkFn != nil {
 		return f.runNvidiaBenchmarkFn(baseDir, opts)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaTargetedStressFn != nil {
 		return f.runNvidiaTargetedStressFn(baseDir, durationSec, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaComputeFn != nil {
 		return f.runNvidiaComputeFn(baseDir, durationSec, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaTargetedPowerPack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaPowerFn != nil {
 		return f.runNvidiaPowerFn(baseDir, durationSec, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaPulseTestPack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaPulseFn != nil {
 		return f.runNvidiaPulseFn(baseDir, durationSec, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaBandwidthPack(_ context.Context, baseDir string, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaBandwidthFn != nil {
 		return f.runNvidiaBandwidthFn(baseDir, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaStressPack(_ context.Context, baseDir string, opts platform.NvidiaStressOptions, _ func(string)) (string, error) {
 	if f.runNvidiaStressFn != nil {
 		return f.runNvidiaStressFn(baseDir, opts)
 	}
 	return f.runNvidiaFn(baseDir)
 }
@@ -134,15 +203,29 @@ func (f fakeSAT) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
 	return nil, nil
 }
-func (f fakeSAT) RunMemoryAcceptancePack(baseDir string) (string, error) {
+func (f fakeSAT) ListNvidiaGPUStatuses() ([]platform.NvidiaGPUStatus, error) {
 	if f.listNvidiaGPUStatusesFn != nil {
 		return f.listNvidiaGPUStatusesFn()
 	}
 	return nil, nil
 }
 func (f fakeSAT) ResetNvidiaGPU(index int) (string, error) {
 	if f.resetNvidiaGPUFn != nil {
 		return f.resetNvidiaGPUFn(index)
 	}
 	return "", nil
 }
 func (f fakeSAT) RunMemoryAcceptancePack(_ context.Context, baseDir string, _, _ int, _ func(string)) (string, error) {
 	return f.runMemoryFn(baseDir)
 }
-func (f fakeSAT) RunStorageAcceptancePack(baseDir string) (string, error) {
+func (f fakeSAT) RunStorageAcceptancePack(_ context.Context, baseDir string, _ bool, _ func(string)) (string, error) {
 	return f.runStorageFn(baseDir)
 }
-func (f fakeSAT) RunCPUAcceptancePack(baseDir string, durationSec int) (string, error) {
+func (f fakeSAT) RunCPUAcceptancePack(_ context.Context, baseDir string, durationSec int, _ func(string)) (string, error) {
 	if f.runCPUFn != nil {
 		return f.runCPUFn(baseDir, durationSec)
 	}
@@ -163,21 +246,46 @@ func (f fakeSAT) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
 	return nil, nil
 }
-func (f fakeSAT) RunAMDAcceptancePack(baseDir string) (string, error) {
+func (f fakeSAT) RunAMDAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
 	if f.runAMDPackFn != nil {
 		return f.runAMDPackFn(baseDir)
 	}
 	return "", nil
 }
 func (f fakeSAT) RunAMDMemIntegrityPack(_ context.Context, _ string, _ func(string)) (string, error) {
 	return "", nil
 }
 func (f fakeSAT) RunAMDMemBandwidthPack(_ context.Context, _ string, _ func(string)) (string, error) {
 	return "", nil
 }
 func (f fakeSAT) RunAMDStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
 	return "", nil
 }
 func (f fakeSAT) RunMemoryStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
 	return "", nil
 }
 func (f fakeSAT) RunSATStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
 	return "", nil
 }
 func (f fakeSAT) RunFanStressTest(_ context.Context, _ string, _ platform.FanStressOptions) (string, error) {
 	return "", nil
 }
-func (f fakeSAT) RunNCCLTests(_ context.Context, _ string) (string, error) {
+func (f fakeSAT) RunPlatformStress(_ context.Context, _ string, _ platform.PlatformStressOptions, _ func(string)) (string, error) {
 	return "", nil
 }
 func (f fakeSAT) RunNCCLTests(_ context.Context, _ string, _ func(string)) (string, error) {
 	return "", nil
 }
 func (f fakeSAT) RunHPL(_ context.Context, _ string, _ platform.HPLOptions, _ func(string)) (string, *platform.HPLResult, error) {
 	return "", nil, nil
 }
 func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
 	t.Parallel()
@@ -570,13 +678,13 @@ func TestRunSATDefaultsToExportDir(t *testing.T) {
 		},
 	}
-	if _, err := a.RunNvidiaAcceptancePack(""); err != nil {
+	if _, err := a.RunNvidiaAcceptancePack("", nil); err != nil {
 		t.Fatal(err)
 	}
-	if _, err := a.RunMemoryAcceptancePack(""); err != nil {
+	if _, err := a.RunMemoryAcceptancePack("", nil); err != nil {
 		t.Fatal(err)
 	}
-	if _, err := a.RunStorageAcceptancePack(""); err != nil {
+	if _, err := a.RunStorageAcceptancePack("", nil); err != nil {
 		t.Fatal(err)
 	}
 }
@@ -619,13 +727,50 @@ func TestHealthSummaryResultIncludesCompactSATSummary(t *testing.T) {
 	}
 }
 func TestApplySATOverlayFiltersIgnoredLegacyDevices(t *testing.T) {
 	tmp := t.TempDir()
 	oldSATBaseDir := DefaultSATBaseDir
 	DefaultSATBaseDir = filepath.Join(tmp, "sat")
 	t.Cleanup(func() { DefaultSATBaseDir = oldSATBaseDir })
 	raw := `{
 	  "collected_at": "2026-03-15T10:00:00Z",
 	  "hardware": {
 	    "board": {"serial_number": "SRV123"},
 	    "storage": [
 	      {"model": "Virtual HDisk0", "serial_number": "AAAABBBBCCCC3"},
 	      {"model": "PASCARI", "serial_number": "DISK1", "status": "OK"}
 	    ],
 	    "pcie_devices": [
 	      {"device_class": "Co-processor", "model": "402xx Series QAT", "status": "OK"},
 	      {"device_class": "VideoController", "model": "NVIDIA H100", "status": "OK"}
 	    ]
 	  }
 	}`
 	got, err := ApplySATOverlay([]byte(raw))
 	if err != nil {
 		t.Fatalf("ApplySATOverlay error: %v", err)
 	}
 	text := string(got)
 	if contains(text, "Virtual HDisk0") {
 		t.Fatalf("overlaid audit should drop virtual hdisk:\n%s", text)
 	}
 	if contains(text, "\"device_class\": \"Co-processor\"") {
 		t.Fatalf("overlaid audit should drop co-processors:\n%s", text)
 	}
 	if !contains(text, "PASCARI") || !contains(text, "NVIDIA H100") {
 		t.Fatalf("overlaid audit should keep real devices:\n%s", text)
 	}
 }
 func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	tmp := t.TempDir()
 	exportDir := filepath.Join(tmp, "export")
 	if err := os.MkdirAll(filepath.Join(exportDir, "bee-sat", "memory-run"), 0755); err != nil {
 		t.Fatal(err)
 	}
-	if err := os.WriteFile(filepath.Join(exportDir, "bee-audit.json"), []byte(`{"ok":true}`), 0644); err != nil {
+	if err := os.WriteFile(filepath.Join(exportDir, "bee-audit.json"), []byte(`{"collected_at":"2026-03-15T10:00:00Z","hardware":{"board":{"serial_number":"SRV123"},"storage":[{"model":"Virtual HDisk0","serial_number":"AAAABBBBCCCC3"},{"model":"PASCARI","serial_number":"DISK1"}],"pcie_devices":[{"device_class":"Co-processor","model":"402xx Series QAT"},{"device_class":"VideoController","model":"NVIDIA H100"}]}}`), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run", "verbose.log"), []byte("sat verbose"), 0644); err != nil {
@@ -657,6 +802,7 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	tr := tar.NewReader(gzr)
 	var names []string
 	var auditJSON string
 	for {
 		hdr, err := tr.Next()
 		if errors.Is(err, io.EOF) {
@@ -666,6 +812,36 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 			t.Fatalf("read tar entry: %v", err)
 		}
 		names = append(names, hdr.Name)
 		if contains(hdr.Name, "/export/bee-audit.json") {
 			body, err := io.ReadAll(tr)
 			if err != nil {
 				t.Fatalf("read audit entry: %v", err)
 			}
 			auditJSON = string(body)
 		}
 	}
 	for _, want := range []string{
 		"/system/ip-link.txt",
 		"/system/ip-link-stats.txt",
 		"/system/kernel-aer-nvidia.txt",
 		"/system/lspci-nvidia-bridges-vv.txt",
 		"/system/pcie-aer-sysfs.txt",
 		"/system/ethtool-info.txt",
 		"/system/ethtool-link.txt",
 		"/system/ethtool-module.txt",
 		"/system/mstflint-query.txt",
 	} {
 		var found bool
 		for _, name := range names {
 			if contains(name, want) {
 				found = true
 				break
 			}
 		}
 		if !found {
 			t.Fatalf("support bundle missing %s, names=%v", want, names)
 		}
 	}
 	var foundRaw bool
@@ -680,6 +856,12 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	if !foundRaw {
 		t.Fatalf("support bundle missing raw SAT log, names=%v", names)
 	}
 	if contains(auditJSON, "Virtual HDisk0") || contains(auditJSON, "\"device_class\": \"Co-processor\"") {
 		t.Fatalf("support bundle should normalize ignored devices:\n%s", auditJSON)
 	}
 	if !contains(auditJSON, "PASCARI") || !contains(auditJSON, "NVIDIA H100") {
 		t.Fatalf("support bundle should keep real devices:\n%s", auditJSON)
 	}
 }
 func TestMainBanner(t *testing.T) {
@@ -693,6 +875,10 @@ func TestMainBanner(t *testing.T) {
 	product := "PowerEdge R760"
 	cpuModel := "Intel Xeon Gold 6430"
 	memoryType := "DDR5"
 	memorySerialA := "DIMM-A"
 	memorySerialB := "DIMM-B"
 	storageSerialA := "DISK-A"
 	storageSerialB := "DISK-B"
 	gpuClass := "VideoController"
 	gpuModel := "NVIDIA H100"
@@ -708,12 +894,12 @@ func TestMainBanner(t *testing.T) {
 				{Model: &cpuModel},
 			},
 			Memory: []schema.HardwareMemory{
-				{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType},
+				{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType, SerialNumber: &memorySerialA},
-				{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType},
+				{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType, SerialNumber: &memorySerialB},
 			},
 			Storage: []schema.HardwareStorage{
-				{Present: &trueValue, SizeGB: intPtr(3840)},
+				{Present: &trueValue, SizeGB: intPtr(3840), SerialNumber: &storageSerialA},
-				{Present: &trueValue, SizeGB: intPtr(3840)},
+				{Present: &trueValue, SizeGB: intPtr(3840), SerialNumber: &storageSerialB},
 			},
 			PCIeDevices: []schema.HardwarePCIeDevice{
 				{DeviceClass: &gpuClass, Model: &gpuModel},
--- a/audit/internal/app/atomic_write.go
+++ b/audit/internal/app/atomic_write.go
@@ -0,0 +1,48 @@
 package app
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 )
 func atomicWriteFile(path string, data []byte, perm os.FileMode) error {
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return fmt.Errorf("mkdir %s: %w", filepath.Dir(path), err)
 	}
 	tmpPath := path + ".tmp"
 	f, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, perm)
 	if err != nil {
 		return fmt.Errorf("open temp %s: %w", tmpPath, err)
 	}
 	success := false
 	defer func() {
 		_ = f.Close()
 		if !success {
 			_ = os.Remove(tmpPath)
 		}
 	}()
 	if _, err := f.Write(data); err != nil {
 		return fmt.Errorf("write temp %s: %w", tmpPath, err)
 	}
 	if err := f.Sync(); err != nil {
 		return fmt.Errorf("sync temp %s: %w", tmpPath, err)
 	}
 	if err := f.Close(); err != nil {
 		return fmt.Errorf("close temp %s: %w", tmpPath, err)
 	}
 	if err := os.Rename(tmpPath, path); err != nil {
 		return fmt.Errorf("rename %s -> %s: %w", tmpPath, path, err)
 	}
 	if dir, err := os.Open(filepath.Dir(path)); err == nil {
 		_ = dir.Sync()
 		_ = dir.Close()
 	}
 	success = true
 	return nil
 }
--- a/audit/internal/app/atomic_write_test.go
+++ b/audit/internal/app/atomic_write_test.go
@@ -0,0 +1,71 @@
 package app
 import (
 	"encoding/json"
 	"os"
 	"path/filepath"
 	"testing"
 	"bee/audit/internal/schema"
 )
 func TestAtomicWriteFileReplacesTargetWithoutLeavingTmp(t *testing.T) {
 	path := filepath.Join(t.TempDir(), "bee-audit.json")
 	if err := os.WriteFile(path, []byte("old\n"), 0644); err != nil {
 		t.Fatalf("seed file: %v", err)
 	}
 	if err := atomicWriteFile(path, []byte("new\n"), 0644); err != nil {
 		t.Fatalf("atomicWriteFile: %v", err)
 	}
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatalf("read final: %v", err)
 	}
 	if string(raw) != "new\n" {
 		t.Fatalf("final content=%q want %q", string(raw), "new\n")
 	}
 	if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
 		t.Fatalf("tmp file should be absent after success, err=%v", err)
 	}
 }
 func TestRunRuntimePreflightWritesAtomically(t *testing.T) {
 	path := filepath.Join(t.TempDir(), "runtime-health.json")
 	a := &App{
 		runtime: fakeRuntime{
 			collectFn: func(exportDir string) (schema.RuntimeHealth, error) {
 				return schema.RuntimeHealth{
 					Status:      "OK",
 					ExportDir:   exportDir,
 					DriverReady: true,
 					CUDAReady:   true,
 				}, nil
 			},
 		},
 	}
 	got, err := a.RunRuntimePreflight("file:" + path)
 	if err != nil {
 		t.Fatalf("RunRuntimePreflight: %v", err)
 	}
 	if got != path {
 		t.Fatalf("path=%q want %q", got, path)
 	}
 	if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
 		t.Fatalf("tmp file should be absent after success, err=%v", err)
 	}
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatalf("read runtime file: %v", err)
 	}
 	var health schema.RuntimeHealth
 	if err := json.Unmarshal(raw, &health); err != nil {
 		t.Fatalf("json unmarshal: %v", err)
 	}
 	if health.Status != "OK" {
 		t.Fatalf("status=%q want OK", health.Status)
 	}
 }
--- a/audit/internal/app/component_status_db.go
+++ b/audit/internal/app/component_status_db.go
@@ -0,0 +1,268 @@
 package app
 import (
 	"encoding/json"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
 	"time"
 )
 // ComponentStatusDB is a persistent, append-only store of hardware component health records.
 // Records are keyed by component identity strings (e.g. "pcie:0000:c8:00.0", "storage:nvme0n1").
 // Once a component is marked Warning or Critical, subsequent OK entries do not downgrade it —
 // the component stays at the highest observed severity until explicitly reset.
 type ComponentStatusDB struct {
 	path    string
 	mu      sync.Mutex
 	records map[string]*ComponentStatusRecord
 }
 // ComponentStatusRecord holds the current and historical health of one hardware component.
 type ComponentStatusRecord struct {
 	ComponentKey  string                 `json:"component_key"`
 	Status        string                 `json:"status"` // "OK", "Warning", "Critical", "Unknown"
 	LastCheckedAt time.Time              `json:"last_checked_at"`
 	LastChangedAt time.Time              `json:"last_changed_at"`
 	ErrorSummary  string                 `json:"error_summary,omitempty"`
 	History       []ComponentStatusEntry `json:"history"`
 }
 // ComponentStatusEntry is one observation written to a component's history.
 type ComponentStatusEntry struct {
 	At     time.Time `json:"at"`
 	Status string    `json:"status"`
 	Source string    `json:"source"` // e.g. "sat:nvidia", "sat:memory", "watchdog:kmsg"
 	Detail string    `json:"detail,omitempty"`
 }
 // OpenComponentStatusDB opens (or creates) the JSON status DB at path.
 func OpenComponentStatusDB(path string) (*ComponentStatusDB, error) {
 	db := &ComponentStatusDB{
 		path:    path,
 		records: make(map[string]*ComponentStatusRecord),
 	}
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return nil, err
 	}
 	data, err := os.ReadFile(path)
 	if err != nil && !os.IsNotExist(err) {
 		return nil, err
 	}
 	if len(data) > 0 {
 		var records []ComponentStatusRecord
 		if err := json.Unmarshal(data, &records); err == nil {
 			for i := range records {
 				db.records[records[i].ComponentKey] = &records[i]
 			}
 		}
 	}
 	return db, nil
 }
 // Record writes one observation for the given component key.
 // source is a short label like "sat:nvidia" or "watchdog:kmsg".
 // status is "OK", "Warning", "Critical", or "Unknown".
 // OK never downgrades an existing Warning or Critical status.
 func (db *ComponentStatusDB) Record(key, source, status, detail string) {
 	if db == nil || strings.TrimSpace(key) == "" {
 		return
 	}
 	db.mu.Lock()
 	defer db.mu.Unlock()
 	now := time.Now().UTC()
 	rec, exists := db.records[key]
 	if !exists {
 		rec = &ComponentStatusRecord{ComponentKey: key}
 		db.records[key] = rec
 	}
 	rec.LastCheckedAt = now
 	entry := ComponentStatusEntry{At: now, Status: status, Source: source, Detail: detail}
 	rec.History = append(rec.History, entry)
 	// Status merge: OK never downgrades Warning/Critical.
 	newSev := componentSeverity(status)
 	curSev := componentSeverity(rec.Status)
 	if newSev > curSev {
 		rec.Status = status
 		rec.LastChangedAt = now
 		rec.ErrorSummary = detail
 	} else if rec.Status == "" {
 		rec.Status = status
 		rec.LastChangedAt = now
 	}
 	_ = db.saveLocked()
 }
 // Get returns the current record for a component key.
 func (db *ComponentStatusDB) Get(key string) (ComponentStatusRecord, bool) {
 	if db == nil {
 		return ComponentStatusRecord{}, false
 	}
 	db.mu.Lock()
 	defer db.mu.Unlock()
 	r, ok := db.records[key]
 	if !ok {
 		return ComponentStatusRecord{}, false
 	}
 	return *r, true
 }
 // All returns a snapshot of all records.
 func (db *ComponentStatusDB) All() []ComponentStatusRecord {
 	if db == nil {
 		return nil
 	}
 	db.mu.Lock()
 	defer db.mu.Unlock()
 	out := make([]ComponentStatusRecord, 0, len(db.records))
 	for _, r := range db.records {
 		out = append(out, *r)
 	}
 	return out
 }
 func (db *ComponentStatusDB) saveLocked() error {
 	records := make([]ComponentStatusRecord, 0, len(db.records))
 	for _, r := range db.records {
 		records = append(records, *r)
 	}
 	data, err := json.MarshalIndent(records, "", "  ")
 	if err != nil {
 		return err
 	}
 	return os.WriteFile(db.path, data, 0644)
 }
 // componentSeverity returns a numeric severity so higher values win.
 func componentSeverity(status string) int {
 	switch strings.TrimSpace(status) {
 	case "Critical":
 		return 3
 	case "Warning":
 		return 2
 	case "OK":
 		return 1
 	default:
 		return 0
 	}
 }
 // ApplySATResultToDB reads a SAT summary.txt from the run directory next to archivePath
 // and writes component status records to db for the given SAT target.
 // archivePath may be either a bare .tar.gz path or "Archive written to /path/foo.tar.gz".
 func ApplySATResultToDB(db *ComponentStatusDB, target, archivePath string) {
 	if db == nil || strings.TrimSpace(archivePath) == "" {
 		return
 	}
 	archivePath = extractArchivePath(archivePath)
 	if archivePath == "" {
 		return
 	}
 	runDir := strings.TrimSuffix(archivePath, ".tar.gz")
 	data, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
 	if err != nil {
 		return
 	}
 	kv := parseSATKV(string(data))
 	overall := strings.ToUpper(strings.TrimSpace(kv["overall_status"]))
 	if overall == "" {
 		return
 	}
 	source := "sat:" + target
 	dbStatus := satStatusToDBStatus(overall)
 	// Map SAT target to component keys.
 	switch target {
 	case "nvidia", "nvidia-targeted-stress", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
 		"nvidia-interconnect", "nvidia-bandwidth", "amd", "nvidia-stress",
 		"amd-stress", "amd-mem", "amd-bandwidth":
 		db.Record("pcie:gpu:"+target, source, dbStatus, target+" SAT: "+overall)
 	case "memory", "memory-stress", "sat-stress":
 		db.Record("memory:all", source, dbStatus, target+" SAT: "+overall)
 	case "cpu", "platform-stress":
 		db.Record("cpu:all", source, dbStatus, target+" SAT: "+overall)
 	case "storage":
 		// Try to record per-device if available in summary.
 		recordedAny := false
 		for key, val := range kv {
 			if !strings.HasSuffix(key, "_status") || key == "overall_status" {
 				continue
 			}
 			base := strings.TrimSuffix(key, "_status")
 			idx := strings.Index(base, "_")
 			if idx <= 0 {
 				continue
 			}
 			devName := base[:idx]
 			devStatus := satStatusToDBStatus(strings.ToUpper(strings.TrimSpace(val)))
 			db.Record("storage:"+devName, source, devStatus, "storage SAT: "+val)
 			recordedAny = true
 		}
 		if !recordedAny {
 			db.Record("storage:all", source, dbStatus, "storage SAT: "+overall)
 		}
 	}
 }
 func satStatusToDBStatus(overall string) string {
 	switch overall {
 	case "OK":
 		return "OK"
 	case "FAILED":
 		return "Warning"
 	case "PARTIAL", "UNSUPPORTED":
 		return "Unknown"
 	default:
 		return "Unknown"
 	}
 }
 // ExtractArchivePath extracts a bare .tar.gz path from a string that may be
 // "Archive written to /path/foo.tar.gz" or already a bare path.
 func ExtractArchivePath(s string) string {
 	return extractArchivePath(s)
 }
 // ReadSATOverallStatus reads the overall_status value from the summary.txt
 // file located in the run directory alongside archivePath.
 // Returns "" if the file cannot be read.
 func ReadSATOverallStatus(archivePath string) string {
 	if strings.TrimSpace(archivePath) == "" {
 		return ""
 	}
 	runDir := strings.TrimSuffix(archivePath, ".tar.gz")
 	data, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
 	if err != nil {
 		return ""
 	}
 	kv := parseSATKV(string(data))
 	return strings.ToUpper(strings.TrimSpace(kv["overall_status"]))
 }
 func extractArchivePath(s string) string {
 	s = strings.TrimSpace(s)
 	if strings.HasSuffix(s, ".tar.gz") {
 		parts := strings.Fields(s)
 		if len(parts) > 0 {
 			return parts[len(parts)-1]
 		}
 	}
 	return s
 }
 func parseSATKV(raw string) map[string]string {
 	kv := make(map[string]string)
 	for _, line := range strings.Split(raw, "\n") {
 		k, v, ok := strings.Cut(strings.TrimSpace(line), "=")
 		if ok {
 			kv[strings.TrimSpace(k)] = strings.TrimSpace(v)
 		}
 	}
 	return kv
 }
--- a/audit/internal/app/panel.go
+++ b/audit/internal/app/panel.go
@@ -1,387 +0,0 @@
 package app
 import (
 	"encoding/json"
 	"fmt"
 	"os"
 	"path/filepath"
 	"sort"
 	"strings"
 	"bee/audit/internal/schema"
 )
 // ComponentRow is one line in the hardware panel.
 type ComponentRow struct {
 	Key    string // "CPU", "MEM", "GPU", "DISK", "PSU"
 	Status string // "PASS", "FAIL", "CANCEL", "N/A"
 	Detail string // compact one-liner
 }
 // HardwarePanelData holds everything the TUI right panel needs.
 type HardwarePanelData struct {
 	Header []string
 	Rows   []ComponentRow
 }
 // LoadHardwarePanel reads the latest audit JSON and SAT summaries.
 // Returns empty panel if no audit data exists yet.
 func (a *App) LoadHardwarePanel() HardwarePanelData {
 	raw, err := os.ReadFile(DefaultAuditJSONPath)
 	if err != nil {
 		return HardwarePanelData{Header: []string{"No audit data — run audit first."}}
 	}
 	var snap schema.HardwareIngestRequest
 	if err := json.Unmarshal(raw, &snap); err != nil {
 		return HardwarePanelData{Header: []string{"Audit data unreadable."}}
 	}
 	statuses := satStatuses()
 	var header []string
 	if sys := formatSystemLine(snap.Hardware.Board); sys != "" {
 		header = append(header, sys)
 	}
 	for _, fw := range snap.Hardware.Firmware {
 		if fw.DeviceName == "BIOS" && fw.Version != "" {
 			header = append(header, "BIOS: "+fw.Version)
 		}
 		if fw.DeviceName == "BMC" && fw.Version != "" {
 			header = append(header, "BMC:  "+fw.Version)
 		}
 	}
 	if ip := formatIPLine(a.network.ListInterfaces); ip != "" {
 		header = append(header, ip)
 	}
 	var rows []ComponentRow
 	if cpu := formatCPULine(snap.Hardware.CPUs); cpu != "" {
 		rows = append(rows, ComponentRow{
 			Key:    "CPU",
 			Status: statuses["cpu"],
 			Detail: strings.TrimPrefix(cpu, "CPU: "),
 		})
 	}
 	if mem := formatMemoryLine(snap.Hardware.Memory); mem != "" {
 		rows = append(rows, ComponentRow{
 			Key:    "MEM",
 			Status: statuses["memory"],
 			Detail: strings.TrimPrefix(mem, "Memory: "),
 		})
 	}
 	if gpu := formatGPULine(snap.Hardware.PCIeDevices); gpu != "" {
 		rows = append(rows, ComponentRow{
 			Key:    "GPU",
 			Status: statuses["gpu"],
 			Detail: strings.TrimPrefix(gpu, "GPU: "),
 		})
 	}
 	if disk := formatStorageLine(snap.Hardware.Storage); disk != "" {
 		rows = append(rows, ComponentRow{
 			Key:    "DISK",
 			Status: statuses["storage"],
 			Detail: strings.TrimPrefix(disk, "Storage: "),
 		})
 	}
 	if psu := formatPSULine(snap.Hardware.PowerSupplies); psu != "" {
 		rows = append(rows, ComponentRow{
 			Key:    "PSU",
 			Status: "N/A",
 			Detail: psu,
 		})
 	}
 	return HardwarePanelData{Header: header, Rows: rows}
 }
 // ComponentDetailResult returns detail text for a component shown in the panel.
 func (a *App) ComponentDetailResult(key string) ActionResult {
 	switch key {
 	case "CPU":
 		return a.cpuDetailResult(false)
 	case "MEM":
 		return a.satDetailResult("memory", "memory-", "MEM detail")
 	case "GPU":
 		// Prefer whichever GPU SAT was run most recently.
 		nv, _ := filepath.Glob(filepath.Join(DefaultSATBaseDir, "gpu-nvidia-*/summary.txt"))
 		am, _ := filepath.Glob(filepath.Join(DefaultSATBaseDir, "gpu-amd-*/summary.txt"))
 		sort.Strings(nv)
 		sort.Strings(am)
 		latestNV := ""
 		if len(nv) > 0 {
 			latestNV = nv[len(nv)-1]
 		}
 		latestAM := ""
 		if len(am) > 0 {
 			latestAM = am[len(am)-1]
 		}
 		if latestAM > latestNV {
 			return a.satDetailResult("gpu", "gpu-amd-", "GPU detail")
 		}
 		return a.satDetailResult("gpu", "gpu-nvidia-", "GPU detail")
 	case "DISK":
 		return a.satDetailResult("storage", "storage-", "DISK detail")
 	case "PSU":
 		return a.psuDetailResult()
 	default:
 		return ActionResult{Title: key, Body: "No detail available."}
 	}
 }
 func (a *App) cpuDetailResult(satOnly bool) ActionResult {
 	var b strings.Builder
 	// Show latest SAT summary if available.
 	satResult := a.satDetailResult("cpu", "cpu-", "CPU SAT")
 	if satResult.Body != "No test results found. Run a test first." {
 		fmt.Fprintln(&b, "=== Last SAT ===")
 		fmt.Fprintln(&b, satResult.Body)
 		fmt.Fprintln(&b)
 	}
 	if satOnly {
 		body := strings.TrimSpace(b.String())
 		if body == "" {
 			body = "No CPU SAT results found. Run a test first."
 		}
 		return ActionResult{Title: "CPU SAT", Body: body}
 	}
 	raw, err := os.ReadFile(DefaultAuditJSONPath)
 	if err != nil {
 		return ActionResult{Title: "CPU", Body: strings.TrimSpace(b.String())}
 	}
 	var snap schema.HardwareIngestRequest
 	if err := json.Unmarshal(raw, &snap); err != nil {
 		return ActionResult{Title: "CPU", Body: strings.TrimSpace(b.String())}
 	}
 	if len(snap.Hardware.CPUs) == 0 {
 		return ActionResult{Title: "CPU", Body: strings.TrimSpace(b.String())}
 	}
 	fmt.Fprintln(&b, "=== Audit ===")
 	for i, cpu := range snap.Hardware.CPUs {
 		fmt.Fprintf(&b, "CPU %d\n", i)
 		if cpu.Model != nil {
 			fmt.Fprintf(&b, "  Model:    %s\n", *cpu.Model)
 		}
 		if cpu.Manufacturer != nil {
 			fmt.Fprintf(&b, "  Vendor:   %s\n", *cpu.Manufacturer)
 		}
 		if cpu.Cores != nil {
 			fmt.Fprintf(&b, "  Cores:    %d\n", *cpu.Cores)
 		}
 		if cpu.Threads != nil {
 			fmt.Fprintf(&b, "  Threads:  %d\n", *cpu.Threads)
 		}
 		if cpu.MaxFrequencyMHz != nil {
 			fmt.Fprintf(&b, "  Max freq: %d MHz\n", *cpu.MaxFrequencyMHz)
 		}
 		if cpu.TemperatureC != nil {
 			fmt.Fprintf(&b, "  Temp:     %.1f°C\n", *cpu.TemperatureC)
 		}
 		if cpu.Throttled != nil {
 			fmt.Fprintf(&b, "  Throttled: %v\n", *cpu.Throttled)
 		}
 		if cpu.CorrectableErrorCount != nil && *cpu.CorrectableErrorCount > 0 {
 			fmt.Fprintf(&b, "  ECC correctable:   %d\n", *cpu.CorrectableErrorCount)
 		}
 		if cpu.UncorrectableErrorCount != nil && *cpu.UncorrectableErrorCount > 0 {
 			fmt.Fprintf(&b, "  ECC uncorrectable: %d\n", *cpu.UncorrectableErrorCount)
 		}
 		if i < len(snap.Hardware.CPUs)-1 {
 			fmt.Fprintln(&b)
 		}
 	}
 	return ActionResult{Title: "CPU", Body: strings.TrimSpace(b.String())}
 }
 func (a *App) satDetailResult(statusKey, prefix, title string) ActionResult {
 	matches, err := filepath.Glob(filepath.Join(DefaultSATBaseDir, prefix+"*/summary.txt"))
 	if err != nil || len(matches) == 0 {
 		return ActionResult{Title: title, Body: "No test results found. Run a test first."}
 	}
 	sort.Strings(matches)
 	raw, err := os.ReadFile(matches[len(matches)-1])
 	if err != nil {
 		return ActionResult{Title: title, Body: "Could not read test results."}
 	}
 	return ActionResult{Title: title, Body: formatSATDetail(strings.TrimSpace(string(raw)))}
 }
 // formatSATDetail converts raw summary.txt key=value content to a human-readable per-step display.
 func formatSATDetail(raw string) string {
 	var b strings.Builder
 	kv := parseKeyValueSummary(raw)
 	if t, ok := kv["run_at_utc"]; ok {
 		fmt.Fprintf(&b, "Run: %s\n\n", t)
 	}
 	// Collect step names in order they appear in the file
 	lines := strings.Split(raw, "\n")
 	var stepKeys []string
 	seenStep := map[string]bool{}
 	for _, line := range lines {
 		if idx := strings.Index(line, "_status="); idx >= 0 {
 			key := line[:idx]
 			if !seenStep[key] && key != "overall" {
 				seenStep[key] = true
 				stepKeys = append(stepKeys, key)
 			}
 		}
 	}
 	for _, key := range stepKeys {
 		status := kv[key+"_status"]
 		display := cleanSummaryKey(key)
 		switch status {
 		case "OK":
 			fmt.Fprintf(&b, "PASS  %s\n", display)
 		case "FAILED":
 			fmt.Fprintf(&b, "FAIL  %s\n", display)
 		case "UNSUPPORTED":
 			fmt.Fprintf(&b, "SKIP  %s\n", display)
 		default:
 			fmt.Fprintf(&b, "?     %s\n", display)
 		}
 	}
 	if overall, ok := kv["overall_status"]; ok {
 		ok2 := kv["job_ok"]
 		failed := kv["job_failed"]
 		fmt.Fprintf(&b, "\nOverall: %s  (ok=%s  failed=%s)", overall, ok2, failed)
 	}
 	return strings.TrimSpace(b.String())
 }
 // cleanSummaryKey strips the leading numeric prefix from a SAT step key.
 // "1-lscpu" → "lscpu", "3-stress-ng" → "stress-ng"
 func cleanSummaryKey(key string) string {
 	idx := strings.Index(key, "-")
 	if idx <= 0 {
 		return key
 	}
 	prefix := key[:idx]
 	for _, c := range prefix {
 		if c < '0' || c > '9' {
 			return key
 		}
 	}
 	return key[idx+1:]
 }
 func (a *App) psuDetailResult() ActionResult {
 	raw, err := os.ReadFile(DefaultAuditJSONPath)
 	if err != nil {
 		return ActionResult{Title: "PSU", Body: "No audit data."}
 	}
 	var snap schema.HardwareIngestRequest
 	if err := json.Unmarshal(raw, &snap); err != nil {
 		return ActionResult{Title: "PSU", Body: "Audit data unreadable."}
 	}
 	if len(snap.Hardware.PowerSupplies) == 0 {
 		return ActionResult{Title: "PSU", Body: "No PSU data in last audit."}
 	}
 	var b strings.Builder
 	for i, psu := range snap.Hardware.PowerSupplies {
 		fmt.Fprintf(&b, "PSU %d\n", i)
 		if psu.Model != nil {
 			fmt.Fprintf(&b, "  Model:   %s\n", *psu.Model)
 		}
 		if psu.Vendor != nil {
 			fmt.Fprintf(&b, "  Vendor:  %s\n", *psu.Vendor)
 		}
 		if psu.WattageW != nil {
 			fmt.Fprintf(&b, "  Rated:   %d W\n", *psu.WattageW)
 		}
 		if psu.InputPowerW != nil {
 			fmt.Fprintf(&b, "  Input:   %.1f W\n", *psu.InputPowerW)
 		}
 		if psu.OutputPowerW != nil {
 			fmt.Fprintf(&b, "  Output:  %.1f W\n", *psu.OutputPowerW)
 		}
 		if psu.TemperatureC != nil {
 			fmt.Fprintf(&b, "  Temp:    %.1f°C\n", *psu.TemperatureC)
 		}
 		if i < len(snap.Hardware.PowerSupplies)-1 {
 			fmt.Fprintln(&b)
 		}
 	}
 	return ActionResult{Title: "PSU", Body: strings.TrimSpace(b.String())}
 }
 // satStatuses reads the latest summary.txt for each SAT type and returns
 // a map of component key ("gpu","memory","storage") → status ("PASS","FAIL","CANCEL","N/A").
 func satStatuses() map[string]string {
 	result := map[string]string{
 		"gpu":     "N/A",
 		"memory":  "N/A",
 		"storage": "N/A",
 		"cpu":     "N/A",
 	}
 	patterns := []struct {
 		key    string
 		prefix string
 	}{
 		{"gpu", "gpu-nvidia-"},
 		{"gpu", "gpu-amd-"},
 		{"memory", "memory-"},
 		{"storage", "storage-"},
 		{"cpu", "cpu-"},
 	}
 	for _, item := range patterns {
 		matches, err := filepath.Glob(filepath.Join(DefaultSATBaseDir, item.prefix+"*/summary.txt"))
 		if err != nil || len(matches) == 0 {
 			continue
 		}
 		sort.Strings(matches)
 		raw, err := os.ReadFile(matches[len(matches)-1])
 		if err != nil {
 			continue
 		}
 		values := parseKeyValueSummary(string(raw))
 		switch strings.ToUpper(strings.TrimSpace(values["overall_status"])) {
 		case "OK":
 			result[item.key] = "PASS"
 		case "FAILED":
 			result[item.key] = "FAIL"
 		case "CANCELED", "CANCELLED":
 			result[item.key] = "CANCEL"
 		}
 	}
 	return result
 }
 func formatPSULine(psus []schema.HardwarePowerSupply) string {
 	var present []schema.HardwarePowerSupply
 	for _, psu := range psus {
 		if psu.Present != nil && !*psu.Present {
 			continue
 		}
 		present = append(present, psu)
 	}
 	if len(present) == 0 {
 		return ""
 	}
 	firstW := 0
 	if present[0].WattageW != nil {
 		firstW = *present[0].WattageW
 	}
 	allSame := firstW > 0
 	for _, p := range present[1:] {
 		w := 0
 		if p.WattageW != nil {
 			w = *p.WattageW
 		}
 		if w != firstW {
 			allSame = false
 			break
 		}
 	}
 	if allSame && firstW > 0 {
 		return fmt.Sprintf("%dx %dW", len(present), firstW)
 	}
 	return fmt.Sprintf("%d PSU", len(present))
 }
--- a/audit/internal/app/sat_overlay.go
+++ b/audit/internal/app/sat_overlay.go
@@ -3,13 +3,14 @@ package app
 import (
 	"os"
 	"path/filepath"
 	"strconv"
 	"sort"
 	"strings"
 	"bee/audit/internal/schema"
 )
-func applyLatestSATStatuses(snap *schema.HardwareSnapshot, baseDir string) {
+func applyLatestSATStatuses(snap *schema.HardwareSnapshot, baseDir string, db *ComponentStatusDB) {
 	if snap == nil || strings.TrimSpace(baseDir) == "" {
 		return
 	}
@@ -18,6 +19,7 @@ func applyLatestSATStatuses(snap *schema.HardwareSnapshot, baseDir string) {
 	}
 	if summary, ok := loadLatestSATSummary(baseDir, "gpu-nvidia-"); ok {
 		applyGPUVendorSAT(snap.PCIeDevices, "nvidia", summary)
 		applyNvidiaPerGPUStatus(snap.PCIeDevices, baseDir)
 	}
 	if summary, ok := loadLatestSATSummary(baseDir, "memory-"); ok {
 		applyMemorySAT(snap.Memory, summary)
@@ -28,6 +30,102 @@ func applyLatestSATStatuses(snap *schema.HardwareSnapshot, baseDir string) {
 	if summary, ok := loadLatestSATSummary(baseDir, "storage-"); ok {
 		applyStorageSAT(snap.Storage, summary)
 	}
 	// Apply unified component status DB — overlaid last so it can only upgrade severity.
 	applyComponentStatusDB(snap, db)
 }
 type nvidiaPerGPUStatus struct {
 	runStatus string
 	reason    string
 }
 func applyNvidiaPerGPUStatus(devs []schema.HardwarePCIeDevice, baseDir string) {
 	statusByIndex, ts, ok := loadLatestNvidiaPerGPUStatus(baseDir)
 	if !ok {
 		return
 	}
 	for i := range devs {
 		if devs[i].Telemetry == nil {
 			continue
 		}
 		rawIdx, ok := devs[i].Telemetry["nvidia_gpu_index"]
 		if !ok {
 			continue
 		}
 		idx, ok := telemetryInt(rawIdx)
 		if !ok {
 			continue
 		}
 		st, ok := statusByIndex[idx]
 		if !ok {
 			continue
 		}
 		status, description, ok := satKeyStatus(st.runStatus, firstNonEmpty(strings.TrimSpace(st.reason), "nvidia GPU SAT"))
 		if !ok {
 			continue
 		}
 		mergeComponentStatusPreferDetail(&devs[i].HardwareComponentStatus, ts, status, description)
 	}
 }
 func loadLatestNvidiaPerGPUStatus(baseDir string) (map[int]nvidiaPerGPUStatus, string, bool) {
 	matches, err := filepath.Glob(filepath.Join(baseDir, "gpu-nvidia-*"))
 	if err != nil || len(matches) == 0 {
 		return nil, "", false
 	}
 	sort.Strings(matches)
 	runDir := matches[len(matches)-1]
 	summaryRaw, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
 	if err != nil {
 		return nil, "", false
 	}
 	summaryKV := parseKeyValueSummary(string(summaryRaw))
 	runAtUTC := strings.TrimSpace(summaryKV["run_at_utc"])
 	files, err := filepath.Glob(filepath.Join(runDir, "gpu-*-status.txt"))
 	if err != nil || len(files) == 0 {
 		return nil, "", false
 	}
 	out := make(map[int]nvidiaPerGPUStatus, len(files))
 	for _, file := range files {
 		raw, err := os.ReadFile(file)
 		if err != nil {
 			continue
 		}
 		kv := parseKeyValueSummary(string(raw))
 		idx, err := strconv.Atoi(strings.TrimSpace(kv["gpu_index"]))
 		if err != nil {
 			continue
 		}
 		out[idx] = nvidiaPerGPUStatus{
 			runStatus: strings.ToUpper(strings.TrimSpace(kv["run_status"])),
 			reason:    strings.TrimSpace(kv["reason"]),
 		}
 	}
 	if len(out) == 0 {
 		return nil, "", false
 	}
 	return out, runAtUTC, true
 }
 func telemetryInt(v any) (int, bool) {
 	switch value := v.(type) {
 	case int:
 		return value, true
 	case int32:
 		return int(value), true
 	case int64:
 		return int(value), true
 	case float64:
 		return int(value), true
 	case string:
 		n, err := strconv.Atoi(strings.TrimSpace(value))
 		if err != nil {
 			return 0, false
 		}
 		return n, true
 	default:
 		return 0, false
 	}
 }
 type satSummary struct {
@@ -141,9 +239,11 @@ func satSummaryStatus(summary satSummary, label string) (string, string, bool) {
 func satKeyStatus(rawStatus, label string) (string, string, bool) {
 	switch strings.ToUpper(strings.TrimSpace(rawStatus)) {
 	case "OK":
-		return "OK", label + " passed", true
+		// No error description on success — error_description is for problems only.
 		return "OK", "", true
 	case "PARTIAL", "UNSUPPORTED", "CANCELED", "CANCELLED":
-		return "Warning", label + " incomplete", true
+		// Tool couldn't run or test was incomplete — we can't assert hardware health.
 		return "Unknown", "", true
 	case "FAILED":
 		return "Critical", label + " failed", true
 	default:
@@ -172,6 +272,31 @@ func mergeComponentStatus(component *schema.HardwareComponentStatus, changedAt,
 	}
 }
 func mergeComponentStatusPreferDetail(component *schema.HardwareComponentStatus, changedAt, satStatus, description string) {
 	if component == nil || satStatus == "" {
 		return
 	}
 	current := strings.TrimSpace(ptrString(component.Status))
 	newSeverity := statusSeverity(satStatus)
 	currentSeverity := statusSeverity(current)
 	if current == "" || current == "Unknown" || newSeverity > currentSeverity {
 		mergeComponentStatus(component, changedAt, satStatus, description)
 		return
 	}
 	if newSeverity == currentSeverity && strings.TrimSpace(description) != "" {
 		component.Status = appStringPtr(satStatus)
 		component.ErrorDescription = appStringPtr(description)
 		if strings.TrimSpace(changedAt) != "" {
 			component.StatusChangedAt = appStringPtr(changedAt)
 			component.StatusHistory = append(component.StatusHistory, schema.HardwareStatusHistory{
 				Status:    satStatus,
 				ChangedAt: changedAt,
 				Details:   appStringPtr(description),
 			})
 		}
 	}
 }
 func statusSeverity(status string) int {
 	switch strings.TrimSpace(status) {
 	case "Critical":
@@ -180,6 +305,8 @@ func statusSeverity(status string) int {
 		return 2
 	case "OK":
 		return 1
 	case "Unknown":
 		return 1 // same as OK — does not override OK from another source
 	default:
 		return 0
 	}
@@ -202,6 +329,86 @@ func matchesGPUVendor(dev schema.HardwarePCIeDevice, vendor string) bool {
 	}
 }
 func applyComponentStatusDB(snap *schema.HardwareSnapshot, db *ComponentStatusDB) {
 	if snap == nil || db == nil {
 		return
 	}
 	for _, rec := range db.All() {
 		key := rec.ComponentKey
 		status := dbStatusToSATStatus(rec.Status)
 		if status == "" {
 			continue
 		}
 		detail := rec.ErrorSummary
 		ts := rec.LastChangedAt.UTC().Format("2006-01-02T15:04:05Z")
 		switch {
 		case strings.HasPrefix(key, "pcie:"):
 			bdf := strings.TrimPrefix(key, "pcie:")
 			bdf = strings.TrimPrefix(bdf, "gpu:") // strip sub-type if present
 			// bdf may be empty (e.g. "pcie:gpu:nvidia") — skip BDF matching
 			if sanitizeBDFForLookup(bdf) == "" {
 				break
 			}
 			normalized := sanitizeBDFForLookup(bdf)
 			for i := range snap.PCIeDevices {
 				if snap.PCIeDevices[i].BDF == nil {
 					continue
 				}
 				if sanitizeBDFForLookup(*snap.PCIeDevices[i].BDF) == normalized {
 					mergeComponentStatus(&snap.PCIeDevices[i].HardwareComponentStatus, ts, status, detail)
 				}
 			}
 		case strings.HasPrefix(key, "storage:"):
 			devName := strings.TrimPrefix(key, "storage:")
 			if devName == "all" {
 				for i := range snap.Storage {
 					mergeComponentStatus(&snap.Storage[i].HardwareComponentStatus, ts, status, detail)
 				}
 			} else {
 				for i := range snap.Storage {
 					linuxDev, _ := snap.Storage[i].Telemetry["linux_device"].(string)
 					if filepath.Base(strings.TrimSpace(linuxDev)) == devName {
 						mergeComponentStatus(&snap.Storage[i].HardwareComponentStatus, ts, status, detail)
 					}
 				}
 			}
 		case strings.HasPrefix(key, "memory:"):
 			for i := range snap.Memory {
 				mergeComponentStatus(&snap.Memory[i].HardwareComponentStatus, ts, status, detail)
 			}
 		case strings.HasPrefix(key, "cpu:"):
 			for i := range snap.CPUs {
 				mergeComponentStatus(&snap.CPUs[i].HardwareComponentStatus, ts, status, detail)
 			}
 		}
 	}
 }
 // dbStatusToSATStatus converts ComponentStatusDB status strings to the format
 // expected by mergeComponentStatus (which uses "OK", "Warning", "Critical", "Unknown").
 func dbStatusToSATStatus(s string) string {
 	switch strings.TrimSpace(s) {
 	case "OK", "Warning", "Critical", "Unknown":
 		return s
 	default:
 		return ""
 	}
 }
 // sanitizeBDFForLookup normalises a PCIe BDF address to a canonical lower-case form
 // suitable for comparison. "c8:00.0" → "0000:c8:00.0"; already-full BDFs are left as-is.
 func sanitizeBDFForLookup(bdf string) string {
 	bdf = strings.ToLower(strings.TrimSpace(bdf))
 	if bdf == "" || bdf == "gpu" || strings.ContainsAny(bdf, " \t") {
 		return ""
 	}
 	if strings.Count(bdf, ":") == 1 {
 		bdf = "0000:" + bdf
 	}
 	return bdf
 }
 func ptrString(v *string) string {
 	if v == nil {
 		return ""
--- a/audit/internal/app/sat_overlay_test.go
+++ b/audit/internal/app/sat_overlay_test.go
@@ -23,7 +23,7 @@ func TestApplyLatestSATStatusesMarksStorageByDevice(t *testing.T) {
 	usb := schema.HardwareStorage{Telemetry: map[string]any{"linux_device": "/dev/sda"}}
 	snap := schema.HardwareSnapshot{Storage: []schema.HardwareStorage{nvme, usb}}
-	applyLatestSATStatuses(&snap, baseDir)
+	applyLatestSATStatuses(&snap, baseDir, nil)
 	if snap.Storage[0].Status == nil || *snap.Storage[0].Status != "OK" {
 		t.Fatalf("nvme status=%v want OK", snap.Storage[0].Status)
@@ -53,9 +53,57 @@ func TestApplyLatestSATStatusesMarksAMDGPUs(t *testing.T) {
 		}},
 	}
-	applyLatestSATStatuses(&snap, baseDir)
+	applyLatestSATStatuses(&snap, baseDir, nil)
 	if snap.PCIeDevices[0].Status == nil || *snap.PCIeDevices[0].Status != "Critical" {
 		t.Fatalf("gpu status=%v want Critical", snap.PCIeDevices[0].Status)
 	}
 }
 func TestApplyLatestSATStatusesMarksNvidiaGPUByPerGPUStatusFile(t *testing.T) {
 	baseDir := t.TempDir()
 	runDir := filepath.Join(baseDir, "gpu-nvidia-20260407-162123")
 	if err := os.MkdirAll(runDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte("run_at_utc=2026-04-07T16:21:23Z\noverall_status=FAILED\n"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(runDir, "gpu-1-status.txt"), []byte("gpu_index=1\ngpu_name=NVIDIA H100 PCIe\nrun_status=FAILED\nreason=GPU requires reset\n"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	class := "VideoController"
 	manufacturer := "NVIDIA Corporation"
 	bdf0 := "0000:4b:00.0"
 	bdf1 := "0000:4f:00.0"
 	snap := schema.HardwareSnapshot{
 		PCIeDevices: []schema.HardwarePCIeDevice{
 			{
 				DeviceClass:  &class,
 				Manufacturer: &manufacturer,
 				BDF:          &bdf0,
 				Telemetry:    map[string]any{"nvidia_gpu_index": 0},
 			},
 			{
 				DeviceClass:  &class,
 				Manufacturer: &manufacturer,
 				BDF:          &bdf1,
 				Telemetry:    map[string]any{"nvidia_gpu_index": 1},
 			},
 		},
 	}
 	applyLatestSATStatuses(&snap, baseDir, nil)
 	if snap.PCIeDevices[1].Status == nil || *snap.PCIeDevices[1].Status != "Critical" {
 		t.Fatalf("gpu1 status=%v want Critical", snap.PCIeDevices[1].Status)
 	}
 	if snap.PCIeDevices[1].ErrorDescription == nil || *snap.PCIeDevices[1].ErrorDescription != "GPU requires reset failed" {
 		got := "<nil>"
 		if snap.PCIeDevices[1].ErrorDescription != nil {
 			got = *snap.PCIeDevices[1].ErrorDescription
 		}
 		t.Fatalf("gpu1 error=%q want per-gpu reason", got)
 	}
 }
--- a/audit/internal/app/support_bundle.go
+++ b/audit/internal/app/support_bundle.go
@@ -19,6 +19,8 @@ var supportBundleServices = []string{
 	"bee-network.service",
 	"bee-nvidia.service",
 	"bee-preflight.service",
 	"bee-selfheal.service",
 	"bee-selfheal.timer",
 	"bee-sshsetup.service",
 }
@@ -27,15 +29,171 @@ var supportBundleCommands = []struct {
 	cmd  []string
 }{
 	{name: "system/uname.txt", cmd: []string{"uname", "-a"}},
 	{name: "system/cmdline.txt", cmd: []string{"cat", "/proc/cmdline"}},
 	{name: "system/lsmod.txt", cmd: []string{"lsmod"}},
 	{name: "system/lspci-nn.txt", cmd: []string{"lspci", "-nn"}},
 	{name: "system/lspci-vvv.txt", cmd: []string{"lspci", "-vvv"}},
 	{name: "system/ip-addr.txt", cmd: []string{"ip", "addr"}},
 	{name: "system/ip-link.txt", cmd: []string{"ip", "-details", "link", "show"}},
 	{name: "system/ip-link-stats.txt", cmd: []string{"ip", "-s", "link", "show"}},
 	{name: "system/ip-route.txt", cmd: []string{"ip", "route"}},
 	{name: "system/mount.txt", cmd: []string{"mount"}},
 	{name: "system/df-h.txt", cmd: []string{"df", "-h"}},
-	{name: "system/dmesg-tail.txt", cmd: []string{"sh", "-c", "dmesg | tail -n 200"}},
+	{name: "system/dmesg.txt", cmd: []string{"dmesg"}},
 	{name: "system/kernel-aer-nvidia.txt", cmd: []string{"sh", "-c", `
 if command -v dmesg >/dev/null 2>&1; then
  dmesg | grep -iE 'AER|NVRM|Xid|pcieport|nvidia' || echo "no AER/NVRM/Xid kernel messages found"
 else
  echo "dmesg not found"
 fi
 `}},
 	{name: "system/nvidia-smi-q.txt", cmd: []string{"nvidia-smi", "-q"}},
 	{name: "system/lspci-nvidia-bridges-vv.txt", cmd: []string{"sh", "-c", `
 if ! command -v lspci >/dev/null 2>&1; then
  echo "lspci not found"
  exit 0
 fi
 found=0
 for gpu in $(lspci -Dn | awk '$3 ~ /^10de:/ {print $1}'); do
  found=1
  echo "=== GPU $gpu ==="
  lspci -s "$gpu" -vv 2>&1 || true
  bridge=$(basename "$(readlink -f "/sys/bus/pci/devices/$gpu/.." 2>/dev/null)" 2>/dev/null)
  if [ -n "$bridge" ] && [ "$bridge" != "$gpu" ]; then
    echo
    echo "=== UPSTREAM $bridge for $gpu ==="
    lspci -s "$bridge" -vv 2>&1 || true
  fi
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no NVIDIA PCI devices found"
 fi
 `}},
 	{name: "system/pcie-nvidia-link.txt", cmd: []string{"sh", "-c", `
 for d in /sys/bus/pci/devices/*/; do
  vendor=$(cat "$d/vendor" 2>/dev/null)
  [ "$vendor" = "0x10de" ] || continue
  dev=$(basename "$d")
  echo "=== $dev ==="
  for f in current_link_speed current_link_width max_link_speed max_link_width; do
    printf "  %-22s %s\n" "$f" "$(cat "$d/$f" 2>/dev/null)"
  done
 done
 `}},
 	{name: "system/pcie-aer-sysfs.txt", cmd: []string{"sh", "-c", `
 found=0
 for dev in /sys/bus/pci/devices/*; do
  [ -e "$dev" ] || continue
  bdf=$(basename "$dev")
  block=""
  for f in aer_dev_correctable aer_dev_fatal aer_dev_nonfatal aer_rootport_total_err_cor aer_rootport_total_err_fatal aer_rootport_total_err_nonfatal; do
    if [ -r "$dev/$f" ]; then
      if [ -z "$block" ]; then
        block=1
        found=1
        echo "=== $bdf ==="
      fi
      printf "  %-30s %s\n" "$f" "$(cat "$dev/$f" 2>/dev/null)"
    fi
  done
  if [ -n "$block" ]; then
    echo
  fi
 done
 if [ "$found" -eq 0 ]; then
  echo "no PCIe AER sysfs counters found"
 fi
 `}},
 	{name: "system/ethtool-info.txt", cmd: []string{"sh", "-c", `
 if ! command -v ethtool >/dev/null 2>&1; then
  echo "ethtool not found"
  exit 0
 fi
 found=0
 for path in /sys/class/net/*; do
  [ -e "$path" ] || continue
  iface=$(basename "$path")
  [ "$iface" = "lo" ] && continue
  found=1
  echo "=== $iface ==="
  ethtool -i "$iface" 2>&1 || true
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no interfaces found"
 fi
 `}},
 	{name: "system/ethtool-link.txt", cmd: []string{"sh", "-c", `
 if ! command -v ethtool >/dev/null 2>&1; then
  echo "ethtool not found"
  exit 0
 fi
 found=0
 for path in /sys/class/net/*; do
  [ -e "$path" ] || continue
  iface=$(basename "$path")
  [ "$iface" = "lo" ] && continue
  found=1
  echo "=== $iface ==="
  ethtool "$iface" 2>&1 || true
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no interfaces found"
 fi
 `}},
 	{name: "system/ethtool-module.txt", cmd: []string{"sh", "-c", `
 if ! command -v ethtool >/dev/null 2>&1; then
  echo "ethtool not found"
  exit 0
 fi
 found=0
 for path in /sys/class/net/*; do
  [ -e "$path" ] || continue
  iface=$(basename "$path")
  [ "$iface" = "lo" ] && continue
  found=1
  echo "=== $iface ==="
  ethtool -m "$iface" 2>&1 || true
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no interfaces found"
 fi
 `}},
 	{name: "system/mstflint-query.txt", cmd: []string{"sh", "-c", `
 if ! command -v mstflint >/dev/null 2>&1; then
  echo "mstflint not found"
  exit 0
 fi
 found=0
 for path in /sys/bus/pci/devices/*; do
  [ -e "$path/vendor" ] || continue
  vendor=$(cat "$path/vendor" 2>/dev/null)
  [ "$vendor" = "0x15b3" ] || continue
  bdf=$(basename "$path")
  found=1
  echo "=== $bdf ==="
  mstflint -d "$bdf" q 2>&1 || true
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no Mellanox/NVIDIA networking devices found"
 fi
 `}},
 }
 var supportBundleOptionalFiles = []struct {
 	name string
 	src  string
 }{
 	{name: "system/kern.log", src: "/var/log/kern.log"},
 	{name: "system/syslog.txt", src: "/var/log/syslog"},
 }
 const supportBundleGlob = "bee-support-*.tar.gz"
 func BuildSupportBundle(exportDir string) (string, error) {
 	exportDir = strings.TrimSpace(exportDir)
 	if exportDir == "" {
@@ -75,6 +233,9 @@ func BuildSupportBundle(exportDir string) (string, error) {
 			return "", err
 		}
 	}
 	for _, item := range supportBundleOptionalFiles {
 		_ = copyOptionalFile(item.src, filepath.Join(stageRoot, item.name))
 	}
 	if err := writeManifest(filepath.Join(stageRoot, "manifest.txt"), exportDir, stageRoot); err != nil {
 		return "", err
 	}
@@ -86,34 +247,64 @@ func BuildSupportBundle(exportDir string) (string, error) {
 	return archivePath, nil
 }
 func LatestSupportBundlePath() (string, error) {
 	return latestSupportBundlePath(os.TempDir())
 }
 func cleanupOldSupportBundles(dir string) error {
-	matches, err := filepath.Glob(filepath.Join(dir, "bee-support-*.tar.gz"))
+	matches, err := filepath.Glob(filepath.Join(dir, supportBundleGlob))
 	if err != nil {
 		return err
 	}
-	type entry struct {
+	entries := supportBundleEntries(matches)
-		path string
+	for path, mod := range entries {
-		mod  time.Time
+		if time.Since(mod) > 24*time.Hour {
 			_ = os.Remove(path)
 			delete(entries, path)
 		}
 	}
-	list := make([]entry, 0, len(matches))
+	ordered := orderSupportBundles(entries)
 	if len(ordered) > 3 {
 		for _, old := range ordered[3:] {
 			_ = os.Remove(old)
 		}
 	}
 	return nil
 }
 func latestSupportBundlePath(dir string) (string, error) {
 	matches, err := filepath.Glob(filepath.Join(dir, supportBundleGlob))
 	if err != nil {
 		return "", err
 	}
 	ordered := orderSupportBundles(supportBundleEntries(matches))
 	if len(ordered) == 0 {
 		return "", os.ErrNotExist
 	}
 	return ordered[0], nil
 }
 func supportBundleEntries(matches []string) map[string]time.Time {
 	entries := make(map[string]time.Time, len(matches))
 	for _, match := range matches {
 		info, err := os.Stat(match)
 		if err != nil {
 			continue
 		}
-		if time.Since(info.ModTime()) > 24*time.Hour {
+		entries[match] = info.ModTime()
 			_ = os.Remove(match)
 			continue
 		}
 		list = append(list, entry{path: match, mod: info.ModTime()})
 	}
-	sort.Slice(list, func(i, j int) bool { return list[i].mod.After(list[j].mod) })
+	return entries
-	if len(list) > 3 {
+}
-		for _, old := range list[3:] {
+
-			_ = os.Remove(old.path)
+func orderSupportBundles(entries map[string]time.Time) []string {
-		}
+	ordered := make([]string, 0, len(entries))
 	for path := range entries {
 		ordered = append(ordered, path)
 	}
-	return nil
+	sort.Slice(ordered, func(i, j int) bool {
 		return entries[ordered[i]].After(entries[ordered[j]])
 	})
 	return ordered
 }
 func writeJournalDump(dst string) error {
@@ -152,6 +343,24 @@ func writeCommandOutput(dst string, cmd []string) error {
 	return os.WriteFile(dst, raw, 0644)
 }
 func copyOptionalFile(src, dst string) error {
 	in, err := os.Open(src)
 	if err != nil {
 		return err
 	}
 	defer in.Close()
 	if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
 		return err
 	}
 	out, err := os.Create(dst)
 	if err != nil {
 		return err
 	}
 	defer out.Close()
 	_, err = io.Copy(out, in)
 	return err
 }
 func writeManifest(dst, exportDir, stageRoot string) error {
 	if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
 		return err
@@ -215,7 +424,7 @@ func copyDirContents(srcDir, dstDir string) error {
 }
 func copyExportDirForSupportBundle(srcDir, dstDir string) error {
-	return copyDirContentsFiltered(srcDir, dstDir, func(rel string, info os.FileInfo) bool {
+	if err := copyDirContentsFiltered(srcDir, dstDir, func(rel string, info os.FileInfo) bool {
 		cleanRel := filepath.ToSlash(strings.TrimPrefix(filepath.Clean(rel), "./"))
 		if cleanRel == "" {
 			return true
@@ -227,7 +436,25 @@ func copyExportDirForSupportBundle(srcDir, dstDir string) error {
 			return false
 		}
 		return true
-	})
+	}); err != nil {
 		return err
 	}
 	return normalizeSupportBundleAuditJSON(filepath.Join(dstDir, "bee-audit.json"))
 }
 func normalizeSupportBundleAuditJSON(path string) error {
 	data, err := os.ReadFile(path)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return nil
 		}
 		return err
 	}
 	normalized, err := ApplySATOverlay(data)
 	if err != nil {
 		return nil
 	}
 	return os.WriteFile(path, normalized, 0644)
 }
 func copyDirContentsFiltered(srcDir, dstDir string, keep func(rel string, info os.FileInfo) bool) error {
--- a/audit/internal/collector/finalize.go
+++ b/audit/internal/collector/finalize.go
@@ -1,10 +1,18 @@
 package collector
-import "bee/audit/internal/schema"
+import (
 	"bee/audit/internal/schema"
 	"strings"
 )
 func NormalizeSnapshot(snap *schema.HardwareSnapshot, collectedAt string) {
 	finalizeSnapshot(snap, collectedAt)
 }
 func finalizeSnapshot(snap *schema.HardwareSnapshot, collectedAt string) {
 	snap.Memory = filterMemory(snap.Memory)
 	snap.Storage = filterStorage(snap.Storage)
 	snap.PCIeDevices = filterPCIe(snap.PCIeDevices)
 	snap.PowerSupplies = filterPSUs(snap.PowerSupplies)
 	setComponentStatusMetadata(snap, collectedAt)
@@ -33,11 +41,25 @@ func filterStorage(disks []schema.HardwareStorage) []schema.HardwareStorage {
 		if disk.SerialNumber == nil || *disk.SerialNumber == "" {
 			continue
 		}
 		if disk.Model != nil && isVirtualHDiskModel(*disk.Model) {
 			continue
 		}
 		out = append(out, disk)
 	}
 	return out
 }
 func filterPCIe(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
 	out := make([]schema.HardwarePCIeDevice, 0, len(devs))
 	for _, dev := range devs {
 		if dev.DeviceClass != nil && strings.Contains(strings.ToLower(strings.TrimSpace(*dev.DeviceClass)), "co-processor") {
 			continue
 		}
 		out = append(out, dev)
 	}
 	return out
 }
 func filterPSUs(psus []schema.HardwarePowerSupply) []schema.HardwarePowerSupply {
 	out := make([]schema.HardwarePowerSupply, 0, len(psus))
 	for _, psu := range psus {
--- a/audit/internal/collector/finalize_test.go
+++ b/audit/internal/collector/finalize_test.go
@@ -10,6 +10,10 @@ func TestFinalizeSnapshotFiltersComponentsWithoutRequiredSerials(t *testing.T) {
 	present := true
 	status := statusOK
 	serial := "SN-1"
 	virtualModel := "Virtual HDisk1"
 	realModel := "PASCARI"
 	coProcessorClass := "Co-processor"
 	gpuClass := "VideoController"
 	snap := schema.HardwareSnapshot{
 		Memory: []schema.HardwareMemory{
@@ -17,9 +21,15 @@ func TestFinalizeSnapshotFiltersComponentsWithoutRequiredSerials(t *testing.T) {
 			{Present: &present, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 		},
 		Storage: []schema.HardwareStorage{
 			{Model: &virtualModel, SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 			{SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 			{Model: &realModel, SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 			{HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 		},
 		PCIeDevices: []schema.HardwarePCIeDevice{
 			{DeviceClass: &coProcessorClass, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 			{DeviceClass: &gpuClass, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 		},
 		PowerSupplies: []schema.HardwarePowerSupply{
 			{SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 			{HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
@@ -31,9 +41,12 @@ func TestFinalizeSnapshotFiltersComponentsWithoutRequiredSerials(t *testing.T) {
 	if len(snap.Memory) != 1 || snap.Memory[0].StatusCheckedAt == nil || *snap.Memory[0].StatusCheckedAt != collectedAt {
 		t.Fatalf("memory finalize mismatch: %+v", snap.Memory)
 	}
-	if len(snap.Storage) != 1 || snap.Storage[0].StatusCheckedAt == nil || *snap.Storage[0].StatusCheckedAt != collectedAt {
+	if len(snap.Storage) != 2 || snap.Storage[0].StatusCheckedAt == nil || *snap.Storage[0].StatusCheckedAt != collectedAt {
 		t.Fatalf("storage finalize mismatch: %+v", snap.Storage)
 	}
 	if len(snap.PCIeDevices) != 1 || snap.PCIeDevices[0].DeviceClass == nil || *snap.PCIeDevices[0].DeviceClass != gpuClass {
 		t.Fatalf("pcie finalize mismatch: %+v", snap.PCIeDevices)
 	}
 	if len(snap.PowerSupplies) != 1 || snap.PowerSupplies[0].StatusCheckedAt == nil || *snap.PowerSupplies[0].StatusCheckedAt != collectedAt {
 		t.Fatalf("psu finalize mismatch: %+v", snap.PowerSupplies)
 	}
--- a/audit/internal/collector/nic_mellanox.go
+++ b/audit/internal/collector/nic_mellanox.go
@@ -2,18 +2,21 @@ package collector
 import (
 	"bee/audit/internal/schema"
 	"context"
 	"log/slog"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"time"
 )
 const mellanoxVendorID = 0x15b3
 const nicProbeTimeout = 2 * time.Second
 var (
 	mstflintQuery = func(bdf string) (string, error) {
-		out, err := exec.Command("mstflint", "-d", bdf, "q").Output()
+		out, err := commandOutputWithTimeout(nicProbeTimeout, "mstflint", "-d", bdf, "q")
 		if err != nil {
 			return "", err
 		}
@@ -21,7 +24,7 @@ var (
 	}
 	ethtoolInfoQuery = func(iface string) (string, error) {
-		out, err := exec.Command("ethtool", "-i", iface).Output()
+		out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-i", iface)
 		if err != nil {
 			return "", err
 		}
@@ -29,6 +32,14 @@ var (
 	}
 	netIfacesByBDF = listNetIfacesByBDF
 	readNetCarrierFile = func(iface string) (string, error) {
 		path := filepath.Join("/sys/class/net", iface, "carrier")
 		raw, err := os.ReadFile(path)
 		if err != nil {
 			return "", err
 		}
 		return strings.TrimSpace(string(raw)), nil
 	}
 )
 // enrichPCIeWithMellanox enriches Mellanox/NVIDIA Networking devices with
@@ -162,3 +173,17 @@ func listNetIfacesByBDF(bdf string) []string {
 	}
 	return ifaces
 }
 func commandOutputWithTimeout(timeout time.Duration, name string, args ...string) ([]byte, error) {
 	ctx, cancel := context.WithTimeout(context.Background(), timeout)
 	defer cancel()
 	return exec.CommandContext(ctx, name, args...).Output()
 }
 func interfaceHasCarrier(iface string) bool {
 	raw, err := readNetCarrierFile(iface)
 	if err != nil {
 		return false
 	}
 	return strings.TrimSpace(raw) == "1"
 }
--- a/audit/internal/collector/nic_telemetry.go
+++ b/audit/internal/collector/nic_telemetry.go
@@ -12,7 +12,7 @@ import (
 var (
 	ethtoolModuleQuery = func(iface string) (string, error) {
-		out, err := raidToolQuery("ethtool", "-m", iface)
+		out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-m", iface)
 		if err != nil {
 			return "", err
 		}
@@ -58,10 +58,12 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw
 			}
 		}
-		if out, err := ethtoolModuleQuery(iface); err == nil {
+		if interfaceHasCarrier(iface) {
-			if injectSFPDOMTelemetry(&devs[i], out) {
+			if out, err := ethtoolModuleQuery(iface); err == nil {
-				enriched++
+				if injectSFPDOMTelemetry(&devs[i], out) {
-				continue
+					enriched++
 					continue
 				}
 			}
 		}
 		if len(devs[i].MacAddresses) > 0 || devs[i].Firmware != nil {
--- a/audit/internal/collector/nic_telemetry_test.go
+++ b/audit/internal/collector/nic_telemetry_test.go
@@ -57,6 +57,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
 	origReadMAC := readNetAddressFile
 	origEth := ethtoolInfoQuery
 	origModule := ethtoolModuleQuery
 	origCarrier := readNetCarrierFile
 	t.Cleanup(func() {
 		queryPCILSPCIDetail = origDetail
 		readPCIVPDFile = origVPD
@@ -64,6 +65,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
 		readNetAddressFile = origReadMAC
 		ethtoolInfoQuery = origEth
 		ethtoolModuleQuery = origModule
 		readNetCarrierFile = origCarrier
 	})
 	queryPCILSPCIDetail = func(bdf string) (string, error) {
@@ -82,6 +84,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
 		}
 		return "aa:bb:cc:dd:ee:ff", nil
 	}
 	readNetCarrierFile = func(string) (string, error) { return "1", nil }
 	ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
 	ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("skip optics") }
@@ -101,6 +104,42 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
 	}
 }
 func TestEnrichPCIeWithNICTelemetrySkipsModuleQueryWithoutCarrier(t *testing.T) {
 	origIfaces := netIfacesByBDF
 	origReadMAC := readNetAddressFile
 	origEth := ethtoolInfoQuery
 	origModule := ethtoolModuleQuery
 	origCarrier := readNetCarrierFile
 	t.Cleanup(func() {
 		netIfacesByBDF = origIfaces
 		readNetAddressFile = origReadMAC
 		ethtoolInfoQuery = origEth
 		ethtoolModuleQuery = origModule
 		readNetCarrierFile = origCarrier
 	})
 	netIfacesByBDF = func(string) []string { return []string{"eth0"} }
 	readNetAddressFile = func(string) (string, error) { return "aa:bb:cc:dd:ee:ff", nil }
 	readNetCarrierFile = func(string) (string, error) { return "0", nil }
 	ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
 	ethtoolModuleQuery = func(string) (string, error) {
 		t.Fatal("ethtool -m should not be called without carrier")
 		return "", nil
 	}
 	class := "EthernetController"
 	bdf := "0000:18:00.0"
 	devs := []schema.HardwarePCIeDevice{{
 		DeviceClass: &class,
 		BDF:         &bdf,
 	}}
 	out := enrichPCIeWithNICTelemetry(devs)
 	if len(out[0].MacAddresses) != 1 || out[0].MacAddresses[0] != "aa:bb:cc:dd:ee:ff" {
 		t.Fatalf("mac_addresses=%v", out[0].MacAddresses)
 	}
 }
 func TestDBMValue(t *testing.T) {
 	tests := []struct {
 		in   string
--- a/audit/internal/collector/nvidia.go
+++ b/audit/internal/collector/nvidia.go
@@ -13,14 +13,19 @@ import (
 const nvidiaVendorID = 0x10de
 type nvidiaGPUInfo struct {
-	BDF            string
+	Index              int
-	Serial         string
+	BDF                string
-	VBIOS          string
+	Serial             string
-	TemperatureC   *float64
+	VBIOS              string
-	PowerW         *float64
+	TemperatureC       *float64
-	ECCUncorrected *int64
+	PowerW             *float64
-	ECCCorrected   *int64
+	ECCUncorrected     *int64
-	HWSlowdown     *bool
+	ECCCorrected       *int64
 	HWSlowdown         *bool
 	PCIeLinkGenCurrent *int
 	PCIeLinkGenMax     *int
 	PCIeLinkWidthCur   *int
 	PCIeLinkWidthMax   *int
 }
 // enrichPCIeWithNVIDIA enriches NVIDIA PCIe devices with data from nvidia-smi.
@@ -94,7 +99,7 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
 func queryNVIDIAGPUs() (map[string]nvidiaGPUInfo, error) {
 	out, err := exec.Command(
 		"nvidia-smi",
-		"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown",
+		"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown,pcie.link.gen.current,pcie.link.gen.max,pcie.link.width.current,pcie.link.width.max",
 		"--format=csv,noheader,nounits",
 	).Output()
 	if err != nil {
@@ -118,8 +123,8 @@ func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
 		if len(rec) == 0 {
 			continue
 		}
-		if len(rec) < 9 {
+		if len(rec) < 13 {
-			return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 9", len(rec))
+			return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 13", len(rec))
 		}
 		bdf := normalizePCIeBDF(rec[1])
@@ -128,14 +133,19 @@ func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
 		}
 		info := nvidiaGPUInfo{
-			BDF:            bdf,
+			Index:              parseRequiredInt(rec[0]),
-			Serial:         strings.TrimSpace(rec[2]),
+			BDF:                bdf,
-			VBIOS:          strings.TrimSpace(rec[3]),
+			Serial:             strings.TrimSpace(rec[2]),
-			TemperatureC:   parseMaybeFloat(rec[4]),
+			VBIOS:              strings.TrimSpace(rec[3]),
-			PowerW:         parseMaybeFloat(rec[5]),
+			TemperatureC:       parseMaybeFloat(rec[4]),
-			ECCUncorrected: parseMaybeInt64(rec[6]),
+			PowerW:             parseMaybeFloat(rec[5]),
-			ECCCorrected:   parseMaybeInt64(rec[7]),
+			ECCUncorrected:     parseMaybeInt64(rec[6]),
-			HWSlowdown:     parseMaybeBool(rec[8]),
+			ECCCorrected:       parseMaybeInt64(rec[7]),
 			HWSlowdown:         parseMaybeBool(rec[8]),
 			PCIeLinkGenCurrent: parseMaybeInt(rec[9]),
 			PCIeLinkGenMax:     parseMaybeInt(rec[10]),
 			PCIeLinkWidthCur:   parseMaybeInt(rec[11]),
 			PCIeLinkWidthMax:   parseMaybeInt(rec[12]),
 		}
 		result[bdf] = info
 	}
@@ -167,6 +177,30 @@ func parseMaybeInt64(v string) *int64 {
 	return &n
 }
 func parseMaybeInt(v string) *int {
 	v = strings.TrimSpace(v)
 	if v == "" || strings.EqualFold(v, "n/a") || strings.EqualFold(v, "not supported") || strings.EqualFold(v, "[not supported]") {
 		return nil
 	}
 	n, err := strconv.Atoi(v)
 	if err != nil {
 		return nil
 	}
 	return &n
 }
 func parseRequiredInt(v string) int {
 	n, err := strconv.Atoi(strings.TrimSpace(v))
 	if err != nil {
 		return 0
 	}
 	return n
 }
 func pcieLinkGenLabel(gen int) string {
 	return fmt.Sprintf("Gen%d", gen)
 }
 func parseMaybeBool(v string) *bool {
 	v = strings.TrimSpace(strings.ToLower(v))
 	switch v {
@@ -216,6 +250,10 @@ func setPCIeFallback(dev *schema.HardwarePCIeDevice) {
 }
 func injectNVIDIATelemetry(dev *schema.HardwarePCIeDevice, info nvidiaGPUInfo) {
 	if dev.Telemetry == nil {
 		dev.Telemetry = map[string]any{}
 	}
 	dev.Telemetry["nvidia_gpu_index"] = info.Index
 	if info.TemperatureC != nil {
 		dev.TemperatureC = info.TemperatureC
 	}
@@ -231,4 +269,22 @@ func injectNVIDIATelemetry(dev *schema.HardwarePCIeDevice, info nvidiaGPUInfo) {
 	if info.HWSlowdown != nil {
 		dev.HWSlowdown = info.HWSlowdown
 	}
 	// Override PCIe link speed/width with nvidia-smi driver values.
 	// sysfs current_link_speed reflects the instantaneous physical link state and
 	// can show Gen1 when the GPU is idle due to ASPM power management. The driver
 	// knows the negotiated speed regardless of the current power state.
 	if info.PCIeLinkGenCurrent != nil {
 		s := pcieLinkGenLabel(*info.PCIeLinkGenCurrent)
 		dev.LinkSpeed = &s
 	}
 	if info.PCIeLinkGenMax != nil {
 		s := pcieLinkGenLabel(*info.PCIeLinkGenMax)
 		dev.MaxLinkSpeed = &s
 	}
 	if info.PCIeLinkWidthCur != nil {
 		dev.LinkWidth = info.PCIeLinkWidthCur
 	}
 	if info.PCIeLinkWidthMax != nil {
 		dev.MaxLinkWidth = info.PCIeLinkWidthMax
 	}
 }
--- a/audit/internal/collector/nvidia_test.go
+++ b/audit/internal/collector/nvidia_test.go
@@ -6,7 +6,7 @@ import (
 )
 func TestParseNVIDIASMIQuery(t *testing.T) {
-	raw := "0, 00000000:65:00.0, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active\n"
+	raw := "0, 00000000:65:00.0, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active, 4, 4, 16, 16\n"
 	byBDF, err := parseNVIDIASMIQuery(raw)
 	if err != nil {
 		t.Fatalf("parse failed: %v", err)
@@ -28,6 +28,12 @@ func TestParseNVIDIASMIQuery(t *testing.T) {
 	if gpu.HWSlowdown == nil || *gpu.HWSlowdown {
 		t.Fatalf("hw slowdown: got %v, want false", gpu.HWSlowdown)
 	}
 	if gpu.PCIeLinkGenCurrent == nil || *gpu.PCIeLinkGenCurrent != 4 {
 		t.Fatalf("pcie link gen current: got %v, want 4", gpu.PCIeLinkGenCurrent)
 	}
 	if gpu.PCIeLinkGenMax == nil || *gpu.PCIeLinkGenMax != 4 {
 		t.Fatalf("pcie link gen max: got %v, want 4", gpu.PCIeLinkGenMax)
 	}
 }
 func TestNormalizePCIeBDF(t *testing.T) {
@@ -80,6 +86,9 @@ func TestEnrichPCIeWithNVIDIAData_driverLoaded(t *testing.T) {
 	if out[0].Firmware == nil || *out[0].Firmware != "96.00.1F.00.02" {
 		t.Fatalf("firmware: got %v", out[0].Firmware)
 	}
 	if out[0].Telemetry == nil || out[0].Telemetry["nvidia_gpu_index"] != 0 {
 		t.Fatalf("telemetry nvidia_gpu_index: got %#v", out[0].Telemetry)
 	}
 	if out[0].Status == nil || *out[0].Status != statusWarning {
 		t.Fatalf("status: got %v", out[0].Status)
 	}
--- a/audit/internal/collector/pcie.go
+++ b/audit/internal/collector/pcie.go
@@ -59,6 +59,7 @@ func shouldIncludePCIeDevice(class, vendor, device string) bool {
 		"host bridge",
 		"isa bridge",
 		"pci bridge",
 		"co-processor",
 		"performance counter",
 		"performance counters",
 		"ram memory",
--- a/audit/internal/collector/pcie_filter_test.go
+++ b/audit/internal/collector/pcie_filter_test.go
@@ -19,6 +19,7 @@ func TestShouldIncludePCIeDevice(t *testing.T) {
 		{name: "audio", class: "Audio device", want: false},
 		{name: "host bridge", class: "Host bridge", want: false},
 		{name: "pci bridge", class: "PCI bridge", want: false},
 		{name: "co-processor", class: "Co-processor", want: false},
 		{name: "smbus", class: "SMBus", want: false},
 		{name: "perf", class: "Performance counters", want: false},
 		{name: "non essential instrumentation", class: "Non-Essential Instrumentation", want: false},
@@ -76,6 +77,20 @@ func TestParseLspci_filtersAMDChipsetNoise(t *testing.T) {
 	}
 }
 func TestParseLspci_filtersCoProcessors(t *testing.T) {
 	input := "" +
 		"Slot:\t0000:01:00.0\nClass:\tCo-processor\nVendor:\tIntel Corporation\nDevice:\t402xx Series QAT\n\n" +
 		"Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
 	devs := parseLspci(input)
 	if len(devs) != 1 {
 		t.Fatalf("expected 1 remaining device, got %d", len(devs))
 	}
 	if devs[0].Model == nil || *devs[0].Model != "H100" {
 		t.Fatalf("unexpected remaining device: %+v", devs[0])
 	}
 }
 func TestPCIeJSONUsesSlotNotBDF(t *testing.T) {
 	input := "Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
--- a/audit/internal/collector/storage.go
+++ b/audit/internal/collector/storage.go
@@ -77,11 +77,28 @@ func discoverStorageDevices() []lsblkDevice {
 		if dev.Type != "disk" {
 			continue
 		}
 		if isVirtualBMCDisk(dev) {
 			slog.Debug("storage: skipping BMC virtual disk", "name", dev.Name, "model", dev.Model)
 			continue
 		}
 		disks = append(disks, dev)
 	}
 	return disks
 }
 // isVirtualBMCDisk returns true for BMC/IPMI virtual USB mass storage devices
 // that appear as disks but are not real hardware (e.g. iDRAC Virtual HDisk*).
 // These have zero reported size, a generic fake serial, and a model name that
 // starts with "Virtual HDisk".
 func isVirtualBMCDisk(dev lsblkDevice) bool {
 	return isVirtualHDiskModel(dev.Model)
 }
 func isVirtualHDiskModel(model string) bool {
 	model = strings.ToLower(strings.TrimSpace(model))
 	return strings.HasPrefix(model, "virtual hdisk")
 }
 func lsblkDevices() []lsblkDevice {
 	out, err := exec.Command("lsblk", "-J", "-d",
 		"-o", "NAME,TYPE,SIZE,SERIAL,MODEL,TRAN,HCTL").Output()
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -0,0 +1,344 @@
 package platform
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	"time"
 )
 func renderBenchmarkReport(result NvidiaBenchmarkResult) string {
 	return renderBenchmarkReportWithCharts(result, nil)
 }
 type benchmarkReportChart struct {
 	Title   string
 	Content string
 }
 var ansiEscapePattern = regexp.MustCompile(`\x1b\[[0-9;]*m`)
 func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benchmarkReportChart) string {
 	var b strings.Builder
 	// ── Header ────────────────────────────────────────────────────────────────
 	b.WriteString("# Bee NVIDIA Benchmark Report\n\n")
 	// System identity block
 	if result.ServerModel != "" {
 		fmt.Fprintf(&b, "**Server:** %s  \n", result.ServerModel)
 	}
 	if result.Hostname != "" {
 		fmt.Fprintf(&b, "**Host:** %s  \n", result.Hostname)
 	}
 	// GPU models summary
 	if len(result.GPUs) > 0 {
 		modelCount := make(map[string]int)
 		var modelOrder []string
 		for _, g := range result.GPUs {
 			m := strings.TrimSpace(g.Name)
 			if m == "" {
 				m = "Unknown GPU"
 			}
 			if modelCount[m] == 0 {
 				modelOrder = append(modelOrder, m)
 			}
 			modelCount[m]++
 		}
 		var parts []string
 		for _, m := range modelOrder {
 			if modelCount[m] == 1 {
 				parts = append(parts, m)
 			} else {
 				parts = append(parts, fmt.Sprintf("%d× %s", modelCount[m], m))
 			}
 		}
 		fmt.Fprintf(&b, "**GPU(s):** %s  \n", strings.Join(parts, ", "))
 	}
 	fmt.Fprintf(&b, "**Profile:** %s  \n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "**App version:** %s  \n", result.BenchmarkVersion)
 	fmt.Fprintf(&b, "**Generated:** %s  \n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC"))
 	if result.ParallelGPUs {
 		fmt.Fprintf(&b, "**Mode:** parallel (all GPUs simultaneously)  \n")
 	}
 	fmt.Fprintf(&b, "**Overall status:** %s  \n", result.OverallStatus)
 	b.WriteString("\n")
 	// ── Executive Summary ─────────────────────────────────────────────────────
 	if len(result.Findings) > 0 {
 		b.WriteString("## Executive Summary\n\n")
 		for _, finding := range result.Findings {
 			fmt.Fprintf(&b, "- %s\n", finding)
 		}
 		b.WriteString("\n")
 	}
 	if len(result.Warnings) > 0 {
 		b.WriteString("## Warnings\n\n")
 		for _, warning := range result.Warnings {
 			fmt.Fprintf(&b, "- %s\n", warning)
 		}
 		b.WriteString("\n")
 	}
 	// ── Scorecard table ───────────────────────────────────────────────────────
 	b.WriteString("## Scorecard\n\n")
 	b.WriteString("| GPU | Status | Composite | Compute | TOPS/SM/GHz | Power Sustain | Thermal Sustain | Stability | Interconnect |\n")
 	b.WriteString("|-----|--------|-----------|---------|-------------|---------------|-----------------|-----------|-------------|\n")
 	for _, gpu := range result.GPUs {
 		name := strings.TrimSpace(gpu.Name)
 		if name == "" {
 			name = "Unknown"
 		}
 		interconnect := "-"
 		if gpu.Scores.InterconnectScore > 0 {
 			interconnect = fmt.Sprintf("%.1f", gpu.Scores.InterconnectScore)
 		}
 		topsPerSM := "-"
 		if gpu.Scores.TOPSPerSMPerGHz > 0 {
 			topsPerSM = fmt.Sprintf("%.3f", gpu.Scores.TOPSPerSMPerGHz)
 		}
 		fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %.2f | %s | %.1f | %.1f | %.1f | %s |\n",
 			gpu.Index, name,
 			gpu.Status,
 			gpu.Scores.CompositeScore,
 			gpu.Scores.ComputeScore,
 			topsPerSM,
 			gpu.Scores.PowerSustainScore,
 			gpu.Scores.ThermalSustainScore,
 			gpu.Scores.StabilityScore,
 			interconnect,
 		)
 	}
 	b.WriteString("\n")
 	// ── Per GPU detail ────────────────────────────────────────────────────────
 	b.WriteString("## Per-GPU Details\n\n")
 	for _, gpu := range result.GPUs {
 		name := strings.TrimSpace(gpu.Name)
 		if name == "" {
 			name = "Unknown GPU"
 		}
 		fmt.Fprintf(&b, "### GPU %d — %s\n\n", gpu.Index, name)
 		// Identity
 		if gpu.BusID != "" {
 			fmt.Fprintf(&b, "- **Bus ID:** %s\n", gpu.BusID)
 		}
 		if gpu.VBIOS != "" {
 			fmt.Fprintf(&b, "- **vBIOS:** %s\n", gpu.VBIOS)
 		}
 		if gpu.ComputeCapability != "" {
 			fmt.Fprintf(&b, "- **Compute capability:** %s\n", gpu.ComputeCapability)
 		}
 		if gpu.MultiprocessorCount > 0 {
 			fmt.Fprintf(&b, "- **SMs:** %d\n", gpu.MultiprocessorCount)
 		}
 		if gpu.PowerLimitW > 0 {
 			fmt.Fprintf(&b, "- **Power limit:** %.0f W (default %.0f W)\n", gpu.PowerLimitW, gpu.DefaultPowerLimitW)
 		}
 		if gpu.LockedGraphicsClockMHz > 0 {
 			fmt.Fprintf(&b, "- **Locked clocks:** GPU %.0f MHz / Mem %.0f MHz\n", gpu.LockedGraphicsClockMHz, gpu.LockedMemoryClockMHz)
 		}
 		b.WriteString("\n")
 		// Steady-state telemetry
 		fmt.Fprintf(&b, "**Steady-state telemetry** (%ds):\n\n", int(gpu.Steady.DurationSec))
 		b.WriteString("| | Avg | P95 |\n|---|---|---|\n")
 		fmt.Fprintf(&b, "| Power | %.1f W | %.1f W |\n", gpu.Steady.AvgPowerW, gpu.Steady.P95PowerW)
 		fmt.Fprintf(&b, "| Temperature | %.1f °C | %.1f °C |\n", gpu.Steady.AvgTempC, gpu.Steady.P95TempC)
 		fmt.Fprintf(&b, "| GPU clock | %.0f MHz | %.0f MHz |\n", gpu.Steady.AvgGraphicsClockMHz, gpu.Steady.P95GraphicsClockMHz)
 		fmt.Fprintf(&b, "| Memory clock | %.0f MHz | %.0f MHz |\n", gpu.Steady.AvgMemoryClockMHz, gpu.Steady.P95MemoryClockMHz)
 		fmt.Fprintf(&b, "| GPU utilisation | %.1f %% | — |\n", gpu.Steady.AvgUsagePct)
 		b.WriteString("\n")
 		// Throttle
 		throttle := formatThrottleLine(gpu.Throttle, gpu.Steady.DurationSec)
 		if throttle != "none" {
 			fmt.Fprintf(&b, "**Throttle:** %s\n\n", throttle)
 		}
 		// Precision results
 		if len(gpu.PrecisionResults) > 0 {
 			b.WriteString("**Precision results:**\n\n")
 			b.WriteString("| Precision | TOPS | Lanes | Iterations |\n|-----------|------|-------|------------|\n")
 			for _, p := range gpu.PrecisionResults {
 				if p.Supported {
 					fmt.Fprintf(&b, "| %s | %.2f | %d | %d |\n", p.Name, p.TeraOpsPerSec, p.Lanes, p.Iterations)
 				} else {
 					fmt.Fprintf(&b, "| %s | — (unsupported) | — | — |\n", p.Name)
 				}
 			}
 			b.WriteString("\n")
 		}
 		// Degradation / Notes
 		if len(gpu.DegradationReasons) > 0 {
 			fmt.Fprintf(&b, "**Degradation reasons:** %s\n\n", strings.Join(gpu.DegradationReasons, ", "))
 		}
 		if len(gpu.Notes) > 0 {
 			b.WriteString("**Notes:**\n\n")
 			for _, note := range gpu.Notes {
 				fmt.Fprintf(&b, "- %s\n", note)
 			}
 			b.WriteString("\n")
 		}
 	}
 	// ── Interconnect ──────────────────────────────────────────────────────────
 	if result.Interconnect != nil {
 		b.WriteString("## Interconnect (NCCL)\n\n")
 		fmt.Fprintf(&b, "**Status:** %s\n\n", result.Interconnect.Status)
 		if result.Interconnect.Supported {
 			b.WriteString("| Metric | Avg | Max |\n|--------|-----|-----|\n")
 			fmt.Fprintf(&b, "| Alg BW | %.1f GB/s | %.1f GB/s |\n", result.Interconnect.AvgAlgBWGBps, result.Interconnect.MaxAlgBWGBps)
 			fmt.Fprintf(&b, "| Bus BW | %.1f GB/s | %.1f GB/s |\n", result.Interconnect.AvgBusBWGBps, result.Interconnect.MaxBusBWGBps)
 			b.WriteString("\n")
 		}
 		for _, note := range result.Interconnect.Notes {
 			fmt.Fprintf(&b, "- %s\n", note)
 		}
 		if len(result.Interconnect.Notes) > 0 {
 			b.WriteString("\n")
 		}
 	}
 	// ── Server Power (IPMI) ───────────────────────────────────────────────────
 	if sp := result.ServerPower; sp != nil {
 		b.WriteString("## Server Power (IPMI)\n\n")
 		if !sp.Available {
 			b.WriteString("IPMI power measurement unavailable.\n\n")
 		} else {
 			b.WriteString("| | Value |\n|---|---|\n")
 			fmt.Fprintf(&b, "| Server idle | %.0f W |\n", sp.IdleW)
 			fmt.Fprintf(&b, "| Server under load | %.0f W |\n", sp.LoadedW)
 			fmt.Fprintf(&b, "| Server delta (load − idle) | %.0f W |\n", sp.DeltaW)
 			fmt.Fprintf(&b, "| GPU-reported sum | %.0f W |\n", sp.GPUReportedSumW)
 			if sp.ReportingRatio > 0 {
 				fmt.Fprintf(&b, "| Reporting ratio | %.2f (1.0 = accurate, <0.75 = GPU over-reports) |\n", sp.ReportingRatio)
 			}
 			b.WriteString("\n")
 		}
 		for _, note := range sp.Notes {
 			fmt.Fprintf(&b, "- %s\n", note)
 		}
 		if len(sp.Notes) > 0 {
 			b.WriteString("\n")
 		}
 	}
 	// ── Terminal charts (steady-state only) ───────────────────────────────────
 	if len(charts) > 0 {
 		b.WriteString("## Steady-State Charts\n\n")
 		for _, chart := range charts {
 			content := strings.TrimSpace(stripANSIEscapeSequences(chart.Content))
 			if content == "" {
 				continue
 			}
 			fmt.Fprintf(&b, "### %s\n\n```\n%s\n```\n\n", chart.Title, content)
 		}
 	}
 	// ── Methodology ───────────────────────────────────────────────────────────
 	b.WriteString("## Methodology\n\n")
 	fmt.Fprintf(&b, "- Profile `%s` uses standardized baseline → warmup → steady-state → interconnect → cooldown phases.\n", result.BenchmarkProfile)
 	b.WriteString("- Single-GPU compute score from bee-gpu-burn cuBLASLt when available.\n")
 	b.WriteString("- Thermal and power limitations inferred from NVIDIA clock event reason counters and sustained telemetry.\n")
 	b.WriteString("- `result.json` is the canonical machine-readable source for this benchmark run.\n\n")
 	// ── Raw files ─────────────────────────────────────────────────────────────
 	b.WriteString("## Raw Files\n\n")
 	b.WriteString("- `result.json`\n- `report.md`\n- `summary.txt`\n- `verbose.log`\n")
 	b.WriteString("- `gpu-*-baseline-metrics.csv/html/term.txt`\n")
 	b.WriteString("- `gpu-*-warmup.log`\n")
 	b.WriteString("- `gpu-*-steady.log`\n")
 	b.WriteString("- `gpu-*-steady-metrics.csv/html/term.txt`\n")
 	b.WriteString("- `gpu-*-cooldown-metrics.csv/html/term.txt`\n")
 	if result.Interconnect != nil {
 		b.WriteString("- `nccl-all-reduce.log`\n")
 	}
 	return b.String()
 }
 // loadBenchmarkReportCharts loads only steady-state terminal charts (baseline and
 // cooldown charts are not useful for human review).
 func loadBenchmarkReportCharts(runDir string, gpuIndices []int) []benchmarkReportChart {
 	var charts []benchmarkReportChart
 	for _, idx := range gpuIndices {
 		path := filepath.Join(runDir, fmt.Sprintf("gpu-%d-steady-metrics-term.txt", idx))
 		raw, err := os.ReadFile(path)
 		if err != nil || len(raw) == 0 {
 			continue
 		}
 		charts = append(charts, benchmarkReportChart{
 			Title:   fmt.Sprintf("GPU %d — Steady State", idx),
 			Content: string(raw),
 		})
 	}
 	return charts
 }
 func stripANSIEscapeSequences(raw string) string {
 	return ansiEscapePattern.ReplaceAllString(raw, "")
 }
 // formatThrottleLine renders throttle counters as human-readable percentages of
 // the steady-state window.  Only non-zero counters are shown.  When the steady
 // duration is unknown (0), raw seconds are shown instead.
 func formatThrottleLine(t BenchmarkThrottleCounters, steadyDurationSec float64) string {
 	type counter struct {
 		label string
 		us    uint64
 	}
 	counters := []counter{
 		{"sw_power", t.SWPowerCapUS},
 		{"sw_thermal", t.SWThermalSlowdownUS},
 		{"sync_boost", t.SyncBoostUS},
 		{"hw_thermal", t.HWThermalSlowdownUS},
 		{"hw_power_brake", t.HWPowerBrakeSlowdownUS},
 	}
 	var parts []string
 	for _, c := range counters {
 		if c.us == 0 {
 			continue
 		}
 		sec := float64(c.us) / 1e6
 		if steadyDurationSec > 0 {
 			pct := sec / steadyDurationSec * 100
 			parts = append(parts, fmt.Sprintf("%s=%.1f%% (%.0fs)", c.label, pct, sec))
 		} else if sec < 1 {
 			parts = append(parts, fmt.Sprintf("%s=%.0fms", c.label, sec*1000))
 		} else {
 			parts = append(parts, fmt.Sprintf("%s=%.1fs", c.label, sec))
 		}
 	}
 	if len(parts) == 0 {
 		return "none"
 	}
 	return strings.Join(parts, "  ")
 }
 func renderBenchmarkSummary(result NvidiaBenchmarkResult) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "run_at_utc=%s\n", result.GeneratedAt.Format(time.RFC3339))
 	fmt.Fprintf(&b, "benchmark_profile=%s\n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "overall_status=%s\n", result.OverallStatus)
 	fmt.Fprintf(&b, "gpu_count=%d\n", len(result.GPUs))
 	fmt.Fprintf(&b, "normalization_status=%s\n", result.Normalization.Status)
 	var best float64
 	for i, gpu := range result.GPUs {
 		fmt.Fprintf(&b, "gpu_%d_status=%s\n", gpu.Index, gpu.Status)
 		fmt.Fprintf(&b, "gpu_%d_composite_score=%.2f\n", gpu.Index, gpu.Scores.CompositeScore)
 		if i == 0 || gpu.Scores.CompositeScore > best {
 			best = gpu.Scores.CompositeScore
 		}
 	}
 	fmt.Fprintf(&b, "best_composite_score=%.2f\n", best)
 	if result.Interconnect != nil {
 		fmt.Fprintf(&b, "interconnect_status=%s\n", result.Interconnect.Status)
 		fmt.Fprintf(&b, "interconnect_max_busbw_gbps=%.1f\n", result.Interconnect.MaxBusBWGBps)
 	}
 	return b.String()
 }
--- a/audit/internal/platform/benchmark_test.go
+++ b/audit/internal/platform/benchmark_test.go
@@ -0,0 +1,180 @@
 package platform
 import (
 	"strings"
 	"testing"
 )
 func TestResolveBenchmarkProfile(t *testing.T) {
 	t.Parallel()
 	cases := []struct {
 		name    string
 		profile string
 		want    benchmarkProfileSpec
 	}{
 		{
 			name:    "default",
 			profile: "",
 			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 120, SteadySec: 480, NCCLSec: 180, CooldownSec: 120},
 		},
 		{
 			name:    "stability",
 			profile: "stability",
 			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 300, SteadySec: 3600, NCCLSec: 300, CooldownSec: 300},
 		},
 		{
 			name:    "overnight",
 			profile: "overnight",
 			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 600, SteadySec: 27000, NCCLSec: 600, CooldownSec: 300},
 		},
 	}
 	for _, tc := range cases {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
 			got := resolveBenchmarkProfile(tc.profile)
 			if got != tc.want {
 				t.Fatalf("profile=%q got %+v want %+v", tc.profile, got, tc.want)
 			}
 		})
 	}
 }
 func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
 	t.Parallel()
 	opts := normalizeNvidiaBenchmarkOptionsForBenchmark(NvidiaBenchmarkOptions{
 		Profile: "stability",
 		RunNCCL: false,
 	})
 	if opts.Profile != NvidiaBenchmarkProfileStability {
 		t.Fatalf("profile=%q want %q", opts.Profile, NvidiaBenchmarkProfileStability)
 	}
 	if opts.RunNCCL {
 		t.Fatalf("RunNCCL should stay false when explicitly disabled")
 	}
 }
 func TestParseBenchmarkBurnLog(t *testing.T) {
 	t.Parallel()
 	raw := strings.Join([]string{
 		"loader=bee-gpu-burn",
 		"[gpu 0] device=NVIDIA H100",
 		"[gpu 0] compute_capability=9.0",
 		"[gpu 0] backend=cublasLt",
 		"[gpu 0] duration_s=10",
 		"[gpu 0] fp16_tensor[0]=READY dim=4096x4096x4096 block=128 stream=0",
 		"[gpu 0] fp8_e4m3[0]=READY dim=8192x8192x4096 block=128 stream=0",
 		"[gpu 0] fp16_tensor_iterations=200",
 		"[gpu 0] fp8_e4m3_iterations=50",
 		"[gpu 0] status=OK",
 	}, "\n")
 	got := parseBenchmarkBurnLog(raw)
 	if got.Backend != "cublasLt" {
 		t.Fatalf("backend=%q want cublasLt", got.Backend)
 	}
 	if got.ComputeCapability != "9.0" {
 		t.Fatalf("compute capability=%q want 9.0", got.ComputeCapability)
 	}
 	if len(got.Profiles) != 2 {
 		t.Fatalf("profiles=%d want 2", len(got.Profiles))
 	}
 	if got.Profiles[0].TeraOpsPerSec <= 0 {
 		t.Fatalf("profile[0] teraops=%f want >0", got.Profiles[0].TeraOpsPerSec)
 	}
 	if got.Profiles[1].Category != "fp8" {
 		t.Fatalf("profile[1] category=%q want fp8", got.Profiles[1].Category)
 	}
 }
 func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 	t.Parallel()
 	result := NvidiaBenchmarkResult{
 		BenchmarkVersion:   benchmarkVersion,
 		BenchmarkProfile:   NvidiaBenchmarkProfileStandard,
 		OverallStatus:      "PARTIAL",
 		SelectedGPUIndices: []int{0},
 		Normalization: BenchmarkNormalization{
 			Status: "partial",
 		},
 		Findings: []string{"GPU 0 spent measurable time under SW power cap."},
 		GPUs: []BenchmarkGPUResult{
 			{
 				Index:  0,
 				Name:   "NVIDIA H100",
 				Status: "OK",
 				Steady: BenchmarkTelemetrySummary{
 					AvgPowerW:           680,
 					AvgTempC:            79,
 					AvgGraphicsClockMHz: 1725,
 					P95PowerW:           700,
 					P95TempC:            82,
 					P95GraphicsClockMHz: 1800,
 				},
 				Scores: BenchmarkScorecard{
 					ComputeScore:        1200,
 					PowerSustainScore:   96,
 					ThermalSustainScore: 88,
 					StabilityScore:      92,
 					CompositeScore:      1176,
 				},
 				PrecisionResults: []BenchmarkPrecisionResult{
 					{Name: "fp16_tensor", Supported: true, TeraOpsPerSec: 700},
 				},
 				Throttle: BenchmarkThrottleCounters{
 					SWPowerCapUS: 1000000,
 				},
 				DegradationReasons: []string{"power_capped"},
 			},
 		},
 	}
 	report := renderBenchmarkReport(result)
 	for _, needle := range []string{
 		"Executive Summary",
 		"GPU 0 spent measurable time under SW power cap.",
 		"1176.00",
 		"fp16_tensor",
 		"700.00",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
 		}
 	}
 }
 func TestRenderBenchmarkReportIncludesTerminalChartsWithoutANSI(t *testing.T) {
 	t.Parallel()
 	report := renderBenchmarkReportWithCharts(NvidiaBenchmarkResult{
 		BenchmarkProfile:   NvidiaBenchmarkProfileStandard,
 		OverallStatus:      "OK",
 		SelectedGPUIndices: []int{0},
 		Normalization: BenchmarkNormalization{
 			Status: "full",
 		},
 	}, []benchmarkReportChart{
 		{
 			Title:   "GPU 0 Steady State",
 			Content: "\x1b[31mGPU 0 chart\x1b[0m\n 42┤───",
 		},
 	})
 	for _, needle := range []string{
 		"Steady-State Charts",
 		"GPU 0 Steady State",
 		"GPU 0 chart",
 		"42┤───",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
 		}
 	}
 	if strings.Contains(report, "\x1b[31m") {
 		t.Fatalf("report should not contain ANSI escapes\n%s", report)
 	}
 }
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -0,0 +1,158 @@
 package platform
 import "time"
 const (
 	NvidiaBenchmarkProfileStandard  = "standard"
 	NvidiaBenchmarkProfileStability = "stability"
 	NvidiaBenchmarkProfileOvernight = "overnight"
 )
 type NvidiaBenchmarkOptions struct {
 	Profile           string
 	SizeMB            int
 	GPUIndices        []int
 	ExcludeGPUIndices []int
 	RunNCCL           bool
 	ParallelGPUs      bool // run all selected GPUs simultaneously instead of sequentially
 }
 type NvidiaBenchmarkResult struct {
 	BenchmarkVersion   string                       `json:"benchmark_version"`
 	GeneratedAt        time.Time                    `json:"generated_at"`
 	Hostname           string                       `json:"hostname,omitempty"`
 	ServerModel        string                       `json:"server_model,omitempty"`
 	BenchmarkProfile   string                       `json:"benchmark_profile"`
 	ParallelGPUs       bool                         `json:"parallel_gpus,omitempty"`
 	OverallStatus      string                       `json:"overall_status"`
 	SelectedGPUIndices []int                        `json:"selected_gpu_indices"`
 	Findings           []string                     `json:"findings,omitempty"`
 	Warnings           []string                     `json:"warnings,omitempty"`
 	Normalization      BenchmarkNormalization       `json:"normalization"`
 	GPUs               []BenchmarkGPUResult         `json:"gpus"`
 	Interconnect       *BenchmarkInterconnectResult `json:"interconnect,omitempty"`
 	ServerPower        *BenchmarkServerPower        `json:"server_power,omitempty"`
 }
 type BenchmarkNormalization struct {
 	Status string                      `json:"status"`
 	Notes  []string                    `json:"notes,omitempty"`
 	GPUs   []BenchmarkNormalizationGPU `json:"gpus,omitempty"`
 }
 type BenchmarkNormalizationGPU struct {
 	Index                 int      `json:"index"`
 	PersistenceMode       string   `json:"persistence_mode,omitempty"`
 	GPUClockLockMHz       float64  `json:"gpu_clock_lock_mhz,omitempty"`
 	GPUClockLockStatus    string   `json:"gpu_clock_lock_status,omitempty"`
 	MemoryClockLockMHz    float64  `json:"memory_clock_lock_mhz,omitempty"`
 	MemoryClockLockStatus string   `json:"memory_clock_lock_status,omitempty"`
 	Notes                 []string `json:"notes,omitempty"`
 }
 type BenchmarkGPUResult struct {
 	Index                  int                        `json:"index"`
 	UUID                   string                     `json:"uuid,omitempty"`
 	Name                   string                     `json:"name,omitempty"`
 	BusID                  string                     `json:"bus_id,omitempty"`
 	VBIOS                  string                     `json:"vbios,omitempty"`
 	ComputeCapability      string                     `json:"compute_capability,omitempty"`
 	Backend                string                     `json:"backend,omitempty"`
 	Status                 string                     `json:"status"`
 	PowerLimitW            float64                    `json:"power_limit_w,omitempty"`
 	MultiprocessorCount    int                        `json:"multiprocessor_count,omitempty"`
 	DefaultPowerLimitW     float64                    `json:"default_power_limit_w,omitempty"`
 	MaxGraphicsClockMHz    float64                    `json:"max_graphics_clock_mhz,omitempty"`
 	BaseGraphicsClockMHz   float64                    `json:"base_graphics_clock_mhz,omitempty"`
 	MaxMemoryClockMHz      float64                    `json:"max_memory_clock_mhz,omitempty"`
 	LockedGraphicsClockMHz float64                    `json:"locked_graphics_clock_mhz,omitempty"`
 	LockedMemoryClockMHz   float64                    `json:"locked_memory_clock_mhz,omitempty"`
 	Baseline               BenchmarkTelemetrySummary  `json:"baseline"`
 	Steady                 BenchmarkTelemetrySummary  `json:"steady"`
 	Cooldown               BenchmarkTelemetrySummary  `json:"cooldown"`
 	Throttle               BenchmarkThrottleCounters  `json:"throttle_counters"`
 	PrecisionResults       []BenchmarkPrecisionResult `json:"precision_results,omitempty"`
 	Scores                 BenchmarkScorecard         `json:"scores"`
 	DegradationReasons     []string                   `json:"degradation_reasons,omitempty"`
 	Notes                  []string                   `json:"notes,omitempty"`
 }
 type BenchmarkTelemetrySummary struct {
 	DurationSec         float64 `json:"duration_sec"`
 	Samples             int     `json:"samples"`
 	AvgTempC            float64 `json:"avg_temp_c"`
 	P95TempC            float64 `json:"p95_temp_c"`
 	AvgPowerW           float64 `json:"avg_power_w"`
 	P95PowerW           float64 `json:"p95_power_w"`
 	AvgGraphicsClockMHz float64 `json:"avg_graphics_clock_mhz"`
 	P95GraphicsClockMHz float64 `json:"p95_graphics_clock_mhz"`
 	AvgMemoryClockMHz   float64 `json:"avg_memory_clock_mhz"`
 	P95MemoryClockMHz   float64 `json:"p95_memory_clock_mhz"`
 	AvgUsagePct         float64 `json:"avg_usage_pct"`
 	AvgMemUsagePct      float64 `json:"avg_mem_usage_pct"`
 	ClockCVPct          float64 `json:"clock_cv_pct"`
 	PowerCVPct          float64 `json:"power_cv_pct"`
 	TempCVPct           float64 `json:"temp_cv_pct"`
 	ClockDriftPct       float64 `json:"clock_drift_pct"`
 }
 type BenchmarkThrottleCounters struct {
 	SWPowerCapUS           uint64 `json:"sw_power_cap_us"`
 	SWThermalSlowdownUS    uint64 `json:"sw_thermal_slowdown_us"`
 	SyncBoostUS            uint64 `json:"sync_boost_us"`
 	HWThermalSlowdownUS    uint64 `json:"hw_thermal_slowdown_us"`
 	HWPowerBrakeSlowdownUS uint64 `json:"hw_power_brake_slowdown_us"`
 }
 type BenchmarkPrecisionResult struct {
 	Name          string  `json:"name"`
 	Category      string  `json:"category"`
 	Supported     bool    `json:"supported"`
 	Lanes         int     `json:"lanes,omitempty"`
 	M             uint64  `json:"m,omitempty"`
 	N             uint64  `json:"n,omitempty"`
 	K             uint64  `json:"k,omitempty"`
 	Iterations    uint64  `json:"iterations,omitempty"`
 	TeraOpsPerSec float64 `json:"teraops_per_sec,omitempty"`
 	Notes         string  `json:"notes,omitempty"`
 }
 type BenchmarkScorecard struct {
 	ComputeScore        float64 `json:"compute_score"`
 	PowerSustainScore   float64 `json:"power_sustain_score"`
 	ThermalSustainScore float64 `json:"thermal_sustain_score"`
 	StabilityScore      float64 `json:"stability_score"`
 	InterconnectScore   float64 `json:"interconnect_score"`
 	CompositeScore      float64 `json:"composite_score"`
 	// TOPSPerSMPerGHz is compute efficiency independent of clock speed and SM count.
 	// Comparable across throttle levels and GPU generations. Low value at normal
 	// clocks indicates silicon degradation.
 	TOPSPerSMPerGHz float64 `json:"tops_per_sm_per_ghz,omitempty"`
 }
 // BenchmarkServerPower captures server-side power via IPMI alongside GPU-reported
 // power. The reporting_ratio (delta / gpu_reported_sum) near 1.0 means GPU power
 // telemetry is accurate; a ratio well below 1.0 (e.g. 0.5) means the GPU is
 // over-reporting its power consumption.
 type BenchmarkServerPower struct {
 	Available       bool     `json:"available"`
 	IdleW           float64  `json:"idle_w,omitempty"`
 	LoadedW         float64  `json:"loaded_w,omitempty"`
 	DeltaW          float64  `json:"delta_w,omitempty"`
 	GPUReportedSumW float64  `json:"gpu_reported_sum_w,omitempty"`
 	ReportingRatio  float64  `json:"reporting_ratio,omitempty"`
 	Notes           []string `json:"notes,omitempty"`
 }
 type BenchmarkInterconnectResult struct {
 	Status             string   `json:"status"`
 	Attempted          bool     `json:"attempted"`
 	Supported          bool     `json:"supported"`
 	SelectedGPUIndices []int    `json:"selected_gpu_indices,omitempty"`
 	AvgAlgBWGBps       float64  `json:"avg_algbw_gbps,omitempty"`
 	MaxAlgBWGBps       float64  `json:"max_algbw_gbps,omitempty"`
 	AvgBusBWGBps       float64  `json:"avg_busbw_gbps,omitempty"`
 	MaxBusBWGBps       float64  `json:"max_busbw_gbps,omitempty"`
 	Notes              []string `json:"notes,omitempty"`
 }
--- a/audit/internal/platform/error_patterns.go
+++ b/audit/internal/platform/error_patterns.go
@@ -0,0 +1,139 @@
 package platform
 import "regexp"
 // ErrorPattern describes a kernel log pattern that indicates a hardware error.
 // Add new patterns by appending to HardwareErrorPatterns — no other code changes needed.
 type ErrorPattern struct {
 	// Name is a short machine-readable label for logging and deduplication.
 	Name string
 	// Re is the compiled regular expression matched against a single kmsg line.
 	Re *regexp.Regexp
 	// Category groups related errors: "gpu", "pcie", "storage", "mce", "memory", "cpu".
 	Category string
 	// Severity is "warning" for recoverable/uncertain faults, "critical" for definitive failures.
 	Severity string
 	// BDFGroup is the capture group index (1-based) that contains a PCIe BDF address
 	// (e.g. "0000:c8:00.0"). 0 means no BDF is captured by this pattern.
 	BDFGroup int
 	// DevGroup is the capture group index (1-based) that contains a device name
 	// (e.g. "sda", "nvme0"). 0 means no device name is captured by this pattern.
 	DevGroup int
 }
 // HardwareErrorPatterns is the global list of kernel log patterns that indicate hardware faults.
 // To add a new pattern: append a new ErrorPattern struct to this slice.
 var HardwareErrorPatterns = []ErrorPattern{
 	// ── GPU / NVIDIA ────────────────────────────────────────────────────────────
 	{
 		Name:     "nvidia-rminitadapter",
 		Re:       mustPat(`(?i)NVRM:.*GPU\s+([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d)`),
 		Category: "gpu",
 		Severity: "warning",
 		BDFGroup: 1,
 	},
 	{
 		Name:     "nvidia-msi-fail",
 		Re:       mustPat(`(?i)NVRM:.*Failed to enable MSI`),
 		Category: "gpu",
 		Severity: "warning",
 	},
 	{
 		Name:     "nvidia-aer",
 		Re:       mustPat(`(?i)nvidia\s+([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d).*AER`),
 		Category: "gpu",
 		Severity: "warning",
 		BDFGroup: 1,
 	},
 	{
 		Name:     "nvidia-xid",
 		Re:       mustPat(`(?i)NVRM:.*Xid.*\b([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d)`),
 		Category: "gpu",
 		Severity: "warning",
 		BDFGroup: 1,
 	},
 	// ── PCIe AER (generic) ──────────────────────────────────────────────────────
 	{
 		Name:     "pcie-aer",
 		Re:       mustPat(`(?i)pcieport\s+([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d).*AER`),
 		Category: "pcie",
 		Severity: "warning",
 		BDFGroup: 1,
 	},
 	{
 		Name:     "pcie-uncorrectable",
 		Re:       mustPat(`(?i)([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d).*[Uu]ncorrectable`),
 		Category: "pcie",
 		Severity: "warning",
 		BDFGroup: 1,
 	},
 	{
 		Name:     "pcie-link-down",
 		Re:       mustPat(`(?i)pcieport\s+([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d).*[Ll]ink.*[Dd]own`),
 		Category: "pcie",
 		Severity: "warning",
 		BDFGroup: 1,
 	},
 	// ── Storage ─────────────────────────────────────────────────────────────────
 	{
 		Name:     "blk-io-error",
 		Re:       mustPat(`(?i)blk_update_request.*I/O error.*dev\s+(\w+)`),
 		Category: "storage",
 		Severity: "warning",
 		DevGroup: 1,
 	},
 	{
 		Name:     "nvme-timeout",
 		Re:       mustPat(`(?i)nvme\s+(\w+):.*timeout`),
 		Category: "storage",
 		Severity: "warning",
 		DevGroup: 1,
 	},
 	{
 		Name:     "scsi-failed",
 		Re:       mustPat(`(?i)sd\s+[\da-f:]+:.*FAILED`),
 		Category: "storage",
 		Severity: "warning",
 	},
 	{
 		Name:     "nvme-reset",
 		Re:       mustPat(`(?i)nvme\s+(\w+):.*reset`),
 		Category: "storage",
 		Severity: "warning",
 		DevGroup: 1,
 	},
 	// ── Machine Check Exceptions ────────────────────────────────────────────────
 	{
 		Name:     "mce-hardware-error",
 		Re:       mustPat(`(?i)mce:.*[Hh]ardware [Ee]rror`),
 		Category: "mce",
 		Severity: "warning",
 	},
 	{
 		Name:     "mce-corrected",
 		Re:       mustPat(`(?i)mce:.*[Cc]orrected`),
 		Category: "mce",
 		Severity: "warning",
 	},
 	// ── Memory ─────────────────────────────────────────────────────────────────
 	{
 		Name:     "edac-ue",
 		Re:       mustPat(`(?i)EDAC.*[Uu]ncorrectable`),
 		Category: "memory",
 		Severity: "warning",
 	},
 	{
 		Name:     "edac-ce",
 		Re:       mustPat(`(?i)EDAC.*[Cc]orrectable`),
 		Category: "memory",
 		Severity: "warning",
 	},
 }
 func mustPat(s string) *regexp.Regexp {
 	return regexp.MustCompile(s)
 }
--- a/audit/internal/platform/gpu_metrics.go
+++ b/audit/internal/platform/gpu_metrics.go
@@ -13,18 +13,20 @@ import (
 // GPUMetricRow is one telemetry sample from nvidia-smi during a stress test.
 type GPUMetricRow struct {
-	ElapsedSec float64
+	ElapsedSec  float64 `json:"elapsed_sec"`
-	GPUIndex   int
+	GPUIndex    int     `json:"index"`
-	TempC      float64
+	TempC       float64 `json:"temp_c"`
-	UsagePct   float64
+	UsagePct    float64 `json:"usage_pct"`
-	PowerW     float64
+	MemUsagePct float64 `json:"mem_usage_pct"`
-	ClockMHz   float64
+	PowerW      float64 `json:"power_w"`
 	ClockMHz    float64 `json:"clock_mhz"`
 	MemClockMHz float64 `json:"mem_clock_mhz"`
 }
 // sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
 func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
 	args := []string{
-		"--query-gpu=index,temperature.gpu,utilization.gpu,power.draw,clocks.current.graphics",
+		"--query-gpu=index,temperature.gpu,utilization.gpu,utilization.memory,power.draw,clocks.current.graphics,clocks.current.memory",
 		"--format=csv,noheader,nounits",
 	}
 	if len(gpuIndices) > 0 {
@@ -45,16 +47,18 @@ func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
 			continue
 		}
 		parts := strings.Split(line, ", ")
-		if len(parts) < 5 {
+		if len(parts) < 7 {
 			continue
 		}
 		idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
 		rows = append(rows, GPUMetricRow{
-			GPUIndex: idx,
+			GPUIndex:    idx,
-			TempC:    parseGPUFloat(parts[1]),
+			TempC:       parseGPUFloat(parts[1]),
-			UsagePct: parseGPUFloat(parts[2]),
+			UsagePct:    parseGPUFloat(parts[2]),
-			PowerW:   parseGPUFloat(parts[3]),
+			MemUsagePct: parseGPUFloat(parts[3]),
-			ClockMHz: parseGPUFloat(parts[4]),
+			PowerW:      parseGPUFloat(parts[4]),
 			ClockMHz:    parseGPUFloat(parts[5]),
 			MemClockMHz: parseGPUFloat(parts[6]),
 		})
 	}
 	return rows, nil
@@ -74,13 +78,73 @@ func SampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
 	return sampleGPUMetrics(gpuIndices)
 }
 // sampleAMDGPUMetrics queries rocm-smi for live GPU metrics.
 func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
 	out, err := runROCmSMI("--showtemp", "--showuse", "--showpower", "--showmemuse", "--csv")
 	if err != nil {
 		return nil, err
 	}
 	lines := strings.Split(strings.TrimSpace(string(out)), "\n")
 	if len(lines) < 2 {
 		return nil, fmt.Errorf("rocm-smi: insufficient output")
 	}
 	// Parse header to find column indices by name.
 	headers := strings.Split(lines[0], ",")
 	colIdx := func(keywords ...string) int {
 		for i, h := range headers {
 			hl := strings.ToLower(strings.TrimSpace(h))
 			for _, kw := range keywords {
 				if strings.Contains(hl, kw) {
 					return i
 				}
 			}
 		}
 		return -1
 	}
 	idxTemp := colIdx("sensor edge", "temperature (c)", "temp")
 	idxUse := colIdx("gpu use (%)")
 	idxMem := colIdx("vram%", "memory allocated")
 	idxPow := colIdx("average graphics package power", "power (w)")
 	var rows []GPUMetricRow
 	for _, line := range lines[1:] {
 		line = strings.TrimSpace(line)
 		if line == "" {
 			continue
 		}
 		parts := strings.Split(line, ",")
 		idx := len(rows)
 		row := GPUMetricRow{GPUIndex: idx}
 		get := func(i int) float64 {
 			if i < 0 || i >= len(parts) {
 				return 0
 			}
 			v := strings.TrimSpace(parts[i])
 			if strings.EqualFold(v, "n/a") {
 				return 0
 			}
 			return parseGPUFloat(v)
 		}
 		row.TempC = get(idxTemp)
 		row.UsagePct = get(idxUse)
 		row.MemUsagePct = get(idxMem)
 		row.PowerW = get(idxPow)
 		rows = append(rows, row)
 	}
 	if len(rows) == 0 {
 		return nil, fmt.Errorf("rocm-smi: no GPU rows parsed")
 	}
 	return rows, nil
 }
 // WriteGPUMetricsCSV writes collected rows as a CSV file.
 func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
 	var b bytes.Buffer
-	b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,power_w,clock_mhz\n")
+	b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz\n")
 	for _, r := range rows {
-		fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.0f\n",
+		fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f\n",
-			r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.PowerW, r.ClockMHz)
+			r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz)
 	}
 	return os.WriteFile(path, b.Bytes(), 0644)
 }
@@ -135,7 +199,7 @@ func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
 	const PW = plotX2 - plotX1
 	const PH = plotY2 - plotY1
 	// Outer axes
-	const tempAxisX = 60  // temp axis line
+	const tempAxisX = 60   // temp axis line
 	const clockAxisX = 900 // clock axis line
 	colors := [4]string{"#e74c3c", "#3498db", "#2ecc71", "#f39c12"}
@@ -332,7 +396,7 @@ const (
 )
 // RenderGPUTerminalChart returns ANSI line charts (asciigraph-style) per GPU.
-// Suitable for display in the TUI screenOutput.
+// Used in SAT stress-test logs.
 func RenderGPUTerminalChart(rows []GPUMetricRow) string {
 	seen := make(map[int]bool)
 	var order []int
@@ -375,162 +439,6 @@ func RenderGPUTerminalChart(rows []GPUMetricRow) string {
 	return strings.TrimRight(b.String(), "\n")
 }
 // RenderGPULiveChart renders all GPU metrics on a single combined chart per GPU.
 // Each series is normalised to its own min–max and drawn in a different colour.
 // chartWidth controls the width of the plot area (Y-axis label uses 5 extra chars).
 func RenderGPULiveChart(rows []GPUMetricRow, chartWidth int) string {
 	if chartWidth < 20 {
 		chartWidth = 70
 	}
 	const chartHeight = 14
 	seen := make(map[int]bool)
 	var order []int
 	gpuMap := make(map[int][]GPUMetricRow)
 	for _, r := range rows {
 		if !seen[r.GPUIndex] {
 			seen[r.GPUIndex] = true
 			order = append(order, r.GPUIndex)
 		}
 		gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
 	}
 	type seriesDef struct {
 		label string
 		color string
 		unit  string
 		fn    func(GPUMetricRow) float64
 	}
 	defs := []seriesDef{
 		{"Usage", ansiBlue, "%", func(r GPUMetricRow) float64 { return r.UsagePct }},
 		{"Temp", ansiRed, "°C", func(r GPUMetricRow) float64 { return r.TempC }},
 		{"Power", ansiGreen, "W", func(r GPUMetricRow) float64 { return r.PowerW }},
 	}
 	var b strings.Builder
 	for _, gpuIdx := range order {
 		gr := gpuMap[gpuIdx]
 		if len(gr) == 0 {
 			continue
 		}
 		elapsed := gr[len(gr)-1].ElapsedSec
 		// Build value slices for each series.
 		type seriesData struct {
 			seriesDef
 			vals []float64
 			mn   float64
 			mx   float64
 		}
 		var series []seriesData
 		for _, d := range defs {
 			vals := extractGPUField(gr, d.fn)
 			mn, mx := gpuMinMax(vals)
 			if mn == mx {
 				mx = mn + 1
 			}
 			series = append(series, seriesData{d, vals, mn, mx})
 		}
 		// Shared character grid: row 0 = top (max), row chartHeight = bottom (min).
 		type cell struct {
 			ch    rune
 			color string
 		}
 		grid := make([][]cell, chartHeight+1)
 		for r := range grid {
 			grid[r] = make([]cell, chartWidth)
 			for c := range grid[r] {
 				grid[r][c] = cell{' ', ""}
 			}
 		}
 		// Plot each series onto the shared grid.
 		for _, s := range series {
 			w := chartWidth
 			if len(s.vals) < w {
 				w = len(s.vals)
 			}
 			data := gpuDownsample(s.vals, w)
 			prevRow := -1
 			for x, v := range data {
 				row := chartHeight - int(math.Round((v-s.mn)/(s.mx-s.mn)*float64(chartHeight)))
 				if row < 0 {
 					row = 0
 				}
 				if row > chartHeight {
 					row = chartHeight
 				}
 				if prevRow < 0 || prevRow == row {
 					grid[row][x] = cell{'─', s.color}
 				} else {
 					lo, hi := prevRow, row
 					if lo > hi {
 						lo, hi = hi, lo
 					}
 					for y := lo + 1; y < hi; y++ {
 						grid[y][x] = cell{'│', s.color}
 					}
 					if prevRow < row {
 						grid[prevRow][x] = cell{'╮', s.color}
 						grid[row][x] = cell{'╰', s.color}
 					} else {
 						grid[prevRow][x] = cell{'╯', s.color}
 						grid[row][x] = cell{'╭', s.color}
 					}
 				}
 				prevRow = row
 			}
 		}
 		// Render: Y axis + data rows.
 		fmt.Fprintf(&b, "GPU %d  (%.0fs)  each series normalised to its range\n", gpuIdx, elapsed)
 		for r := 0; r <= chartHeight; r++ {
 			// Y axis label: 100% at top, 50% in middle, 0% at bottom.
 			switch r {
 			case 0:
 				fmt.Fprintf(&b, "%4s┤", "100%")
 			case chartHeight / 2:
 				fmt.Fprintf(&b, "%4s┤", "50%")
 			case chartHeight:
 				fmt.Fprintf(&b, "%4s┤", "0%")
 			default:
 				fmt.Fprintf(&b, "%4s│", "")
 			}
 			for c := 0; c < chartWidth; c++ {
 				cl := grid[r][c]
 				if cl.color != "" {
 					b.WriteString(cl.color)
 					b.WriteRune(cl.ch)
 					b.WriteString(ansiReset)
 				} else {
 					b.WriteRune(' ')
 				}
 			}
 			b.WriteRune('\n')
 		}
 		// Bottom axis.
 		b.WriteString("     └")
 		b.WriteString(strings.Repeat("─", chartWidth))
 		b.WriteRune('\n')
 		// Legend with current (last) values.
 		b.WriteString("     ")
 		for i, s := range series {
 			last := s.vals[len(s.vals)-1]
 			b.WriteString(s.color)
 			fmt.Fprintf(&b, "▐ %s: %.0f%s", s.label, last, s.unit)
 			b.WriteString(ansiReset)
 			if i < len(series)-1 {
 				b.WriteString("   ")
 			}
 		}
 		b.WriteRune('\n')
 	}
 	return strings.TrimRight(b.String(), "\n")
 }
 // renderLineChart draws a single time-series line chart using box-drawing characters.
 // Produces output in the style of asciigraph: ╭─╮ │ ╰─╯ with a Y axis and caption.
 func renderLineChart(vals []float64, color, caption string, height, width int) string {
--- a/audit/internal/platform/hpl.go
+++ b/audit/internal/platform/hpl.go
@@ -0,0 +1,142 @@
 package platform
 import (
 	"context"
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strconv"
 	"strings"
 	"time"
 )
 // HPLOptions configures the HPL (LINPACK) benchmark run.
 type HPLOptions struct {
 	MemFraction float64 // fraction of RAM to use (default 0.80)
 	NB          int     // block size (default 256)
 }
 // HPLResult holds the parsed result of an HPL run.
 type HPLResult struct {
 	N          int     // matrix dimension
 	NB         int     // block size
 	P          int     // process grid rows
 	Q          int     // process grid cols
 	TimeSec    float64 // wall time in seconds
 	GFlops     float64 // achieved performance
 	Residual   float64 // backward error residual (from HPL verification line)
 	Status     string  // "PASSED" or "FAILED"
 	RawOutput  string  // full xhpl output
 }
 func applyHPLDefaults(opts *HPLOptions) {
 	if opts.MemFraction <= 0 || opts.MemFraction > 1 {
 		opts.MemFraction = 0.80
 	}
 	if opts.NB <= 0 {
 		opts.NB = 256
 	}
 }
 // RunHPL runs bee-hpl and returns parsed results plus a tar.gz artifact path.
 func (s *System) RunHPL(ctx context.Context, baseDir string, opts HPLOptions, logFunc func(string)) (string, *HPLResult, error) {
 	applyHPLDefaults(&opts)
 	if baseDir == "" {
 		baseDir = "/var/log/bee-sat"
 	}
 	ts := time.Now().UTC().Format("20060102-150405")
 	runDir := filepath.Join(baseDir, "hpl-"+ts)
 	if err := os.MkdirAll(runDir, 0755); err != nil {
 		return "", nil, fmt.Errorf("mkdir %s: %w", runDir, err)
 	}
 	logPath := filepath.Join(runDir, "hpl.log")
 	cmd := []string{
 		"bee-hpl",
 		"--mem-fraction", strconv.FormatFloat(opts.MemFraction, 'f', 2, 64),
 		"--nb", strconv.Itoa(opts.NB),
 	}
 	if logFunc != nil {
 		logFunc(fmt.Sprintf("HPL: N will be auto-sized to %.0f%% of RAM, NB=%d", opts.MemFraction*100, opts.NB))
 	}
 	out, err := runSATCommandCtx(ctx, "", "hpl", cmd, nil, logFunc)
 	_ = os.WriteFile(logPath, out, 0644)
 	result := parseHPLOutput(string(out))
 	result.RawOutput = string(out)
 	if err != nil && err != context.Canceled {
 		return "", result, fmt.Errorf("bee-hpl failed: %w", err)
 	}
 	if err == nil && result.GFlops <= 0 {
 		return "", result, fmt.Errorf("HPL completed but no Gflops result found in output")
 	}
 	// Write summary
 	summary := fmt.Sprintf("N=%d NB=%d time=%.2fs gflops=%.3f status=%s\n",
 		result.N, result.NB, result.TimeSec, result.GFlops, result.Status)
 	_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)
 	if logFunc != nil {
 		logFunc(fmt.Sprintf("HPL result: N=%d NB=%d %.2fs %.3f Gflops %s",
 			result.N, result.NB, result.TimeSec, result.GFlops, result.Status))
 	}
 	ts2 := time.Now().UTC().Format("20060102-150405")
 	archive := filepath.Join(baseDir, "hpl-"+ts2+".tar.gz")
 	if archErr := createTarGz(archive, runDir); archErr != nil {
 		return runDir, result, err
 	}
 	return archive, result, err
 }
 // parseHPLOutput extracts N, NB, time, and Gflops from standard HPL output.
 //
 // HPL prints a result line of the form:
 //
 //	WR00L2L2       45312   256     1     1        1234.56             5.678e+01
 //	T/V               N    NB     P     Q           Time                 Gflops
 func parseHPLOutput(output string) *HPLResult {
 	result := &HPLResult{Status: "FAILED"}
 	for _, line := range strings.Split(output, "\n") {
 		line = strings.TrimSpace(line)
 		// Result line starts with WR
 		if strings.HasPrefix(line, "WR") {
 			fields := strings.Fields(line)
 			// WR00L2L2  N  NB  P  Q  Time  Gflops
 			if len(fields) >= 7 {
 				result.N, _ = strconv.Atoi(fields[1])
 				result.NB, _ = strconv.Atoi(fields[2])
 				result.P, _ = strconv.Atoi(fields[3])
 				result.Q, _ = strconv.Atoi(fields[4])
 				result.TimeSec, _ = strconv.ParseFloat(fields[5], 64)
 				result.GFlops, _ = strconv.ParseFloat(fields[6], 64)
 			}
 		}
 		// Verification line: "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ... PASSED"
 		if strings.Contains(line, "PASSED") {
 			result.Status = "PASSED"
 			fields := strings.Fields(line)
 			for i, f := range fields {
 				if f == "PASSED" && i > 0 {
 					result.Residual, _ = strconv.ParseFloat(fields[i-1], 64)
 				}
 			}
 		}
 	}
 	return result
 }
 // hplAvailable returns true if bee-hpl and xhpl are present and executable.
 func hplAvailable() bool {
 	if _, err := exec.LookPath("bee-hpl"); err != nil {
 		return false
 	}
 	_, err := os.Stat("/usr/local/lib/bee/xhpl")
 	return err == nil
 }
--- a/audit/internal/platform/install.go
+++ b/audit/internal/platform/install.go
@@ -3,6 +3,7 @@ package platform
 import (
 	"context"
 	"fmt"
 	"os"
 	"os/exec"
 	"strconv"
 	"strings"
@@ -10,13 +11,17 @@ import (
 // InstallDisk describes a candidate disk for installation.
 type InstallDisk struct {
-	Device string // e.g. /dev/sda
+	Device       string // e.g. /dev/sda
-	Model  string
+	Model        string
-	Size   string // human-readable, e.g. "500G"
+	Size         string   // human-readable, e.g. "500G"
 	SizeBytes    int64    // raw byte count from lsblk
 	MountedParts []string // partition mount points currently active
 }
 const squashfsPath = "/run/live/medium/live/filesystem.squashfs"
 // ListInstallDisks returns block devices suitable for installation.
-// Excludes USB drives and the current live boot medium.
+// Excludes the current live boot medium but includes USB drives.
 func (s *System) ListInstallDisks() ([]InstallDisk, error) {
 	out, err := exec.Command("lsblk", "-dn", "-o", "NAME,MODEL,SIZE,TYPE,TRAN").Output()
 	if err != nil {
@@ -33,7 +38,6 @@ func (s *System) ListInstallDisks() ([]InstallDisk, error) {
 			continue
 		}
 		// Last field: TRAN, second-to-last: TYPE, third-to-last: SIZE
 		tran := fields[len(fields)-1]
 		typ := fields[len(fields)-2]
 		size := fields[len(fields)-3]
 		name := fields[0]
@@ -42,24 +46,58 @@ func (s *System) ListInstallDisks() ([]InstallDisk, error) {
 		if typ != "disk" {
 			continue
 		}
 		if strings.EqualFold(tran, "usb") {
 			continue
 		}
 		device := "/dev/" + name
 		if device == bootDev {
 			continue
 		}
 		sizeBytes := diskSizeBytes(device)
 		mounted := mountedParts(device)
 		disks = append(disks, InstallDisk{
-			Device: device,
+			Device:       device,
-			Model:  strings.TrimSpace(model),
+			Model:        strings.TrimSpace(model),
-			Size:   size,
+			Size:         size,
 			SizeBytes:    sizeBytes,
 			MountedParts: mounted,
 		})
 	}
 	return disks, nil
 }
 // diskSizeBytes returns the byte size of a block device using lsblk.
 func diskSizeBytes(device string) int64 {
 	out, err := exec.Command("lsblk", "-bdn", "-o", "SIZE", device).Output()
 	if err != nil {
 		return 0
 	}
 	n, _ := strconv.ParseInt(strings.TrimSpace(string(out)), 10, 64)
 	return n
 }
 // mountedParts returns a list of "<part> at <mountpoint>" strings for any
 // mounted partitions on the given device.
 func mountedParts(device string) []string {
 	out, err := exec.Command("lsblk", "-n", "-o", "NAME,MOUNTPOINT", device).Output()
 	if err != nil {
 		return nil
 	}
 	var result []string
 	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
 		fields := strings.Fields(line)
 		if len(fields) < 2 {
 			continue
 		}
 		mp := fields[1]
 		if mp == "" || mp == "[SWAP]" {
 			continue
 		}
 		result = append(result, "/dev/"+strings.TrimLeft(fields[0], "└─├─")+" at "+mp)
 	}
 	return result
 }
 // findLiveBootDevice returns the block device backing /run/live/medium (if any).
 func findLiveBootDevice() string {
 	out, err := exec.Command("findmnt", "-n", "-o", "SOURCE", "/run/live/medium").Output()
@@ -79,6 +117,135 @@ func findLiveBootDevice() string {
 	return "/dev/" + strings.TrimSpace(string(out2))
 }
 func mountSource(target string) string {
 	out, err := exec.Command("findmnt", "-n", "-o", "SOURCE", target).Output()
 	if err != nil {
 		return ""
 	}
 	return strings.TrimSpace(string(out))
 }
 func mountFSType(target string) string {
 	out, err := exec.Command("findmnt", "-n", "-o", "FSTYPE", target).Output()
 	if err != nil {
 		return ""
 	}
 	return strings.TrimSpace(string(out))
 }
 func blockDeviceType(device string) string {
 	if strings.TrimSpace(device) == "" {
 		return ""
 	}
 	out, err := exec.Command("lsblk", "-dn", "-o", "TYPE", device).Output()
 	if err != nil {
 		return ""
 	}
 	return strings.TrimSpace(string(out))
 }
 func blockDeviceTransport(device string) string {
 	if strings.TrimSpace(device) == "" {
 		return ""
 	}
 	out, err := exec.Command("lsblk", "-dn", "-o", "TRAN", device).Output()
 	if err != nil {
 		return ""
 	}
 	return strings.TrimSpace(string(out))
 }
 func inferLiveBootKind(fsType, source, deviceType, transport string) string {
 	switch {
 	case strings.EqualFold(strings.TrimSpace(fsType), "tmpfs"):
 		return "ram"
 	case strings.EqualFold(strings.TrimSpace(deviceType), "rom"):
 		return "cdrom"
 	case strings.EqualFold(strings.TrimSpace(transport), "usb"):
 		return "usb"
 	case strings.HasPrefix(strings.TrimSpace(source), "/dev/sr"):
 		return "cdrom"
 	case strings.HasPrefix(strings.TrimSpace(source), "/dev/"):
 		return "disk"
 	default:
 		return "unknown"
 	}
 }
 // MinInstallBytes returns the minimum recommended disk size for installation:
 // squashfs size × 1.5 to allow for extracted filesystem and bootloader.
 // Returns 0 if the squashfs is not available (non-live environment).
 func MinInstallBytes() int64 {
 	fi, err := os.Stat(squashfsPath)
 	if err != nil {
 		return 0
 	}
 	return fi.Size() * 3 / 2
 }
 // toramActive returns true when the live system was booted with toram.
 func toramActive() bool {
 	data, err := os.ReadFile("/proc/cmdline")
 	if err != nil {
 		return false
 	}
 	return strings.Contains(string(data), "toram")
 }
 // freeMemBytes returns MemAvailable from /proc/meminfo.
 func freeMemBytes() int64 {
 	data, err := os.ReadFile("/proc/meminfo")
 	if err != nil {
 		return 0
 	}
 	for _, line := range strings.Split(string(data), "\n") {
 		if strings.HasPrefix(line, "MemAvailable:") {
 			fields := strings.Fields(line)
 			if len(fields) >= 2 {
 				n, _ := strconv.ParseInt(fields[1], 10, 64)
 				return n * 1024 // kB → bytes
 			}
 		}
 	}
 	return 0
 }
 // DiskWarnings returns advisory warning strings for a disk candidate.
 func DiskWarnings(d InstallDisk) []string {
 	var w []string
 	if len(d.MountedParts) > 0 {
 		w = append(w, "has mounted partitions: "+strings.Join(d.MountedParts, ", "))
 	}
 	min := MinInstallBytes()
 	if min > 0 && d.SizeBytes > 0 && d.SizeBytes < min {
 		w = append(w, fmt.Sprintf("disk may be too small (need ≥ %s, have %s)",
 			humanBytes(min), humanBytes(d.SizeBytes)))
 	}
 	if toramActive() {
 		sqFi, err := os.Stat(squashfsPath)
 		if err == nil {
 			free := freeMemBytes()
 			if free > 0 && free < sqFi.Size()*2 {
 				w = append(w, "toram mode — low RAM, extraction may be slow or fail")
 			}
 		}
 	}
 	return w
 }
 func humanBytes(b int64) string {
 	const unit = 1024
 	if b < unit {
 		return fmt.Sprintf("%d B", b)
 	}
 	div, exp := int64(unit), 0
 	for n := b / unit; n >= unit; n /= unit {
 		div *= unit
 		exp++
 	}
 	return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "KMGTPE"[exp])
 }
 // InstallToDisk runs bee-install <device> <logfile> and streams output to logFile.
 // The context can be used to cancel.
 func (s *System) InstallToDisk(ctx context.Context, device string, logFile string) error {
@@ -92,14 +259,11 @@ func InstallLogPath(device string) string {
 	return "/tmp/bee-install" + safe + ".log"
 }
-// DiskLabel returns a display label for a disk.
+// Label returns a display label for a disk.
 func (d InstallDisk) Label() string {
 	model := d.Model
 	if model == "" {
 		model = "Unknown"
 	}
 	sizeBytes, err := strconv.ParseInt(strings.TrimSuffix(d.Size, "B"), 10, 64)
 	_ = sizeBytes
 	_ = err
 	return fmt.Sprintf("%s  %s  %s", d.Device, d.Size, model)
 }
--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -0,0 +1,301 @@
 package platform
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 )
 func (s *System) IsLiveMediaInRAM() bool {
 	fsType := mountFSType("/run/live/medium")
 	if fsType == "" {
 		return toramActive()
 	}
 	return strings.EqualFold(fsType, "tmpfs")
 }
 func (s *System) LiveBootSource() LiveBootSource {
 	fsType := mountFSType("/run/live/medium")
 	source := mountSource("/run/live/medium")
 	device := findLiveBootDevice()
 	status := LiveBootSource{
 		InRAM:  strings.EqualFold(fsType, "tmpfs"),
 		Source: source,
 		Device: device,
 	}
 	if fsType == "" && source == "" && device == "" {
 		if toramActive() {
 			status.InRAM = true
 			status.Kind = "ram"
 			status.Source = "tmpfs"
 			return status
 		}
 		status.Kind = "unknown"
 		return status
 	}
 	status.Kind = inferLiveBootKind(fsType, source, blockDeviceType(device), blockDeviceTransport(device))
 	if status.Kind == "" {
 		status.Kind = "unknown"
 	}
 	if status.InRAM && strings.TrimSpace(status.Source) == "" {
 		status.Source = "tmpfs"
 	}
 	return status
 }
 func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
 	log := func(msg string) {
 		if logFunc != nil {
 			logFunc(msg)
 		}
 	}
 	if s.IsLiveMediaInRAM() {
 		log("Already running from RAM — installation media can be safely disconnected.")
 		return nil
 	}
 	squashfsFiles, err := filepath.Glob("/run/live/medium/live/*.squashfs")
 	if err != nil || len(squashfsFiles) == 0 {
 		return fmt.Errorf("no squashfs files found in /run/live/medium/live/")
 	}
 	free := freeMemBytes()
 	var needed int64
 	for _, sf := range squashfsFiles {
 		fi, err2 := os.Stat(sf)
 		if err2 != nil {
 			return fmt.Errorf("stat %s: %v", sf, err2)
 		}
 		needed += fi.Size()
 	}
 	const headroom = 256 * 1024 * 1024
 	if free > 0 && needed+headroom > free {
 		return fmt.Errorf("insufficient RAM: need %s, available %s",
 			humanBytes(needed+headroom), humanBytes(free))
 	}
 	dstDir := "/dev/shm/bee-live"
 	if err := os.MkdirAll(dstDir, 0755); err != nil {
 		return fmt.Errorf("create tmpfs dir: %v", err)
 	}
 	for _, sf := range squashfsFiles {
 		if err := ctx.Err(); err != nil {
 			return err
 		}
 		base := filepath.Base(sf)
 		dst := filepath.Join(dstDir, base)
 		log(fmt.Sprintf("Copying %s to RAM...", base))
 		if err := copyFileLarge(ctx, sf, dst, log); err != nil {
 			return fmt.Errorf("copy %s: %v", base, err)
 		}
 		log(fmt.Sprintf("Copied %s.", base))
 		loopDev, err := findLoopForFile(sf)
 		if err != nil {
 			log(fmt.Sprintf("Loop device for %s not found (%v) — skipping re-association.", base, err))
 			continue
 		}
 		if err := reassociateLoopDevice(loopDev, dst); err != nil {
 			log(fmt.Sprintf("Warning: could not re-associate %s → %s: %v", loopDev, dst, err))
 		} else {
 			log(fmt.Sprintf("Loop device %s now backed by RAM copy.", loopDev))
 		}
 	}
 	log("Copying remaining medium files...")
 	if err := cpDir(ctx, "/run/live/medium", dstDir, log); err != nil {
 		log(fmt.Sprintf("Warning: partial copy: %v", err))
 	}
 	if err := ctx.Err(); err != nil {
 		return err
 	}
 	mediumRebound := false
 	if err := bindMount(dstDir, "/run/live/medium"); err != nil {
 		log(fmt.Sprintf("Warning: rebind /run/live/medium → %s failed: %v", dstDir, err))
 	} else {
 		mediumRebound = true
 	}
 	log("Verifying live medium now served from RAM...")
 	status := s.LiveBootSource()
 	if err := verifyInstallToRAMStatus(status, dstDir, mediumRebound, log); err != nil {
 		return err
 	}
 	if status.InRAM {
 		log(fmt.Sprintf("Verification passed: live medium now served from %s.", describeLiveBootSource(status)))
 	}
 	log("Done. Squashfs files are in RAM. Installation media can be safely disconnected.")
 	return nil
 }
 func verifyInstallToRAMStatus(status LiveBootSource, dstDir string, mediumRebound bool, log func(string)) error {
 	if status.InRAM {
 		return nil
 	}
 	// The live medium mount was not redirected to RAM. This is expected when
 	// booting from an ISO/CD-ROM: the squashfs loop device has a non-zero
 	// offset and LOOP_CHANGE_FD cannot be used; the bind mount also fails
 	// because the CD-ROM mount is in use. Check whether files were at least
 	// copied to the tmpfs directory — that is sufficient for safe disconnection
 	// once the kernel has paged in all actively-used data.
 	files, _ := filepath.Glob(filepath.Join(dstDir, "*.squashfs"))
 	if len(files) > 0 {
 		if !mediumRebound {
 			log(fmt.Sprintf("Note: squashfs copied to RAM (%s) but /run/live/medium still shows the original source.", dstDir))
 			log("This is normal for CD-ROM boots. For a fully transparent RAM boot, add 'toram' to the kernel parameters.")
 		}
 		return nil
 	}
 	return fmt.Errorf("install to RAM verification failed: live medium still mounted from %s and no squashfs found in %s", describeLiveBootSource(status), dstDir)
 }
 func describeLiveBootSource(status LiveBootSource) string {
 	source := strings.TrimSpace(status.Device)
 	if source == "" {
 		source = strings.TrimSpace(status.Source)
 	}
 	if source == "" {
 		source = "unknown source"
 	}
 	switch strings.TrimSpace(status.Kind) {
 	case "ram":
 		return "RAM"
 	case "usb":
 		return "USB (" + source + ")"
 	case "cdrom":
 		return "CD-ROM (" + source + ")"
 	case "disk":
 		return "disk (" + source + ")"
 	default:
 		return source
 	}
 }
 func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) error {
 	in, err := os.Open(src)
 	if err != nil {
 		return err
 	}
 	defer in.Close()
 	fi, err := in.Stat()
 	if err != nil {
 		return err
 	}
 	out, err := os.Create(dst)
 	if err != nil {
 		return err
 	}
 	defer out.Close()
 	total := fi.Size()
 	var copied int64
 	buf := make([]byte, 4*1024*1024)
 	for {
 		if err := ctx.Err(); err != nil {
 			return err
 		}
 		n, err := in.Read(buf)
 		if n > 0 {
 			if _, werr := out.Write(buf[:n]); werr != nil {
 				return werr
 			}
 			copied += int64(n)
 			if logFunc != nil && total > 0 {
 				pct := int(float64(copied) / float64(total) * 100)
 				logFunc(fmt.Sprintf("  %s / %s (%d%%)", humanBytes(copied), humanBytes(total), pct))
 			}
 		}
 		if err == io.EOF {
 			break
 		}
 		if err != nil {
 			return err
 		}
 	}
 	return out.Sync()
 }
 func cpDir(ctx context.Context, src, dst string, logFunc func(string)) error {
 	return filepath.Walk(src, func(path string, fi os.FileInfo, err error) error {
 		if ctx.Err() != nil {
 			return ctx.Err()
 		}
 		if err != nil {
 			return nil
 		}
 		rel, _ := filepath.Rel(src, path)
 		target := filepath.Join(dst, rel)
 		if fi.IsDir() {
 			return os.MkdirAll(target, fi.Mode())
 		}
 		if strings.HasSuffix(path, ".squashfs") {
 			return nil
 		}
 		if _, err := os.Stat(target); err == nil {
 			return nil
 		}
 		return copyFileLarge(ctx, path, target, nil)
 	})
 }
 func findLoopForFile(backingFile string) (string, error) {
 	out, err := exec.Command("losetup", "--list", "--json").Output()
 	if err != nil {
 		return "", err
 	}
 	var result struct {
 		Loopdevices []struct {
 			Name     string `json:"name"`
 			BackFile string `json:"back-file"`
 		} `json:"loopdevices"`
 	}
 	if err := json.Unmarshal(out, &result); err != nil {
 		return "", err
 	}
 	for _, dev := range result.Loopdevices {
 		if dev.BackFile == backingFile {
 			return dev.Name, nil
 		}
 	}
 	return "", fmt.Errorf("no loop device found for %s", backingFile)
 }
 // loopDeviceOffset returns the byte offset configured for the loop device,
 // or -1 if it cannot be determined.
 func loopDeviceOffset(loopDev string) int64 {
 	out, err := exec.Command("losetup", "--json", loopDev).Output()
 	if err != nil {
 		return -1
 	}
 	var result struct {
 		Loopdevices []struct {
 			Offset int64 `json:"offset"`
 		} `json:"loopdevices"`
 	}
 	if err := json.Unmarshal(out, &result); err != nil || len(result.Loopdevices) == 0 {
 		return -1
 	}
 	return result.Loopdevices[0].Offset
 }
 func reassociateLoopDevice(loopDev, newFile string) error {
 	// LOOP_CHANGE_FD requires lo_offset == 0. ISO/CD-ROM loop devices are
 	// typically set up with a non-zero offset (squashfs lives inside the ISO),
 	// so the ioctl returns EINVAL. Detect this early for a clear error message.
 	if off := loopDeviceOffset(loopDev); off > 0 {
 		return fmt.Errorf("loop device has non-zero offset (%d bytes, typical for ISO/CD-ROM) — LOOP_CHANGE_FD not supported; use 'toram' kernel parameter for RAM boot", off)
 	}
 	if err := exec.Command("losetup", "--replace", loopDev, newFile).Run(); err == nil {
 		return nil
 	}
 	return loopChangeFD(loopDev, newFile)
 }
--- a/audit/internal/platform/install_to_ram_linux.go
+++ b/audit/internal/platform/install_to_ram_linux.go
@@ -0,0 +1,33 @@
 //go:build linux
 package platform
 import (
 	"os"
 	"syscall"
 )
 const ioctlLoopChangeFD = 0x4C08
 func loopChangeFD(loopDev, newFile string) error {
 	lf, err := os.OpenFile(loopDev, os.O_RDWR, 0)
 	if err != nil {
 		return err
 	}
 	defer lf.Close()
 	nf, err := os.OpenFile(newFile, os.O_RDONLY, 0)
 	if err != nil {
 		return err
 	}
 	defer nf.Close()
 	_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, lf.Fd(), ioctlLoopChangeFD, nf.Fd())
 	if errno != 0 {
 		return errno
 	}
 	return nil
 }
 // bindMount binds src over dst using the syscall directly (avoids exec PATH issues).
 func bindMount(src, dst string) error {
 	return syscall.Mount(src, dst, "", syscall.MS_BIND, "")
 }
--- a/audit/internal/platform/install_to_ram_other.go
+++ b/audit/internal/platform/install_to_ram_other.go
@@ -0,0 +1,13 @@
 //go:build !linux
 package platform
 import "errors"
 func loopChangeFD(loopDev, newFile string) error {
 	return errors.New("LOOP_CHANGE_FD not available on this platform")
 }
 func bindMount(src, dst string) error {
 	return errors.New("bind mount not available on this platform")
 }
--- a/audit/internal/platform/install_to_ram_test.go
+++ b/audit/internal/platform/install_to_ram_test.go
@@ -0,0 +1,60 @@
 package platform
 import "testing"
 func TestInferLiveBootKind(t *testing.T) {
 	t.Parallel()
 	tests := []struct {
 		name       string
 		fsType     string
 		source     string
 		deviceType string
 		transport  string
 		want       string
 	}{
 		{name: "ram tmpfs", fsType: "tmpfs", source: "/dev/shm/bee-live", want: "ram"},
 		{name: "usb disk", source: "/dev/sdb1", deviceType: "disk", transport: "usb", want: "usb"},
 		{name: "cdrom rom", source: "/dev/sr0", deviceType: "rom", want: "cdrom"},
 		{name: "disk sata", source: "/dev/nvme0n1p1", deviceType: "disk", transport: "nvme", want: "disk"},
 		{name: "unknown", source: "overlay", want: "unknown"},
 	}
 	for _, tc := range tests {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
 			got := inferLiveBootKind(tc.fsType, tc.source, tc.deviceType, tc.transport)
 			if got != tc.want {
 				t.Fatalf("inferLiveBootKind(%q,%q,%q,%q)=%q want %q", tc.fsType, tc.source, tc.deviceType, tc.transport, got, tc.want)
 			}
 		})
 	}
 }
 func TestVerifyInstallToRAMStatus(t *testing.T) {
 	t.Parallel()
 	dstDir := t.TempDir()
 	if err := verifyInstallToRAMStatus(LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"}, dstDir, false, nil); err != nil {
 		t.Fatalf("expected success for RAM-backed status, got %v", err)
 	}
 	err := verifyInstallToRAMStatus(LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"}, dstDir, false, nil)
 	if err == nil {
 		t.Fatal("expected verification failure when media is still on USB")
 	}
 	if got := err.Error(); got != "install to RAM verification failed: live medium still mounted from USB (/dev/sdb1) and no squashfs found in "+dstDir {
 		t.Fatalf("error=%q", got)
 	}
 }
 func TestDescribeLiveBootSource(t *testing.T) {
 	t.Parallel()
 	if got := describeLiveBootSource(LiveBootSource{InRAM: true, Kind: "ram"}); got != "RAM" {
 		t.Fatalf("got %q want RAM", got)
 	}
 	if got := describeLiveBootSource(LiveBootSource{Kind: "unknown", Source: "/run/live/medium"}); got != "/run/live/medium" {
 		t.Fatalf("got %q want /run/live/medium", got)
 	}
 }
--- a/audit/internal/platform/kill_workers.go
+++ b/audit/internal/platform/kill_workers.go
@@ -0,0 +1,68 @@
 package platform
 import (
 	"fmt"
 	"os"
 	"strconv"
 	"strings"
 	"syscall"
 )
 // workerPatterns are substrings matched against /proc/<pid>/cmdline to identify
 // bee test worker processes that should be killed by KillTestWorkers.
 var workerPatterns = []string{
 	"bee-gpu-burn",
 	"stress-ng",
 	"stressapptest",
 	"memtester",
 	// DCGM diagnostic workers — nvvs is spawned by dcgmi diag and survives
 	// if dcgmi is killed mid-run, leaving the GPU occupied (DCGM_ST_IN_USE).
 	"nvvs",
 	"dcgmi",
 }
 // KilledProcess describes a process that was sent SIGKILL.
 type KilledProcess struct {
 	PID  int    `json:"pid"`
 	Name string `json:"name"`
 }
 // KillTestWorkers scans /proc for running test worker processes and sends
 // SIGKILL to each one found. It returns a list of killed processes.
 // Errors for individual processes (e.g. already exited) are silently ignored.
 func KillTestWorkers() []KilledProcess {
 	entries, err := os.ReadDir("/proc")
 	if err != nil {
 		return nil
 	}
 	var killed []KilledProcess
 	for _, e := range entries {
 		if !e.IsDir() {
 			continue
 		}
 		pid, err := strconv.Atoi(e.Name())
 		if err != nil {
 			continue
 		}
 		cmdline, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid))
 		if err != nil {
 			continue
 		}
 		// /proc/*/cmdline uses NUL bytes as argument separators.
 		args := strings.SplitN(strings.ReplaceAll(string(cmdline), "\x00", " "), " ", 2)
 		exe := strings.TrimSpace(args[0])
 		base := exe
 		if idx := strings.LastIndexByte(exe, '/'); idx >= 0 {
 			base = exe[idx+1:]
 		}
 		for _, pat := range workerPatterns {
 			if strings.Contains(base, pat) || strings.Contains(exe, pat) {
 				_ = syscall.Kill(pid, syscall.SIGKILL)
 				killed = append(killed, KilledProcess{PID: pid, Name: base})
 				break
 			}
 		}
 	}
 	return killed
 }
--- a/audit/internal/platform/live_metrics.go
+++ b/audit/internal/platform/live_metrics.go
@@ -1,20 +1,32 @@
 package platform
-import "time"
+import (
 	"bufio"
 	"encoding/json"
 	"os"
 	"os/exec"
 	"sort"
 	"strconv"
 	"strings"
 	"time"
 )
 // LiveMetricSample is a single point-in-time snapshot of server metrics
 // collected for the web UI metrics page.
 type LiveMetricSample struct {
-	Timestamp time.Time       `json:"ts"`
+	Timestamp  time.Time      `json:"ts"`
-	Fans      []FanReading    `json:"fans"`
+	Fans       []FanReading   `json:"fans"`
-	Temps     []TempReading   `json:"temps"`
+	Temps      []TempReading  `json:"temps"`
-	PowerW    float64         `json:"power_w"`
+	PowerW     float64        `json:"power_w"`
-	GPUs      []GPUMetricRow  `json:"gpus"`
+	CPULoadPct float64        `json:"cpu_load_pct"`
 	MemLoadPct float64        `json:"mem_load_pct"`
 	GPUs       []GPUMetricRow `json:"gpus"`
 }
 // TempReading is a named temperature sensor value.
 type TempReading struct {
 	Name    string  `json:"name"`
 	Group   string  `json:"group,omitempty"`
 	Celsius float64 `json:"celsius"`
 }
@@ -24,22 +36,293 @@ type TempReading struct {
 func SampleLiveMetrics() LiveMetricSample {
 	s := LiveMetricSample{Timestamp: time.Now().UTC()}
-	// GPU metrics — skipped silently if nvidia-smi unavailable
+	// GPU metrics — try NVIDIA first, fall back to AMD
-	gpus, _ := SampleGPUMetrics(nil)
+	if gpus, err := SampleGPUMetrics(nil); err == nil && len(gpus) > 0 {
-	s.GPUs = gpus
+		s.GPUs = gpus
 	} else if amdGPUs, err := sampleAMDGPUMetrics(); err == nil && len(amdGPUs) > 0 {
 		s.GPUs = amdGPUs
 	}
 	// Fan speeds — skipped silently if ipmitool unavailable
 	fans, _ := sampleFanSpeeds()
 	s.Fans = fans
-	// CPU/system temperature — returns 0 if unavailable
+	s.Temps = append(s.Temps, sampleLiveTemperatureReadings()...)
-	cpuTemp := sampleCPUMaxTemp()
+	if !hasTempGroup(s.Temps, "cpu") {
-	if cpuTemp > 0 {
+		if cpuTemp := sampleCPUMaxTemp(); cpuTemp > 0 {
-		s.Temps = append(s.Temps, TempReading{Name: "CPU", Celsius: cpuTemp})
+			s.Temps = append(s.Temps, TempReading{Name: "CPU Max", Group: "cpu", Celsius: cpuTemp})
 		}
 	}
 	// System power — returns 0 if unavailable
 	s.PowerW = sampleSystemPower()
 	// CPU load — from /proc/stat
 	s.CPULoadPct = sampleCPULoadPct()
 	// Memory load — from /proc/meminfo
 	s.MemLoadPct = sampleMemLoadPct()
 	return s
 }
 // sampleCPULoadPct reads two /proc/stat snapshots 200ms apart and returns
 // the overall CPU utilisation percentage.
 func sampleCPULoadPct() float64 {
 	total0, idle0 := readCPUStat()
 	if total0 == 0 {
 		return 0
 	}
 	time.Sleep(200 * time.Millisecond)
 	total1, idle1 := readCPUStat()
 	if total1 == 0 {
 		return 0
 	}
 	return cpuLoadPctBetween(total0, idle0, total1, idle1)
 }
 func cpuLoadPctBetween(prevTotal, prevIdle, total, idle uint64) float64 {
 	dt := float64(total - prevTotal)
 	di := float64(idle - prevIdle)
 	if dt <= 0 {
 		return 0
 	}
 	pct := (1 - di/dt) * 100
 	if pct < 0 {
 		return 0
 	}
 	if pct > 100 {
 		return 100
 	}
 	return pct
 }
 func readCPUStat() (total, idle uint64) {
 	f, err := os.Open("/proc/stat")
 	if err != nil {
 		return 0, 0
 	}
 	defer f.Close()
 	sc := bufio.NewScanner(f)
 	for sc.Scan() {
 		line := sc.Text()
 		if !strings.HasPrefix(line, "cpu ") {
 			continue
 		}
 		fields := strings.Fields(line)[1:] // skip "cpu"
 		var vals [10]uint64
 		for i := 0; i < len(fields) && i < 10; i++ {
 			vals[i], _ = strconv.ParseUint(fields[i], 10, 64)
 		}
 		// idle = idle + iowait
 		idle = vals[3] + vals[4]
 		for _, v := range vals {
 			total += v
 		}
 		return total, idle
 	}
 	return 0, 0
 }
 func sampleMemLoadPct() float64 {
 	f, err := os.Open("/proc/meminfo")
 	if err != nil {
 		return 0
 	}
 	defer f.Close()
 	vals := map[string]uint64{}
 	sc := bufio.NewScanner(f)
 	for sc.Scan() {
 		fields := strings.Fields(sc.Text())
 		if len(fields) >= 2 {
 			v, _ := strconv.ParseUint(fields[1], 10, 64)
 			vals[strings.TrimSuffix(fields[0], ":")] = v
 		}
 	}
 	total := vals["MemTotal"]
 	avail := vals["MemAvailable"]
 	if total == 0 {
 		return 0
 	}
 	used := total - avail
 	return float64(used) / float64(total) * 100
 }
 func hasTempGroup(temps []TempReading, group string) bool {
 	for _, t := range temps {
 		if t.Group == group {
 			return true
 		}
 	}
 	return false
 }
 func sampleLiveTemperatureReadings() []TempReading {
 	if temps := sampleLiveTempsViaSensorsJSON(); len(temps) > 0 {
 		return temps
 	}
 	return sampleLiveTempsViaIPMI()
 }
 func sampleLiveTempsViaSensorsJSON() []TempReading {
 	out, err := exec.Command("sensors", "-j").Output()
 	if err != nil || len(out) == 0 {
 		return nil
 	}
 	var doc map[string]map[string]any
 	if err := json.Unmarshal(out, &doc); err != nil {
 		return nil
 	}
 	chips := make([]string, 0, len(doc))
 	for chip := range doc {
 		chips = append(chips, chip)
 	}
 	sort.Strings(chips)
 	temps := make([]TempReading, 0, len(chips))
 	seen := map[string]struct{}{}
 	for _, chip := range chips {
 		features := doc[chip]
 		featureNames := make([]string, 0, len(features))
 		for name := range features {
 			featureNames = append(featureNames, name)
 		}
 		sort.Strings(featureNames)
 		for _, name := range featureNames {
 			if strings.EqualFold(name, "Adapter") {
 				continue
 			}
 			feature, ok := features[name].(map[string]any)
 			if !ok {
 				continue
 			}
 			value, ok := firstTempInputValue(feature)
 			if !ok || value <= 0 || value > 150 {
 				continue
 			}
 			group := classifyLiveTempGroup(chip, name)
 			if group == "gpu" {
 				continue
 			}
 			label := strings.TrimSpace(name)
 			if label == "" {
 				continue
 			}
 			if group == "ambient" {
 				label = compactAmbientTempName(chip, label)
 			}
 			key := group + "\x00" + label
 			if _, ok := seen[key]; ok {
 				continue
 			}
 			seen[key] = struct{}{}
 			temps = append(temps, TempReading{Name: label, Group: group, Celsius: value})
 		}
 	}
 	return temps
 }
 func sampleLiveTempsViaIPMI() []TempReading {
 	out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
 	if err != nil || len(out) == 0 {
 		return nil
 	}
 	var temps []TempReading
 	seen := map[string]struct{}{}
 	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
 		parts := strings.Split(line, "|")
 		if len(parts) < 3 {
 			continue
 		}
 		name := strings.TrimSpace(parts[0])
 		if name == "" {
 			continue
 		}
 		unit := strings.ToLower(strings.TrimSpace(parts[2]))
 		if !strings.Contains(unit, "degrees") {
 			continue
 		}
 		raw := strings.TrimSpace(parts[1])
 		if raw == "" || strings.EqualFold(raw, "na") {
 			continue
 		}
 		value, err := strconv.ParseFloat(raw, 64)
 		if err != nil || value <= 0 || value > 150 {
 			continue
 		}
 		group := classifyLiveTempGroup("", name)
 		if group == "gpu" {
 			continue
 		}
 		label := name
 		if group == "ambient" {
 			label = compactAmbientTempName("", label)
 		}
 		key := group + "\x00" + label
 		if _, ok := seen[key]; ok {
 			continue
 		}
 		seen[key] = struct{}{}
 		temps = append(temps, TempReading{Name: label, Group: group, Celsius: value})
 	}
 	return temps
 }
 func firstTempInputValue(feature map[string]any) (float64, bool) {
 	keys := make([]string, 0, len(feature))
 	for key := range feature {
 		keys = append(keys, key)
 	}
 	sort.Strings(keys)
 	for _, key := range keys {
 		lower := strings.ToLower(key)
 		if !strings.Contains(lower, "temp") || !strings.HasSuffix(lower, "_input") {
 			continue
 		}
 		switch value := feature[key].(type) {
 		case float64:
 			return value, true
 		case string:
 			f, err := strconv.ParseFloat(value, 64)
 			if err == nil {
 				return f, true
 			}
 		}
 	}
 	return 0, false
 }
 func classifyLiveTempGroup(chip, name string) string {
 	text := strings.ToLower(strings.TrimSpace(chip + " " + name))
 	switch {
 	case strings.Contains(text, "gpu"), strings.Contains(text, "amdgpu"), strings.Contains(text, "nvidia"), strings.Contains(text, "adeon"):
 		return "gpu"
 	case strings.Contains(text, "coretemp"),
 		strings.Contains(text, "k10temp"),
 		strings.Contains(text, "zenpower"),
 		strings.Contains(text, "package id"),
 		strings.Contains(text, "x86_pkg_temp"),
 		strings.Contains(text, "tctl"),
 		strings.Contains(text, "tdie"),
 		strings.Contains(text, "tccd"),
 		strings.Contains(text, "cpu"),
 		strings.Contains(text, "peci"):
 		return "cpu"
 	default:
 		return "ambient"
 	}
 }
 func compactAmbientTempName(chip, name string) string {
 	chip = strings.TrimSpace(chip)
 	name = strings.TrimSpace(name)
 	if chip == "" || strings.EqualFold(chip, name) {
 		return name
 	}
 	if strings.Contains(strings.ToLower(name), strings.ToLower(chip)) {
 		return name
 	}
 	return chip + " / " + name
 }
--- a/audit/internal/platform/live_metrics_test.go
+++ b/audit/internal/platform/live_metrics_test.go
@@ -0,0 +1,94 @@
 package platform
 import "testing"
 func TestFirstTempInputValue(t *testing.T) {
 	feature := map[string]any{
 		"temp1_input": 61.5,
 		"temp1_max":   80.0,
 	}
 	got, ok := firstTempInputValue(feature)
 	if !ok {
 		t.Fatal("expected value")
 	}
 	if got != 61.5 {
 		t.Fatalf("got %v want 61.5", got)
 	}
 }
 func TestClassifyLiveTempGroup(t *testing.T) {
 	tests := []struct {
 		chip string
 		name string
 		want string
 	}{
 		{chip: "coretemp-isa-0000", name: "Package id 0", want: "cpu"},
 		{chip: "amdgpu-pci-4300", name: "edge", want: "gpu"},
 		{chip: "nvme-pci-0100", name: "Composite", want: "ambient"},
 		{chip: "acpitz-acpi-0", name: "temp1", want: "ambient"},
 	}
 	for _, tc := range tests {
 		if got := classifyLiveTempGroup(tc.chip, tc.name); got != tc.want {
 			t.Fatalf("classifyLiveTempGroup(%q,%q)=%q want %q", tc.chip, tc.name, got, tc.want)
 		}
 	}
 }
 func TestCompactAmbientTempName(t *testing.T) {
 	if got := compactAmbientTempName("nvme-pci-0100", "Composite"); got != "nvme-pci-0100 / Composite" {
 		t.Fatalf("got %q", got)
 	}
 	if got := compactAmbientTempName("", "Inlet Temp"); got != "Inlet Temp" {
 		t.Fatalf("got %q", got)
 	}
 }
 func TestCPULoadPctBetween(t *testing.T) {
 	tests := []struct {
 		name      string
 		prevTotal uint64
 		prevIdle  uint64
 		total     uint64
 		idle      uint64
 		want      float64
 	}{
 		{
 			name:      "busy half",
 			prevTotal: 100,
 			prevIdle:  40,
 			total:     200,
 			idle:      90,
 			want:      50,
 		},
 		{
 			name:      "fully busy",
 			prevTotal: 100,
 			prevIdle:  40,
 			total:     200,
 			idle:      40,
 			want:      100,
 		},
 		{
 			name:      "no progress",
 			prevTotal: 100,
 			prevIdle:  40,
 			total:     100,
 			idle:      40,
 			want:      0,
 		},
 		{
 			name:      "idle delta larger than total clamps to zero",
 			prevTotal: 100,
 			prevIdle:  40,
 			total:     200,
 			idle:      150,
 			want:      0,
 		},
 	}
 	for _, tc := range tests {
 		if got := cpuLoadPctBetween(tc.prevTotal, tc.prevIdle, tc.total, tc.idle); got != tc.want {
 			t.Fatalf("%s: cpuLoadPctBetween(...)=%v want %v", tc.name, got, tc.want)
 		}
 	}
 }
--- a/audit/internal/platform/network.go
+++ b/audit/internal/platform/network.go
@@ -2,6 +2,7 @@ package platform
 import (
 	"bytes"
 	"errors"
 	"fmt"
 	"os"
 	"os/exec"
@@ -18,21 +19,17 @@ func (s *System) ListInterfaces() ([]InterfaceInfo, error) {
 	out := make([]InterfaceInfo, 0, len(names))
 	for _, name := range names {
 		state := "unknown"
-		if raw, err := exec.Command("ip", "-o", "link", "show", name).Output(); err == nil {
+		if up, err := interfaceAdminState(name); err == nil {
-			fields := strings.Fields(string(raw))
+			if up {
-			if len(fields) >= 9 {
+				state = "up"
-				state = fields[8]
+			} else {
 				state = "down"
 			}
 		}
-		var ipv4 []string
+		ipv4, err := interfaceIPv4Addrs(name)
-		if raw, err := exec.Command("ip", "-o", "-4", "addr", "show", "dev", name).Output(); err == nil {
+		if err != nil {
-			for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
+			ipv4 = nil
 				fields := strings.Fields(line)
 				if len(fields) >= 4 {
 					ipv4 = append(ipv4, fields[3])
 				}
 			}
 		}
 		out = append(out, InterfaceInfo{Name: name, State: state, IPv4: ipv4})
@@ -55,6 +52,119 @@ func (s *System) DefaultRoute() string {
 	return ""
 }
 func (s *System) CaptureNetworkSnapshot() (NetworkSnapshot, error) {
 	names, err := listInterfaceNames()
 	if err != nil {
 		return NetworkSnapshot{}, err
 	}
 	snapshot := NetworkSnapshot{
 		Interfaces: make([]NetworkInterfaceSnapshot, 0, len(names)),
 	}
 	for _, name := range names {
 		up, err := interfaceAdminState(name)
 		if err != nil {
 			return NetworkSnapshot{}, err
 		}
 		ipv4, err := interfaceIPv4Addrs(name)
 		if err != nil {
 			return NetworkSnapshot{}, err
 		}
 		snapshot.Interfaces = append(snapshot.Interfaces, NetworkInterfaceSnapshot{
 			Name: name,
 			Up:   up,
 			IPv4: ipv4,
 		})
 	}
 	if raw, err := exec.Command("ip", "route", "show", "default").Output(); err == nil {
 		for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
 			line = strings.TrimSpace(line)
 			if line != "" {
 				snapshot.DefaultRoutes = append(snapshot.DefaultRoutes, line)
 			}
 		}
 	}
 	if raw, err := os.ReadFile("/etc/resolv.conf"); err == nil {
 		snapshot.ResolvConf = string(raw)
 	}
 	return snapshot, nil
 }
 func (s *System) RestoreNetworkSnapshot(snapshot NetworkSnapshot) error {
 	var errs []string
 	for _, iface := range snapshot.Interfaces {
 		if err := exec.Command("ip", "link", "set", "dev", iface.Name, "up").Run(); err != nil {
 			errs = append(errs, fmt.Sprintf("%s: bring up before restore: %v", iface.Name, err))
 			continue
 		}
 		if err := exec.Command("ip", "addr", "flush", "dev", iface.Name).Run(); err != nil {
 			errs = append(errs, fmt.Sprintf("%s: flush addresses: %v", iface.Name, err))
 		}
 		for _, cidr := range iface.IPv4 {
 			if raw, err := exec.Command("ip", "addr", "add", cidr, "dev", iface.Name).CombinedOutput(); err != nil {
 				detail := strings.TrimSpace(string(raw))
 				if detail != "" {
 					errs = append(errs, fmt.Sprintf("%s: restore address %s: %v: %s", iface.Name, cidr, err, detail))
 				} else {
 					errs = append(errs, fmt.Sprintf("%s: restore address %s: %v", iface.Name, cidr, err))
 				}
 			}
 		}
 		state := "down"
 		if iface.Up {
 			state = "up"
 		}
 		if err := exec.Command("ip", "link", "set", "dev", iface.Name, state).Run(); err != nil {
 			errs = append(errs, fmt.Sprintf("%s: restore state %s: %v", iface.Name, state, err))
 		}
 	}
 	if err := exec.Command("ip", "route", "del", "default").Run(); err != nil {
 		var exitErr *exec.ExitError
 		if !errors.As(err, &exitErr) {
 			errs = append(errs, fmt.Sprintf("clear default route: %v", err))
 		}
 	}
 	for _, route := range snapshot.DefaultRoutes {
 		fields := strings.Fields(route)
 		if len(fields) == 0 {
 			continue
 		}
 		// Strip state flags that ip-route(8) does not accept as add arguments.
 		filtered := fields[:0]
 		for _, f := range fields {
 			switch f {
 			case "linkdown", "dead", "onlink", "pervasive":
 				// skip
 			default:
 				filtered = append(filtered, f)
 			}
 		}
 		args := append([]string{"route", "add"}, filtered...)
 		if raw, err := exec.Command("ip", args...).CombinedOutput(); err != nil {
 			detail := strings.TrimSpace(string(raw))
 			if detail != "" {
 				errs = append(errs, fmt.Sprintf("restore route %q: %v: %s", route, err, detail))
 			} else {
 				errs = append(errs, fmt.Sprintf("restore route %q: %v", route, err))
 			}
 		}
 	}
 	if err := os.WriteFile("/etc/resolv.conf", []byte(snapshot.ResolvConf), 0644); err != nil {
 		errs = append(errs, fmt.Sprintf("restore resolv.conf: %v", err))
 	}
 	if len(errs) > 0 {
 		return errors.New(strings.Join(errs, "; "))
 	}
 	return nil
 }
 func (s *System) DHCPOne(iface string) (string, error) {
 	var out bytes.Buffer
 	if err := exec.Command("ip", "link", "set", iface, "up").Run(); err != nil {
@@ -131,6 +241,65 @@ func (s *System) SetStaticIPv4(cfg StaticIPv4Config) (string, error) {
 	return out.String(), nil
 }
 // SetInterfaceState brings a network interface up or down.
 func (s *System) SetInterfaceState(iface string, up bool) error {
 	state := "down"
 	if up {
 		state = "up"
 	}
 	return exec.Command("ip", "link", "set", "dev", iface, state).Run()
 }
 // GetInterfaceState returns true if the interface is UP.
 func (s *System) GetInterfaceState(iface string) (bool, error) {
 	return interfaceAdminState(iface)
 }
 func interfaceAdminState(iface string) (bool, error) {
 	raw, err := exec.Command("ip", "-o", "link", "show", "dev", iface).Output()
 	if err != nil {
 		return false, err
 	}
 	return parseInterfaceAdminState(string(raw))
 }
 func parseInterfaceAdminState(raw string) (bool, error) {
 	start := strings.IndexByte(raw, '<')
 	if start == -1 {
 		return false, fmt.Errorf("ip link output missing flags")
 	}
 	end := strings.IndexByte(raw[start+1:], '>')
 	if end == -1 {
 		return false, fmt.Errorf("ip link output missing flag terminator")
 	}
 	flags := strings.Split(raw[start+1:start+1+end], ",")
 	for _, flag := range flags {
 		if strings.TrimSpace(flag) == "UP" {
 			return true, nil
 		}
 	}
 	return false, nil
 }
 func interfaceIPv4Addrs(iface string) ([]string, error) {
 	raw, err := exec.Command("ip", "-o", "-4", "addr", "show", "dev", iface).Output()
 	if err != nil {
 		var exitErr *exec.ExitError
 		if errors.As(err, &exitErr) {
 			return nil, nil
 		}
 		return nil, err
 	}
 	var ipv4 []string
 	for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
 		fields := strings.Fields(line)
 		if len(fields) >= 4 {
 			ipv4 = append(ipv4, fields[3])
 		}
 	}
 	return ipv4, nil
 }
 func listInterfaceNames() ([]string, error) {
 	raw, err := exec.Command("ip", "-o", "link", "show").Output()
 	if err != nil {
--- a/audit/internal/platform/network_test.go
+++ b/audit/internal/platform/network_test.go
@@ -0,0 +1,46 @@
 package platform
 import "testing"
 func TestParseInterfaceAdminState(t *testing.T) {
 	tests := []struct {
 		name    string
 		raw     string
 		want    bool
 		wantErr bool
 	}{
 		{
 			name: "admin up with no carrier",
 			raw:  "2: enp1s0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000\n",
 			want: true,
 		},
 		{
 			name: "admin down",
 			raw:  "2: enp1s0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000\n",
 			want: false,
 		},
 		{
 			name:    "malformed output",
 			raw:     "2: enp1s0: mtu 1500 state DOWN\n",
 			wantErr: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got, err := parseInterfaceAdminState(tt.raw)
 			if tt.wantErr {
 				if err == nil {
 					t.Fatal("expected error")
 				}
 				return
 			}
 			if err != nil {
 				t.Fatalf("unexpected error: %v", err)
 			}
 			if got != tt.want {
 				t.Fatalf("got %v want %v", got, tt.want)
 			}
 		})
 	}
 }
--- a/audit/internal/platform/nvidia_stress.go
+++ b/audit/internal/platform/nvidia_stress.go
@@ -0,0 +1,203 @@
 package platform
 import (
 	"context"
 	"fmt"
 	"sort"
 	"strconv"
 	"strings"
 )
 func (s *System) RunNvidiaStressPack(ctx context.Context, baseDir string, opts NvidiaStressOptions, logFunc func(string)) (string, error) {
 	normalizeNvidiaStressOptions(&opts)
 	job, err := buildNvidiaStressJob(opts)
 	if err != nil {
 		return "", err
 	}
 	return runAcceptancePackCtx(ctx, baseDir, nvidiaStressArchivePrefix(opts.Loader), withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{name: "02-nvidia-smi-list.log", cmd: []string{"nvidia-smi", "-L"}},
 		job,
 		satJob{name: "04-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	), logFunc)
 }
 func nvidiaStressArchivePrefix(loader string) string {
 	switch strings.TrimSpace(strings.ToLower(loader)) {
 	case NvidiaStressLoaderJohn:
 		return "gpu-nvidia-john"
 	case NvidiaStressLoaderNCCL:
 		return "gpu-nvidia-nccl"
 	default:
 		return "gpu-nvidia-burn"
 	}
 }
 func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
 	selected, err := resolveNvidiaGPUSelection(opts.GPUIndices, opts.ExcludeGPUIndices)
 	if err != nil {
 		return satJob{}, err
 	}
 	loader := strings.TrimSpace(strings.ToLower(opts.Loader))
 	switch loader {
 	case "", NvidiaStressLoaderBuiltin:
 		cmd := []string{
 			"bee-gpu-burn",
 			"--seconds", strconv.Itoa(opts.DurationSec),
 			"--size-mb", strconv.Itoa(opts.SizeMB),
 		}
 		if len(selected) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(selected))
 		}
 		return satJob{
 			name:       "03-bee-gpu-burn.log",
 			cmd:        cmd,
 			collectGPU: true,
 			gpuIndices: selected,
 		}, nil
 	case NvidiaStressLoaderJohn:
 		cmd := []string{
 			"bee-john-gpu-stress",
 			"--seconds", strconv.Itoa(opts.DurationSec),
 		}
 		if len(selected) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(selected))
 		}
 		return satJob{
 			name:       "03-john-gpu-stress.log",
 			cmd:        cmd,
 			collectGPU: true,
 			gpuIndices: selected,
 		}, nil
 	case NvidiaStressLoaderNCCL:
 		cmd := []string{
 			"bee-nccl-gpu-stress",
 			"--seconds", strconv.Itoa(opts.DurationSec),
 		}
 		if len(selected) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(selected))
 		}
 		return satJob{
 			name:       "03-bee-nccl-gpu-stress.log",
 			cmd:        cmd,
 			collectGPU: true,
 			gpuIndices: selected,
 		}, nil
 	default:
 		return satJob{}, fmt.Errorf("unknown NVIDIA stress loader %q", opts.Loader)
 	}
 }
 func normalizeNvidiaStressOptions(opts *NvidiaStressOptions) {
 	if opts.DurationSec <= 0 {
 		opts.DurationSec = 300
 	}
 	// SizeMB=0 means "auto" — bee-gpu-burn will query per-GPU memory at runtime.
 	switch strings.TrimSpace(strings.ToLower(opts.Loader)) {
 	case "", NvidiaStressLoaderBuiltin:
 		opts.Loader = NvidiaStressLoaderBuiltin
 	case NvidiaStressLoaderJohn:
 		opts.Loader = NvidiaStressLoaderJohn
 	case NvidiaStressLoaderNCCL:
 		opts.Loader = NvidiaStressLoaderNCCL
 	default:
 		opts.Loader = NvidiaStressLoaderBuiltin
 	}
 	opts.GPUIndices = dedupeSortedIndices(opts.GPUIndices)
 	opts.ExcludeGPUIndices = dedupeSortedIndices(opts.ExcludeGPUIndices)
 }
 func resolveNvidiaGPUSelection(include, exclude []int) ([]int, error) {
 	all, err := listNvidiaGPUIndices()
 	if err != nil {
 		return nil, err
 	}
 	if len(all) == 0 {
 		return nil, fmt.Errorf("nvidia-smi found no NVIDIA GPUs")
 	}
 	selected := all
 	if len(include) > 0 {
 		want := make(map[int]struct{}, len(include))
 		for _, idx := range include {
 			want[idx] = struct{}{}
 		}
 		selected = selected[:0]
 		for _, idx := range all {
 			if _, ok := want[idx]; ok {
 				selected = append(selected, idx)
 			}
 		}
 	}
 	if len(exclude) > 0 {
 		skip := make(map[int]struct{}, len(exclude))
 		for _, idx := range exclude {
 			skip[idx] = struct{}{}
 		}
 		filtered := selected[:0]
 		for _, idx := range selected {
 			if _, ok := skip[idx]; ok {
 				continue
 			}
 			filtered = append(filtered, idx)
 		}
 		selected = filtered
 	}
 	if len(selected) == 0 {
 		return nil, fmt.Errorf("no NVIDIA GPUs selected after applying filters")
 	}
 	out := append([]int(nil), selected...)
 	sort.Ints(out)
 	return out, nil
 }
 func listNvidiaGPUIndices() ([]int, error) {
 	out, err := satExecCommand("nvidia-smi", "--query-gpu=index", "--format=csv,noheader,nounits").Output()
 	if err != nil {
 		return nil, fmt.Errorf("nvidia-smi: %w", err)
 	}
 	var indices []int
 	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
 		line = strings.TrimSpace(line)
 		if line == "" {
 			continue
 		}
 		idx, err := strconv.Atoi(line)
 		if err != nil {
 			continue
 		}
 		indices = append(indices, idx)
 	}
 	return dedupeSortedIndices(indices), nil
 }
 func dedupeSortedIndices(values []int) []int {
 	if len(values) == 0 {
 		return nil
 	}
 	seen := make(map[int]struct{}, len(values))
 	out := make([]int, 0, len(values))
 	for _, value := range values {
 		if value < 0 {
 			continue
 		}
 		if _, ok := seen[value]; ok {
 			continue
 		}
 		seen[value] = struct{}{}
 		out = append(out, value)
 	}
 	sort.Ints(out)
 	return out
 }
 func joinIndexList(values []int) string {
 	parts := make([]string, 0, len(values))
 	for _, value := range values {
 		parts = append(parts, strconv.Itoa(value))
 	}
 	return strings.Join(parts, ",")
 }
--- a/audit/internal/platform/platform_stress.go
+++ b/audit/internal/platform/platform_stress.go
@@ -0,0 +1,569 @@
 package platform
 import (
 	"archive/tar"
 	"bytes"
 	"compress/gzip"
 	"context"
 	"encoding/csv"
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
 	"sync"
 	"syscall"
 	"time"
 )
 // PlatformStressCycle defines one load+idle cycle.
 type PlatformStressCycle struct {
 	LoadSec int // seconds of simultaneous CPU+GPU stress
 	IdleSec int // seconds of idle monitoring after load cut
 }
 // PlatformStressOptions controls the thermal cycling test.
 type PlatformStressOptions struct {
 	Cycles     []PlatformStressCycle
 	Components []string // if empty: run all; values: "cpu", "gpu"
 }
 // platformStressRow is one second of telemetry.
 type platformStressRow struct {
 	ElapsedSec   float64
 	Cycle        int
 	Phase        string // "load" | "idle"
 	CPULoadPct   float64
 	MaxCPUTempC  float64
 	MaxGPUTempC  float64
 	SysPowerW    float64
 	FanMinRPM    float64
 	FanMaxRPM    float64
 	GPUThrottled bool
 }
 // RunPlatformStress runs repeated load+idle thermal cycling.
 // Each cycle starts CPU (stressapptest) and GPU stress simultaneously,
 // runs for LoadSec, then cuts load abruptly and monitors for IdleSec.
 func (s *System) RunPlatformStress(
 	ctx context.Context,
 	baseDir string,
 	opts PlatformStressOptions,
 	logFunc func(string),
 ) (string, error) {
 	if logFunc == nil {
 		logFunc = func(string) {}
 	}
 	if len(opts.Cycles) == 0 {
 		return "", fmt.Errorf("no cycles defined")
 	}
 	if err := os.MkdirAll(baseDir, 0755); err != nil {
 		return "", fmt.Errorf("mkdir %s: %w", baseDir, err)
 	}
 	stamp := time.Now().UTC().Format("20060102-150405")
 	runDir := filepath.Join(baseDir, "platform-stress-"+stamp)
 	if err := os.MkdirAll(runDir, 0755); err != nil {
 		return "", fmt.Errorf("mkdir run dir: %w", err)
 	}
 	hasCPU := len(opts.Components) == 0 || containsComponent(opts.Components, "cpu")
 	hasGPU := len(opts.Components) == 0 || containsComponent(opts.Components, "gpu")
 	vendor := s.DetectGPUVendor()
 	logFunc(fmt.Sprintf("Platform Thermal Cycling — %d cycle(s), GPU vendor: %s, cpu=%v gpu=%v", len(opts.Cycles), vendor, hasCPU, hasGPU))
 	var rows []platformStressRow
 	start := time.Now()
 	var analyses []cycleAnalysis
 	for i, cycle := range opts.Cycles {
 		if ctx.Err() != nil {
 			break
 		}
 		cycleNum := i + 1
 		logFunc(fmt.Sprintf("--- Cycle %d/%d: load=%ds, idle=%ds ---", cycleNum, len(opts.Cycles), cycle.LoadSec, cycle.IdleSec))
 		// ── LOAD PHASE ───────────────────────────────────────────────────────
 		loadCtx, loadCancel := context.WithTimeout(ctx, time.Duration(cycle.LoadSec)*time.Second)
 		var wg sync.WaitGroup
 		// CPU stress
 		if hasCPU {
 			wg.Add(1)
 			go func() {
 				defer wg.Done()
 				cpuCmd, err := buildCPUStressCmd(loadCtx)
 				if err != nil {
 					logFunc("CPU stress: " + err.Error())
 					return
 				}
 				_ = cpuCmd.Wait() // exits when loadCtx times out (SIGKILL)
 			}()
 		}
 		// GPU stress
 		if hasGPU {
 			wg.Add(1)
 			go func() {
 				defer wg.Done()
 				gpuCmd := buildGPUStressCmd(loadCtx, vendor, cycle.LoadSec)
 				if gpuCmd == nil {
 					return
 				}
 				_ = gpuCmd.Wait()
 			}()
 		}
 		// Monitoring goroutine for load phase
 		loadRows := collectPhase(loadCtx, cycleNum, "load", start)
 		for _, r := range loadRows {
 			logFunc(formatPlatformRow(r))
 		}
 		rows = append(rows, loadRows...)
 		loadCancel()
 		wg.Wait()
 		if len(loadRows) > 0 {
 			logFunc(fmt.Sprintf("Cycle %d load ended (%.0fs)", cycleNum, loadRows[len(loadRows)-1].ElapsedSec))
 		}
 		// ── IDLE PHASE ───────────────────────────────────────────────────────
 		idleCtx, idleCancel := context.WithTimeout(ctx, time.Duration(cycle.IdleSec)*time.Second)
 		idleRows := collectPhase(idleCtx, cycleNum, "idle", start)
 		for _, r := range idleRows {
 			logFunc(formatPlatformRow(r))
 		}
 		rows = append(rows, idleRows...)
 		idleCancel()
 		// Per-cycle analysis
 		an := analyzePlatformCycle(loadRows, idleRows)
 		analyses = append(analyses, an)
 		logFunc(fmt.Sprintf("Cycle %d: maxCPU=%.1f°C maxGPU=%.1f°C power=%.0fW throttled=%v fanDrop=%.0f%%",
 			cycleNum, an.maxCPUTemp, an.maxGPUTemp, an.maxPower, an.throttled, an.fanDropPct))
 	}
 	// Write CSV
 	csvData := writePlatformCSV(rows)
 	_ = os.WriteFile(filepath.Join(runDir, "metrics.csv"), csvData, 0644)
 	// Write summary
 	summary := writePlatformSummary(opts, analyses)
 	logFunc("--- Summary ---")
 	for _, line := range strings.Split(summary, "\n") {
 		if line != "" {
 			logFunc(line)
 		}
 	}
 	_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)
 	// Pack tar.gz
 	archivePath := filepath.Join(baseDir, "platform-stress-"+stamp+".tar.gz")
 	if err := packPlatformDir(runDir, archivePath); err != nil {
 		return "", fmt.Errorf("pack archive: %w", err)
 	}
 	_ = os.RemoveAll(runDir)
 	return archivePath, nil
 }
 // collectPhase samples live metrics every second until ctx is done.
 func collectPhase(ctx context.Context, cycle int, phase string, testStart time.Time) []platformStressRow {
 	var rows []platformStressRow
 	ticker := time.NewTicker(time.Second)
 	defer ticker.Stop()
 	for {
 		select {
 		case <-ctx.Done():
 			return rows
 		case <-ticker.C:
 			sample := SampleLiveMetrics()
 			rows = append(rows, sampleToPlatformRow(sample, cycle, phase, testStart))
 		}
 	}
 }
 func sampleToPlatformRow(s LiveMetricSample, cycle int, phase string, testStart time.Time) platformStressRow {
 	r := platformStressRow{
 		ElapsedSec: time.Since(testStart).Seconds(),
 		Cycle:      cycle,
 		Phase:      phase,
 		CPULoadPct: s.CPULoadPct,
 		SysPowerW:  s.PowerW,
 	}
 	for _, t := range s.Temps {
 		switch t.Group {
 		case "cpu":
 			if t.Celsius > r.MaxCPUTempC {
 				r.MaxCPUTempC = t.Celsius
 			}
 		case "gpu":
 			if t.Celsius > r.MaxGPUTempC {
 				r.MaxGPUTempC = t.Celsius
 			}
 		}
 	}
 	for _, g := range s.GPUs {
 		if g.TempC > r.MaxGPUTempC {
 			r.MaxGPUTempC = g.TempC
 		}
 	}
 	if len(s.Fans) > 0 {
 		r.FanMinRPM = s.Fans[0].RPM
 		r.FanMaxRPM = s.Fans[0].RPM
 		for _, f := range s.Fans[1:] {
 			if f.RPM < r.FanMinRPM {
 				r.FanMinRPM = f.RPM
 			}
 			if f.RPM > r.FanMaxRPM {
 				r.FanMaxRPM = f.RPM
 			}
 		}
 	}
 	return r
 }
 func formatPlatformRow(r platformStressRow) string {
 	throttle := ""
 	if r.GPUThrottled {
 		throttle = " THROTTLE"
 	}
 	fans := ""
 	if r.FanMinRPM > 0 {
 		fans = fmt.Sprintf(" fans=%.0f-%.0fRPM", r.FanMinRPM, r.FanMaxRPM)
 	}
 	return fmt.Sprintf("[%5.0fs] cycle=%d phase=%-4s cpu=%.0f%% cpuT=%.1f°C gpuT=%.1f°C pwr=%.0fW%s%s",
 		r.ElapsedSec, r.Cycle, r.Phase, r.CPULoadPct, r.MaxCPUTempC, r.MaxGPUTempC, r.SysPowerW, fans, throttle)
 }
 func analyzePlatformCycle(loadRows, idleRows []platformStressRow) cycleAnalysis {
 	var an cycleAnalysis
 	for _, r := range loadRows {
 		if r.MaxCPUTempC > an.maxCPUTemp {
 			an.maxCPUTemp = r.MaxCPUTempC
 		}
 		if r.MaxGPUTempC > an.maxGPUTemp {
 			an.maxGPUTemp = r.MaxGPUTempC
 		}
 		if r.SysPowerW > an.maxPower {
 			an.maxPower = r.SysPowerW
 		}
 		if r.GPUThrottled {
 			an.throttled = true
 		}
 	}
 	// Fan RPM at cut = avg of last 5 load rows
 	if n := len(loadRows); n > 0 {
 		window := loadRows
 		if n > 5 {
 			window = loadRows[n-5:]
 		}
 		var sum float64
 		var cnt int
 		for _, r := range window {
 			if r.FanMinRPM > 0 {
 				sum += (r.FanMinRPM + r.FanMaxRPM) / 2
 				cnt++
 			}
 		}
 		if cnt > 0 {
 			an.fanAtCutAvg = sum / float64(cnt)
 		}
 	}
 	// Fan RPM min in first 15s of idle
 	an.fanMin15s = an.fanAtCutAvg
 	var cutElapsed float64
 	if len(loadRows) > 0 {
 		cutElapsed = loadRows[len(loadRows)-1].ElapsedSec
 	}
 	for _, r := range idleRows {
 		if r.ElapsedSec > cutElapsed+15 {
 			break
 		}
 		avg := (r.FanMinRPM + r.FanMaxRPM) / 2
 		if avg > 0 && (an.fanMin15s == 0 || avg < an.fanMin15s) {
 			an.fanMin15s = avg
 		}
 	}
 	if an.fanAtCutAvg > 0 {
 		an.fanDropPct = (an.fanAtCutAvg - an.fanMin15s) / an.fanAtCutAvg * 100
 	}
 	return an
 }
 type cycleAnalysis struct {
 	maxCPUTemp  float64
 	maxGPUTemp  float64
 	maxPower    float64
 	throttled   bool
 	fanAtCutAvg float64
 	fanMin15s   float64
 	fanDropPct  float64
 }
 func writePlatformSummary(opts PlatformStressOptions, analyses []cycleAnalysis) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "Platform Thermal Cycling — %d cycle(s)\n", len(opts.Cycles))
 	fmt.Fprintf(&b, "%s\n\n", strings.Repeat("=", 48))
 	totalThrottle := 0
 	totalFanWarn := 0
 	for i, an := range analyses {
 		cycle := opts.Cycles[i]
 		fmt.Fprintf(&b, "Cycle %d/%d (load=%ds, idle=%ds)\n", i+1, len(opts.Cycles), cycle.LoadSec, cycle.IdleSec)
 		fmt.Fprintf(&b, "  Max CPU temp: %.1f°C\n", an.maxCPUTemp)
 		fmt.Fprintf(&b, "  Max GPU temp: %.1f°C\n", an.maxGPUTemp)
 		fmt.Fprintf(&b, "  Max sys power: %.0f W\n", an.maxPower)
 		if an.throttled {
 			fmt.Fprintf(&b, "  Throttle: DETECTED\n")
 			totalThrottle++
 		} else {
 			fmt.Fprintf(&b, "  Throttle: none\n")
 		}
 		if an.fanAtCutAvg > 0 {
 			fmt.Fprintf(&b, "  Fan at load cut: %.0f RPM avg\n", an.fanAtCutAvg)
 			fmt.Fprintf(&b, "  Fan min (first 15s idle): %.0f RPM (drop %.0f%%)\n", an.fanMin15s, an.fanDropPct)
 			if an.fanDropPct > 20 {
 				fmt.Fprintf(&b, "  Fan response: WARN — fast spindown (>20%% drop in 15s)\n")
 				totalFanWarn++
 			} else {
 				fmt.Fprintf(&b, "  Fan response: OK\n")
 			}
 		}
 		b.WriteString("\n")
 	}
 	fmt.Fprintf(&b, "%s\n", strings.Repeat("=", 48))
 	if totalThrottle > 0 {
 		fmt.Fprintf(&b, "Overall: FAIL — throttle detected in %d/%d cycles\n", totalThrottle, len(analyses))
 	} else if totalFanWarn > 0 {
 		fmt.Fprintf(&b, "Overall: WARN — fast fan spindown in %d/%d cycles (cooling recovery risk)\n", totalFanWarn, len(analyses))
 	} else {
 		fmt.Fprintf(&b, "Overall: PASS\n")
 	}
 	return b.String()
 }
 func writePlatformCSV(rows []platformStressRow) []byte {
 	var buf bytes.Buffer
 	w := csv.NewWriter(&buf)
 	_ = w.Write([]string{
 		"elapsed_sec", "cycle", "phase",
 		"cpu_load_pct", "max_cpu_temp_c", "max_gpu_temp_c",
 		"sys_power_w", "fan_min_rpm", "fan_max_rpm", "gpu_throttled",
 	})
 	for _, r := range rows {
 		throttled := "0"
 		if r.GPUThrottled {
 			throttled = "1"
 		}
 		_ = w.Write([]string{
 			strconv.FormatFloat(r.ElapsedSec, 'f', 1, 64),
 			strconv.Itoa(r.Cycle),
 			r.Phase,
 			strconv.FormatFloat(r.CPULoadPct, 'f', 1, 64),
 			strconv.FormatFloat(r.MaxCPUTempC, 'f', 1, 64),
 			strconv.FormatFloat(r.MaxGPUTempC, 'f', 1, 64),
 			strconv.FormatFloat(r.SysPowerW, 'f', 1, 64),
 			strconv.FormatFloat(r.FanMinRPM, 'f', 0, 64),
 			strconv.FormatFloat(r.FanMaxRPM, 'f', 0, 64),
 			throttled,
 		})
 	}
 	w.Flush()
 	return buf.Bytes()
 }
 // buildCPUStressCmd creates a stressapptest command that runs until ctx is cancelled.
 func buildCPUStressCmd(ctx context.Context) (*exec.Cmd, error) {
 	path, err := satLookPath("stressapptest")
 	if err != nil {
 		return nil, fmt.Errorf("stressapptest not found: %w", err)
 	}
 	// Use a very long duration; the context timeout will kill it at the right time.
 	cmdArgs := []string{"-s", "86400", "-W", "--cc_test"}
 	if threads := platformStressCPUThreads(); threads > 0 {
 		cmdArgs = append(cmdArgs, "-m", strconv.Itoa(threads))
 	}
 	if mb := platformStressMemoryMB(); mb > 0 {
 		cmdArgs = append(cmdArgs, "-M", strconv.Itoa(mb))
 	}
 	cmd := exec.CommandContext(ctx, path, cmdArgs...)
 	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
 	cmd.Cancel = func() error {
 		if cmd.Process != nil {
 			_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
 		}
 		return nil
 	}
 	cmd.Stdout = nil
 	cmd.Stderr = nil
 	if err := startLowPriorityCmd(cmd, 15); err != nil {
 		return nil, fmt.Errorf("stressapptest start: %w", err)
 	}
 	return cmd, nil
 }
 // buildGPUStressCmd creates a GPU stress command appropriate for the detected vendor.
 // Returns nil if no GPU stress tool is available (CPU-only cycling still useful).
 func buildGPUStressCmd(ctx context.Context, vendor string, durSec int) *exec.Cmd {
 	switch strings.ToLower(vendor) {
 	case "amd":
 		return buildAMDGPUStressCmd(ctx, durSec)
 	case "nvidia":
 		return buildNvidiaGPUStressCmd(ctx, durSec)
 	}
 	return nil
 }
 func buildAMDGPUStressCmd(ctx context.Context, durSec int) *exec.Cmd {
 	rvsArgs, err := resolveRVSCommand()
 	if err != nil {
 		return nil
 	}
 	rvsPath := rvsArgs[0]
 	cfg := fmt.Sprintf(`actions:
 - name: gst_platform
  device: all
  module: gst
  parallel: true
  duration: %d`, durSec*1000) + `
  copy_matrix: false
  target_stress: 90
  matrix_size_a: 8640
  matrix_size_b: 8640
  matrix_size_c: 8640
 `
 	cfgFile := "/tmp/bee-platform-gst.conf"
 	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
 	cmd := exec.CommandContext(ctx, rvsPath, "-c", cfgFile)
 	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
 	cmd.Cancel = func() error {
 		if cmd.Process != nil {
 			_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
 		}
 		return nil
 	}
 	cmd.Stdout = nil
 	cmd.Stderr = nil
 	_ = startLowPriorityCmd(cmd, 10)
 	return cmd
 }
 func buildNvidiaGPUStressCmd(ctx context.Context, durSec int) *exec.Cmd {
 	path, err := satLookPath("bee-gpu-burn")
 	if err != nil {
 		path, err = satLookPath("bee-gpu-stress")
 	}
 	if err != nil {
 		return nil
 	}
 	// Pass exact duration so bee-gpu-burn exits on its own when the cycle ends.
 	// Process group kill via Setpgid+Cancel is kept as a safety net for cases
 	// where the context is cancelled early (user stop, parent timeout).
 	cmd := exec.CommandContext(ctx, path, "--seconds", strconv.Itoa(durSec))
 	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
 	cmd.Cancel = func() error {
 		if cmd.Process != nil {
 			_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
 		}
 		return nil
 	}
 	cmd.Stdout = nil
 	cmd.Stderr = nil
 	_ = startLowPriorityCmd(cmd, 10)
 	return cmd
 }
 func startLowPriorityCmd(cmd *exec.Cmd, nice int) error {
 	if err := cmd.Start(); err != nil {
 		return err
 	}
 	if cmd.Process != nil {
 		_ = syscall.Setpriority(syscall.PRIO_PROCESS, cmd.Process.Pid, nice)
 	}
 	return nil
 }
 func platformStressCPUThreads() int {
 	if n := envInt("BEE_PLATFORM_STRESS_THREADS", 0); n > 0 {
 		return n
 	}
 	cpus := runtime.NumCPU()
 	switch {
 	case cpus <= 2:
 		return 1
 	case cpus <= 8:
 		return cpus - 1
 	default:
 		return cpus - 2
 	}
 }
 func platformStressMemoryMB() int {
 	if mb := envInt("BEE_PLATFORM_STRESS_MB", 0); mb > 0 {
 		return mb
 	}
 	free := freeMemBytes()
 	if free <= 0 {
 		return 0
 	}
 	mb := int((free * 60) / 100 / (1024 * 1024))
 	if mb < 1024 {
 		return 1024
 	}
 	return mb
 }
 func containsComponent(components []string, name string) bool {
 	for _, c := range components {
 		if c == name {
 			return true
 		}
 	}
 	return false
 }
 func packPlatformDir(dir, dest string) error {
 	f, err := os.Create(dest)
 	if err != nil {
 		return err
 	}
 	defer f.Close()
 	gz := gzip.NewWriter(f)
 	defer gz.Close()
 	tw := tar.NewWriter(gz)
 	defer tw.Close()
 	entries, err := os.ReadDir(dir)
 	if err != nil {
 		return err
 	}
 	base := filepath.Base(dir)
 	for _, e := range entries {
 		if e.IsDir() {
 			continue
 		}
 		fpath := filepath.Join(dir, e.Name())
 		data, err := os.ReadFile(fpath)
 		if err != nil {
 			continue
 		}
 		hdr := &tar.Header{
 			Name:    filepath.Join(base, e.Name()),
 			Size:    int64(len(data)),
 			Mode:    0644,
 			ModTime: time.Now(),
 		}
 		if err := tw.WriteHeader(hdr); err != nil {
 			return err
 		}
 		if _, err := tw.Write(data); err != nil {
 			return err
 		}
 	}
 	return nil
 }
--- a/audit/internal/platform/platform_stress_test.go
+++ b/audit/internal/platform/platform_stress_test.go
@@ -0,0 +1,34 @@
 package platform
 import (
 	"runtime"
 	"testing"
 )
 func TestPlatformStressCPUThreadsOverride(t *testing.T) {
 	t.Setenv("BEE_PLATFORM_STRESS_THREADS", "7")
 	if got := platformStressCPUThreads(); got != 7 {
 		t.Fatalf("platformStressCPUThreads=%d want 7", got)
 	}
 }
 func TestPlatformStressCPUThreadsDefaultLeavesHeadroom(t *testing.T) {
 	t.Setenv("BEE_PLATFORM_STRESS_THREADS", "")
 	got := platformStressCPUThreads()
 	if got < 1 {
 		t.Fatalf("platformStressCPUThreads=%d want >= 1", got)
 	}
 	if got > runtime.NumCPU() {
 		t.Fatalf("platformStressCPUThreads=%d want <= NumCPU=%d", got, runtime.NumCPU())
 	}
 	if runtime.NumCPU() > 2 && got >= runtime.NumCPU() {
 		t.Fatalf("platformStressCPUThreads=%d want headroom below NumCPU=%d", got, runtime.NumCPU())
 	}
 }
 func TestPlatformStressMemoryMBOverride(t *testing.T) {
 	t.Setenv("BEE_PLATFORM_STRESS_MB", "8192")
 	if got := platformStressMemoryMB(); got != 8192 {
 		t.Fatalf("platformStressMemoryMB=%d want 8192", got)
 	}
 }
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -135,9 +135,15 @@ func (s *System) runtimeToolStatuses(vendor string) []ToolStatus {
 	case "nvidia":
 		tools = append(tools, s.CheckTools([]string{
 			"nvidia-smi",
 			"dcgmi",
 			"nv-hostengine",
 			"nvidia-bug-report.sh",
-			"bee-gpu-stress",
+			"bee-gpu-burn",
 			"bee-john-gpu-stress",
 			"bee-nccl-gpu-stress",
 			"all_reduce_perf",
 		})...)
 		tools = append(tools, resolvedToolStatus("dcgmproftester", dcgmProfTesterCandidates...))
 	case "amd":
 		tool := ToolStatus{Name: "rocm-smi"}
 		if cmd, err := resolveROCmSMICommand(); err == nil && len(cmd) > 0 {
@@ -152,11 +158,37 @@ func (s *System) runtimeToolStatuses(vendor string) []ToolStatus {
 	return tools
 }
 func resolvedToolStatus(display string, candidates ...string) ToolStatus {
 	for _, candidate := range candidates {
 		path, err := exec.LookPath(candidate)
 		if err == nil {
 			return ToolStatus{Name: display, Path: path, OK: true}
 		}
 	}
 	return ToolStatus{Name: display}
 }
 func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHealth) {
 	lsmodText := commandText("lsmod")
 	switch vendor {
 	case "nvidia":
 		if raw, err := os.ReadFile("/run/bee-nvidia-mode"); err == nil {
 			health.NvidiaGSPMode = strings.TrimSpace(string(raw))
 			if health.NvidiaGSPMode == "gsp-stuck" {
 				health.Issues = append(health.Issues, schema.RuntimeIssue{
 					Code:        "nvidia_gsp_stuck",
 					Severity:    "critical",
 					Description: "NVIDIA GSP firmware init timed out and the kernel module is stuck. Reboot and select 'GSP=off' in the boot menu.",
 				})
 			} else if health.NvidiaGSPMode == "gsp-off" {
 				health.Issues = append(health.Issues, schema.RuntimeIssue{
 					Code:        "nvidia_gsp_disabled",
 					Severity:    "warning",
 					Description: "NVIDIA GSP firmware disabled (fallback). Power management runs via CPU path — power draw readings may differ from reference hardware.",
 				})
 			}
 		}
 		health.DriverReady = strings.Contains(lsmodText, "nvidia ")
 		if !health.DriverReady {
 			health.Issues = append(health.Issues, schema.RuntimeIssue{
@@ -176,8 +208,8 @@ func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHe
 			health.DriverReady = true
 		}
-		if lookErr := exec.Command("sh", "-c", "command -v bee-gpu-stress >/dev/null 2>&1").Run(); lookErr == nil {
+		if _, lookErr := exec.LookPath("bee-gpu-burn"); lookErr == nil {
-			out, err := exec.Command("bee-gpu-stress", "--seconds", "1", "--size-mb", "1").CombinedOutput()
+			out, err := exec.Command("bee-gpu-burn", "--seconds", "1", "--size-mb", "1").CombinedOutput()
 			if err == nil {
 				health.CUDAReady = true
 			} else if strings.Contains(strings.ToLower(string(out)), "cuda_error_system_not_ready") {
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -2,10 +2,12 @@ package platform
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"sort"
 	"strconv"
 	"strings"
 	"sync"
@@ -18,7 +20,7 @@ type FanStressOptions struct {
 	Phase1DurSec int   // first load phase duration in seconds (default 300)
 	PauseSec     int   // pause between the two load phases (default 60)
 	Phase2DurSec int   // second load phase duration in seconds (default 300)
-	SizeMB       int   // GPU memory to allocate per GPU during stress (default 64)
+	SizeMB       int   // GPU memory to allocate per GPU during stress (0 = auto: 95% of VRAM)
 	GPUIndices   []int // which GPU indices to stress (empty = all detected)
 }
@@ -49,6 +51,18 @@ type FanStressRow struct {
 	SysPowerW    float64 // DCMI system power reading
 }
 type cachedPowerReading struct {
 	Value     float64
 	UpdatedAt time.Time
 }
 var (
 	systemPowerCacheMu sync.Mutex
 	systemPowerCache   cachedPowerReading
 )
 const systemPowerHoldTTL = 15 * time.Second
 // RunFanStressTest runs a two-phase GPU stress test while monitoring fan speeds,
 // temperatures, and power draw every second. Exports metrics.csv and fan-sensors.csv.
 // Designed to reproduce case-04 fan-speed lag and detect GPU thermal throttling.
@@ -128,26 +142,21 @@ func (s *System) RunFanStressTest(ctx context.Context, baseDir string, opts FanS
 		stats.OK++
 	}
-	// loadPhase runs bee-gpu-stress for durSec; sampler stamps phaseName on each row.
+	// loadPhase runs bee-gpu-burn for durSec; sampler stamps phaseName on each row.
 	loadPhase := func(phaseName, stepName string, durSec int) {
 		if ctx.Err() != nil {
 			return
 		}
 		setPhase(phaseName)
 		var env []string
 		if len(opts.GPUIndices) > 0 {
 			ids := make([]string, len(opts.GPUIndices))
 			for i, idx := range opts.GPUIndices {
 				ids[i] = strconv.Itoa(idx)
 			}
 			env = []string{"CUDA_VISIBLE_DEVICES=" + strings.Join(ids, ",")}
 		}
 		cmd := []string{
-			"bee-gpu-stress",
+			"bee-gpu-burn",
 			"--seconds", strconv.Itoa(durSec),
 			"--size-mb", strconv.Itoa(opts.SizeMB),
 		}
-		out, err := runSATCommandCtx(ctx, verboseLog, stepName, cmd, env)
+		if len(opts.GPUIndices) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(dedupeSortedIndices(opts.GPUIndices)))
 		}
 		out, err := runSATCommandCtx(ctx, verboseLog, stepName, cmd, nil, nil)
 		_ = os.WriteFile(filepath.Join(runDir, stepName+".log"), out, 0644)
 		if err != nil && err != context.Canceled && err.Error() != "signal: killed" {
 			fmt.Fprintf(&summary, "%s_status=FAILED\n", stepName)
@@ -234,9 +243,8 @@ func applyFanStressDefaults(opts *FanStressOptions) {
 	if opts.Phase2DurSec <= 0 {
 		opts.Phase2DurSec = 300
 	}
-	if opts.SizeMB <= 0 {
+	// SizeMB == 0 means "auto" (worker picks 95% of GPU VRAM for maximum power draw).
-		opts.SizeMB = 64
+	// Leave at 0 to avoid passing a too-small size that starves the tensor-core path.
 	}
 }
 // sampleFanStressRow collects all metrics for one telemetry sample.
@@ -304,41 +312,148 @@ func sampleGPUStressMetrics(gpuIndices []int) []GPUStressMetric {
 // sampleFanSpeeds reads fan RPM values from ipmitool sdr.
 func sampleFanSpeeds() ([]FanReading, error) {
 	out, err := exec.Command("ipmitool", "sdr", "type", "Fan").Output()
 	if err == nil {
 		if fans := parseFanSpeeds(string(out)); len(fans) > 0 {
 			return fans, nil
 		}
 	}
 	fans, sensorsErr := sampleFanSpeedsViaSensorsJSON()
 	if len(fans) > 0 {
 		return fans, nil
 	}
 	if err != nil {
 		return nil, err
 	}
-	return parseFanSpeeds(string(out)), nil
+	return nil, sensorsErr
 }
 // parseFanSpeeds parses "ipmitool sdr type Fan" output.
-// Line format: "FAN1             | 2400.000   | RPM        | ok"
+// Handles two formats:
 //
 //	Old: "FAN1 | 2400.000 | RPM | ok"           (value in col[1], unit in col[2])
 //	New: "FAN1 | 41h | ok | 29.1 | 4340 RPM"   (value+unit combined in last col)
 func parseFanSpeeds(raw string) []FanReading {
 	var fans []FanReading
 	for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
 		parts := strings.Split(line, "|")
-		if len(parts) < 3 {
+		if len(parts) < 2 {
 			continue
 		}
-		unit := strings.TrimSpace(parts[2])
+		name := strings.TrimSpace(parts[0])
-		if !strings.EqualFold(unit, "RPM") {
+		// Find the first field that contains "RPM" (either as a standalone unit or inline)
 		rpmVal := 0.0
 		found := false
 		for _, p := range parts[1:] {
 			p = strings.TrimSpace(p)
 			if !strings.Contains(strings.ToUpper(p), "RPM") {
 				continue
 			}
 			if strings.EqualFold(p, "RPM") {
 				continue // unit-only column in old format; value is in previous field
 			}
 			val, err := parseFanRPMValue(p)
 			if err == nil {
 				rpmVal = val
 				found = true
 				break
 			}
 		}
 		// Old format: unit "RPM" is in col[2], value is in col[1]
 		if !found && len(parts) >= 3 && strings.EqualFold(strings.TrimSpace(parts[2]), "RPM") {
 			valStr := strings.TrimSpace(parts[1])
 			if !strings.EqualFold(valStr, "na") && !strings.EqualFold(valStr, "disabled") && valStr != "" {
 				if val, err := parseFanRPMValue(valStr); err == nil {
 					rpmVal = val
 					found = true
 				}
 			}
 		}
 		if !found {
 			continue
 		}
-		valStr := strings.TrimSpace(parts[1])
+		fans = append(fans, FanReading{Name: name, RPM: rpmVal})
 		if strings.EqualFold(valStr, "na") || strings.EqualFold(valStr, "disabled") || valStr == "" {
 			continue
 		}
 		val, err := strconv.ParseFloat(valStr, 64)
 		if err != nil {
 			continue
 		}
 		fans = append(fans, FanReading{
 			Name: strings.TrimSpace(parts[0]),
 			RPM:  val,
 		})
 	}
 	return fans
 }
 func parseFanRPMValue(raw string) (float64, error) {
 	fields := strings.Fields(strings.TrimSpace(strings.ReplaceAll(raw, ",", "")))
 	if len(fields) == 0 {
 		return 0, strconv.ErrSyntax
 	}
 	return strconv.ParseFloat(fields[0], 64)
 }
 func sampleFanSpeedsViaSensorsJSON() ([]FanReading, error) {
 	out, err := exec.Command("sensors", "-j").Output()
 	if err != nil || len(out) == 0 {
 		return nil, err
 	}
 	var doc map[string]map[string]any
 	if err := json.Unmarshal(out, &doc); err != nil {
 		return nil, err
 	}
 	chips := make([]string, 0, len(doc))
 	for chip := range doc {
 		chips = append(chips, chip)
 	}
 	sort.Strings(chips)
 	var fans []FanReading
 	seen := map[string]struct{}{}
 	for _, chip := range chips {
 		features := doc[chip]
 		names := make([]string, 0, len(features))
 		for name := range features {
 			names = append(names, name)
 		}
 		sort.Strings(names)
 		for _, name := range names {
 			feature, ok := features[name].(map[string]any)
 			if !ok {
 				continue
 			}
 			rpm, ok := firstFanInputValue(feature)
 			if !ok || rpm <= 0 {
 				continue
 			}
 			label := strings.TrimSpace(name)
 			if chip != "" && !strings.Contains(strings.ToLower(label), strings.ToLower(chip)) {
 				label = chip + " / " + label
 			}
 			if _, ok := seen[label]; ok {
 				continue
 			}
 			seen[label] = struct{}{}
 			fans = append(fans, FanReading{Name: label, RPM: rpm})
 		}
 	}
 	return fans, nil
 }
 func firstFanInputValue(feature map[string]any) (float64, bool) {
 	keys := make([]string, 0, len(feature))
 	for key := range feature {
 		keys = append(keys, key)
 	}
 	sort.Strings(keys)
 	for _, key := range keys {
 		lower := strings.ToLower(key)
 		if !strings.Contains(lower, "fan") || !strings.HasSuffix(lower, "_input") {
 			continue
 		}
 		switch value := feature[key].(type) {
 		case float64:
 			return value, true
 		case string:
 			f, err := strconv.ParseFloat(value, 64)
 			if err == nil {
 				return f, true
 			}
 		}
 	}
 	return 0, false
 }
 // sampleCPUMaxTemp returns the highest CPU/inlet temperature from ipmitool or sensors.
 func sampleCPUMaxTemp() float64 {
 	out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
@@ -404,11 +519,17 @@ func sampleCPUTempViaSensors() float64 {
 // sampleSystemPower reads system power draw via DCMI.
 func sampleSystemPower() float64 {
 	now := time.Now()
 	current := 0.0
 	out, err := exec.Command("ipmitool", "dcmi", "power", "reading").Output()
-	if err != nil {
+	if err == nil {
-		return 0
+		current = parseDCMIPowerReading(string(out))
 	}
-	return parseDCMIPowerReading(string(out))
+	systemPowerCacheMu.Lock()
 	defer systemPowerCacheMu.Unlock()
 	value, updated := effectiveSystemPowerReading(systemPowerCache, current, now)
 	systemPowerCache = updated
 	return value
 }
 // parseDCMIPowerReading extracts the instantaneous power reading from ipmitool dcmi output.
@@ -431,6 +552,17 @@ func parseDCMIPowerReading(raw string) float64 {
 	return 0
 }
 func effectiveSystemPowerReading(cache cachedPowerReading, current float64, now time.Time) (float64, cachedPowerReading) {
 	if current > 0 {
 		cache = cachedPowerReading{Value: current, UpdatedAt: now}
 		return current, cache
 	}
 	if cache.Value > 0 && !cache.UpdatedAt.IsZero() && now.Sub(cache.UpdatedAt) <= systemPowerHoldTTL {
 		return cache.Value, cache
 	}
 	return 0, cache
 }
 // analyzeThrottling returns true if any GPU reported an active throttle reason
 // during either load phase.
 func analyzeThrottling(rows []FanStressRow) bool {
--- a/audit/internal/platform/sat_fan_stress_test.go
+++ b/audit/internal/platform/sat_fan_stress_test.go
@@ -0,0 +1,67 @@
 package platform
 import (
 	"testing"
 	"time"
 )
 func TestParseFanSpeeds(t *testing.T) {
 	raw := "FAN1 | 2400.000 | RPM | ok\nFAN2 | 1800 RPM | ok | ok\nFAN3 | na | RPM | ns\n"
 	got := parseFanSpeeds(raw)
 	if len(got) != 2 {
 		t.Fatalf("fans=%d want 2 (%v)", len(got), got)
 	}
 	if got[0].Name != "FAN1" || got[0].RPM != 2400 {
 		t.Fatalf("fan0=%+v", got[0])
 	}
 	if got[1].Name != "FAN2" || got[1].RPM != 1800 {
 		t.Fatalf("fan1=%+v", got[1])
 	}
 }
 func TestFirstFanInputValue(t *testing.T) {
 	feature := map[string]any{
 		"fan1_input": 9200.0,
 	}
 	got, ok := firstFanInputValue(feature)
 	if !ok || got != 9200 {
 		t.Fatalf("got=%v ok=%v", got, ok)
 	}
 }
 func TestParseDCMIPowerReading(t *testing.T) {
 	raw := `
 Instantaneous power reading:                   512 Watts
 Minimum during sampling period:               498 Watts
 `
 	if got := parseDCMIPowerReading(raw); got != 512 {
 		t.Fatalf("parseDCMIPowerReading()=%v want 512", got)
 	}
 }
 func TestEffectiveSystemPowerReading(t *testing.T) {
 	now := time.Now()
 	cache := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-5 * time.Second)}
 	got, updated := effectiveSystemPowerReading(cache, 0, now)
 	if got != 480 {
 		t.Fatalf("got=%v want cached 480", got)
 	}
 	if updated.Value != 480 {
 		t.Fatalf("updated=%+v", updated)
 	}
 	got, updated = effectiveSystemPowerReading(cache, 530, now)
 	if got != 530 {
 		t.Fatalf("got=%v want 530", got)
 	}
 	if updated.Value != 530 {
 		t.Fatalf("updated=%+v", updated)
 	}
 	expired := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-systemPowerHoldTTL - time.Second)}
 	got, _ = effectiveSystemPowerReading(expired, 0, now)
 	if got != 0 {
 		t.Fatalf("expired cache returned %v want 0", got)
 	}
 }
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -1,22 +1,25 @@
 package platform
 import (
 	"context"
 	"errors"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"testing"
 	"time"
 )
 func TestStorageSATCommands(t *testing.T) {
 	t.Parallel()
-	nvme := storageSATCommands("/dev/nvme0n1")
+	nvme := storageSATCommands("/dev/nvme0n1", false)
 	if len(nvme) != 3 || nvme[2].cmd[0] != "nvme" {
 		t.Fatalf("unexpected nvme commands: %#v", nvme)
 	}
-	sata := storageSATCommands("/dev/sda")
+	sata := storageSATCommands("/dev/sda", false)
 	if len(sata) != 2 || sata[0].cmd[0] != "smartctl" {
 		t.Fatalf("unexpected sata commands: %#v", sata)
 	}
@@ -27,24 +30,68 @@ func TestRunNvidiaAcceptancePackIncludesGPUStress(t *testing.T) {
 	jobs := nvidiaSATJobs()
-	if len(jobs) != 5 {
+	if len(jobs) != 6 {
-		t.Fatalf("jobs=%d want 5", len(jobs))
+		t.Fatalf("jobs=%d want 6", len(jobs))
 	}
-	if got := jobs[4].cmd[0]; got != "bee-gpu-stress" {
+	if got := jobs[0].cmd[0]; got != "nvidia-smi" {
-		t.Fatalf("gpu stress command=%q want bee-gpu-stress", got)
+		t.Fatalf("preflight command=%q want nvidia-smi", got)
 	}
-	if got := jobs[3].cmd[1]; got != "--output-file" {
+	if got := strings.Join(jobs[0].cmd, " "); got != "nvidia-smi -pm 1" {
 		t.Fatalf("preflight=%q want %q", got, "nvidia-smi -pm 1")
 	}
 	if got := jobs[5].cmd[0]; got != "bee-gpu-burn" {
 		t.Fatalf("gpu stress command=%q want bee-gpu-burn", got)
 	}
 	if got := jobs[4].cmd[1]; got != "--output-file" {
 		t.Fatalf("bug report flag=%q want --output-file", got)
 	}
 }
-func TestNvidiaSATJobsUseEnvOverrides(t *testing.T) {
+func TestAMDStressConfigUsesSingleGSTAction(t *testing.T) {
-	t.Setenv("BEE_GPU_STRESS_SECONDS", "9")
+	t.Parallel()
 	t.Setenv("BEE_GPU_STRESS_SIZE_MB", "96")
 	cfg := amdStressRVSConfig(123)
 	if !strings.Contains(cfg, "module: gst") {
 		t.Fatalf("config missing gst module:\n%s", cfg)
 	}
 	if strings.Contains(cfg, "module: mem") {
 		t.Fatalf("config should not include mem module:\n%s", cfg)
 	}
 	if !strings.Contains(cfg, "copy_matrix: false") {
 		t.Fatalf("config should use copy_matrix=false:\n%s", cfg)
 	}
 	if strings.Count(cfg, "duration: 123000") != 1 {
 		t.Fatalf("config should apply duration once:\n%s", cfg)
 	}
 	for _, field := range []string{"matrix_size_a: 8640", "matrix_size_b: 8640", "matrix_size_c: 8640"} {
 		if !strings.Contains(cfg, field) {
 			t.Fatalf("config missing %s:\n%s", field, cfg)
 		}
 	}
 }
 func TestAMDStressJobsIncludeBandwidthAndGST(t *testing.T) {
 	t.Parallel()
 	jobs := amdStressJobs(300, "/tmp/test-amd-gst.conf")
 	if len(jobs) != 4 {
 		t.Fatalf("jobs=%d want 4", len(jobs))
 	}
 	if got := jobs[1].cmd[0]; got != "rocm-bandwidth-test" {
 		t.Fatalf("jobs[1]=%q want rocm-bandwidth-test", got)
 	}
 	if got := jobs[2].cmd[0]; got != "rvs" {
 		t.Fatalf("jobs[2]=%q want rvs", got)
 	}
 	if got := jobs[2].cmd[2]; got != "/tmp/test-amd-gst.conf" {
 		t.Fatalf("jobs[2] cfg=%q want /tmp/test-amd-gst.conf", got)
 	}
 }
 func TestNvidiaSATJobsUseBuiltinBurnDefaults(t *testing.T) {
 	jobs := nvidiaSATJobs()
-	got := jobs[4].cmd
+	got := jobs[5].cmd
-	want := []string{"bee-gpu-stress", "--seconds", "9", "--size-mb", "96"}
+	want := []string{"bee-gpu-burn", "--seconds", "5", "--size-mb", "64"}
 	if len(got) != len(want) {
 		t.Fatalf("cmd len=%d want %d", len(got), len(want))
 	}
@@ -55,6 +102,257 @@ func TestNvidiaSATJobsUseEnvOverrides(t *testing.T) {
 	}
 }
 func TestNvidiaDCGMJobsEnablePersistenceModeBeforeDiag(t *testing.T) {
 	jobs := nvidiaDCGMJobs(3, []int{2, 0})
 	if len(jobs) != 5 {
 		t.Fatalf("jobs=%d want 5", len(jobs))
 	}
 	if got := strings.Join(jobs[0].cmd, " "); got != "nvidia-smi -pm 1" {
 		t.Fatalf("preflight=%q want %q", got, "nvidia-smi -pm 1")
 	}
 	if got := strings.Join(jobs[4].cmd, " "); got != "dcgmi diag -r 3 -i 2,0" {
 		t.Fatalf("diag=%q want %q", got, "dcgmi diag -r 3 -i 2,0")
 	}
 }
 func TestBuildNvidiaStressJobUsesSelectedLoaderAndDevices(t *testing.T) {
 	t.Parallel()
 	oldExecCommand := satExecCommand
 	satExecCommand = func(name string, args ...string) *exec.Cmd {
 		if name == "nvidia-smi" {
 			return exec.Command("sh", "-c", "printf '0\n1\n2\n'")
 		}
 		return exec.Command(name, args...)
 	}
 	t.Cleanup(func() { satExecCommand = oldExecCommand })
 	job, err := buildNvidiaStressJob(NvidiaStressOptions{
 		DurationSec:       600,
 		Loader:            NvidiaStressLoaderJohn,
 		ExcludeGPUIndices: []int{1},
 	})
 	if err != nil {
 		t.Fatalf("buildNvidiaStressJob error: %v", err)
 	}
 	wantCmd := []string{"bee-john-gpu-stress", "--seconds", "600", "--devices", "0,2"}
 	if len(job.cmd) != len(wantCmd) {
 		t.Fatalf("cmd len=%d want %d (%v)", len(job.cmd), len(wantCmd), job.cmd)
 	}
 	for i := range wantCmd {
 		if job.cmd[i] != wantCmd[i] {
 			t.Fatalf("cmd[%d]=%q want %q", i, job.cmd[i], wantCmd[i])
 		}
 	}
 	if got := joinIndexList(job.gpuIndices); got != "0,2" {
 		t.Fatalf("gpuIndices=%q want 0,2", got)
 	}
 }
 func TestBuildNvidiaStressJobUsesNCCLLoader(t *testing.T) {
 	t.Parallel()
 	oldExecCommand := satExecCommand
 	satExecCommand = func(name string, args ...string) *exec.Cmd {
 		if name == "nvidia-smi" {
 			return exec.Command("sh", "-c", "printf '0\n1\n2\n'")
 		}
 		return exec.Command(name, args...)
 	}
 	t.Cleanup(func() { satExecCommand = oldExecCommand })
 	job, err := buildNvidiaStressJob(NvidiaStressOptions{
 		DurationSec: 120,
 		Loader:      NvidiaStressLoaderNCCL,
 		GPUIndices:  []int{2, 0},
 	})
 	if err != nil {
 		t.Fatalf("buildNvidiaStressJob error: %v", err)
 	}
 	wantCmd := []string{"bee-nccl-gpu-stress", "--seconds", "120", "--devices", "0,2"}
 	if len(job.cmd) != len(wantCmd) {
 		t.Fatalf("cmd len=%d want %d (%v)", len(job.cmd), len(wantCmd), job.cmd)
 	}
 	for i := range wantCmd {
 		if job.cmd[i] != wantCmd[i] {
 			t.Fatalf("cmd[%d]=%q want %q", i, job.cmd[i], wantCmd[i])
 		}
 	}
 	if got := joinIndexList(job.gpuIndices); got != "0,2" {
 		t.Fatalf("gpuIndices=%q want 0,2", got)
 	}
 }
 func TestResolveDCGMGPUIndicesUsesDetectedGPUsWhenUnset(t *testing.T) {
 	t.Parallel()
 	oldExecCommand := satExecCommand
 	satExecCommand = func(name string, args ...string) *exec.Cmd {
 		if name == "nvidia-smi" {
 			return exec.Command("sh", "-c", "printf '2\n0\n1\n'")
 		}
 		return exec.Command(name, args...)
 	}
 	t.Cleanup(func() { satExecCommand = oldExecCommand })
 	got, err := resolveDCGMGPUIndices(nil)
 	if err != nil {
 		t.Fatalf("resolveDCGMGPUIndices error: %v", err)
 	}
 	if want := "0,1,2"; joinIndexList(got) != want {
 		t.Fatalf("gpuIndices=%q want %q", joinIndexList(got), want)
 	}
 }
 func TestResolveDCGMGPUIndicesKeepsExplicitSelection(t *testing.T) {
 	t.Parallel()
 	got, err := resolveDCGMGPUIndices([]int{3, 1, 3})
 	if err != nil {
 		t.Fatalf("resolveDCGMGPUIndices error: %v", err)
 	}
 	if want := "1,3"; joinIndexList(got) != want {
 		t.Fatalf("gpuIndices=%q want %q", joinIndexList(got), want)
 	}
 }
 func TestParseNvidiaGPUHealthDetectsResetRequired(t *testing.T) {
 	t.Parallel()
 	got := parseNvidiaGPUHealth("0, NVIDIA H100 PCIe, 38, 46.89, 0, 0, 81559\n1, NVIDIA H100 PCIe, [GPU requires reset], [N/A], [N/A], 0, 81559\n")
 	if len(got) != 2 {
 		t.Fatalf("len=%d want 2", len(got))
 	}
 	if got[0].NeedsReset {
 		t.Fatalf("gpu0 unexpectedly marked reset-required")
 	}
 	if !got[1].NeedsReset {
 		t.Fatalf("gpu1 should be marked reset-required: %#v", got[1])
 	}
 }
 func TestCheckNvidiaJobHealthReturnsErrorForSelectedResetRequiredGPU(t *testing.T) {
 	oldExecCommand := satExecCommand
 	satExecCommand = func(name string, args ...string) *exec.Cmd {
 		if name == "nvidia-smi" {
 			return exec.Command("sh", "-c", "printf '0, NVIDIA H100 PCIe, 38, 46.89, 0, 0, 81559\n1, NVIDIA H100 PCIe, [GPU requires reset], [N/A], [N/A], 0, 81559\n'")
 		}
 		return exec.Command(name, args...)
 	}
 	t.Cleanup(func() { satExecCommand = oldExecCommand })
 	msg, err := checkNvidiaJobHealth([]int{1})
 	if err == nil {
 		t.Fatal("expected health check error")
 	}
 	if !strings.Contains(msg, "gpu 1") || !strings.Contains(strings.ToLower(msg), "requires reset") {
 		t.Fatalf("unexpected message: %q", msg)
 	}
 }
 func TestWriteNvidiaGPUStatusFilesCreatesPerGPUFiles(t *testing.T) {
 	dir := t.TempDir()
 	oldExecCommand := satExecCommand
 	satExecCommand = func(name string, args ...string) *exec.Cmd {
 		if name == "nvidia-smi" {
 			return exec.Command("sh", "-c", "printf '0, NVIDIA H100 PCIe, 38, 46.89, 0, 0, 81559\n1, NVIDIA H100 PCIe, [GPU requires reset], [N/A], [N/A], 0, 81559\n'")
 		}
 		return exec.Command(name, args...)
 	}
 	t.Cleanup(func() { satExecCommand = oldExecCommand })
 	perGPU := map[int]*nvidiaGPUStatusFile{
 		0: {Index: 0, RunStatus: "OK"},
 		1: {Index: 1, RunStatus: "FAILED", FailingJob: "02-dcgmi-targeted-stress.log", Reason: "NVIDIA GPU health check failed:"},
 	}
 	if err := writeNvidiaGPUStatusFiles(dir, "FAILED", perGPU, map[int]struct{}{0: {}, 1: {}}); err != nil {
 		t.Fatalf("writeNvidiaGPUStatusFiles error: %v", err)
 	}
 	raw, err := os.ReadFile(filepath.Join(dir, "gpu-1-status.txt"))
 	if err != nil {
 		t.Fatalf("ReadFile gpu-1-status.txt: %v", err)
 	}
 	text := string(raw)
 	if !strings.Contains(text, "run_status=FAILED") {
 		t.Fatalf("missing run status:\n%s", text)
 	}
 	if !strings.Contains(text, "health_status=RESET_REQUIRED") {
 		t.Fatalf("missing health status:\n%s", text)
 	}
 	if !strings.Contains(text, "failing_job=02-dcgmi-targeted-stress.log") {
 		t.Fatalf("missing failing job:\n%s", text)
 	}
 }
 func TestResolveDCGMProfTesterCommandUsesVersionedBinary(t *testing.T) {
 	oldLookPath := satLookPath
 	satLookPath = func(file string) (string, error) {
 		switch file {
 		case "dcgmproftester13":
 			return "/usr/bin/dcgmproftester13", nil
 		default:
 			return "", exec.ErrNotFound
 		}
 	}
 	t.Cleanup(func() { satLookPath = oldLookPath })
 	cmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004")
 	if err != nil {
 		t.Fatalf("resolveDCGMProfTesterCommand error: %v", err)
 	}
 	if len(cmd) != 4 {
 		t.Fatalf("cmd len=%d want 4 (%v)", len(cmd), cmd)
 	}
 	if cmd[0] != "/usr/bin/dcgmproftester13" {
 		t.Fatalf("cmd[0]=%q want /usr/bin/dcgmproftester13", cmd[0])
 	}
 }
 func TestNvidiaDCGMNamedDiagCommandUsesDurationAndSelection(t *testing.T) {
 	cmd := nvidiaDCGMNamedDiagCommand("targeted_power", 900, []int{3, 1})
 	want := []string{"dcgmi", "diag", "-r", "targeted_power", "-p", "targeted_power.test_duration=900", "-i", "3,1"}
 	if len(cmd) != len(want) {
 		t.Fatalf("cmd len=%d want %d (%v)", len(cmd), len(want), cmd)
 	}
 	for i := range want {
 		if cmd[i] != want[i] {
 			t.Fatalf("cmd[%d]=%q want %q", i, cmd[i], want[i])
 		}
 	}
 }
 func TestNvidiaVisibleDevicesEnvUsesSelectedGPUs(t *testing.T) {
 	env := nvidiaVisibleDevicesEnv([]int{0, 2, 4})
 	if len(env) != 2 {
 		t.Fatalf("env len=%d want 2 (%v)", len(env), env)
 	}
 	if env[0] != "CUDA_DEVICE_ORDER=PCI_BUS_ID" {
 		t.Fatalf("env[0]=%q want CUDA_DEVICE_ORDER=PCI_BUS_ID", env[0])
 	}
 	if env[1] != "CUDA_VISIBLE_DEVICES=0,2,4" {
 		t.Fatalf("env[1]=%q want CUDA_VISIBLE_DEVICES=0,2,4", env[1])
 	}
 }
 func TestNvidiaStressArchivePrefixByLoader(t *testing.T) {
 	t.Parallel()
 	tests := []struct {
 		loader string
 		want   string
 	}{
 		{loader: NvidiaStressLoaderBuiltin, want: "gpu-nvidia-burn"},
 		{loader: NvidiaStressLoaderJohn, want: "gpu-nvidia-john"},
 		{loader: NvidiaStressLoaderNCCL, want: "gpu-nvidia-nccl"},
 		{loader: "", want: "gpu-nvidia-burn"},
 	}
 	for _, tt := range tests {
 		if got := nvidiaStressArchivePrefix(tt.loader); got != tt.want {
 			t.Fatalf("loader=%q prefix=%q want %q", tt.loader, got, tt.want)
 		}
 	}
 }
 func TestEnvIntFallback(t *testing.T) {
 	os.Unsetenv("BEE_MEMTESTER_SIZE_MB")
 	if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
@@ -70,6 +368,37 @@ func TestEnvIntFallback(t *testing.T) {
 	}
 }
 func TestMemoryStressSizeArgUsesAvailableMemory(t *testing.T) {
 	oldFreeMemBytes := satFreeMemBytes
 	satFreeMemBytes = func() int64 { return 96 * 1024 * 1024 * 1024 }
 	t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
 	if got := memoryStressSizeArg(); got != "65536M" {
 		t.Fatalf("sizeArg=%q want 65536M", got)
 	}
 }
 func TestMemoryStressSizeArgRespectsOverride(t *testing.T) {
 	oldFreeMemBytes := satFreeMemBytes
 	satFreeMemBytes = func() int64 { return 96 * 1024 * 1024 * 1024 }
 	t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
 	t.Setenv("BEE_VM_STRESS_SIZE_MB", "4096")
 	if got := memoryStressSizeArg(); got != "4096M" {
 		t.Fatalf("sizeArg=%q want 4096M", got)
 	}
 }
 func TestMemoryStressSizeArgFallsBackWhenFreeMemoryUnknown(t *testing.T) {
 	oldFreeMemBytes := satFreeMemBytes
 	satFreeMemBytes = func() int64 { return 0 }
 	t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
 	if got := memoryStressSizeArg(); got != "80%" {
 		t.Fatalf("sizeArg=%q want 80%%", got)
 	}
 }
 func TestClassifySATResult(t *testing.T) {
 	tests := []struct {
 		name   string
@@ -80,8 +409,9 @@ func TestClassifySATResult(t *testing.T) {
 	}{
 		{name: "ok", job: "memtester", out: "done", err: nil, status: "OK"},
 		{name: "unsupported", job: "smartctl-self-test-short", out: "Self-test not supported", err: errors.New("rc 1"), status: "UNSUPPORTED"},
-		{name: "failed", job: "bee-gpu-stress", out: "cuda error", err: errors.New("rc 1"), status: "FAILED"},
+		{name: "nvme wait timeout without progress", job: "nvme-device-self-test", out: "Short Device self-test started\nWaiting for self test completion...\nno progress for 78 seconds, stop waiting", err: errors.New("rc 1"), status: "UNSUPPORTED"},
-		{name: "cuda not ready", job: "bee-gpu-stress", out: "cuInit failed: CUDA_ERROR_SYSTEM_NOT_READY", err: errors.New("rc 1"), status: "UNSUPPORTED"},
+		{name: "failed", job: "bee-gpu-burn", out: "cuda error", err: errors.New("rc 1"), status: "FAILED"},
 		{name: "cuda not ready", job: "bee-gpu-burn", out: "cuInit failed: CUDA_ERROR_SYSTEM_NOT_READY", err: errors.New("rc 1"), status: "UNSUPPORTED"},
 	}
 	for _, tt := range tests {
@@ -94,6 +424,38 @@ func TestClassifySATResult(t *testing.T) {
 	}
 }
 func TestRunAcceptancePackCtxReturnsContextErrorWithoutArchive(t *testing.T) {
 	dir := t.TempDir()
 	ctx, cancel := context.WithCancel(context.Background())
 	t.Cleanup(cancel)
 	done := make(chan struct{})
 	go func() {
 		time.Sleep(100 * time.Millisecond)
 		cancel()
 		close(done)
 	}()
 	archive, err := runAcceptancePackCtx(ctx, dir, "cancelled-pack", []satJob{
 		{name: "01-sleep.log", cmd: []string{"sh", "-c", "sleep 5"}},
 	}, nil)
 	<-done
 	if !errors.Is(err, context.Canceled) {
 		t.Fatalf("err=%v want context.Canceled", err)
 	}
 	if archive != "" {
 		t.Fatalf("archive=%q want empty", archive)
 	}
 	matches, globErr := filepath.Glob(filepath.Join(dir, "cancelled-pack-*.tar.gz"))
 	if globErr != nil {
 		t.Fatalf("Glob error: %v", globErr)
 	}
 	if len(matches) != 0 {
 		t.Fatalf("archives=%v want none", matches)
 	}
 }
 func TestParseStorageDevicesSkipsUSBDisks(t *testing.T) {
 	t.Parallel()
@@ -130,6 +492,44 @@ func TestResolveROCmSMICommandFromPATH(t *testing.T) {
 	}
 }
 func TestResolveSATCommandUsesLookPathForGenericTools(t *testing.T) {
 	oldLookPath := satLookPath
 	satLookPath = func(file string) (string, error) {
 		if file == "stress-ng" {
 			return "/usr/bin/stress-ng", nil
 		}
 		return "", exec.ErrNotFound
 	}
 	t.Cleanup(func() { satLookPath = oldLookPath })
 	cmd, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
 	if err != nil {
 		t.Fatalf("resolveSATCommand error: %v", err)
 	}
 	if len(cmd) != 3 {
 		t.Fatalf("cmd len=%d want 3 (%v)", len(cmd), cmd)
 	}
 	if cmd[0] != "/usr/bin/stress-ng" {
 		t.Fatalf("cmd[0]=%q want /usr/bin/stress-ng", cmd[0])
 	}
 }
 func TestResolveSATCommandFailsForMissingGenericTool(t *testing.T) {
 	oldLookPath := satLookPath
 	satLookPath = func(file string) (string, error) {
 		return "", exec.ErrNotFound
 	}
 	t.Cleanup(func() { satLookPath = oldLookPath })
 	_, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
 	if err == nil {
 		t.Fatal("expected error")
 	}
 	if !strings.Contains(err.Error(), "stress-ng not found in PATH") {
 		t.Fatalf("error=%q", err)
 	}
 }
 func TestResolveROCmSMICommandFallsBackToROCmTree(t *testing.T) {
 	tmp := t.TempDir()
 	execPath := filepath.Join(tmp, "opt", "rocm", "bin", "rocm-smi")
--- a/audit/internal/platform/services.go
+++ b/audit/internal/platform/services.go
@@ -10,13 +10,30 @@ import (
 func (s *System) ListBeeServices() ([]string, error) {
 	seen := map[string]bool{}
 	var out []string
-	for _, pattern := range []string{"/etc/systemd/system/bee-*.service", "/lib/systemd/system/bee-*.service"} {
+	for _, pattern := range []string{
 		"/etc/systemd/system/bee-*.service",
 		"/lib/systemd/system/bee-*.service",
 		"/etc/systemd/system/bee-*.timer",
 		"/lib/systemd/system/bee-*.timer",
 	} {
 		matches, err := filepath.Glob(pattern)
 		if err != nil {
 			return nil, err
 		}
 		for _, match := range matches {
-			name := strings.TrimSuffix(filepath.Base(match), ".service")
+			base := filepath.Base(match)
 			name := base
 			if strings.HasSuffix(base, ".service") {
 				name = strings.TrimSuffix(base, ".service")
 			}
 			// Skip template units (e.g. bee-journal-mirror@) — they have no instances to query.
 			if strings.HasSuffix(name, "@") {
 				continue
 			}
 			// bee-selfheal is timer-managed; showing the oneshot service as inactive is misleading.
 			if name == "bee-selfheal" && strings.HasSuffix(base, ".service") {
 				continue
 			}
 			if !seen[name] {
 				seen[name] = true
 				out = append(out, name)
@@ -44,7 +61,9 @@ func (s *System) ServiceState(name string) string {
 }
 func (s *System) ServiceDo(name string, action ServiceAction) (string, error) {
-	raw, err := exec.Command("systemctl", string(action), name).CombinedOutput()
+	// bee-web runs as the bee user; sudo is required to control system services.
 	// /etc/sudoers.d/bee grants bee NOPASSWD:ALL.
 	raw, err := exec.Command("sudo", "systemctl", string(action), name).CombinedOutput()
 	return string(raw), err
 }
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -2,12 +2,31 @@ package platform
 type System struct{}
 type LiveBootSource struct {
 	InRAM  bool   `json:"in_ram"`
 	Kind   string `json:"kind"`
 	Source string `json:"source,omitempty"`
 	Device string `json:"device,omitempty"`
 }
 type InterfaceInfo struct {
 	Name  string
 	State string
 	IPv4  []string
 }
 type NetworkInterfaceSnapshot struct {
 	Name string
 	Up   bool
 	IPv4 []string
 }
 type NetworkSnapshot struct {
 	Interfaces    []NetworkInterfaceSnapshot
 	DefaultRoutes []string
 	ResolvConf    string
 }
 type ServiceAction string
 const (
@@ -25,12 +44,12 @@ type StaticIPv4Config struct {
 }
 type RemovableTarget struct {
-	Device     string
+	Device     string `json:"device"`
-	FSType     string
+	FSType     string `json:"fs_type"`
-	Size       string
+	Size       string `json:"size"`
-	Label      string
+	Label      string `json:"label"`
-	Model      string
+	Model      string `json:"model"`
-	Mountpoint string
+	Mountpoint string `json:"mountpoint"`
 }
 type ToolStatus struct {
@@ -39,6 +58,20 @@ type ToolStatus struct {
 	OK   bool
 }
 const (
 	NvidiaStressLoaderBuiltin = "builtin"
 	NvidiaStressLoaderJohn    = "john"
 	NvidiaStressLoaderNCCL    = "nccl"
 )
 type NvidiaStressOptions struct {
 	DurationSec       int
 	SizeMB            int
 	Loader            string
 	GPUIndices        []int
 	ExcludeGPUIndices []int
 }
 func New() *System {
 	return &System{}
 }
--- a/audit/internal/platform/types_test.go
+++ b/audit/internal/platform/types_test.go
@@ -0,0 +1,31 @@
 package platform
 import (
 	"encoding/json"
 	"strings"
 	"testing"
 )
 func TestRemovableTargetJSONUsesFrontendFieldNames(t *testing.T) {
 	t.Parallel()
 	data, err := json.Marshal(RemovableTarget{
 		Device: "/dev/sdb1",
 		FSType: "exfat",
 		Size:   "1.8T",
 		Label:  "USB",
 		Model:  "Flash",
 	})
 	if err != nil {
 		t.Fatalf("marshal: %v", err)
 	}
 	raw := string(data)
 	for _, key := range []string{`"device"`, `"fs_type"`, `"size"`, `"label"`, `"model"`} {
 		if !strings.Contains(raw, key) {
 			t.Fatalf("json missing key %s: %s", key, raw)
 		}
 	}
 	if strings.Contains(raw, `"Device"`) || strings.Contains(raw, `"FSType"`) {
 		t.Fatalf("json still contains Go field names: %s", raw)
 	}
 }
--- a/audit/internal/schema/hardware.go
+++ b/audit/internal/schema/hardware.go
@@ -20,6 +20,7 @@ type RuntimeHealth struct {
 	ExportDir     string                 `json:"export_dir,omitempty"`
 	DriverReady   bool                   `json:"driver_ready,omitempty"`
 	CUDAReady     bool                   `json:"cuda_ready,omitempty"`
 	NvidiaGSPMode string                 `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
 	NetworkStatus string                 `json:"network_status,omitempty"`
 	Issues        []RuntimeIssue         `json:"issues,omitempty"`
 	Tools         []RuntimeToolStatus    `json:"tools,omitempty"`
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -0,0 +1,228 @@
 package webui
 import (
 	"encoding/json"
 	"net/http/httptest"
 	"strings"
 	"testing"
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 )
 func TestXrandrCommandAddsDefaultX11Env(t *testing.T) {
 	t.Setenv("DISPLAY", "")
 	t.Setenv("XAUTHORITY", "")
 	cmd := xrandrCommand("--query")
 	var hasDisplay bool
 	var hasXAuthority bool
 	for _, kv := range cmd.Env {
 		if kv == "DISPLAY=:0" {
 			hasDisplay = true
 		}
 		if kv == "XAUTHORITY=/home/bee/.Xauthority" {
 			hasXAuthority = true
 		}
 	}
 	if !hasDisplay {
 		t.Fatalf("DISPLAY not injected: %v", cmd.Env)
 	}
 	if !hasXAuthority {
 		t.Fatalf("XAUTHORITY not injected: %v", cmd.Env)
 	}
 }
 func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/sat/cpu/run", strings.NewReader(`{"profile":"smoke"}`))
 	req.ContentLength = -1
 	rec := httptest.NewRecorder()
 	h.handleAPISATRun("cpu").ServeHTTP(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 1 {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	if got := globalQueue.tasks[0].params.BurnProfile; got != "smoke" {
 		t.Fatalf("burn profile=%q want smoke", got)
 	}
 }
 func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	prevList := apiListNvidiaGPUs
 	apiListNvidiaGPUs = func(_ *app.App) ([]platform.NvidiaGPU, error) {
 		return []platform.NvidiaGPU{
 			{Index: 1, Name: "NVIDIA H100 PCIe"},
 			{Index: 3, Name: "NVIDIA H100 PCIe"},
 		}, nil
 	}
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
 	rec := httptest.NewRecorder()
 	h.handleAPIBenchmarkNvidiaRun(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 1 {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	task := globalQueue.tasks[0]
 	if task.Target != "nvidia-benchmark" {
 		t.Fatalf("target=%q want nvidia-benchmark", task.Target)
 	}
 	if got := task.params.GPUIndices; len(got) != 2 || got[0] != 1 || got[1] != 3 {
 		t.Fatalf("gpu indices=%v want [1 3]", got)
 	}
 	if task.params.RunNCCL {
 		t.Fatal("RunNCCL should reflect explicit false from request")
 	}
 }
 func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	prevList := apiListNvidiaGPUs
 	apiListNvidiaGPUs = func(_ *app.App) ([]platform.NvidiaGPU, error) {
 		return []platform.NvidiaGPU{
 			{Index: 0, Name: "NVIDIA H100 PCIe"},
 			{Index: 1, Name: "NVIDIA H100 PCIe"},
 			{Index: 2, Name: "NVIDIA H200 NVL"},
 		}, nil
 	}
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"run_nccl":false}`))
 	rec := httptest.NewRecorder()
 	h.handleAPIBenchmarkNvidiaRun(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	var resp taskRunResponse
 	if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil {
 		t.Fatalf("decode response: %v", err)
 	}
 	if len(resp.TaskIDs) != 2 {
 		t.Fatalf("task_ids=%v want 2 items", resp.TaskIDs)
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 2 {
 		t.Fatalf("tasks=%d want 2", len(globalQueue.tasks))
 	}
 	if got := globalQueue.tasks[0].params.GPUIndices; len(got) != 2 || got[0] != 0 || got[1] != 1 {
 		t.Fatalf("task[0] gpu indices=%v want [0 1]", got)
 	}
 	if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 {
 		t.Fatalf("task[1] gpu indices=%v want [2]", got)
 	}
 }
 func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	prevList := apiListNvidiaGPUs
 	apiListNvidiaGPUs = func(_ *app.App) ([]platform.NvidiaGPU, error) {
 		return []platform.NvidiaGPU{
 			{Index: 0, Name: "NVIDIA H100 PCIe"},
 			{Index: 1, Name: "NVIDIA H100 PCIe"},
 			{Index: 2, Name: "NVIDIA H200 NVL"},
 		}, nil
 	}
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/sat/nvidia-targeted-power/run", strings.NewReader(`{"profile":"acceptance","gpu_indices":[0,1,2]}`))
 	rec := httptest.NewRecorder()
 	h.handleAPISATRun("nvidia-targeted-power").ServeHTTP(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 2 {
 		t.Fatalf("tasks=%d want 2", len(globalQueue.tasks))
 	}
 	if got := globalQueue.tasks[0].params.GPUIndices; len(got) != 2 || got[0] != 0 || got[1] != 1 {
 		t.Fatalf("task[0] gpu indices=%v want [0 1]", got)
 	}
 	if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 {
 		t.Fatalf("task[1] gpu indices=%v want [2]", got)
 	}
 }
 func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
 	h := &handler{}
 	h.pushFanRings([]platform.FanReading{
 		{Name: "FAN_A", RPM: 4200},
 		{Name: "FAN_B", RPM: 5100},
 	})
 	h.pushFanRings([]platform.FanReading{
 		{Name: "FAN_B", RPM: 5200},
 	})
 	if len(h.fanNames) != 2 || h.fanNames[0] != "FAN_A" || h.fanNames[1] != "FAN_B" {
 		t.Fatalf("fanNames=%v", h.fanNames)
 	}
 	aVals, _ := h.ringFans[0].snapshot()
 	bVals, _ := h.ringFans[1].snapshot()
 	if len(aVals) != 2 || len(bVals) != 2 {
 		t.Fatalf("fan ring lengths: A=%d B=%d", len(aVals), len(bVals))
 	}
 	if aVals[1] != 4200 {
 		t.Fatalf("FAN_A should carry forward last value, got %v", aVals)
 	}
 	if bVals[1] != 5200 {
 		t.Fatalf("FAN_B should use latest sampled value, got %v", bVals)
 	}
 }
--- a/audit/internal/webui/charts_svg.go
+++ b/audit/internal/webui/charts_svg.go
@@ -0,0 +1,773 @@
 package webui
 import (
 	"fmt"
 	"math"
 	"sort"
 	"strconv"
 	"strings"
 	"sync"
 	"time"
 	"bee/audit/internal/platform"
 )
 type chartTimelineSegment struct {
 	Start  time.Time
 	End    time.Time
 	Active bool
 }
 type chartScale struct {
 	Min   float64
 	Max   float64
 	Ticks []float64
 }
 type chartLayout struct {
 	Width      int
 	Height     int
 	PlotLeft   int
 	PlotRight  int
 	PlotTop    int
 	PlotBottom int
 }
 type metricChartSeries struct {
 	Name      string
 	AxisTitle string
 	Color     string
 	Values    []float64
 }
 var metricChartPalette = []string{
 	"#5794f2",
 	"#73bf69",
 	"#f2cc0c",
 	"#ff9830",
 	"#f2495c",
 	"#b877d9",
 	"#56d2f7",
 	"#8ab8ff",
 	"#9adf8f",
 	"#ffbe5c",
 }
 var gpuLabelCache struct {
 	mu       sync.Mutex
 	loadedAt time.Time
 	byIndex  map[int]string
 }
 func renderMetricChartSVG(title string, labels []string, times []time.Time, datasets [][]float64, names []string, yMin, yMax *float64, canvasHeight int, timeline []chartTimelineSegment) ([]byte, error) {
 	pointCount := len(labels)
 	if len(times) > pointCount {
 		pointCount = len(times)
 	}
 	if pointCount == 0 {
 		pointCount = 1
 		labels = []string{""}
 		times = []time.Time{time.Time{}}
 	}
 	if len(labels) < pointCount {
 		padded := make([]string, pointCount)
 		copy(padded, labels)
 		labels = padded
 	}
 	if len(times) < pointCount {
 		times = synthesizeChartTimes(times, pointCount)
 	}
 	for i := range datasets {
 		if len(datasets[i]) == 0 {
 			datasets[i] = make([]float64, pointCount)
 		}
 	}
 	statsLabel := chartStatsLabel(datasets)
 	legendItems := []metricChartSeries{}
 	for i, name := range names {
 		color := metricChartPalette[i%len(metricChartPalette)]
 		values := make([]float64, pointCount)
 		if i < len(datasets) {
 			copy(values, coalesceDataset(datasets[i], pointCount))
 		}
 		legendItems = append(legendItems, metricChartSeries{
 			Name:   name,
 			Color:  color,
 			Values: values,
 		})
 	}
 	scale := singleAxisChartScale(datasets, yMin, yMax)
 	layout := singleAxisChartLayout(canvasHeight, len(legendItems))
 	start, end := chartTimeBounds(times)
 	var b strings.Builder
 	writeSVGOpen(&b, layout.Width, layout.Height)
 	writeChartFrame(&b, title, statsLabel, layout.Width, layout.Height)
 	writeTimelineIdleSpans(&b, layout, start, end, timeline)
 	writeVerticalGrid(&b, layout, times, pointCount, 8)
 	writeHorizontalGrid(&b, layout, scale)
 	writeTimelineBoundaries(&b, layout, start, end, timeline)
 	writePlotBorder(&b, layout)
 	writeSingleAxisY(&b, layout, scale)
 	writeXAxisLabels(&b, layout, times, labels, start, end, 8)
 	for _, item := range legendItems {
 		writeSeriesPolyline(&b, layout, times, start, end, item.Values, scale, item.Color)
 	}
 	writeLegend(&b, layout, legendItems)
 	writeSVGClose(&b)
 	return []byte(b.String()), nil
 }
 func renderGPUOverviewChartSVG(idx int, samples []platform.LiveMetricSample, timeline []chartTimelineSegment) ([]byte, bool, error) {
 	temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
 	power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
 	coreClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
 	if temp == nil && power == nil && coreClock == nil {
 		return nil, false, nil
 	}
 	labels := sampleTimeLabels(samples)
 	times := sampleTimes(samples)
 	svg, err := drawGPUOverviewChartSVG(
 		gpuDisplayLabel(idx)+" Overview",
 		labels,
 		times,
 		[]metricChartSeries{
 			{Name: "Temp C", Values: coalesceDataset(temp, len(labels)), Color: "#f05a5a", AxisTitle: "Temp C"},
 			{Name: "Power W", Values: coalesceDataset(power, len(labels)), Color: "#ffb357", AxisTitle: "Power W"},
 			{Name: "Core Clock MHz", Values: coalesceDataset(coreClock, len(labels)), Color: "#73bf69", AxisTitle: "Core MHz"},
 		},
 		timeline,
 	)
 	if err != nil {
 		return nil, false, err
 	}
 	return svg, true, nil
 }
 func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, series []metricChartSeries, timeline []chartTimelineSegment) ([]byte, error) {
 	if len(series) != 3 {
 		return nil, fmt.Errorf("gpu overview requires 3 series, got %d", len(series))
 	}
 	const (
 		width      = 1400
 		height     = 840
 		plotLeft   = 180
 		plotRight  = 1220
 		plotTop    = 96
 		plotBottom = 660
 	)
 	const (
 		leftOuterAxis  = 72
 		leftInnerAxis  = 132
 		rightInnerAxis = 1268
 	)
 	layout := chartLayout{
 		Width:      width,
 		Height:     height,
 		PlotLeft:   plotLeft,
 		PlotRight:  plotRight,
 		PlotTop:    plotTop,
 		PlotBottom: plotBottom,
 	}
 	axisX := []int{leftOuterAxis, leftInnerAxis, rightInnerAxis}
 	pointCount := len(labels)
 	if len(times) > pointCount {
 		pointCount = len(times)
 	}
 	if pointCount == 0 {
 		pointCount = 1
 		labels = []string{""}
 		times = []time.Time{time.Time{}}
 	}
 	if len(labels) < pointCount {
 		padded := make([]string, pointCount)
 		copy(padded, labels)
 		labels = padded
 	}
 	if len(times) < pointCount {
 		times = synthesizeChartTimes(times, pointCount)
 	}
 	for i := range series {
 		if len(series[i].Values) == 0 {
 			series[i].Values = make([]float64, pointCount)
 		}
 	}
 	scales := make([]chartScale, len(series))
 	for i := range series {
 		min, max := chartSeriesBounds(series[i].Values)
 		ticks := chartNiceTicks(min, max, 8)
 		scales[i] = chartScale{
 			Min:   ticks[0],
 			Max:   ticks[len(ticks)-1],
 			Ticks: ticks,
 		}
 	}
 	start, end := chartTimeBounds(times)
 	var b strings.Builder
 	writeSVGOpen(&b, width, height)
 	writeChartFrame(&b, title, "", width, height)
 	writeTimelineIdleSpans(&b, layout, start, end, timeline)
 	writeVerticalGrid(&b, layout, times, pointCount, 8)
 	writeHorizontalGrid(&b, layout, scales[0])
 	writeTimelineBoundaries(&b, layout, start, end, timeline)
 	writePlotBorder(&b, layout)
 	for i, axisLineX := range axisX {
 		fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="1"/>`+"\n",
 			axisLineX, layout.PlotTop, axisLineX, layout.PlotBottom, series[i].Color)
 		fmt.Fprintf(&b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="11" font-weight="700" fill="%s">%s</text>`+"\n",
 			axisLineX, 64, series[i].Color, sanitizeChartText(series[i].AxisTitle))
 		for _, tick := range scales[i].Ticks {
 			y := chartYForValue(valueClamp(tick, scales[i]), scales[i], layout.PlotTop, layout.PlotBottom)
 			label := sanitizeChartText(chartYAxisNumber(tick))
 			if i < 2 {
 				fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
 					axisLineX, y, axisLineX+6, y, series[i].Color)
 				fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
 					axisLineX-8, y, series[i].Color, label)
 				continue
 			}
 			fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
 				axisLineX, y, axisLineX-6, y, series[i].Color)
 			fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="start" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
 				axisLineX+8, y, series[i].Color, label)
 		}
 	}
 	writeXAxisLabels(&b, layout, times, labels, start, end, 8)
 	for i := range series {
 		writeSeriesPolyline(&b, layout, times, start, end, series[i].Values, scales[i], series[i].Color)
 	}
 	writeLegend(&b, layout, series)
 	writeSVGClose(&b)
 	return []byte(b.String()), nil
 }
 func metricsTimelineSegments(samples []platform.LiveMetricSample, now time.Time) []chartTimelineSegment {
 	if len(samples) == 0 {
 		return nil
 	}
 	times := sampleTimes(samples)
 	start, end := chartTimeBounds(times)
 	if start.IsZero() || end.IsZero() {
 		return nil
 	}
 	return chartTimelineSegmentsForRange(start, end, now, snapshotTaskHistory())
 }
 func snapshotTaskHistory() []Task {
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	out := make([]Task, len(globalQueue.tasks))
 	for i, t := range globalQueue.tasks {
 		out[i] = *t
 	}
 	return out
 }
 func chartTimelineSegmentsForRange(start, end, now time.Time, tasks []Task) []chartTimelineSegment {
 	if start.IsZero() || end.IsZero() {
 		return nil
 	}
 	if end.Before(start) {
 		start, end = end, start
 	}
 	type interval struct {
 		start time.Time
 		end   time.Time
 	}
 	active := make([]interval, 0, len(tasks))
 	for _, task := range tasks {
 		if task.StartedAt == nil {
 			continue
 		}
 		intervalStart := task.StartedAt.UTC()
 		intervalEnd := now.UTC()
 		if task.DoneAt != nil {
 			intervalEnd = task.DoneAt.UTC()
 		}
 		if !intervalEnd.After(intervalStart) {
 			continue
 		}
 		if intervalEnd.Before(start) || intervalStart.After(end) {
 			continue
 		}
 		if intervalStart.Before(start) {
 			intervalStart = start
 		}
 		if intervalEnd.After(end) {
 			intervalEnd = end
 		}
 		active = append(active, interval{start: intervalStart, end: intervalEnd})
 	}
 	sort.Slice(active, func(i, j int) bool {
 		if active[i].start.Equal(active[j].start) {
 			return active[i].end.Before(active[j].end)
 		}
 		return active[i].start.Before(active[j].start)
 	})
 	merged := make([]interval, 0, len(active))
 	for _, span := range active {
 		if len(merged) == 0 {
 			merged = append(merged, span)
 			continue
 		}
 		last := &merged[len(merged)-1]
 		if !span.start.After(last.end) {
 			if span.end.After(last.end) {
 				last.end = span.end
 			}
 			continue
 		}
 		merged = append(merged, span)
 	}
 	segments := make([]chartTimelineSegment, 0, len(merged)*2+1)
 	cursor := start
 	for _, span := range merged {
 		if span.start.After(cursor) {
 			segments = append(segments, chartTimelineSegment{Start: cursor, End: span.start, Active: false})
 		}
 		segments = append(segments, chartTimelineSegment{Start: span.start, End: span.end, Active: true})
 		cursor = span.end
 	}
 	if cursor.Before(end) {
 		segments = append(segments, chartTimelineSegment{Start: cursor, End: end, Active: false})
 	}
 	if len(segments) == 0 {
 		segments = append(segments, chartTimelineSegment{Start: start, End: end, Active: false})
 	}
 	return segments
 }
 func sampleTimes(samples []platform.LiveMetricSample) []time.Time {
 	times := make([]time.Time, 0, len(samples))
 	for _, sample := range samples {
 		times = append(times, sample.Timestamp)
 	}
 	return times
 }
 func singleAxisChartScale(datasets [][]float64, yMin, yMax *float64) chartScale {
 	min, max := 0.0, 1.0
 	if yMin != nil && yMax != nil {
 		min, max = *yMin, *yMax
 	} else {
 		min, max = chartSeriesBounds(flattenDatasets(datasets))
 		if yMin != nil {
 			min = *yMin
 		}
 		if yMax != nil {
 			max = *yMax
 		}
 	}
 	ticks := chartNiceTicks(min, max, 8)
 	return chartScale{Min: ticks[0], Max: ticks[len(ticks)-1], Ticks: ticks}
 }
 func flattenDatasets(datasets [][]float64) []float64 {
 	total := 0
 	for _, ds := range datasets {
 		total += len(ds)
 	}
 	out := make([]float64, 0, total)
 	for _, ds := range datasets {
 		out = append(out, ds...)
 	}
 	return out
 }
 func singleAxisChartLayout(canvasHeight int, seriesCount int) chartLayout {
 	legendRows := 0
 	if chartLegendVisible(seriesCount) && seriesCount > 0 {
 		cols := 4
 		if seriesCount < cols {
 			cols = seriesCount
 		}
 		legendRows = (seriesCount + cols - 1) / cols
 	}
 	legendHeight := 0
 	if legendRows > 0 {
 		legendHeight = legendRows*24 + 24
 	}
 	return chartLayout{
 		Width:      1400,
 		Height:     canvasHeight,
 		PlotLeft:   96,
 		PlotRight:  1352,
 		PlotTop:    72,
 		PlotBottom: canvasHeight - 60 - legendHeight,
 	}
 }
 func chartTimeBounds(times []time.Time) (time.Time, time.Time) {
 	if len(times) == 0 {
 		return time.Time{}, time.Time{}
 	}
 	start := times[0].UTC()
 	end := start
 	for _, ts := range times[1:] {
 		t := ts.UTC()
 		if t.Before(start) {
 			start = t
 		}
 		if t.After(end) {
 			end = t
 		}
 	}
 	return start, end
 }
 func synthesizeChartTimes(times []time.Time, count int) []time.Time {
 	if count <= 0 {
 		return nil
 	}
 	if len(times) == count {
 		return times
 	}
 	if len(times) == 1 {
 		out := make([]time.Time, count)
 		for i := range out {
 			out[i] = times[0].Add(time.Duration(i) * time.Minute)
 		}
 		return out
 	}
 	base := time.Now().UTC().Add(-time.Duration(count-1) * time.Minute)
 	out := make([]time.Time, count)
 	for i := range out {
 		out[i] = base.Add(time.Duration(i) * time.Minute)
 	}
 	return out
 }
 func writeSVGOpen(b *strings.Builder, width, height int) {
 	fmt.Fprintf(b, `<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" viewBox="0 0 %d %d">`+"\n", width, height, width, height)
 }
 func writeSVGClose(b *strings.Builder) {
 	b.WriteString("</svg>\n")
 }
 func writeChartFrame(b *strings.Builder, title, subtitle string, width, height int) {
 	fmt.Fprintf(b, `<rect width="%d" height="%d" rx="10" ry="10" fill="#ffffff" stroke="#d7e0ea"/>`+"\n", width, height)
 	fmt.Fprintf(b, `<text x="%d" y="30" text-anchor="middle" font-family="sans-serif" font-size="16" font-weight="700" fill="#1f2937">%s</text>`+"\n",
 		width/2, sanitizeChartText(title))
 	if strings.TrimSpace(subtitle) != "" {
 		fmt.Fprintf(b, `<text x="%d" y="50" text-anchor="middle" font-family="sans-serif" font-size="12" font-weight="600" fill="#64748b">%s</text>`+"\n",
 			width/2, sanitizeChartText(subtitle))
 	}
 }
 func writePlotBorder(b *strings.Builder, layout chartLayout) {
 	fmt.Fprintf(b, `<rect x="%d" y="%d" width="%d" height="%d" fill="none" stroke="#cbd5e1" stroke-width="1"/>`+"\n",
 		layout.PlotLeft, layout.PlotTop, layout.PlotRight-layout.PlotLeft, layout.PlotBottom-layout.PlotTop)
 }
 func writeHorizontalGrid(b *strings.Builder, layout chartLayout, scale chartScale) {
 	b.WriteString(`<g stroke="#e2e8f0" stroke-width="1">` + "\n")
 	for _, tick := range scale.Ticks {
 		y := chartYForValue(tick, scale, layout.PlotTop, layout.PlotBottom)
 		fmt.Fprintf(b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"/>`+"\n",
 			layout.PlotLeft, y, layout.PlotRight, y)
 	}
 	b.WriteString(`</g>` + "\n")
 }
 func writeVerticalGrid(b *strings.Builder, layout chartLayout, times []time.Time, pointCount, target int) {
 	if pointCount <= 0 {
 		return
 	}
 	start, end := chartTimeBounds(times)
 	b.WriteString(`<g stroke="#edf2f7" stroke-width="1">` + "\n")
 	for _, idx := range gpuChartLabelIndices(pointCount, target) {
 		ts := chartPointTime(times, idx)
 		x := chartXForTime(ts, start, end, layout.PlotLeft, layout.PlotRight)
 		fmt.Fprintf(b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d"/>`+"\n",
 			x, layout.PlotTop, x, layout.PlotBottom)
 	}
 	b.WriteString(`</g>` + "\n")
 }
 func writeSingleAxisY(b *strings.Builder, layout chartLayout, scale chartScale) {
 	fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#64748b" stroke-width="1"/>`+"\n",
 		layout.PlotLeft, layout.PlotTop, layout.PlotLeft, layout.PlotBottom)
 	for _, tick := range scale.Ticks {
 		y := chartYForValue(tick, scale, layout.PlotTop, layout.PlotBottom)
 		fmt.Fprintf(b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="#64748b" stroke-width="1"/>`+"\n",
 			layout.PlotLeft, y, layout.PlotLeft-6, y)
 		fmt.Fprintf(b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="#475569">%s</text>`+"\n",
 			layout.PlotLeft-10, y, sanitizeChartText(chartYAxisNumber(tick)))
 	}
 }
 func writeXAxisLabels(b *strings.Builder, layout chartLayout, times []time.Time, labels []string, start, end time.Time, target int) {
 	pointCount := len(labels)
 	if len(times) > pointCount {
 		pointCount = len(times)
 	}
 	b.WriteString(`<g font-family="sans-serif" font-size="11" fill="#64748b" text-anchor="middle">` + "\n")
 	for _, idx := range gpuChartLabelIndices(pointCount, target) {
 		x := chartXForTime(chartPointTime(times, idx), start, end, layout.PlotLeft, layout.PlotRight)
 		label := ""
 		if idx < len(labels) {
 			label = labels[idx]
 		}
 		fmt.Fprintf(b, `<text x="%.1f" y="%d">%s</text>`+"\n", x, layout.PlotBottom+28, sanitizeChartText(label))
 	}
 	b.WriteString(`</g>` + "\n")
 	fmt.Fprintf(b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="12" fill="#64748b">Time</text>`+"\n",
 		(layout.PlotLeft+layout.PlotRight)/2, layout.PlotBottom+48)
 }
 func writeSeriesPolyline(b *strings.Builder, layout chartLayout, times []time.Time, start, end time.Time, values []float64, scale chartScale, color string) {
 	if len(values) == 0 {
 		return
 	}
 	var points strings.Builder
 	for idx, value := range values {
 		if idx > 0 {
 			points.WriteByte(' ')
 		}
 		x := chartXForTime(chartPointTime(times, idx), start, end, layout.PlotLeft, layout.PlotRight)
 		y := chartYForValue(value, scale, layout.PlotTop, layout.PlotBottom)
 		points.WriteString(strconv.FormatFloat(x, 'f', 1, 64))
 		points.WriteByte(',')
 		points.WriteString(strconv.FormatFloat(y, 'f', 1, 64))
 	}
 	fmt.Fprintf(b, `<polyline points="%s" fill="none" stroke="%s" stroke-width="2.2" stroke-linejoin="round" stroke-linecap="round"/>`+"\n",
 		points.String(), color)
 	if len(values) == 1 {
 		x := chartXForTime(chartPointTime(times, 0), start, end, layout.PlotLeft, layout.PlotRight)
 		y := chartYForValue(values[0], scale, layout.PlotTop, layout.PlotBottom)
 		fmt.Fprintf(b, `<circle cx="%.1f" cy="%.1f" r="3.5" fill="%s"/>`+"\n", x, y, color)
 		return
 	}
 	peakIdx := 0
 	peakValue := values[0]
 	for idx, value := range values[1:] {
 		if value >= peakValue {
 			peakIdx = idx + 1
 			peakValue = value
 		}
 	}
 	x := chartXForTime(chartPointTime(times, peakIdx), start, end, layout.PlotLeft, layout.PlotRight)
 	y := chartYForValue(peakValue, scale, layout.PlotTop, layout.PlotBottom)
 	fmt.Fprintf(b, `<circle cx="%.1f" cy="%.1f" r="4.2" fill="%s" stroke="#ffffff" stroke-width="1.6"/>`+"\n", x, y, color)
 	fmt.Fprintf(b, `<path d="M %.1f %.1f L %.1f %.1f L %.1f %.1f Z" fill="%s" opacity="0.9"/>`+"\n",
 		x, y-10, x-5, y-18, x+5, y-18, color)
 }
 func writeLegend(b *strings.Builder, layout chartLayout, series []metricChartSeries) {
 	if !chartLegendVisible(len(series)) || len(series) == 0 {
 		return
 	}
 	cols := 4
 	if len(series) < cols {
 		cols = len(series)
 	}
 	cellWidth := float64(layout.PlotRight-layout.PlotLeft) / float64(cols)
 	baseY := layout.PlotBottom + 74
 	for i, item := range series {
 		row := i / cols
 		col := i % cols
 		x := float64(layout.PlotLeft) + cellWidth*float64(col) + 8
 		y := float64(baseY + row*24)
 		fmt.Fprintf(b, `<line x1="%.1f" y1="%.1f" x2="%.1f" y2="%.1f" stroke="%s" stroke-width="3"/>`+"\n",
 			x, y, x+28, y, item.Color)
 		fmt.Fprintf(b, `<text x="%.1f" y="%.1f" font-family="sans-serif" font-size="12" fill="#1f2937">%s</text>`+"\n",
 			x+38, y+4, sanitizeChartText(item.Name))
 	}
 }
 func writeTimelineIdleSpans(b *strings.Builder, layout chartLayout, start, end time.Time, segments []chartTimelineSegment) {
 	if len(segments) == 0 {
 		return
 	}
 	b.WriteString(`<g data-role="timeline-overlay">` + "\n")
 	for _, segment := range segments {
 		if segment.Active || !segment.End.After(segment.Start) {
 			continue
 		}
 		x0 := chartXForTime(segment.Start, start, end, layout.PlotLeft, layout.PlotRight)
 		x1 := chartXForTime(segment.End, start, end, layout.PlotLeft, layout.PlotRight)
 		fmt.Fprintf(b, `<rect x="%.1f" y="%d" width="%.1f" height="%d" fill="#475569" opacity="0.10"/>`+"\n",
 			x0, layout.PlotTop, math.Max(1, x1-x0), layout.PlotBottom-layout.PlotTop)
 	}
 	b.WriteString(`</g>` + "\n")
 }
 func writeTimelineBoundaries(b *strings.Builder, layout chartLayout, start, end time.Time, segments []chartTimelineSegment) {
 	if len(segments) == 0 {
 		return
 	}
 	seen := map[int]bool{}
 	b.WriteString(`<g data-role="timeline-boundaries" stroke="#94a3b8" stroke-width="1.2">` + "\n")
 	for i, segment := range segments {
 		if i > 0 {
 			x := int(math.Round(chartXForTime(segment.Start, start, end, layout.PlotLeft, layout.PlotRight)))
 			if !seen[x] {
 				seen[x] = true
 				fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d"/>`+"\n", x, layout.PlotTop, x, layout.PlotBottom)
 			}
 		}
 		if i < len(segments)-1 {
 			x := int(math.Round(chartXForTime(segment.End, start, end, layout.PlotLeft, layout.PlotRight)))
 			if !seen[x] {
 				seen[x] = true
 				fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d"/>`+"\n", x, layout.PlotTop, x, layout.PlotBottom)
 			}
 		}
 	}
 	b.WriteString(`</g>` + "\n")
 }
 func chartXForTime(ts, start, end time.Time, left, right int) float64 {
 	if !end.After(start) {
 		return float64(left+right) / 2
 	}
 	if ts.Before(start) {
 		ts = start
 	}
 	if ts.After(end) {
 		ts = end
 	}
 	ratio := float64(ts.Sub(start)) / float64(end.Sub(start))
 	return float64(left) + ratio*float64(right-left)
 }
 func chartPointTime(times []time.Time, idx int) time.Time {
 	if idx >= 0 && idx < len(times) && !times[idx].IsZero() {
 		return times[idx].UTC()
 	}
 	if len(times) > 0 && !times[0].IsZero() {
 		return times[0].UTC().Add(time.Duration(idx) * time.Minute)
 	}
 	return time.Now().UTC().Add(time.Duration(idx) * time.Minute)
 }
 func chartYForValue(value float64, scale chartScale, plotTop, plotBottom int) float64 {
 	if scale.Max <= scale.Min {
 		return float64(plotTop+plotBottom) / 2
 	}
 	return float64(plotBottom) - (value-scale.Min)/(scale.Max-scale.Min)*float64(plotBottom-plotTop)
 }
 func chartSeriesBounds(values []float64) (float64, float64) {
 	if len(values) == 0 {
 		return 0, 1
 	}
 	min, max := values[0], values[0]
 	for _, value := range values[1:] {
 		if value < min {
 			min = value
 		}
 		if value > max {
 			max = value
 		}
 	}
 	if min == max {
 		if max == 0 {
 			return 0, 1
 		}
 		pad := math.Abs(max) * 0.1
 		if pad == 0 {
 			pad = 1
 		}
 		min -= pad
 		max += pad
 	}
 	if min > 0 {
 		pad := (max - min) * 0.2
 		if pad == 0 {
 			pad = max * 0.1
 		}
 		min -= pad
 		if min < 0 {
 			min = 0
 		}
 		max += pad
 	}
 	return min, max
 }
 func chartNiceTicks(min, max float64, target int) []float64 {
 	if min == max {
 		max = min + 1
 	}
 	span := max - min
 	step := math.Pow(10, math.Floor(math.Log10(span/float64(target))))
 	for _, factor := range []float64{1, 2, 5, 10} {
 		if span/(factor*step) <= float64(target)*1.5 {
 			step = factor * step
 			break
 		}
 	}
 	low := math.Floor(min/step) * step
 	high := math.Ceil(max/step) * step
 	var ticks []float64
 	for value := low; value <= high+step*0.001; value += step {
 		ticks = append(ticks, math.Round(value*1e9)/1e9)
 	}
 	return ticks
 }
 func valueClamp(value float64, scale chartScale) float64 {
 	if value < scale.Min {
 		return scale.Min
 	}
 	if value > scale.Max {
 		return scale.Max
 	}
 	return value
 }
 func chartStatsLabel(datasets [][]float64) string {
 	mn, avg, mx := globalStats(datasets)
 	if mx <= 0 && avg <= 0 && mn <= 0 {
 		return ""
 	}
 	return fmt.Sprintf("min %s   avg %s   max %s",
 		chartLegendNumber(mn),
 		chartLegendNumber(avg),
 		chartLegendNumber(mx),
 	)
 }
 func gpuDisplayLabel(idx int) string {
 	if name := gpuModelNameByIndex(idx); name != "" {
 		return fmt.Sprintf("GPU %d — %s", idx, name)
 	}
 	return fmt.Sprintf("GPU %d", idx)
 }
 func gpuModelNameByIndex(idx int) string {
 	now := time.Now()
 	gpuLabelCache.mu.Lock()
 	if now.Sub(gpuLabelCache.loadedAt) > 30*time.Second || gpuLabelCache.byIndex == nil {
 		gpuLabelCache.loadedAt = now
 		gpuLabelCache.byIndex = loadGPUModelNames()
 	}
 	name := strings.TrimSpace(gpuLabelCache.byIndex[idx])
 	gpuLabelCache.mu.Unlock()
 	return name
 }
 func loadGPUModelNames() map[int]string {
 	out := map[int]string{}
 	gpus, err := platform.New().ListNvidiaGPUs()
 	if err != nil {
 		return out
 	}
 	for _, gpu := range gpus {
 		name := strings.TrimSpace(gpu.Name)
 		if name != "" {
 			out[gpu.Index] = name
 		}
 	}
 	return out
 }
--- a/audit/internal/webui/jobs.go
+++ b/audit/internal/webui/jobs.go
@@ -1,24 +1,45 @@
 package webui
 import (
 	"os"
 	"strings"
 	"sync"
 	"time"
 )
 // jobState holds the output lines and completion status of an async job.
 type jobState struct {
-	lines []string
+	lines        []string
-	done  bool
+	done         bool
-	err   string
+	err          string
-	mu    sync.Mutex
+	mu           sync.Mutex
-	// subs is a list of channels that receive new lines as they arrive.
+	subs         []chan string
-	subs []chan string
+	cancel       func() // optional cancel function; nil if job is not cancellable
 	logPath      string
 	serialPrefix string
 }
 // abort cancels the job if it has a cancel function and is not yet done.
 func (j *jobState) abort() bool {
 	j.mu.Lock()
 	defer j.mu.Unlock()
 	if j.done || j.cancel == nil {
 		return false
 	}
 	j.cancel()
 	return true
 }
 func (j *jobState) append(line string) {
 	j.mu.Lock()
 	defer j.mu.Unlock()
 	j.lines = append(j.lines, line)
 	if j.logPath != "" {
 		appendJobLog(j.logPath, line)
 	}
 	if j.serialPrefix != "" {
 		taskSerialWriteLine(j.serialPrefix + line)
 	}
 	for _, ch := range j.subs {
 		select {
 		case ch <- line:
@@ -67,18 +88,57 @@ func (m *jobManager) create(id string) *jobState {
 	j := &jobState{}
 	m.jobs[id] = j
 	// Schedule cleanup after 30 minutes
-	go func() {
+	goRecoverOnce("job cleanup", func() {
 		time.Sleep(30 * time.Minute)
 		m.mu.Lock()
 		delete(m.jobs, id)
 		m.mu.Unlock()
-	}()
+	})
 	return j
 }
 // isDone returns true if the job has finished (either successfully or with error).
 func (j *jobState) isDone() bool {
 	j.mu.Lock()
 	defer j.mu.Unlock()
 	return j.done
 }
 func (m *jobManager) get(id string) (*jobState, bool) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	j, ok := m.jobs[id]
 	return j, ok
 }
 func newTaskJobState(logPath string, serialPrefix ...string) *jobState {
 	j := &jobState{logPath: logPath}
 	if len(serialPrefix) > 0 {
 		j.serialPrefix = serialPrefix[0]
 	}
 	if logPath == "" {
 		return j
 	}
 	data, err := os.ReadFile(logPath)
 	if err != nil || len(data) == 0 {
 		return j
 	}
 	lines := strings.Split(strings.ReplaceAll(string(data), "\r\n", "\n"), "\n")
 	if len(lines) > 0 && lines[len(lines)-1] == "" {
 		lines = lines[:len(lines)-1]
 	}
 	j.lines = append(j.lines, lines...)
 	return j
 }
 func appendJobLog(path, line string) {
 	if path == "" {
 		return
 	}
 	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
 	if err != nil {
 		return
 	}
 	defer f.Close()
 	_, _ = f.WriteString(line + "\n")
 }
--- a/audit/internal/webui/kmsg_watcher.go
+++ b/audit/internal/webui/kmsg_watcher.go
@@ -0,0 +1,242 @@
 package webui
 import (
 	"bufio"
 	"io"
 	"log/slog"
 	"os"
 	"strings"
 	"sync"
 	"time"
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 )
 // kmsgWatcher reads /dev/kmsg and accumulates hardware error events.
 // It supports multiple concurrent SAT tasks: a shared event window is open
 // while any SAT task is running, and flushed when all tasks complete.
 type kmsgWatcher struct {
 	mu          sync.Mutex
 	activeCount int // number of in-flight SAT tasks
 	window      *kmsgWindow
 	statusDB    *app.ComponentStatusDB
 }
 type kmsgWindow struct {
 	targets   []string // SAT targets running concurrently
 	startedAt time.Time
 	seen      map[kmsgEventKey]bool
 	events    []kmsgEvent
 }
 type kmsgEventKey struct {
 	id       string // BDF or device name
 	category string
 }
 type kmsgEvent struct {
 	timestamp time.Time
 	raw       string
 	ids       []string // BDF addresses or device names extracted
 	category  string
 }
 func newKmsgWatcher(statusDB *app.ComponentStatusDB) *kmsgWatcher {
 	return &kmsgWatcher{statusDB: statusDB}
 }
 // start launches the background kmsg reading goroutine.
 func (w *kmsgWatcher) start() {
 	goRecoverLoop("kmsg watcher", 5*time.Second, w.run)
 }
 func (w *kmsgWatcher) run() {
 	for {
 		f, err := os.Open("/dev/kmsg")
 		if err != nil {
 			slog.Warn("kmsg watcher unavailable", "err", err)
 			time.Sleep(30 * time.Second)
 			continue
 		}
 		// Best-effort seek to end so we only capture events from now forward.
 		_, _ = f.Seek(0, io.SeekEnd)
 		scanner := bufio.NewScanner(f)
 		scanner.Buffer(make([]byte, 64*1024), 64*1024)
 		for scanner.Scan() {
 			line := scanner.Text()
 			evt, ok := parseKmsgLine(line)
 			if !ok {
 				continue
 			}
 			w.mu.Lock()
 			if w.window != nil {
 				w.recordEvent(evt)
 			}
 			w.mu.Unlock()
 		}
 		if err := scanner.Err(); err != nil {
 			slog.Warn("kmsg watcher stopped", "err", err)
 		}
 		_ = f.Close()
 		time.Sleep(2 * time.Second)
 	}
 }
 // recordEvent appends evt to the active window, deduplicating by (id, category).
 // Must be called with w.mu held.
 func (w *kmsgWatcher) recordEvent(evt kmsgEvent) {
 	if len(evt.ids) == 0 {
 		key := kmsgEventKey{id: "", category: evt.category}
 		if !w.window.seen[key] {
 			w.window.seen[key] = true
 			w.window.events = append(w.window.events, evt)
 		}
 		return
 	}
 	for _, id := range evt.ids {
 		key := kmsgEventKey{id: id, category: evt.category}
 		if !w.window.seen[key] {
 			w.window.seen[key] = true
 			w.window.events = append(w.window.events, evt)
 		}
 	}
 }
 // NotifyTaskStarted increments the active task counter and opens a shared event window
 // if this is the first task starting.
 func (w *kmsgWatcher) NotifyTaskStarted(taskID, target string) {
 	w.mu.Lock()
 	defer w.mu.Unlock()
 	if w.activeCount == 0 {
 		w.window = &kmsgWindow{
 			startedAt: time.Now(),
 			seen:      make(map[kmsgEventKey]bool),
 		}
 	}
 	w.activeCount++
 	if w.window != nil {
 		w.window.targets = append(w.window.targets, target)
 	}
 }
 // NotifyTaskFinished decrements the active task counter. When all tasks finish,
 // it flushes the accumulated events to the status DB.
 func (w *kmsgWatcher) NotifyTaskFinished(taskID string) {
 	w.mu.Lock()
 	w.activeCount--
 	var window *kmsgWindow
 	if w.activeCount <= 0 {
 		w.activeCount = 0
 		window = w.window
 		w.window = nil
 	}
 	w.mu.Unlock()
 	if window == nil || len(window.events) == 0 {
 		return
 	}
 	goRecoverOnce("kmsg watcher flush", func() { w.flushWindow(window) })
 }
 func (w *kmsgWatcher) flushWindow(window *kmsgWindow) {
 	if w.statusDB == nil {
 		return
 	}
 	source := "watchdog:kmsg"
 	// Collect unique component keys from events.
 	seen := map[string]string{} // componentKey → first raw line
 	for _, evt := range window.events {
 		if len(evt.ids) == 0 {
 			// MCE or un-identified error.
 			key := "cpu:all"
 			if evt.category == "memory" {
 				key = "memory:all"
 			}
 			if _, exists := seen[key]; !exists {
 				seen[key] = evt.raw
 			}
 			continue
 		}
 		for _, id := range evt.ids {
 			var key string
 			switch evt.category {
 			case "gpu", "pcie":
 				key = "pcie:" + normalizeBDF(id)
 			case "storage":
 				key = "storage:" + id
 			default:
 				key = "pcie:" + normalizeBDF(id)
 			}
 			if _, exists := seen[key]; !exists {
 				seen[key] = evt.raw
 			}
 		}
 	}
 	for key, detail := range seen {
 		detail = "kernel error during SAT (" + strings.Join(window.targets, ",") + "): " + truncate(detail, 120)
 		w.statusDB.Record(key, source, "Warning", detail)
 	}
 }
 // parseKmsgLine parses a single /dev/kmsg line and returns an event if it matches
 // any pattern in platform.HardwareErrorPatterns.
 // kmsg format: "<priority>,<sequence>,<timestamp_usec>,-;message text"
 func parseKmsgLine(raw string) (kmsgEvent, bool) {
 	msg := raw
 	if idx := strings.Index(raw, ";"); idx >= 0 {
 		msg = strings.TrimSpace(raw[idx+1:])
 	}
 	if msg == "" {
 		return kmsgEvent{}, false
 	}
 	for _, p := range platform.HardwareErrorPatterns {
 		m := p.Re.FindStringSubmatch(msg)
 		if m == nil {
 			continue
 		}
 		evt := kmsgEvent{
 			timestamp: time.Now(),
 			raw:       msg,
 			category:  p.Category,
 		}
 		if p.BDFGroup > 0 && p.BDFGroup < len(m) {
 			evt.ids = append(evt.ids, normalizeBDF(m[p.BDFGroup]))
 		}
 		if p.DevGroup > 0 && p.DevGroup < len(m) {
 			evt.ids = append(evt.ids, m[p.DevGroup])
 		}
 		return evt, true
 	}
 	return kmsgEvent{}, false
 }
 // normalizeBDF normalizes a PCIe BDF to the 4-part form "0000:c8:00.0".
 func normalizeBDF(bdf string) string {
 	bdf = strings.ToLower(strings.TrimSpace(bdf))
 	if strings.Count(bdf, ":") == 1 {
 		return "0000:" + bdf
 	}
 	return bdf
 }
 func truncate(s string, max int) string {
 	if len(s) <= max {
 		return s
 	}
 	return s[:max] + "..."
 }
 // isSATTarget returns true for task targets that run hardware acceptance tests.
 func isSATTarget(target string) bool {
 	switch target {
 	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
 		"nvidia-interconnect", "nvidia-bandwidth", "nvidia-stress", "memory", "memory-stress", "storage",
 		"cpu", "sat-stress", "amd", "amd-mem", "amd-bandwidth", "amd-stress",
 		"platform-stress":
 		return true
 	}
 	return false
 }
--- a/audit/internal/webui/metricsdb.go
+++ b/audit/internal/webui/metricsdb.go
@@ -0,0 +1,393 @@
 package webui
 import (
 	"database/sql"
 	"encoding/csv"
 	"io"
 	"os"
 	"path/filepath"
 	"sort"
 	"strconv"
 	"strings"
 	"time"
 	"bee/audit/internal/platform"
 	_ "modernc.org/sqlite"
 )
 const metricsDBPath = "/appdata/bee/metrics.db"
 // MetricsDB persists live metric samples to SQLite.
 type MetricsDB struct {
 	db *sql.DB
 }
 func (m *MetricsDB) Close() error {
 	if m == nil || m.db == nil {
 		return nil
 	}
 	return m.db.Close()
 }
 // openMetricsDB opens (or creates) the metrics database at the given path.
 func openMetricsDB(path string) (*MetricsDB, error) {
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return nil, err
 	}
 	db, err := sql.Open("sqlite", path+"?_journal=WAL&_busy_timeout=5000")
 	if err != nil {
 		return nil, err
 	}
 	db.SetMaxOpenConns(1)
 	if err := initMetricsSchema(db); err != nil {
 		_ = db.Close()
 		return nil, err
 	}
 	return &MetricsDB{db: db}, nil
 }
 func initMetricsSchema(db *sql.DB) error {
 	_, err := db.Exec(`
 CREATE TABLE IF NOT EXISTS sys_metrics (
  ts           INTEGER NOT NULL,
  cpu_load_pct REAL,
  mem_load_pct REAL,
  power_w      REAL,
  PRIMARY KEY (ts)
 );
 CREATE TABLE IF NOT EXISTS gpu_metrics (
  ts            INTEGER NOT NULL,
  gpu_index     INTEGER NOT NULL,
  temp_c        REAL,
  usage_pct     REAL,
  mem_usage_pct REAL,
  power_w       REAL,
  clock_mhz     REAL,
  mem_clock_mhz REAL,
  PRIMARY KEY (ts, gpu_index)
 );
 CREATE TABLE IF NOT EXISTS fan_metrics (
  ts   INTEGER NOT NULL,
  name TEXT NOT NULL,
  rpm  REAL,
  PRIMARY KEY (ts, name)
 );
 CREATE TABLE IF NOT EXISTS temp_metrics (
  ts      INTEGER NOT NULL,
  name    TEXT NOT NULL,
  grp     TEXT NOT NULL,
  celsius REAL,
  PRIMARY KEY (ts, name)
 );
 `)
 	if err != nil {
 		return err
 	}
 	if err := ensureMetricsColumn(db, "gpu_metrics", "clock_mhz", "REAL"); err != nil {
 		return err
 	}
 	return ensureMetricsColumn(db, "gpu_metrics", "mem_clock_mhz", "REAL")
 }
 func ensureMetricsColumn(db *sql.DB, table, column, definition string) error {
 	rows, err := db.Query("PRAGMA table_info(" + table + ")")
 	if err != nil {
 		return err
 	}
 	defer rows.Close()
 	for rows.Next() {
 		var cid int
 		var name, ctype string
 		var notNull, pk int
 		var dflt sql.NullString
 		if err := rows.Scan(&cid, &name, &ctype, &notNull, &dflt, &pk); err != nil {
 			return err
 		}
 		if strings.EqualFold(name, column) {
 			return nil
 		}
 	}
 	if err := rows.Err(); err != nil {
 		return err
 	}
 	_, err = db.Exec("ALTER TABLE " + table + " ADD COLUMN " + column + " " + definition)
 	return err
 }
 // Write inserts one sample into all relevant tables.
 func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
 	ts := s.Timestamp.Unix()
 	tx, err := m.db.Begin()
 	if err != nil {
 		return err
 	}
 	defer func() { _ = tx.Rollback() }()
 	_, err = tx.Exec(
 		`INSERT OR REPLACE INTO sys_metrics(ts,cpu_load_pct,mem_load_pct,power_w) VALUES(?,?,?,?)`,
 		ts, s.CPULoadPct, s.MemLoadPct, s.PowerW,
 	)
 	if err != nil {
 		return err
 	}
 	for _, g := range s.GPUs {
 		_, err = tx.Exec(
 			`INSERT OR REPLACE INTO gpu_metrics(ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz) VALUES(?,?,?,?,?,?,?,?)`,
 			ts, g.GPUIndex, g.TempC, g.UsagePct, g.MemUsagePct, g.PowerW, g.ClockMHz, g.MemClockMHz,
 		)
 		if err != nil {
 			return err
 		}
 	}
 	for _, f := range s.Fans {
 		_, err = tx.Exec(
 			`INSERT OR REPLACE INTO fan_metrics(ts,name,rpm) VALUES(?,?,?)`,
 			ts, f.Name, f.RPM,
 		)
 		if err != nil {
 			return err
 		}
 	}
 	for _, t := range s.Temps {
 		_, err = tx.Exec(
 			`INSERT OR REPLACE INTO temp_metrics(ts,name,grp,celsius) VALUES(?,?,?,?)`,
 			ts, t.Name, t.Group, t.Celsius,
 		)
 		if err != nil {
 			return err
 		}
 	}
 	return tx.Commit()
 }
 // LoadRecent returns up to n samples in chronological order (oldest first).
 func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
 	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM (SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?) ORDER BY ts`, n)
 }
 // LoadAll returns all persisted samples in chronological order (oldest first).
 func (m *MetricsDB) LoadAll() ([]platform.LiveMetricSample, error) {
 	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts`, nil)
 }
 // LoadBetween returns samples in chronological order within the given time window.
 func (m *MetricsDB) LoadBetween(start, end time.Time) ([]platform.LiveMetricSample, error) {
 	if m == nil {
 		return nil, nil
 	}
 	if start.IsZero() || end.IsZero() {
 		return nil, nil
 	}
 	if end.Before(start) {
 		start, end = end, start
 	}
 	return m.loadSamples(
 		`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics WHERE ts>=? AND ts<=? ORDER BY ts`,
 		start.Unix(), end.Unix(),
 	)
 }
 // loadSamples reconstructs LiveMetricSample rows from the normalized tables.
 func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetricSample, error) {
 	rows, err := m.db.Query(query, args...)
 	if err != nil {
 		return nil, err
 	}
 	defer rows.Close()
 	type sysRow struct {
 		ts            int64
 		cpu, mem, pwr float64
 	}
 	var sysRows []sysRow
 	for rows.Next() {
 		var r sysRow
 		if err := rows.Scan(&r.ts, &r.cpu, &r.mem, &r.pwr); err != nil {
 			continue
 		}
 		sysRows = append(sysRows, r)
 	}
 	if len(sysRows) == 0 {
 		return nil, nil
 	}
 	// Collect min/max ts for range query
 	minTS := sysRows[0].ts
 	maxTS := sysRows[len(sysRows)-1].ts
 	// Load GPU rows in range
 	type gpuKey struct {
 		ts  int64
 		idx int
 	}
 	gpuData := map[gpuKey]platform.GPUMetricRow{}
 	gRows, err := m.db.Query(
 		`SELECT ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w,IFNULL(clock_mhz,0),IFNULL(mem_clock_mhz,0) FROM gpu_metrics WHERE ts>=? AND ts<=? ORDER BY ts,gpu_index`,
 		minTS, maxTS,
 	)
 	if err == nil {
 		defer gRows.Close()
 		for gRows.Next() {
 			var ts int64
 			var g platform.GPUMetricRow
 			if err := gRows.Scan(&ts, &g.GPUIndex, &g.TempC, &g.UsagePct, &g.MemUsagePct, &g.PowerW, &g.ClockMHz, &g.MemClockMHz); err == nil {
 				gpuData[gpuKey{ts, g.GPUIndex}] = g
 			}
 		}
 	}
 	// Load fan rows in range
 	type fanKey struct {
 		ts   int64
 		name string
 	}
 	fanData := map[fanKey]float64{}
 	fRows, err := m.db.Query(
 		`SELECT ts,name,rpm FROM fan_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
 	)
 	if err == nil {
 		defer fRows.Close()
 		for fRows.Next() {
 			var ts int64
 			var name string
 			var rpm float64
 			if err := fRows.Scan(&ts, &name, &rpm); err == nil {
 				fanData[fanKey{ts, name}] = rpm
 			}
 		}
 	}
 	// Load temp rows in range
 	type tempKey struct {
 		ts   int64
 		name string
 	}
 	tempData := map[tempKey]platform.TempReading{}
 	tRows, err := m.db.Query(
 		`SELECT ts,name,grp,celsius FROM temp_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
 	)
 	if err == nil {
 		defer tRows.Close()
 		for tRows.Next() {
 			var ts int64
 			var t platform.TempReading
 			if err := tRows.Scan(&ts, &t.Name, &t.Group, &t.Celsius); err == nil {
 				tempData[tempKey{ts, t.Name}] = t
 			}
 		}
 	}
 	// Collect unique GPU indices and fan/temp names from loaded data.
 	// Sort each list so that sample reconstruction is deterministic regardless
 	// of Go's non-deterministic map iteration order.
 	seenGPU := map[int]bool{}
 	var gpuIndices []int
 	for k := range gpuData {
 		if !seenGPU[k.idx] {
 			seenGPU[k.idx] = true
 			gpuIndices = append(gpuIndices, k.idx)
 		}
 	}
 	sort.Ints(gpuIndices)
 	seenFan := map[string]bool{}
 	var fanNames []string
 	for k := range fanData {
 		if !seenFan[k.name] {
 			seenFan[k.name] = true
 			fanNames = append(fanNames, k.name)
 		}
 	}
 	sort.Strings(fanNames)
 	seenTemp := map[string]bool{}
 	var tempNames []string
 	for k := range tempData {
 		if !seenTemp[k.name] {
 			seenTemp[k.name] = true
 			tempNames = append(tempNames, k.name)
 		}
 	}
 	sort.Strings(tempNames)
 	samples := make([]platform.LiveMetricSample, len(sysRows))
 	for i, r := range sysRows {
 		s := platform.LiveMetricSample{
 			Timestamp:  time.Unix(r.ts, 0).UTC(),
 			CPULoadPct: r.cpu,
 			MemLoadPct: r.mem,
 			PowerW:     r.pwr,
 		}
 		for _, idx := range gpuIndices {
 			if g, ok := gpuData[gpuKey{r.ts, idx}]; ok {
 				s.GPUs = append(s.GPUs, g)
 			}
 		}
 		for _, name := range fanNames {
 			if rpm, ok := fanData[fanKey{r.ts, name}]; ok {
 				s.Fans = append(s.Fans, platform.FanReading{Name: name, RPM: rpm})
 			}
 		}
 		for _, name := range tempNames {
 			if t, ok := tempData[tempKey{r.ts, name}]; ok {
 				s.Temps = append(s.Temps, t)
 			}
 		}
 		samples[i] = s
 	}
 	return samples, nil
 }
 // ExportCSV writes all sys+gpu data as CSV to w.
 func (m *MetricsDB) ExportCSV(w io.Writer) error {
 	rows, err := m.db.Query(`
 		SELECT s.ts, s.cpu_load_pct, s.mem_load_pct, s.power_w,
 		       g.gpu_index, g.temp_c, g.usage_pct, g.mem_usage_pct, g.power_w,
 		       g.clock_mhz, g.mem_clock_mhz
 		FROM sys_metrics s
 		LEFT JOIN gpu_metrics g ON g.ts = s.ts
 		ORDER BY s.ts, g.gpu_index
 	`)
 	if err != nil {
 		return err
 	}
 	defer rows.Close()
 	cw := csv.NewWriter(w)
 	_ = cw.Write([]string{"ts", "cpu_load_pct", "mem_load_pct", "sys_power_w", "gpu_index", "gpu_temp_c", "gpu_usage_pct", "gpu_mem_pct", "gpu_power_w", "gpu_clock_mhz", "gpu_mem_clock_mhz"})
 	for rows.Next() {
 		var ts int64
 		var cpu, mem, pwr float64
 		var gpuIdx sql.NullInt64
 		var gpuTemp, gpuUse, gpuMem, gpuPow, gpuClock, gpuMemClock sql.NullFloat64
 		if err := rows.Scan(&ts, &cpu, &mem, &pwr, &gpuIdx, &gpuTemp, &gpuUse, &gpuMem, &gpuPow, &gpuClock, &gpuMemClock); err != nil {
 			continue
 		}
 		row := []string{
 			strconv.FormatInt(ts, 10),
 			strconv.FormatFloat(cpu, 'f', 2, 64),
 			strconv.FormatFloat(mem, 'f', 2, 64),
 			strconv.FormatFloat(pwr, 'f', 1, 64),
 		}
 		if gpuIdx.Valid {
 			row = append(row,
 				strconv.FormatInt(gpuIdx.Int64, 10),
 				strconv.FormatFloat(gpuTemp.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuUse.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuMem.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuPow.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuClock.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuMemClock.Float64, 'f', 1, 64),
 			)
 		} else {
 			row = append(row, "", "", "", "", "", "", "")
 		}
 		_ = cw.Write(row)
 	}
 	cw.Flush()
 	return cw.Error()
 }
 func nullFloat(v float64) sql.NullFloat64 {
 	return sql.NullFloat64{Float64: v, Valid: true}
 }
--- a/audit/internal/webui/metricsdb_test.go
+++ b/audit/internal/webui/metricsdb_test.go
@@ -0,0 +1,174 @@
 package webui
 import (
 	"database/sql"
 	"path/filepath"
 	"testing"
 	"time"
 	"bee/audit/internal/platform"
 	_ "modernc.org/sqlite"
 )
 func TestMetricsDBLoadSamplesKeepsChronologicalRangeForGPUs(t *testing.T) {
 	db, err := openMetricsDB(filepath.Join(t.TempDir(), "metrics.db"))
 	if err != nil {
 		t.Fatalf("openMetricsDB: %v", err)
 	}
 	defer db.Close()
 	base := time.Unix(1_700_000_000, 0).UTC()
 	for i := 0; i < 3; i++ {
 		err := db.Write(platform.LiveMetricSample{
 			Timestamp:  base.Add(time.Duration(i) * time.Second),
 			CPULoadPct: float64(10 + i),
 			MemLoadPct: float64(20 + i),
 			PowerW:     float64(300 + i),
 			GPUs: []platform.GPUMetricRow{
 				{GPUIndex: 0, PowerW: float64(100 + i)},
 				{GPUIndex: 2, PowerW: float64(200 + i)},
 			},
 		})
 		if err != nil {
 			t.Fatalf("Write(%d): %v", i, err)
 		}
 	}
 	all, err := db.LoadAll()
 	if err != nil {
 		t.Fatalf("LoadAll: %v", err)
 	}
 	if len(all) != 3 {
 		t.Fatalf("LoadAll len=%d want 3", len(all))
 	}
 	for i, sample := range all {
 		if len(sample.GPUs) != 2 {
 			t.Fatalf("LoadAll sample %d GPUs=%v want 2 rows", i, sample.GPUs)
 		}
 		if sample.GPUs[0].GPUIndex != 0 || sample.GPUs[0].PowerW != float64(100+i) {
 			t.Fatalf("LoadAll sample %d GPU0=%+v", i, sample.GPUs[0])
 		}
 		if sample.GPUs[1].GPUIndex != 2 || sample.GPUs[1].PowerW != float64(200+i) {
 			t.Fatalf("LoadAll sample %d GPU1=%+v", i, sample.GPUs[1])
 		}
 	}
 	recent, err := db.LoadRecent(2)
 	if err != nil {
 		t.Fatalf("LoadRecent: %v", err)
 	}
 	if len(recent) != 2 {
 		t.Fatalf("LoadRecent len=%d want 2", len(recent))
 	}
 	if !recent[0].Timestamp.Before(recent[1].Timestamp) {
 		t.Fatalf("LoadRecent timestamps not ascending: %v >= %v", recent[0].Timestamp, recent[1].Timestamp)
 	}
 	for i, sample := range recent {
 		if len(sample.GPUs) != 2 {
 			t.Fatalf("LoadRecent sample %d GPUs=%v want 2 rows", i, sample.GPUs)
 		}
 	}
 }
 func TestMetricsDBMigratesLegacyGPUSchema(t *testing.T) {
 	path := filepath.Join(t.TempDir(), "metrics.db")
 	raw, err := sql.Open("sqlite", path)
 	if err != nil {
 		t.Fatalf("sql.Open: %v", err)
 	}
 	_, err = raw.Exec(`
 CREATE TABLE gpu_metrics (
  ts            INTEGER NOT NULL,
  gpu_index     INTEGER NOT NULL,
  temp_c        REAL,
  usage_pct     REAL,
  mem_usage_pct REAL,
  power_w       REAL,
  PRIMARY KEY (ts, gpu_index)
 );
 CREATE TABLE sys_metrics (
  ts           INTEGER NOT NULL,
  cpu_load_pct REAL,
  mem_load_pct REAL,
  power_w      REAL,
  PRIMARY KEY (ts)
 );
 CREATE TABLE fan_metrics (
  ts   INTEGER NOT NULL,
  name TEXT NOT NULL,
  rpm  REAL,
  PRIMARY KEY (ts, name)
 );
 CREATE TABLE temp_metrics (
  ts      INTEGER NOT NULL,
  name    TEXT NOT NULL,
  grp     TEXT NOT NULL,
  celsius REAL,
  PRIMARY KEY (ts, name)
 );
 `)
 	if err != nil {
 		t.Fatalf("create legacy schema: %v", err)
 	}
 	_ = raw.Close()
 	db, err := openMetricsDB(path)
 	if err != nil {
 		t.Fatalf("openMetricsDB: %v", err)
 	}
 	defer db.Close()
 	now := time.Unix(1_700_000_100, 0).UTC()
 	err = db.Write(platform.LiveMetricSample{
 		Timestamp: now,
 		GPUs: []platform.GPUMetricRow{
 			{GPUIndex: 0, ClockMHz: 1410, MemClockMHz: 2600},
 		},
 	})
 	if err != nil {
 		t.Fatalf("Write: %v", err)
 	}
 	samples, err := db.LoadAll()
 	if err != nil {
 		t.Fatalf("LoadAll: %v", err)
 	}
 	if len(samples) != 1 || len(samples[0].GPUs) != 1 {
 		t.Fatalf("samples=%+v", samples)
 	}
 	if got := samples[0].GPUs[0].ClockMHz; got != 1410 {
 		t.Fatalf("ClockMHz=%v want 1410", got)
 	}
 	if got := samples[0].GPUs[0].MemClockMHz; got != 2600 {
 		t.Fatalf("MemClockMHz=%v want 2600", got)
 	}
 }
 func TestMetricsDBLoadBetweenFiltersWindow(t *testing.T) {
 	db, err := openMetricsDB(filepath.Join(t.TempDir(), "metrics.db"))
 	if err != nil {
 		t.Fatalf("openMetricsDB: %v", err)
 	}
 	defer db.Close()
 	base := time.Unix(1_700_000_000, 0).UTC()
 	for i := 0; i < 5; i++ {
 		if err := db.Write(platform.LiveMetricSample{
 			Timestamp:  base.Add(time.Duration(i) * time.Minute),
 			CPULoadPct: float64(i),
 		}); err != nil {
 			t.Fatalf("Write(%d): %v", i, err)
 		}
 	}
 	got, err := db.LoadBetween(base.Add(1*time.Minute), base.Add(3*time.Minute))
 	if err != nil {
 		t.Fatalf("LoadBetween: %v", err)
 	}
 	if len(got) != 3 {
 		t.Fatalf("LoadBetween len=%d want 3", len(got))
 	}
 	if !got[0].Timestamp.Equal(base.Add(1*time.Minute)) || !got[2].Timestamp.Equal(base.Add(3*time.Minute)) {
 		t.Fatalf("window=%v..%v", got[0].Timestamp, got[2].Timestamp)
 	}
 }
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
--- a/audit/internal/webui/serial_console.go
+++ b/audit/internal/webui/serial_console.go
@@ -0,0 +1,41 @@
 package webui
 import (
 	"fmt"
 	"os"
 	"strings"
 	"time"
 )
 var taskSerialWriteLine = writeTaskSerialLine
 func writeTaskSerialLine(line string) {
 	line = strings.TrimSpace(line)
 	if line == "" {
 		return
 	}
 	payload := fmt.Sprintf("%s %s\n", time.Now().UTC().Format("2006-01-02 15:04:05Z"), line)
 	for _, path := range []string{"/dev/ttyS0", "/dev/ttyS1", "/dev/console"} {
 		f, err := os.OpenFile(path, os.O_WRONLY|os.O_APPEND, 0)
 		if err != nil {
 			continue
 		}
 		_, _ = f.WriteString(payload)
 		_ = f.Close()
 		return
 	}
 }
 func taskSerialPrefix(t *Task) string {
 	if t == nil {
 		return "[task] "
 	}
 	return fmt.Sprintf("[task %s %s] ", t.ID, t.Name)
 }
 func taskSerialEvent(t *Task, event string) {
 	if t == nil {
 		return
 	}
 	taskSerialWriteLine(fmt.Sprintf("%s%s", taskSerialPrefix(t), strings.TrimSpace(event)))
 }
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
--- a/audit/internal/webui/stability.go
+++ b/audit/internal/webui/stability.go
@@ -0,0 +1,42 @@
 package webui
 import (
 	"fmt"
 	"log/slog"
 	"runtime/debug"
 	"time"
 )
 func goRecoverLoop(name string, restartDelay time.Duration, fn func()) {
 	go func() {
 		for {
 			if !runRecoverable(name, fn) {
 				return
 			}
 			if restartDelay > 0 {
 				time.Sleep(restartDelay)
 			}
 		}
 	}()
 }
 func goRecoverOnce(name string, fn func()) {
 	go func() {
 		_ = runRecoverable(name, fn)
 	}()
 }
 func runRecoverable(name string, fn func()) (panicked bool) {
 	defer func() {
 		if rec := recover(); rec != nil {
 			panicked = true
 			slog.Error("recovered panic",
 				"component", name,
 				"panic", fmt.Sprint(rec),
 				"stack", string(debug.Stack()),
 			)
 		}
 	}()
 	fn()
 	return false
 }
--- a/audit/internal/webui/task_page.go
+++ b/audit/internal/webui/task_page.go
@@ -0,0 +1,267 @@
 package webui
 import (
 	"encoding/json"
 	"fmt"
 	"html"
 	"net/http"
 	"os"
 	"strings"
 	"time"
 	"bee/audit/internal/platform"
 )
 func (h *handler) handleTaskPage(w http.ResponseWriter, r *http.Request) {
 	id := r.PathValue("id")
 	task, ok := globalQueue.findByID(id)
 	if !ok {
 		http.NotFound(w, r)
 		return
 	}
 	snapshot := *task
 	body := renderTaskDetailPage(h.opts, snapshot)
 	w.Header().Set("Cache-Control", "no-store")
 	w.Header().Set("Content-Type", "text/html; charset=utf-8")
 	_, _ = w.Write([]byte(body))
 }
 func (h *handler) handleAPITaskChartsIndex(w http.ResponseWriter, r *http.Request) {
 	task, samples, _, _, ok := h.taskSamplesForRequest(r)
 	if !ok {
 		http.NotFound(w, r)
 		return
 	}
 	type taskChartIndexEntry struct {
 		Title string `json:"title"`
 		File  string `json:"file"`
 	}
 	entries := make([]taskChartIndexEntry, 0)
 	for _, spec := range taskChartSpecsForSamples(samples) {
 		title, _, ok := renderTaskChartSVG(spec.Path, samples, taskTimelineForTask(task))
 		if !ok {
 			continue
 		}
 		entries = append(entries, taskChartIndexEntry{Title: title, File: spec.File})
 	}
 	w.Header().Set("Cache-Control", "no-store")
 	w.Header().Set("Content-Type", "application/json; charset=utf-8")
 	_ = json.NewEncoder(w).Encode(entries)
 }
 func (h *handler) handleAPITaskChartSVG(w http.ResponseWriter, r *http.Request) {
 	task, samples, _, _, ok := h.taskSamplesForRequest(r)
 	if !ok {
 		http.NotFound(w, r)
 		return
 	}
 	file := strings.TrimPrefix(r.URL.Path, "/api/tasks/"+task.ID+"/chart/")
 	path, ok := taskChartPathFromFile(file)
 	if !ok {
 		http.NotFound(w, r)
 		return
 	}
 	title, buf, hasData := renderTaskChartSVG(path, samples, taskTimelineForTask(task))
 	if !hasData || len(buf) == 0 || strings.TrimSpace(title) == "" {
 		http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
 		return
 	}
 	w.Header().Set("Content-Type", "image/svg+xml")
 	w.Header().Set("Cache-Control", "no-store")
 	_, _ = w.Write(buf)
 }
 func renderTaskDetailPage(opts HandlerOptions, task Task) string {
 	title := task.Name
 	if strings.TrimSpace(title) == "" {
 		title = task.ID
 	}
 	var body strings.Builder
 	body.WriteString(`<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px;flex-wrap:wrap">`)
 	body.WriteString(`<a class="btn btn-secondary btn-sm" href="/tasks">Back to Tasks</a>`)
 	if task.Status == TaskRunning || task.Status == TaskPending {
 		body.WriteString(`<button class="btn btn-danger btn-sm" onclick="cancelTaskDetail('` + html.EscapeString(task.ID) + `')">Cancel</button>`)
 	}
 	body.WriteString(`<span style="font-size:12px;color:var(--muted)">Artifacts are saved in the task folder under <code>./tasks</code>.</span>`)
 	body.WriteString(`</div>`)
 	if report := loadTaskReportFragment(task); report != "" {
 		body.WriteString(report)
 	} else {
 		body.WriteString(`<div class="card"><div class="card-head">Task Summary</div><div class="card-body">`)
 		body.WriteString(`<div style="font-size:18px;font-weight:700">` + html.EscapeString(title) + `</div>`)
 		body.WriteString(`<div style="margin-top:8px">` + renderTaskStatusBadge(task.Status) + `</div>`)
 		if strings.TrimSpace(task.ErrMsg) != "" {
 			body.WriteString(`<div style="margin-top:8px;color:var(--crit-fg)">` + html.EscapeString(task.ErrMsg) + `</div>`)
 		}
 		body.WriteString(`</div></div>`)
 	}
 	if task.Status == TaskRunning {
 		body.WriteString(`<div class="card"><div class="card-head">Live Charts</div><div class="card-body">`)
 		body.WriteString(`<div id="task-live-charts" style="display:flex;flex-direction:column;gap:16px;color:var(--muted);font-size:13px">Loading charts...</div>`)
 		body.WriteString(`</div></div>`)
 	}
 	if task.Status == TaskRunning || task.Status == TaskPending {
 		body.WriteString(`<div class="card"><div class="card-head">Live Logs</div><div class="card-body">`)
 		body.WriteString(`<div id="task-live-log" class="terminal" style="max-height:none;white-space:pre-wrap">Connecting...</div>`)
 		body.WriteString(`</div></div>`)
 		body.WriteString(`<script>
 function cancelTaskDetail(id) {
  fetch('/api/tasks/' + id + '/cancel', {method:'POST'}).then(function(){
    var term = document.getElementById('task-live-log');
    if (term) {
      term.textContent += '\nCancel requested.\n';
      term.scrollTop = term.scrollHeight;
    }
  });
 }
 function renderTaskLiveCharts(taskId, charts) {
  const host = document.getElementById('task-live-charts');
  if (!host) return;
  if (!Array.isArray(charts) || charts.length === 0) {
    host.innerHTML = 'Waiting for metric samples...';
    return;
  }
  const seen = {};
  charts.forEach(function(chart) {
    seen[chart.file] = true;
    let img = host.querySelector('img[data-chart-file="' + chart.file + '"]');
    if (img) {
      const card = img.closest('.card');
      if (card) {
        const title = card.querySelector('.card-head');
        if (title) title.textContent = chart.title;
      }
      return;
    }
    const card = document.createElement('div');
    card.className = 'card';
    card.style.margin = '0';
    card.innerHTML = '<div class="card-head"></div><div class="card-body" style="padding:12px"></div>';
    card.querySelector('.card-head').textContent = chart.title;
    const body = card.querySelector('.card-body');
    img = document.createElement('img');
    img.setAttribute('data-task-chart', '1');
    img.setAttribute('data-chart-file', chart.file);
    img.setAttribute('data-base-src', '/api/tasks/' + taskId + '/chart/' + chart.file);
    img.src = '/api/tasks/' + taskId + '/chart/' + chart.file + '?t=' + Date.now();
    img.style.width = '100%';
    img.style.display = 'block';
    img.style.borderRadius = '6px';
    img.alt = chart.title;
    body.appendChild(img);
    host.appendChild(card);
  });
  Array.from(host.querySelectorAll('img[data-task-chart="1"]')).forEach(function(img) {
    const file = img.getAttribute('data-chart-file') || '';
    if (seen[file]) return;
    const card = img.closest('.card');
    if (card) card.remove();
  });
 }
 function loadTaskLiveCharts(taskId) {
  fetch('/api/tasks/' + taskId + '/charts').then(function(r){ return r.json(); }).then(function(charts){
    renderTaskLiveCharts(taskId, charts);
  }).catch(function(){
    const host = document.getElementById('task-live-charts');
    if (host) host.innerHTML = 'Task charts are unavailable.';
  });
 }
 function refreshTaskLiveCharts() {
  document.querySelectorAll('img[data-task-chart="1"]').forEach(function(img){
    const base = img.dataset.baseSrc;
    if (!base) return;
    img.src = base + '?t=' + Date.now();
  });
 }
 var _taskDetailES = new EventSource('/api/tasks/` + html.EscapeString(task.ID) + `/stream');
 var _taskDetailTerm = document.getElementById('task-live-log');
 var _taskChartTimer = null;
 var _taskChartsFrozen = false;
 _taskDetailES.onopen = function(){ _taskDetailTerm.textContent = ''; };
 _taskDetailES.onmessage = function(e){ _taskDetailTerm.textContent += e.data + "\n"; _taskDetailTerm.scrollTop = _taskDetailTerm.scrollHeight; };
 _taskDetailES.addEventListener('done', function(e){
  if (_taskChartTimer) clearInterval(_taskChartTimer);
  _taskDetailES.close();
  _taskDetailES = null;
  _taskChartsFrozen = true;
  _taskDetailTerm.textContent += (e.data ? '\nTask finished with error.\n' : '\nTask finished.\n');
  _taskDetailTerm.scrollTop = _taskDetailTerm.scrollHeight;
  refreshTaskLiveCharts();
 });
 _taskDetailES.onerror = function(){
  if (_taskChartTimer) clearInterval(_taskChartTimer);
  if (_taskDetailES) {
    _taskDetailES.close();
    _taskDetailES = null;
  }
 };
 loadTaskLiveCharts('` + html.EscapeString(task.ID) + `');
 _taskChartTimer = setInterval(function(){
  if (_taskChartsFrozen) return;
  loadTaskLiveCharts('` + html.EscapeString(task.ID) + `');
  refreshTaskLiveCharts();
 }, 2000);
 </script>`)
 	}
 	return layoutHead(opts.Title+" — "+title) +
 		layoutNav("tasks", opts.BuildLabel) +
 		`<div class="main"><div class="topbar"><h1>` + html.EscapeString(title) + `</h1></div><div class="content">` +
 		body.String() +
 		`</div></div></body></html>`
 }
 func loadTaskReportFragment(task Task) string {
 	if strings.TrimSpace(task.ReportHTMLPath) == "" {
 		return ""
 	}
 	data, err := os.ReadFile(task.ReportHTMLPath)
 	if err != nil || len(data) == 0 {
 		return ""
 	}
 	return string(data)
 }
 func taskArtifactDownloadLink(task Task, absPath string) string {
 	if strings.TrimSpace(absPath) == "" {
 		return ""
 	}
 	return fmt.Sprintf(`/export/file?path=%s`, absPath)
 }
 func (h *handler) taskSamplesForRequest(r *http.Request) (Task, []platform.LiveMetricSample, time.Time, time.Time, bool) {
 	id := r.PathValue("id")
 	taskPtr, ok := globalQueue.findByID(id)
 	if !ok {
 		return Task{}, nil, time.Time{}, time.Time{}, false
 	}
 	task := *taskPtr
 	start, end := taskTimeWindow(&task)
 	samples, err := loadTaskMetricSamples(start, end)
 	if err != nil {
 		return task, nil, start, end, true
 	}
 	return task, samples, start, end, true
 }
 func taskTimelineForTask(task Task) []chartTimelineSegment {
 	start, end := taskTimeWindow(&task)
 	return []chartTimelineSegment{{Start: start, End: end, Active: true}}
 }
 func taskChartPathFromFile(file string) (string, bool) {
 	file = strings.TrimSpace(file)
 	for _, spec := range taskDashboardChartSpecs {
 		if spec.File == file {
 			return spec.Path, true
 		}
 	}
 	if strings.HasPrefix(file, "gpu-") && strings.HasSuffix(file, "-overview.svg") {
 		id := strings.TrimSuffix(strings.TrimPrefix(file, "gpu-"), "-overview.svg")
 		return "gpu/" + id + "-overview", true
 	}
 	return "", false
 }
--- a/audit/internal/webui/task_report.go
+++ b/audit/internal/webui/task_report.go
@@ -0,0 +1,343 @@
 package webui
 import (
 	"encoding/json"
 	"fmt"
 	"html"
 	"os"
 	"path/filepath"
 	"sort"
 	"strings"
 	"time"
 	"bee/audit/internal/platform"
 )
 var taskReportMetricsDBPath = metricsDBPath
 type taskReport struct {
 	ID          string            `json:"id"`
 	Name        string            `json:"name"`
 	Target      string            `json:"target"`
 	Status      string            `json:"status"`
 	CreatedAt   time.Time         `json:"created_at"`
 	StartedAt   *time.Time        `json:"started_at,omitempty"`
 	DoneAt      *time.Time        `json:"done_at,omitempty"`
 	DurationSec int               `json:"duration_sec,omitempty"`
 	Error       string            `json:"error,omitempty"`
 	LogFile     string            `json:"log_file,omitempty"`
 	Charts      []taskReportChart `json:"charts,omitempty"`
 	GeneratedAt time.Time         `json:"generated_at"`
 }
 type taskReportChart struct {
 	Title string `json:"title"`
 	File  string `json:"file"`
 }
 type taskChartSpec struct {
 	Path string
 	File string
 }
 var taskDashboardChartSpecs = []taskChartSpec{
 	{Path: "server-load", File: "server-load.svg"},
 	{Path: "server-temp-cpu", File: "server-temp-cpu.svg"},
 	{Path: "server-temp-ambient", File: "server-temp-ambient.svg"},
 	{Path: "server-power", File: "server-power.svg"},
 	{Path: "server-fans", File: "server-fans.svg"},
 	{Path: "gpu-all-load", File: "gpu-all-load.svg"},
 	{Path: "gpu-all-memload", File: "gpu-all-memload.svg"},
 	{Path: "gpu-all-clock", File: "gpu-all-clock.svg"},
 	{Path: "gpu-all-power", File: "gpu-all-power.svg"},
 	{Path: "gpu-all-temp", File: "gpu-all-temp.svg"},
 }
 func taskChartSpecsForSamples(samples []platform.LiveMetricSample) []taskChartSpec {
 	specs := make([]taskChartSpec, 0, len(taskDashboardChartSpecs)+len(taskGPUIndices(samples)))
 	specs = append(specs, taskDashboardChartSpecs...)
 	for _, idx := range taskGPUIndices(samples) {
 		specs = append(specs, taskChartSpec{
 			Path: fmt.Sprintf("gpu/%d-overview", idx),
 			File: fmt.Sprintf("gpu-%d-overview.svg", idx),
 		})
 	}
 	return specs
 }
 func writeTaskReportArtifacts(t *Task) error {
 	if t == nil {
 		return nil
 	}
 	ensureTaskReportPaths(t)
 	if strings.TrimSpace(t.ArtifactsDir) == "" {
 		return nil
 	}
 	if err := os.MkdirAll(t.ArtifactsDir, 0755); err != nil {
 		return err
 	}
 	start, end := taskTimeWindow(t)
 	samples, _ := loadTaskMetricSamples(start, end)
 	charts, inlineCharts := writeTaskCharts(t.ArtifactsDir, start, end, samples)
 	logText := ""
 	if data, err := os.ReadFile(t.LogPath); err == nil {
 		logText = string(data)
 	}
 	report := taskReport{
 		ID:          t.ID,
 		Name:        t.Name,
 		Target:      t.Target,
 		Status:      t.Status,
 		CreatedAt:   t.CreatedAt,
 		StartedAt:   t.StartedAt,
 		DoneAt:      t.DoneAt,
 		DurationSec: taskElapsedSec(t, reportDoneTime(t)),
 		Error:       t.ErrMsg,
 		LogFile:     filepath.Base(t.LogPath),
 		Charts:      charts,
 		GeneratedAt: time.Now().UTC(),
 	}
 	if err := writeJSONFile(t.ReportJSONPath, report); err != nil {
 		return err
 	}
 	return os.WriteFile(t.ReportHTMLPath, []byte(renderTaskReportFragment(report, inlineCharts, logText)), 0644)
 }
 func reportDoneTime(t *Task) time.Time {
 	if t != nil && t.DoneAt != nil && !t.DoneAt.IsZero() {
 		return *t.DoneAt
 	}
 	return time.Now()
 }
 func taskTimeWindow(t *Task) (time.Time, time.Time) {
 	if t == nil {
 		now := time.Now().UTC()
 		return now, now
 	}
 	start := t.CreatedAt.UTC()
 	if t.StartedAt != nil && !t.StartedAt.IsZero() {
 		start = t.StartedAt.UTC()
 	}
 	end := time.Now().UTC()
 	if t.DoneAt != nil && !t.DoneAt.IsZero() {
 		end = t.DoneAt.UTC()
 	}
 	if end.Before(start) {
 		end = start
 	}
 	return start, end
 }
 func loadTaskMetricSamples(start, end time.Time) ([]platform.LiveMetricSample, error) {
 	db, err := openMetricsDB(taskReportMetricsDBPath)
 	if err != nil {
 		return nil, err
 	}
 	defer db.Close()
 	return db.LoadBetween(start, end)
 }
 func writeTaskCharts(dir string, start, end time.Time, samples []platform.LiveMetricSample) ([]taskReportChart, map[string]string) {
 	if len(samples) == 0 {
 		return nil, nil
 	}
 	timeline := []chartTimelineSegment{{Start: start, End: end, Active: true}}
 	var charts []taskReportChart
 	inline := make(map[string]string)
 	for _, spec := range taskChartSpecsForSamples(samples) {
 		title, svg, ok := renderTaskChartSVG(spec.Path, samples, timeline)
 		if !ok || len(svg) == 0 {
 			continue
 		}
 		path := filepath.Join(dir, spec.File)
 		if err := os.WriteFile(path, svg, 0644); err != nil {
 			continue
 		}
 		charts = append(charts, taskReportChart{Title: title, File: spec.File})
 		inline[spec.File] = string(svg)
 	}
 	return charts, inline
 }
 func renderTaskChartSVG(path string, samples []platform.LiveMetricSample, timeline []chartTimelineSegment) (string, []byte, bool) {
 	if idx, sub, ok := parseGPUChartPath(path); ok && sub == "overview" {
 		buf, hasData, err := renderGPUOverviewChartSVG(idx, samples, timeline)
 		if err != nil || !hasData {
 			return "", nil, false
 		}
 		return gpuDisplayLabel(idx) + " Overview", buf, true
 	}
 	datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples)
 	if !ok {
 		return "", nil, false
 	}
 	buf, err := renderMetricChartSVG(
 		title,
 		labels,
 		sampleTimes(samples),
 		datasets,
 		names,
 		yMin,
 		yMax,
 		chartCanvasHeightForPath(path, len(names)),
 		timeline,
 	)
 	if err != nil {
 		return "", nil, false
 	}
 	return title, buf, true
 }
 func taskGPUIndices(samples []platform.LiveMetricSample) []int {
 	seen := map[int]bool{}
 	var out []int
 	for _, s := range samples {
 		for _, g := range s.GPUs {
 			if seen[g.GPUIndex] {
 				continue
 			}
 			seen[g.GPUIndex] = true
 			out = append(out, g.GPUIndex)
 		}
 	}
 	sort.Ints(out)
 	return out
 }
 func writeJSONFile(path string, v any) error {
 	data, err := json.MarshalIndent(v, "", "  ")
 	if err != nil {
 		return err
 	}
 	return os.WriteFile(path, data, 0644)
 }
 func renderTaskReportFragment(report taskReport, charts map[string]string, logText string) string {
 	var b strings.Builder
 	b.WriteString(`<div class="card"><div class="card-head">Task Report</div><div class="card-body">`)
 	b.WriteString(`<div class="grid2">`)
 	b.WriteString(`<div><div style="font-size:12px;color:var(--muted);margin-bottom:6px">Task</div><div style="font-size:16px;font-weight:700">` + html.EscapeString(report.Name) + `</div>`)
 	b.WriteString(`<div style="font-size:13px;color:var(--muted)">` + html.EscapeString(report.Target) + `</div></div>`)
 	b.WriteString(`<div><div style="font-size:12px;color:var(--muted);margin-bottom:6px">Status</div><div>` + renderTaskStatusBadge(report.Status) + `</div>`)
 	if strings.TrimSpace(report.Error) != "" {
 		b.WriteString(`<div style="margin-top:8px;font-size:13px;color:var(--crit-fg)">` + html.EscapeString(report.Error) + `</div>`)
 	}
 	b.WriteString(`</div></div>`)
 	b.WriteString(`<div style="margin-top:14px;font-size:13px;color:var(--muted)">`)
 	b.WriteString(`Started: ` + formatTaskTime(report.StartedAt, report.CreatedAt) + ` | Finished: ` + formatTaskTime(report.DoneAt, time.Time{}) + ` | Duration: ` + formatTaskDuration(report.DurationSec))
 	b.WriteString(`</div></div></div>`)
 	if benchmarkCard := renderTaskBenchmarkResultsCard(report.Target, logText); benchmarkCard != "" {
 		b.WriteString(benchmarkCard)
 	}
 	if len(report.Charts) > 0 {
 		for _, chart := range report.Charts {
 			b.WriteString(`<div class="card"><div class="card-head">` + html.EscapeString(chart.Title) + `</div><div class="card-body" style="padding:12px">`)
 			b.WriteString(charts[chart.File])
 			b.WriteString(`</div></div>`)
 		}
 	} else {
 		b.WriteString(`<div class="alert alert-info">No metric samples were captured during this task window.</div>`)
 	}
 	b.WriteString(`<div class="card"><div class="card-head">Logs</div><div class="card-body">`)
 	b.WriteString(`<div class="terminal" style="max-height:none;white-space:pre-wrap">` + html.EscapeString(strings.TrimSpace(logText)) + `</div>`)
 	b.WriteString(`</div></div>`)
 	return b.String()
 }
 func renderTaskBenchmarkResultsCard(target, logText string) string {
 	if strings.TrimSpace(target) != "nvidia-benchmark" {
 		return ""
 	}
 	resultPath := taskBenchmarkResultPath(logText)
 	if strings.TrimSpace(resultPath) == "" {
 		return ""
 	}
 	columns, runs := loadBenchmarkHistoryFromPaths([]string{resultPath})
 	if len(runs) == 0 {
 		return ""
 	}
 	return renderBenchmarkResultsCardFromRuns(
 		"Benchmark Results",
 		"Composite score for this benchmark task.",
 		"No benchmark results were saved for this task.",
 		columns,
 		runs,
 	)
 }
 func taskBenchmarkResultPath(logText string) string {
 	archivePath := taskArchivePathFromLog(logText)
 	if archivePath == "" {
 		return ""
 	}
 	runDir := strings.TrimSuffix(archivePath, ".tar.gz")
 	if runDir == archivePath {
 		return ""
 	}
 	return filepath.Join(runDir, "result.json")
 }
 func taskArchivePathFromLog(logText string) string {
 	lines := strings.Split(logText, "\n")
 	for i := len(lines) - 1; i >= 0; i-- {
 		line := strings.TrimSpace(lines[i])
 		if line == "" || !strings.HasPrefix(line, "Archive:") {
 			continue
 		}
 		path := strings.TrimSpace(strings.TrimPrefix(line, "Archive:"))
 		if strings.HasPrefix(path, "Archive written to ") {
 			path = strings.TrimSpace(strings.TrimPrefix(path, "Archive written to "))
 		}
 		if strings.HasSuffix(path, ".tar.gz") {
 			return path
 		}
 	}
 	return ""
 }
 func renderTaskStatusBadge(status string) string {
 	className := map[string]string{
 		TaskRunning:   "badge-ok",
 		TaskPending:   "badge-unknown",
 		TaskDone:      "badge-ok",
 		TaskFailed:    "badge-err",
 		TaskCancelled: "badge-unknown",
 	}[status]
 	if className == "" {
 		className = "badge-unknown"
 	}
 	label := strings.TrimSpace(status)
 	if label == "" {
 		label = "unknown"
 	}
 	return `<span class="badge ` + className + `">` + html.EscapeString(label) + `</span>`
 }
 func formatTaskTime(ts *time.Time, fallback time.Time) string {
 	if ts != nil && !ts.IsZero() {
 		return ts.Local().Format("2006-01-02 15:04:05")
 	}
 	if !fallback.IsZero() {
 		return fallback.Local().Format("2006-01-02 15:04:05")
 	}
 	return "n/a"
 }
 func formatTaskDuration(sec int) string {
 	if sec <= 0 {
 		return "n/a"
 	}
 	if sec < 60 {
 		return fmt.Sprintf("%ds", sec)
 	}
 	if sec < 3600 {
 		return fmt.Sprintf("%dm %02ds", sec/60, sec%60)
 	}
 	return fmt.Sprintf("%dh %02dm %02ds", sec/3600, (sec%3600)/60, sec%60)
 }
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -0,0 +1,744 @@
 package webui
 import (
 	"context"
 	"encoding/json"
 	"net/http"
 	"net/http/httptest"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"testing"
 	"time"
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 )
 func TestTaskQueuePersistsAndRecoversPendingTasks(t *testing.T) {
 	dir := t.TempDir()
 	q := &taskQueue{
 		statePath: filepath.Join(dir, "tasks-state.json"),
 		logsDir:   filepath.Join(dir, "tasks"),
 		trigger:   make(chan struct{}, 1),
 	}
 	if err := os.MkdirAll(q.logsDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	started := time.Now().Add(-time.Minute)
 	// A task that was pending (not yet started) must be re-queued on restart.
 	pendingTask := &Task{
 		ID:        "task-pending",
 		Name:      "Memory Burn-in",
 		Target:    "memory-stress",
 		Priority:  2,
 		Status:    TaskPending,
 		CreatedAt: time.Now().Add(-2 * time.Minute),
 		params:    taskParams{Duration: 300, BurnProfile: "smoke"},
 	}
 	// A task that was running when bee-web crashed must NOT be re-queued —
 	// its child processes (e.g. gpu-burn-worker) survive the restart in
 	// their own process groups and can't be cancelled retroactively.
 	runningTask := &Task{
 		ID:        "task-running",
 		Name:      "NVIDIA GPU Stress",
 		Target:    "nvidia-stress",
 		Priority:  1,
 		Status:    TaskRunning,
 		CreatedAt: time.Now().Add(-3 * time.Minute),
 		StartedAt: &started,
 		params:    taskParams{Duration: 86400},
 	}
 	for _, task := range []*Task{pendingTask, runningTask} {
 		q.tasks = append(q.tasks, task)
 		q.assignTaskLogPathLocked(task)
 	}
 	q.persistLocked()
 	recovered := &taskQueue{
 		statePath: q.statePath,
 		logsDir:   q.logsDir,
 		trigger:   make(chan struct{}, 1),
 	}
 	recovered.loadLocked()
 	if len(recovered.tasks) != 2 {
 		t.Fatalf("tasks=%d want 2", len(recovered.tasks))
 	}
 	byID := map[string]*Task{}
 	for i := range recovered.tasks {
 		byID[recovered.tasks[i].ID] = recovered.tasks[i]
 	}
 	// Pending task must be re-queued as pending with params intact.
 	p := byID["task-pending"]
 	if p == nil {
 		t.Fatal("task-pending not found")
 	}
 	if p.Status != TaskPending {
 		t.Fatalf("pending task: status=%q want %q", p.Status, TaskPending)
 	}
 	if p.StartedAt != nil {
 		t.Fatalf("pending task: started_at=%v want nil", p.StartedAt)
 	}
 	if p.params.Duration != 300 || p.params.BurnProfile != "smoke" {
 		t.Fatalf("pending task: params=%+v", p.params)
 	}
 	if p.LogPath == "" {
 		t.Fatal("pending task: expected log path")
 	}
 	// Running task must be marked failed, not re-queued, to prevent
 	// launching duplicate workers (e.g. a second set of gpu-burn-workers).
 	r := byID["task-running"]
 	if r == nil {
 		t.Fatal("task-running not found")
 	}
 	if r.Status != TaskFailed {
 		t.Fatalf("running task: status=%q want %q", r.Status, TaskFailed)
 	}
 	if r.ErrMsg == "" {
 		t.Fatal("running task: expected non-empty error message")
 	}
 	if r.DoneAt == nil {
 		t.Fatal("running task: expected done_at to be set")
 	}
 }
 func TestNewTaskJobStateLoadsExistingLog(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "task.log")
 	if err := os.WriteFile(path, []byte("line1\nline2\n"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	j := newTaskJobState(path)
 	existing, ch := j.subscribe()
 	if ch == nil {
 		t.Fatal("expected live subscription channel")
 	}
 	if len(existing) != 2 || existing[0] != "line1" || existing[1] != "line2" {
 		t.Fatalf("existing=%v", existing)
 	}
 }
 func TestTaskQueueSnapshotSortsNewestFirst(t *testing.T) {
 	now := time.Date(2026, 4, 2, 12, 0, 0, 0, time.UTC)
 	q := &taskQueue{
 		tasks: []*Task{
 			{
 				ID:        "old-running",
 				Name:      "Old Running",
 				Status:    TaskRunning,
 				Priority:  10,
 				CreatedAt: now.Add(-3 * time.Minute),
 			},
 			{
 				ID:        "new-done",
 				Name:      "New Done",
 				Status:    TaskDone,
 				Priority:  0,
 				CreatedAt: now.Add(-1 * time.Minute),
 			},
 			{
 				ID:        "mid-pending",
 				Name:      "Mid Pending",
 				Status:    TaskPending,
 				Priority:  1,
 				CreatedAt: now.Add(-2 * time.Minute),
 			},
 		},
 	}
 	got := q.snapshot()
 	if len(got) != 3 {
 		t.Fatalf("snapshot len=%d want 3", len(got))
 	}
 	if got[0].ID != "new-done" || got[1].ID != "mid-pending" || got[2].ID != "old-running" {
 		t.Fatalf("snapshot order=%q,%q,%q", got[0].ID, got[1].ID, got[2].ID)
 	}
 }
 func TestNewJobIDUsesTASKPrefixAndZeroPadding(t *testing.T) {
 	globalQueue.mu.Lock()
 	origTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	origCounter := jobCounter.Load()
 	jobCounter.Store(0)
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = origTasks
 		globalQueue.mu.Unlock()
 		jobCounter.Store(origCounter)
 	})
 	if got := newJobID("ignored"); got != "TASK-000" {
 		t.Fatalf("id=%q want TASK-000", got)
 	}
 	if got := newJobID("ignored"); got != "TASK-001" {
 		t.Fatalf("id=%q want TASK-001", got)
 	}
 }
 func TestTaskArtifactsDirStartsWithTaskNumber(t *testing.T) {
 	root := t.TempDir()
 	task := &Task{
 		ID:   "TASK-007",
 		Name: "NVIDIA Benchmark",
 	}
 	got := filepath.Base(taskArtifactsDir(root, task, TaskDone))
 	if !strings.HasPrefix(got, "007_") {
 		t.Fatalf("artifacts dir=%q want prefix 007_", got)
 	}
 }
 func TestHandleAPITasksStreamReplaysPersistedLogWithoutLiveJob(t *testing.T) {
 	dir := t.TempDir()
 	logPath := filepath.Join(dir, "task.log")
 	if err := os.WriteFile(logPath, []byte("line1\nline2\n"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	globalQueue.mu.Lock()
 	origTasks := globalQueue.tasks
 	globalQueue.tasks = []*Task{{
 		ID:        "done-1",
 		Name:      "Done Task",
 		Status:    TaskDone,
 		CreatedAt: time.Now(),
 		LogPath:   logPath,
 	}}
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = origTasks
 		globalQueue.mu.Unlock()
 	})
 	req := httptest.NewRequest(http.MethodGet, "/api/tasks/done-1/stream", nil)
 	req.SetPathValue("id", "done-1")
 	rec := httptest.NewRecorder()
 	h := &handler{}
 	h.handleAPITasksStream(rec, req)
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, "data: line1\n\n") || !strings.Contains(body, "data: line2\n\n") {
 		t.Fatalf("body=%q", body)
 	}
 	if !strings.Contains(body, "event: done\n") {
 		t.Fatalf("missing done event: %q", body)
 	}
 }
 func TestHandleAPITasksStreamPendingTaskStartsSSEImmediately(t *testing.T) {
 	globalQueue.mu.Lock()
 	origTasks := globalQueue.tasks
 	globalQueue.tasks = []*Task{{
 		ID:        "pending-1",
 		Name:      "Pending Task",
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 	}}
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = origTasks
 		globalQueue.mu.Unlock()
 	})
 	ctx, cancel := context.WithCancel(context.Background())
 	req := httptest.NewRequest(http.MethodGet, "/api/tasks/pending-1/stream", nil).WithContext(ctx)
 	req.SetPathValue("id", "pending-1")
 	rec := httptest.NewRecorder()
 	done := make(chan struct{})
 	go func() {
 		h := &handler{}
 		h.handleAPITasksStream(rec, req)
 		close(done)
 	}()
 	deadline := time.Now().Add(2 * time.Second)
 	for time.Now().Before(deadline) {
 		if strings.Contains(rec.Body.String(), "Task is queued. Waiting for worker...") {
 			cancel()
 			<-done
 			if rec.Code != http.StatusOK {
 				t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 			}
 			return
 		}
 		time.Sleep(20 * time.Millisecond)
 	}
 	cancel()
 	<-done
 	t.Fatalf("stream did not emit queued status promptly, body=%q", rec.Body.String())
 }
 func TestFinalizeTaskRunCreatesReportFolderAndArtifacts(t *testing.T) {
 	dir := t.TempDir()
 	metricsPath := filepath.Join(dir, "metrics.db")
 	prevMetricsPath := taskReportMetricsDBPath
 	taskReportMetricsDBPath = metricsPath
 	t.Cleanup(func() { taskReportMetricsDBPath = prevMetricsPath })
 	db, err := openMetricsDB(metricsPath)
 	if err != nil {
 		t.Fatalf("openMetricsDB: %v", err)
 	}
 	base := time.Now().UTC().Add(-45 * time.Second)
 	if err := db.Write(platform.LiveMetricSample{
 		Timestamp:  base,
 		CPULoadPct: 42,
 		MemLoadPct: 35,
 		PowerW:     510,
 	}); err != nil {
 		t.Fatalf("Write: %v", err)
 	}
 	_ = db.Close()
 	q := &taskQueue{
 		statePath: filepath.Join(dir, "tasks-state.json"),
 		logsDir:   filepath.Join(dir, "tasks"),
 		trigger:   make(chan struct{}, 1),
 	}
 	if err := os.MkdirAll(q.logsDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	started := time.Now().UTC().Add(-90 * time.Second)
 	task := &Task{
 		ID:        "task-1",
 		Name:      "CPU SAT",
 		Target:    "cpu",
 		Status:    TaskRunning,
 		CreatedAt: started.Add(-10 * time.Second),
 		StartedAt: &started,
 	}
 	q.assignTaskLogPathLocked(task)
 	appendJobLog(task.LogPath, "line-1")
 	job := newTaskJobState(task.LogPath)
 	job.finish("")
 	q.finalizeTaskRun(task, job)
 	if task.Status != TaskDone {
 		t.Fatalf("status=%q want %q", task.Status, TaskDone)
 	}
 	if !strings.Contains(filepath.Base(task.ArtifactsDir), "_done") {
 		t.Fatalf("artifacts dir=%q", task.ArtifactsDir)
 	}
 	if _, err := os.Stat(task.ReportJSONPath); err != nil {
 		t.Fatalf("report json: %v", err)
 	}
 	if _, err := os.Stat(task.ReportHTMLPath); err != nil {
 		t.Fatalf("report html: %v", err)
 	}
 	var report taskReport
 	data, err := os.ReadFile(task.ReportJSONPath)
 	if err != nil {
 		t.Fatalf("ReadFile(report.json): %v", err)
 	}
 	if err := json.Unmarshal(data, &report); err != nil {
 		t.Fatalf("Unmarshal(report.json): %v", err)
 	}
 	if report.ID != task.ID || report.Status != TaskDone {
 		t.Fatalf("report=%+v", report)
 	}
 	if len(report.Charts) == 0 {
 		t.Fatalf("expected charts in report, got none")
 	}
 }
 func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	dir := t.TempDir()
 	metricsPath := filepath.Join(dir, "metrics.db")
 	prevMetricsPath := taskReportMetricsDBPath
 	taskReportMetricsDBPath = metricsPath
 	t.Cleanup(func() { taskReportMetricsDBPath = prevMetricsPath })
 	benchmarkDir := filepath.Join(dir, "bee-benchmark", "gpu-benchmark-20260406-120000")
 	if err := os.MkdirAll(benchmarkDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	result := platform.NvidiaBenchmarkResult{
 		GeneratedAt:      time.Date(2026, time.April, 6, 12, 0, 0, 0, time.UTC),
 		BenchmarkProfile: "standard",
 		OverallStatus:    "OK",
 		GPUs: []platform.BenchmarkGPUResult{
 			{
 				Index: 0,
 				Name:  "NVIDIA H100 PCIe",
 				Scores: platform.BenchmarkScorecard{
 					CompositeScore: 1176.25,
 				},
 			},
 		},
 	}
 	raw, err := json.Marshal(result)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(benchmarkDir, "result.json"), raw, 0644); err != nil {
 		t.Fatal(err)
 	}
 	artifactsDir := filepath.Join(dir, "tasks", "task-bench_done")
 	if err := os.MkdirAll(artifactsDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	task := &Task{
 		ID:           "task-bench",
 		Name:         "NVIDIA Benchmark",
 		Target:       "nvidia-benchmark",
 		Status:       TaskDone,
 		CreatedAt:    time.Now().UTC().Add(-time.Minute),
 		ArtifactsDir: artifactsDir,
 	}
 	ensureTaskReportPaths(task)
 	logText := "line-1\nArchive: " + filepath.Join(dir, "bee-benchmark", "gpu-benchmark-20260406-120000.tar.gz") + "\n"
 	if err := os.WriteFile(task.LogPath, []byte(logText), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := writeTaskReportArtifacts(task); err != nil {
 		t.Fatalf("writeTaskReportArtifacts: %v", err)
 	}
 	body, err := os.ReadFile(task.ReportHTMLPath)
 	if err != nil {
 		t.Fatalf("ReadFile(report.html): %v", err)
 	}
 	html := string(body)
 	for _, needle := range []string{
 		`Benchmark Results`,
 		`Composite score for this benchmark task.`,
 		`GPU #0 — NVIDIA H100 PCIe`,
 		`1176.25`,
 	} {
 		if !strings.Contains(html, needle) {
 			t.Fatalf("report missing %q: %s", needle, html)
 		}
 	}
 }
 func TestTaskLifecycleMirrorsToSerialConsole(t *testing.T) {
 	var lines []string
 	prev := taskSerialWriteLine
 	taskSerialWriteLine = func(line string) { lines = append(lines, line) }
 	t.Cleanup(func() { taskSerialWriteLine = prev })
 	dir := t.TempDir()
 	q := &taskQueue{
 		statePath: filepath.Join(dir, "tasks-state.json"),
 		logsDir:   filepath.Join(dir, "tasks"),
 		trigger:   make(chan struct{}, 1),
 	}
 	task := &Task{
 		ID:        "task-serial-1",
 		Name:      "CPU SAT",
 		Target:    "cpu",
 		Status:    TaskPending,
 		CreatedAt: time.Now().UTC(),
 	}
 	q.enqueue(task)
 	started := time.Now().UTC()
 	task.Status = TaskRunning
 	task.StartedAt = &started
 	job := newTaskJobState(task.LogPath, taskSerialPrefix(task))
 	job.append("Starting CPU SAT...")
 	job.append("CPU stress duration: 60s")
 	job.finish("")
 	q.finalizeTaskRun(task, job)
 	joined := strings.Join(lines, "\n")
 	for _, needle := range []string{
 		"queued",
 		"Starting CPU SAT...",
 		"CPU stress duration: 60s",
 		"finished with status=done",
 	} {
 		if !strings.Contains(joined, needle) {
 			t.Fatalf("serial mirror missing %q in %q", needle, joined)
 		}
 	}
 }
 func TestResolveBurnPreset(t *testing.T) {
 	tests := []struct {
 		profile string
 		want    burnPreset
 	}{
 		{profile: "smoke", want: burnPreset{DurationSec: 5 * 60}},
 		{profile: "acceptance", want: burnPreset{DurationSec: 60 * 60}},
 		{profile: "overnight", want: burnPreset{DurationSec: 8 * 60 * 60}},
 		{profile: "", want: burnPreset{DurationSec: 5 * 60}},
 	}
 	for _, tc := range tests {
 		if got := resolveBurnPreset(tc.profile); got != tc.want {
 			t.Fatalf("resolveBurnPreset(%q)=%+v want %+v", tc.profile, got, tc.want)
 		}
 	}
 }
 func TestTaskDisplayNameUsesNvidiaStressLoader(t *testing.T) {
 	tests := []struct {
 		loader string
 		want   string
 	}{
 		{loader: "", want: "NVIDIA GPU Stress (bee-gpu-burn)"},
 		{loader: "builtin", want: "NVIDIA GPU Stress (bee-gpu-burn)"},
 		{loader: "john", want: "NVIDIA GPU Stress (John/OpenCL)"},
 		{loader: "nccl", want: "NVIDIA GPU Stress (NCCL)"},
 	}
 	for _, tc := range tests {
 		if got := taskDisplayName("nvidia-stress", "acceptance", tc.loader); got != tc.want {
 			t.Fatalf("taskDisplayName(loader=%q)=%q want %q", tc.loader, got, tc.want)
 		}
 	}
 }
 func TestRunTaskHonorsCancel(t *testing.T) {
 	blocked := make(chan struct{})
 	released := make(chan struct{})
 	aRun := func(_ any, ctx context.Context, _ string, _ int, _ func(string)) (string, error) {
 		close(blocked)
 		select {
 		case <-ctx.Done():
 			close(released)
 			return "", ctx.Err()
 		case <-time.After(5 * time.Second):
 			close(released)
 			return "unexpected", nil
 		}
 	}
 	q := &taskQueue{
 		opts: &HandlerOptions{App: &app.App{}},
 	}
 	tk := &Task{
 		ID:        "cpu-1",
 		Name:      "CPU SAT",
 		Target:    "cpu",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 		params:    taskParams{Duration: 60},
 	}
 	j := &jobState{}
 	ctx, cancel := context.WithCancel(context.Background())
 	j.cancel = cancel
 	tk.job = j
 	orig := runCPUAcceptancePackCtx
 	runCPUAcceptancePackCtx = func(_ *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
 		return aRun(nil, ctx, baseDir, durationSec, logFunc)
 	}
 	defer func() { runCPUAcceptancePackCtx = orig }()
 	done := make(chan struct{})
 	go func() {
 		q.runTask(tk, j, ctx)
 		close(done)
 	}()
 	<-blocked
 	j.abort()
 	select {
 	case <-released:
 	case <-time.After(2 * time.Second):
 		t.Fatal("task did not observe cancel")
 	}
 	select {
 	case <-done:
 	case <-time.After(2 * time.Second):
 		t.Fatal("runTask did not return after cancel")
 	}
 }
 func TestRunTaskUsesBurnProfileDurationForCPU(t *testing.T) {
 	var gotDuration int
 	q := &taskQueue{
 		opts: &HandlerOptions{App: &app.App{}},
 	}
 	tk := &Task{
 		ID:        "cpu-burn-1",
 		Name:      "CPU Burn-in",
 		Target:    "cpu",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 		params:    taskParams{BurnProfile: "smoke"},
 	}
 	j := &jobState{}
 	orig := runCPUAcceptancePackCtx
 	runCPUAcceptancePackCtx = func(_ *app.App, _ context.Context, _ string, durationSec int, _ func(string)) (string, error) {
 		gotDuration = durationSec
 		return "/tmp/cpu-burn.tar.gz", nil
 	}
 	defer func() { runCPUAcceptancePackCtx = orig }()
 	q.runTask(tk, j, context.Background())
 	if gotDuration != 5*60 {
 		t.Fatalf("duration=%d want %d", gotDuration, 5*60)
 	}
 }
 func TestRunTaskBuildsSupportBundleWithoutApp(t *testing.T) {
 	dir := t.TempDir()
 	q := &taskQueue{
 		opts: &HandlerOptions{ExportDir: dir},
 	}
 	tk := &Task{
 		ID:        "support-bundle-1",
 		Name:      "Support Bundle",
 		Target:    "support-bundle",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 	}
 	j := &jobState{}
 	var gotExportDir string
 	orig := buildSupportBundle
 	buildSupportBundle = func(exportDir string) (string, error) {
 		gotExportDir = exportDir
 		return filepath.Join(exportDir, "bundle.tar.gz"), nil
 	}
 	defer func() { buildSupportBundle = orig }()
 	q.runTask(tk, j, context.Background())
 	if gotExportDir != dir {
 		t.Fatalf("exportDir=%q want %q", gotExportDir, dir)
 	}
 	if j.err != "" {
 		t.Fatalf("unexpected error: %q", j.err)
 	}
 	if !strings.Contains(strings.Join(j.lines, "\n"), "Archive: "+filepath.Join(dir, "bundle.tar.gz")) {
 		t.Fatalf("lines=%v", j.lines)
 	}
 }
 func TestTaskElapsedSecClampsInvalidStartedAt(t *testing.T) {
 	now := time.Date(2026, 4, 1, 19, 10, 0, 0, time.UTC)
 	created := time.Date(2026, 4, 1, 19, 4, 5, 0, time.UTC)
 	started := time.Time{}
 	task := &Task{
 		Status:    TaskRunning,
 		CreatedAt: created,
 		StartedAt: &started,
 	}
 	if got := taskElapsedSec(task, now); got != 0 {
 		t.Fatalf("taskElapsedSec(zero start)=%d want 0", got)
 	}
 	stale := created.Add(-24 * time.Hour)
 	task.StartedAt = &stale
 	if got := taskElapsedSec(task, now); got != int(now.Sub(created).Seconds()) {
 		t.Fatalf("taskElapsedSec(stale start)=%d want %d", got, int(now.Sub(created).Seconds()))
 	}
 }
 func TestRunTaskInstallUsesSharedCommandStreaming(t *testing.T) {
 	q := &taskQueue{
 		opts: &HandlerOptions{},
 	}
 	tk := &Task{
 		ID:        "install-1",
 		Name:      "Install to Disk",
 		Target:    "install",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 		params:    taskParams{Device: "/dev/sda"},
 	}
 	j := &jobState{}
 	var gotDevice string
 	var gotLogPath string
 	orig := installCommand
 	installCommand = func(ctx context.Context, device string, logPath string) *exec.Cmd {
 		gotDevice = device
 		gotLogPath = logPath
 		return exec.CommandContext(ctx, "sh", "-c", "printf 'line1\nline2\n'")
 	}
 	defer func() { installCommand = orig }()
 	q.runTask(tk, j, context.Background())
 	if gotDevice != "/dev/sda" {
 		t.Fatalf("device=%q want /dev/sda", gotDevice)
 	}
 	if gotLogPath == "" {
 		t.Fatal("expected install log path")
 	}
 	logs := strings.Join(j.lines, "\n")
 	if !strings.Contains(logs, "Install log: ") {
 		t.Fatalf("missing install log line: %v", j.lines)
 	}
 	if !strings.Contains(logs, "line1") || !strings.Contains(logs, "line2") {
 		t.Fatalf("missing streamed output: %v", j.lines)
 	}
 	if j.err != "" {
 		t.Fatalf("unexpected error: %q", j.err)
 	}
 }
 func TestExecuteTaskMarksPanicsAsFailedAndClosesKmsgWindow(t *testing.T) {
 	dir := t.TempDir()
 	q := &taskQueue{
 		opts:        &HandlerOptions{App: &app.App{}},
 		statePath:   filepath.Join(dir, "tasks-state.json"),
 		logsDir:     filepath.Join(dir, "tasks"),
 		kmsgWatcher: newKmsgWatcher(nil),
 	}
 	tk := &Task{
 		ID:        "cpu-panic-1",
 		Name:      "CPU SAT",
 		Target:    "cpu",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 	}
 	j := &jobState{}
 	orig := runCPUAcceptancePackCtx
 	runCPUAcceptancePackCtx = func(_ *app.App, _ context.Context, _ string, _ int, _ func(string)) (string, error) {
 		panic("boom")
 	}
 	defer func() { runCPUAcceptancePackCtx = orig }()
 	q.executeTask(tk, j, context.Background())
 	if tk.Status != TaskFailed {
 		t.Fatalf("status=%q want %q", tk.Status, TaskFailed)
 	}
 	if tk.DoneAt == nil {
 		t.Fatal("expected done_at to be set")
 	}
 	if !strings.Contains(tk.ErrMsg, "task panic: boom") {
 		t.Fatalf("task error=%q", tk.ErrMsg)
 	}
 	if !strings.Contains(j.err, "task panic: boom") {
 		t.Fatalf("job error=%q", j.err)
 	}
 	q.kmsgWatcher.mu.Lock()
 	activeCount := q.kmsgWatcher.activeCount
 	window := q.kmsgWatcher.window
 	q.kmsgWatcher.mu.Unlock()
 	if activeCount != 0 {
 		t.Fatalf("activeCount=%d want 0", activeCount)
 	}
 	if window != nil {
 		t.Fatalf("expected kmsg window to be cleared, got %+v", window)
 	}
 }
--- a/audit/scripts/resolve-version.sh
+++ b/audit/scripts/resolve-version.sh
@@ -0,0 +1,16 @@
 #!/bin/sh
 set -eu
 tag="$(git describe --tags --match 'v[0-9]*' --abbrev=7 --dirty 2>/dev/null || true)"
 case "${tag}" in
 	v*)
 		printf '%s\n' "${tag#v}"
 		;;
 	"")
 		printf 'dev\n'
 		;;
 	*)
 		printf '%s\n' "${tag}"
 		;;
 esac
--- a/2
+++ b/2
--- a/bible-local/architecture/charting.md
+++ b/bible-local/architecture/charting.md
@@ -0,0 +1,67 @@
 # Charting architecture
 ## Decision: one chart engine for all live metrics
 **Engine:** `github.com/go-analyze/charts` (pure Go, no CGO, SVG output)
 **Theme:** `grafana` (dark background, coloured lines)
 All live metrics charts in the web UI are server-side SVG images served by Go
 and polled by the browser every 2 seconds via `<img src="...?t=now">`.
 There is no client-side canvas or JS chart library.
 ## Rule: live charts must be visually uniform
 Live charts are a single UI family, not a set of one-off widgets. New charts and
 changes to existing charts must keep the same rendering model and presentation
 rules unless there is an explicit architectural decision to diverge.
 Default expectations:
 - same server-side SVG pipeline for all live metrics charts
 - same refresh behaviour and failure handling in the browser
 - same canvas size class and card layout
 - same legend placement policy across charts
 - same axis, title, and summary conventions
 - no chart-specific visual exceptions added as a quick fix
 Current default for live charts:
 - legend below the plot area when a chart has 8 series or fewer
 - legend hidden when a chart has more than 8 series
 - 10 equal Y-axis steps across the chart height
 - 1400 x 360 SVG canvas with legend
 - 1400 x 288 SVG canvas without legend
 - full-width card rendering in a single-column stack
 If one chart needs a different layout or legend behaviour, treat that as a
 design-level decision affecting the whole chart family, not as a local tweak to
 just one endpoint.
 ### Why go-analyze/charts
 - Pure Go, no CGO — builds cleanly inside the live-build container
 - SVG output — crisp at any display resolution, full-width without pixelation
 - Grafana theme matches the dark web UI colour scheme
 - Active fork of the archived wcharczuk/go-chart
 ### SAT stress-test charts
 The `drawGPUChartSVG` function in `platform/gpu_metrics.go` is a separate
 self-contained SVG renderer used **only** for completed SAT run reports
 (HTML export, burn-in summaries). It is not used for live metrics.
 ### Live metrics chart endpoints
 | Path | Content |
 |------|---------|
 | `GET /api/metrics/chart/server.svg` | CPU temp, CPU load %, mem load %, power W, fan RPMs |
 | `GET /api/metrics/chart/gpu/{idx}.svg` | GPU temp °C, load %, mem %, power W |
 Charts are 1400 × 360 px SVG when the legend is shown, and 1400 × 288 px when
 the legend is hidden. The page renders them at `width: 100%` in a
 single-column layout so they always fill the viewport width.
 ### Ring buffers
 Each metric is stored in a 120-sample ring buffer (2 minutes of history at 1 Hz).
 Buffers are per-server or per-GPU and grow dynamically as new GPUs appear.
--- a/bible-local/architecture/runtime-flows.md
+++ b/bible-local/architecture/runtime-flows.md
@@ -60,6 +60,8 @@ Rules:
 - Chromium opens `http://localhost/` — the full interactive web UI
 - SSH is independent from the desktop path
 - serial console support is enabled for VM boot debugging
 - Default boot keeps the server-safe graphics path (`nomodeset` + forced `fbdev`) for IPMI/BMC consoles
 - Higher-resolution mode selection is expected only when booting through an explicit `bee.display=kms` menu entry, which disables the forced `fbdev` Xorg config before `lightdm`
 ## ISO build sequence
@@ -81,9 +83,9 @@ build-in-container.sh [--authorized-keys /path/to/keys]
  7. `build-cublas.sh`:
       a. download `libcublas`, `libcublasLt`, `libcudart` runtime + dev packages from the NVIDIA CUDA Debian repo
       b. verify packages against repo `Packages.gz`
-       c. extract headers for `bee-gpu-stress` build
+       c. extract headers for `bee-gpu-burn` worker build
       d. cache userspace libs in `dist/cublas-<version>+cuda<series>/`
-  8. build `bee-gpu-stress` against extracted cuBLASLt/cudart headers
+  8. build `bee-gpu-burn` worker against extracted cuBLASLt/cudart headers
  9. inject NVIDIA `.ko` → staged `/usr/local/lib/nvidia/`
  10. inject `nvidia-smi` → staged `/usr/local/bin/nvidia-smi`
  11. inject `libnvidia-ml` + `libcuda` + `libcublas` + `libcublasLt` + `libcudart` → staged `/usr/lib/`
@@ -104,7 +106,7 @@ Build host notes:
  1. `build-in-container.sh` / `build-nvidia-module.sh` — Debian kernel headers for module build
  2. `auto/config` — `linux-image-${DEBIAN_KERNEL_ABI}` in the ISO
 - NVIDIA modules go to staged `usr/local/lib/nvidia/` — NOT to `/lib/modules/<kver>/extra/`.
- `bee-gpu-stress` must be built against cached CUDA userspace headers from `build-cublas.sh`, not against random host-installed CUDA headers.
+- `bee-gpu-burn` worker must be built against cached CUDA userspace headers from `build-cublas.sh`, not against random host-installed CUDA headers.
 - The live ISO must ship `libcublas`, `libcublasLt`, and `libcudart` together with `libcuda` so tensor-core stress works without internet or package installs at boot.
 - The source overlay in `iso/overlay/` is treated as immutable source. Build-time files are injected only into the staged overlay.
 - The live-build workdir under `dist/` is disposable; source files under `iso/builder/` stay clean.
@@ -126,7 +128,7 @@ Key checks: NVIDIA modules loaded, `nvidia-smi` sees all GPUs, lib symlinks pres
 systemd services running, audit completed with NVIDIA enrichment, LAN reachability.
 Current validation state:
- local/libvirt VM boot path is validated for `systemd`, SSH, `bee audit`, `bee-network`, and TUI startup
+- local/libvirt VM boot path is validated for `systemd`, SSH, `bee audit`, `bee-network`, and Web UI startup
 - real hardware validation is still required before treating the ISO as release-ready
 ## Overlay mechanism
@@ -153,48 +155,31 @@ Current validation state:
 Every collector returns `nil, nil` on tool-not-found. Errors are logged, never fatal.
 Acceptance flows:
- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + mixed-precision `bee-gpu-stress`
+- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + lightweight `bee-gpu-burn`
 - NVIDIA GPU burn-in can use either `bee-gpu-burn` or `bee-john-gpu-stress` (John the Ripper jumbo via OpenCL)
 - `bee sat memory` → `memtester` archive
 - `bee sat storage` → SMART/NVMe diagnostic archive and short self-test trigger where supported
 - SAT `summary.txt` now includes `overall_status` and per-job `*_status` values (`OK`, `FAILED`, `UNSUPPORTED`)
- `bee-gpu-stress` should prefer cuBLASLt GEMM load over the old integer/PTX burn path:
+- `bee-gpu-burn` should prefer cuBLASLt GEMM load over the old integer/PTX burn path:
  - Ampere: `fp16` + `fp32`/TF32 tensor-core load
  - Ada / Hopper: add `fp8`
  - Blackwell+: add `fp4`
  - PTX fallback is only for missing cuBLASLt/userspace or unsupported narrow datatypes
 - Runtime overrides:
  - `BEE_GPU_STRESS_SECONDS`
  - `BEE_GPU_STRESS_SIZE_MB`
  - `BEE_MEMTESTER_SIZE_MB`
  - `BEE_MEMTESTER_PASSES`
-## NVIDIA SAT TUI flow (v1.0.0+)
+## NVIDIA SAT Web UI flow
 ```
-TUI: Acceptance tests → NVIDIA command pack
+Web UI: Acceptance Tests page → Run Test button
-  1. screenNvidiaSATSetup
+  1. POST /api/sat/nvidia/run → returns job_id
-       a. enumerate GPUs via `nvidia-smi --query-gpu=index,name,memory.total`
+  2. GET  /api/sat/stream?job_id=... (SSE) — streams stdout/stderr lines live
-       b. user selects duration preset: 10 min / 1 h / 8 h / 24 h
+  3. After completion — archive written to /appdata/bee/export/bee-sat/
-       c. user selects GPUs via checkboxes (all selected by default)
+     summary.txt contains overall_status (OK / FAILED) and per-job status values
       d. memory size = max(selected GPU memory) — auto-detected, not exposed to user
  2. Start → screenNvidiaSATRunning
       a. CUDA_VISIBLE_DEVICES set to selected GPU indices
       b. tea.Batch: SAT goroutine + tea.ExecProcess(nvtop) launched concurrently
       c. nvtop occupies full terminal; SAT result queues in background
       d. [o] reopen nvtop at any time; [a] abort (cancels context → kills bee-gpu-stress)
  3. GPU metrics collection (during bee-gpu-stress)
       - background goroutine polls `nvidia-smi` every second
       - per-second rows: elapsed, GPU index, temp°C, usage%, power W, clock MHz
       - outputs: gpu-metrics.csv, gpu-metrics.html (offline SVG chart), gpu-metrics-term.txt
  4. After SAT completes
       - result shown in screenOutput with terminal line-chart (gpu-metrics-term.txt)
       - chart is asciigraph-style: box-drawing chars (╭╮╰╯─│), 4 series per GPU,
         Y axis with ticks, ANSI colours (red=temp, blue=usage, green=power, yellow=clock)
 ```
 **Critical invariants:**
- `nvtop` must be in `iso/builder/config/package-lists/bee.list.chroot` (baked into ISO).
+- `bee-gpu-burn` / `bee-john-gpu-stress` use `exec.CommandContext` — killed on job context cancel.
 - `bee-gpu-stress` uses `exec.CommandContext` — aborted on cancel.
 - Metric goroutine uses stopCh/doneCh pattern; main goroutine waits `<-doneCh` before reading rows (no mutex needed).
 - If `nvtop` is not found on PATH, SAT still runs without it (graceful degradation).
 - SVG chart is fully offline: no JS, no external CSS, pure inline SVG.
--- a/bible-local/architecture/system-overview.md
+++ b/bible-local/architecture/system-overview.md
@@ -21,8 +21,8 @@ Fills gaps where Redfish/logpile is blind:
 - Read-only hardware inventory: board, CPU, memory, storage, PCIe, PSU, GPU, NIC, RAID
 - Machine-readable health summary derived from collector verdicts
 - Operator-triggered acceptance tests for NVIDIA, memory, and storage
- NVIDIA SAT includes both diagnostic collection and mixed-precision GPU stress via `bee-gpu-stress`
+- NVIDIA SAT includes diagnostic collection plus a lightweight in-image GPU stress step via `bee-gpu-burn`
- `bee-gpu-stress` should exercise tensor/inference paths (`fp16`, `fp32`/TF32, `fp8`, `fp4` when supported by the GPU/userspace stack) and fall back to Driver API PTX burn only if cuBLASLt is unavailable
+- `bee-gpu-burn` should exercise tensor/inference paths (`fp16`, `fp32`/TF32, `fp8`, `fp4` when supported by the GPU/userspace stack) and fall back to Driver API PTX burn only if cuBLASLt is unavailable
 - Automatic boot audit with operator-facing local console and SSH access
 - NVIDIA proprietary driver loaded at boot for GPU enrichment via `nvidia-smi`
 - SSH access (OpenSSH) always available for inspection and debugging
@@ -70,7 +70,7 @@ Fills gaps where Redfish/logpile is blind:
 | SSH | OpenSSH server |
 | NVIDIA driver | Proprietary `.run` installer, built against Debian kernel headers |
 | NVIDIA modules | Loaded via `insmod` from `/usr/local/lib/nvidia/` |
-| GPU stress backend | `bee-gpu-stress` + cuBLASLt/cuBLAS/cudart mixed-precision GEMM, with Driver API PTX fallback |
+| GPU stress backend | `bee-gpu-burn` + cuBLASLt/cuBLAS/cudart mixed-precision GEMM, with Driver API PTX fallback |
 | Builder | Debian 12 host/VM or Debian 12 container image |
 ## Operator UX
--- a/bible-local/decisions/2026-03-05-nvidia-proprietary-driver.md
+++ b/bible-local/decisions/2026-03-05-nvidia-proprietary-driver.md
@@ -18,6 +18,8 @@ Use the official proprietary NVIDIA `.run` installer for both kernel modules and
 - Kernel modules and nvidia-smi come from a single verified source.
 - NVIDIA publishes `.sha256sum` alongside each installer — download and verify before use.
 - Driver version pinned in `iso/builder/VERSIONS` as `NVIDIA_DRIVER_VERSION`.
 - DCGM must track the CUDA user-mode driver major version exposed by `nvidia-smi`.
 - For NVIDIA driver branch `590` with CUDA `13.x`, use DCGM 4 package family `datacenter-gpu-manager-4-cuda13`; legacy `datacenter-gpu-manager` 3.x does not provide a working path for this stack.
 - Build process: download `.run`, extract, compile `kernel/` sources against `linux-lts-dev`.
 - Modules cached in `dist/nvidia-<version>-<kver>/` — rebuild only on version or kernel change.
 - ISO size increases by ~50MB for .ko files + nvidia-smi.
--- a/bible-local/decisions/2026-04-01-memtest-build-strategy.md
+++ b/bible-local/decisions/2026-04-01-memtest-build-strategy.md
@@ -0,0 +1,224 @@
 # Decision: Treat memtest as explicit ISO content, not as trusted live-build magic
 **Date:** 2026-04-01
 **Status:** resolved
 ## Context
 We have already iterated on `memtest` multiple times and kept cycling between the same ideas.
 The commit history shows several distinct attempts:
 - `f91bce8` — fixed Bookworm memtest file names to `memtest86+x64.bin` / `memtest86+x64.efi`
 - `5857805` — added a binary hook to copy memtest files from the build tree into the ISO root
 - `f96b149` — added fallback extraction from the cached `.deb` when `chroot/boot/` stayed empty
 - `d43a9ae` — removed the custom hook and switched back to live-build built-in memtest integration
 - `60cb8f8` — restored explicit memtest menu entries and added ISO validation
 - `3dbc218` / `3869788` — added archived build logs and better memtest diagnostics
 Current evidence from the archived `easy-bee-nvidia-v3.14-amd64` logs dated 2026-04-01:
 - `lb binary_memtest` does run and installs `memtest86+`
 - but the final ISO still does **not** contain `boot/memtest86+x64.bin`
 - the final ISO also does **not** contain memtest menu entries in `boot/grub/grub.cfg` or `isolinux/live.cfg`
 So the assumption "live-build built-in memtest integration is enough on this stack" is currently false for this project until proven otherwise by a real built ISO.
 Additional evidence from the archived `easy-bee-nvidia-v3.17-dirty-amd64` logs dated 2026-04-01:
 - the build now completes successfully because memtest is non-blocking by default
 - `lb binary_memtest` still runs and installs `memtest86+`
 - the project-owned hook `config/hooks/normal/9100-memtest.hook.binary` does execute
 - but it executes too early for its current target paths:
  - `binary/boot/grub/grub.cfg` is still missing at hook time
  - `binary/isolinux/live.cfg` is still missing at hook time
  - memtest binaries are also still absent in `binary/boot/`
 - later in the build, live-build does create intermediate bootloader configs with memtest lines in the workdir
 - but the final ISO still lacks memtest binaries and still lacks memtest lines in extracted ISO `boot/grub/grub.cfg` and `isolinux/live.cfg`
 So the assumption "the current normal binary hook path is late enough to patch final memtest artifacts" is also false.
 Correction after inspecting the real `easy-bee-nvidia-v3.20-5-g76a9100-amd64.iso`
 artifact dated 2026-04-01:
 - the final ISO does contain `boot/memtest86+x64.bin`
 - the final ISO does contain `boot/memtest86+x64.efi`
 - the final ISO does contain memtest menu entries in both `boot/grub/grub.cfg`
  and `isolinux/live.cfg`
 - so `v3.20-5-g76a9100` was **not** another real memtest regression in the
  shipped ISO
 - the regression was in the build-time validator/debug path in `build.sh`
 Root cause of the false alarm:
 - `build.sh` treated "ISO reader command exists" as equivalent to "ISO reader
  successfully listed/extracted members"
 - `iso_list_files` / `iso_extract_file` failures were collapsed into the same
  observable output as "memtest content missing"
 - this made a reader failure look identical to a missing memtest payload
 - as a result, we re-entered the same memtest investigation loop even though
  the real ISO was already correct
 Additional correction from the subsequent `v3.21` build logs dated 2026-04-01:
 - once ISO reading was fixed, the post-build debug correctly showed the raw ISO
  still carried live-build's default memtest layout (`live/memtest.bin`,
  `live/memtest.efi`, `boot/grub/memtest.cfg`, `isolinux/memtest.cfg`)
 - that mismatch is expected to trigger project recovery, because `bee` requires
  `boot/memtest86+x64.bin` / `boot/memtest86+x64.efi` plus matching menu paths
 - however, `build.sh` exited before recovery because `set -e` treated a direct
  `iso_memtest_present` return code of `1` as fatal
 - so the next repeated loop was caused by shell control flow, not by proof that
  the recovery design itself was wrong
 ## Known Failed Attempts
 These approaches were already tried and should not be repeated blindly:
 1. Built-in live-build memtest only.
 Reason it failed:
 - `lb binary_memtest` runs, but the final ISO still misses memtest binaries and menu entries.
 2. Fixing only the memtest file names for Debian Bookworm.
 Reason it failed:
 - correct file names alone do not make the files appear in the final ISO.
 3. Copying memtest from `chroot/boot/` into `binary/boot/` via a binary hook.
 Reason it failed:
 - in this stack `chroot/boot/` is often empty for memtest payloads at the relevant time.
 4. Fallback extraction from cached `memtest86+` `.deb`.
 Reason it failed:
 - this was explored already and was not enough to stabilize the final ISO path end-to-end.
 5. Restoring explicit memtest menu entries in source bootloader templates only.
 Reason it failed:
 - memtest lines in source templates or intermediate workdir configs do not guarantee the final ISO contains them.
 6. Patching `binary/boot/grub/grub.cfg` and `binary/isolinux/live.cfg` from the current `config/hooks/normal/9100-memtest.hook.binary`.
 Reason it failed:
 - the hook runs before those files exist, so the hook cannot patch them there.
 ## What This Means
 When revisiting memtest later, start from the constraints above rather than retrying the same patterns:
 - do not assume the built-in memtest stage is sufficient
 - do not assume `chroot/boot/` will contain memtest payloads
 - do not assume source bootloader templates are the last writer of final ISO configs
 - do not assume the current normal binary hook timing is late enough for final patching
 Any future memtest fix must explicitly identify:
 - where the memtest binaries are reliably available at build time
 - which exact build stage writes the final bootloader configs that land in the ISO
 - and a post-build proof from a real ISO, not only from intermediate workdir files
 - whether the ISO inspection step itself succeeded, rather than merely whether
  the validator printed a memtest warning
 - whether a non-zero probe is intentionally handled inside an `if` / `case`
  context rather than accidentally tripping `set -e`
 ## Decision
 For `bee`, memtest must be treated as an explicit ISO artifact with explicit post-build validation.
 Project rules from now on:
 - Do **not** trust `--memtest memtest86+` by itself.
 - A memtest implementation is considered valid only if the produced ISO actually contains:
  - `boot/memtest86+x64.bin`
  - `boot/memtest86+x64.efi`
  - a GRUB menu entry
  - an isolinux menu entry
 - If live-build built-in integration does not produce those artifacts, use an explicit project-owned mechanism such as:
  - a binary hook copying files into `binary/boot/`
  - extraction from the cached `memtest86+` `.deb`
  - another deterministic build-time copy step
 - Do **not** remove such explicit logic later unless a fresh real ISO build proves that built-in integration alone produces all required files and menu entries.
 Current implementation direction:
 - keep the live-build memtest stage enabled if it helps package acquisition
 - do not rely on the current early `binary_hooks` timing for final patching
 - prefer a post-`lb build` recovery step in `build.sh` that:
  - patches the fully materialized `LB_DIR/binary` tree
  - injects memtest binaries there
  - ensures final bootloader entries there
  - reruns late binary stages (`binary_checksums`, `binary_iso`, `binary_zsync`) after the patch
 - also treat ISO validation tooling as part of the critical path:
  - install a stable ISO reader in the builder image
  - fail with an explicit reader error if ISO listing/extraction fails
  - do not treat reader failure as evidence that memtest is missing
  - do not call a probe that may return "needs recovery" as a bare command under
    `set -e`; wrap it in explicit control flow
 ## Consequences
 - Future memtest changes must begin by reading this ADR and the commits listed above.
 - Future memtest changes must also begin by reading the failed-attempt list above.
 - We should stop re-introducing "prefer built-in live-build memtest" as a default assumption without new evidence.
 - Memtest validation in `build.sh` is not optional; it is the acceptance gate that prevents another silent regression.
 - But validation output is only trustworthy if ISO reading itself succeeded. A
  "missing memtest" warning without a successful ISO read is not evidence.
 - If we change memtest strategy again, we must update this ADR with the exact build evidence that justified the change.
 ## Working Solution (confirmed 2026-04-01, commits 76a9100 → 2baf3be)
 This approach was confirmed working in ISO `easy-bee-nvidia-v3.20-5-g76a9100-amd64.iso`
 and validated again in subsequent builds. The final ISO contains all required memtest artifacts.
 ### Components
 **1. Binary hook `config/hooks/normal/9100-memtest.hook.binary`**
 Runs inside the live-build binary phase. Does not patch bootloader files at hook time —
 those files may not exist yet. Instead:
 - Tries to copy `memtest86+x64.bin` / `memtest86+x64.efi` from `chroot/boot/` first.
 - Falls back to extracting from the cached `.deb` (via `dpkg-deb -x`) if `chroot/boot/` is empty.
 - Appends GRUB and isolinux menu entries only if the respective cfg files already exist at hook time.
  If they do not exist, the hook warns and continues (does not fail).
 Controlled by `BEE_REQUIRE_MEMTEST=1` env var to turn warnings into hard errors when needed.
 **2. Post-`lb build` recovery step in `build.sh`**
 After `lb build` completes, `build.sh` checks whether the fully materialized `binary/` tree
 contains all required memtest artifacts. If not:
 - Copies/extracts memtest binaries into `binary/boot/`.
 - Patches `binary/boot/grub/grub.cfg` and `binary/isolinux/live.cfg` directly.
 - Reruns the late binary stages (`binary_checksums`, `binary_iso`, `binary_zsync`) to rebuild
  the ISO with the patched tree.
 This is the deterministic safety net: even if the hook runs at the wrong time, the recovery
 step handles the final `binary/` tree after live-build has written all bootloader configs.
 **3. ISO validation hardening**
 The memtest probe in `build.sh` is wrapped in explicit `if` / `case` control flow, not called
 as a bare command under `set -e`. A non-zero probe return (needs recovery) is intentional and
 handled — it does not abort the build prematurely.
 ISO reading (`xorriso -indev -ls` / extraction) is treated as a separate prerequisite.
 If the reader fails, the validator reports a reader error explicitly, not a memtest warning.
 This prevents the false-negative loop that burned 2026-04-01 v3.14–v3.19.
 ### Why this works when earlier attempts did not
 The earlier patterns all shared a single flaw: they assumed a single build-time point
 (hook or source template) would be the last writer of bootloader configs and memtest payloads.
 In live-build on Debian Bookworm that assumption is false — live-build continues writing
 bootloader files after custom hooks run, and `chroot/boot/` does not reliably hold memtest payloads.
 The recovery step sidesteps the ordering problem entirely: it acts on the fully materialized
 `binary/` tree after `lb build` finishes, then rebuilds the ISO from that patched tree.
 There is no ordering dependency to get wrong.
 ### Do not revert
 Do not remove the recovery step or the hook without a fresh real ISO build proving
 live-build alone produces all four required artifacts:
 - `boot/memtest86+x64.bin`
 - `boot/memtest86+x64.efi`
 - memtest entry in `boot/grub/grub.cfg`
 - memtest entry in `isolinux/live.cfg`
--- a/bible-local/decisions/README.md
+++ b/bible-local/decisions/README.md
@@ -5,3 +5,4 @@ One file per decision, named `YYYY-MM-DD-short-topic.md`.
 | Date | Decision | Status |
 |---|---|---|
 | 2026-03-05 | Use NVIDIA proprietary driver | active |
 | 2026-04-01 | Treat memtest as explicit ISO content | active |
--- a/bible-local/docs/benchmark-clock-calibration.md
+++ b/bible-local/docs/benchmark-clock-calibration.md
@@ -0,0 +1,248 @@
 # Benchmark clock calibration research
 ## Status
 In progress. Baseline data from production servers pending.
 ## Background
 The benchmark locks GPU clocks to `MaxGraphicsClockMHz` (boost) via `nvidia-smi -lgc`
 before the steady-state phase. The metric `low_sm_clock_vs_target` fires when
 `avg_steady_clock < locked_target * 0.90`.
 Problem: boost clock is the theoretical maximum under ideal cooling. In practice,
 even a healthy GPU in a non-ideal server will sustain clocks well below boost.
 The 90% threshold has no empirical basis.
 ## Key observations (2026-04-06)
 ### H100 PCIe — new card, server not designed for it
 - avg clock 1384 MHz, P95 1560 MHz (unstable, proba boost 1755 MHz)
 - Thermal sustain: 0.0 (sw_thermal covers entire steady window)
 - Stability: 70.0 — clocks erratic, no equilibrium found
 - Degradation: power_capped, thermal_limited, low_sm_clock_vs_target, variance_too_high
 ### H200 NVL — new card, server not designed for it
 - avg clock = P95 = 1635 MHz (perfectly stable)
 - Thermal sustain: 0.0 (sw_thermal + sw_power cover entire steady window)
 - Stability: 92.0 — found stable thermal equilibrium at 1635 MHz
 - Degradation: power_capped, thermal_limited
 - Compute: 989 TOPS — card is computing correctly for its frequency
 ### Key insight
 The meaningful distinction is not *whether* the card throttles but *how stably*
 it throttles. H200 found a thermal equilibrium (avg == P95, Stability 92),
 H100 did not (avg << P95, Stability 70). Both are new cards; the H100's
 instability may reflect a more severe thermal mismatch or a card issue.
 `sw_power ≈ sw_thermal` pattern = server cooling constraint, card likely OK.
 `hw_thermal >> sw_thermal` pattern = card itself overheating, investigate.
 ## Hypothesis for baseline
 After testing on servers designed for their GPUs (proper cooling):
 - Healthy GPU under sustained load will run at a stable fraction of boost
 - Expected: avg_steady ≈ 80–95% of boost depending on model and TDP class
 - Base clock (`clocks.base.gr`) may be a better reference than boost:
  a healthy card under real workload should comfortably exceed base clock
 ## Baseline: H100 PCIe HBM2e — designed server (2026-04-06, 10 samples)
 Source: external stress test tool, ~90s runs, designed server, adequate power.
 ### Healthy fingerprint
 - **Power**: hits cap ~340–360W immediately, stays flat throughout — HEALTHY
 - **Clock**: starts ~1750 MHz, oscillates and declines to ~1540–1600 MHz by 90s
  - Avg steady (visual): **~1580–1620 MHz**
  - vs boost 1755 MHz: **~91–92%**
  - Oscillation is NORMAL — this is the boost algorithm balancing under power cap
  - Stable power + oscillating clocks = healthy power-cap behavior
 - **Temperature**: linear rise ~38°C → 75–80°C over 90s (no runaway)
 - **Consistency**: all 10 samples within ±20 MHz — very repeatable
 ### Characteristic patten
 Flat power line + oscillating/declining clock line = GPU correctly managed by
 power cap algorithm. Do NOT flag this as instability.
 ### Clock CV implication
 The healthy oscillation WILL produce moderate ClockCVPct (~5–10%).
 The current `variance_too_high` threshold (StabilityScore < 85) may fire on
 healthy HBM2e PCIe cards. Needs recalibration.
 ---
 ## Baseline: H100 HBM3 OEM SXM Custom (restored) — 2 confirmed samples
 Source: pytorch_training_loop stress test, 120s (90s stress + 30s cooldown).
 Confirmed GPU: NVIDIA H100 80GB HBM3, GH100 rev a1.
 ### GPU clock reference (from nvidia-smi, idle):
 - base_clock_mhz: **1095**
 - boost_clock_mhz: **1755** (nvidia-smi `clocks.max.graphics` at idle)
 - achieved_max_clock_mhz: **1980** (actual burst max observed by tool)
 - Our benchmark locks to `clocks.max.graphics` = likely 1980 MHz for this chip
 ### Observed under 700W sustained load (both samples nearly identical):
 - Power: ~700W flat — SXM slot, adequate power confirmed
 - Clock steady range: **~1380–1480 MHz**, avg **~1420–1460 MHz**
 - vs 1980 MHz (lock target): **72–74%** — severely below
 - vs 1755 MHz (nvidia-smi boost): **81–83%**
 - vs 1095 MHz (base): 130% — above base but far below expected for SXM
 - Clock/Watt: ~2.1 MHz/W vs HBM2e ~4.6 MHz/W — 2× worse efficiency
 - Temperature: 38°C → 79–80°C (same rate as HBM2e)
 - Oscillation: present, similar character to HBM2e but at much lower frequency
 ### Diagnosis
 These restored cards are degraded. A healthy H100 SXM in a designed server
 (DGX H100, HGX H100) should sustain ~1800–1900 MHz at 700W (~91–96% of 1980).
 The 72–74% result is a clear signal of silicon or VRM degradation from the
 refurbishment process.
 ### Clock pattern note
 Images 8/9 (previously marked as "HBM3 restored") are now confirmed identical
 to images 19/20. Both sample sets show same degraded pattern — same batch.
 ---
 ## Baseline matrix (filled where data available)
 | GPU model | Config | Avg clock steady | vs boost | Clock/Watt | Notes |
 |---|---|---|---|---|---|
 | H100 PCIe HBM2e | designed server | 1580–1620 MHz | 91–92% | ~4.6 MHz/W | 10 samples, healthy |
 | H100 SXM HBM3 restored | 700W full | 1420–1460 MHz | 72–74% of 1980 | ~2.1 MHz/W | 4 samples confirmed, degraded |
 | H100 SXM HBM3 healthy | designed | ~1800–1900 MHz est. | ~91–96% est. | ~2.7 MHz/W est. | need real baseline |
 | H200 NVL | designed | TBD | TBD | TBD | need baseline |
 ---
 ## H100 official spec (from NVIDIA datasheet)
 Source: NVIDIA H100 Tensor Core GPU Datasheet (image 23, 2026-04-06).
 All TOPS marked * are with structural sparsity enabled. Divide by 2 for dense.
 | Model | FP16 Tensor (dense) | TF32 (dense) | FP8 (dense) | TDP | Memory |
 |---|---|---|---|---|---|
 | H100 80GB PCIe | 756 TFLOPS | 378 TFLOPS | 1,513 TFLOPS | 350W | HBM2e |
 | H100 NVL 94GB PCIe | 990 TFLOPS | 495 TFLOPS | 1,980 TFLOPS | 400W | HBM3 |
 | H100 80GB SXM (BQQV) | 989 TFLOPS | 494 TFLOPS | — | 700W | HBM3 |
 | H100 94GB SXM (BUBB) | 989 TFLOPS | 494 TFLOPS | — | 700W | HBM2e |
 Notes:
 - SXM boards do NOT list FP8 peak in this table (field empty)
 - fp8_e5m2 is unsupported on H100 PCIe HBM2e — confirmed in our tests
 - Tensor Cores: PCIe = 456, SXM = 528 (16% more on SXM)
 ## Observed efficiency (H100 80GB PCIe, throttled server)
 From the report in this session (power+thermal throttle throughout steady):
 | Precision | Measured | Spec (dense) | % of spec |
 |---|---|---|---|
 | fp16_tensor | 329 TOPS | 756 TFLOPS | 44% |
 | fp32_tf32 | 115 TOPS | 378 TFLOPS | 30% |
 | fp8_e4m3 | 505 TOPS | 1,513 TFLOPS | 33% |
 33–44% of spec is expected given sustained power+thermal throttle (avg clock
 1384 MHz vs boost 1755 MHz = 79%). The GPU is computing correctly for its
 actual frequency — the low TOPS comes from throttle, not silicon defect.
 ## H200 official spec (from NVIDIA datasheet, image 24, 2026-04-06)
 Format: without sparsity / with sparsity.
 | Model | FP16 Tensor (dense) | TF32 (dense) | FP8 (dense) | TDP | Memory |
 |---|---|---|---|---|---|
 | H200 NVL PCIe | 836 TFLOPS | 418 TFLOPS | 1,570 TFLOPS | 600W | HBM3e 141GB |
 | H200 SXM | 990 TFLOPS | 495 TFLOPS | 1,979 TFLOPS | 700W | HBM3e 141GB |
 ## Observed efficiency (H200 NVL PCIe, throttled non-designed server)
 Avg clock 1635 MHz (62% of boost ~2619 MHz). Entire steady in thermal throttle.
 | Precision | Measured | Spec (dense) | % of spec |
 |---|---|---|---|
 | fp16_tensor | 340 TOPS | 836 TFLOPS | 41% |
 | fp32_tf32 | 120 TOPS | 418 TFLOPS | 29% |
 | fp8_e4m3 | 529 TOPS | 1,570 TFLOPS | 34% |
 Comparable to H100 PCIe efficiency (33–44%) despite different architecture —
 both are throttle-limited. Confirms that % of spec is not a quality signal,
 it reflects the thermal environment. tops_per_sm_per_ghz is the right metric.
 ## Real-world GEMM efficiency reference (2026-04-06, web research)
 Sources: SemiAnalysis MI300X vs H100 vs H200 training benchmark; cuBLAS optimization
 worklog (hamzaelshafie.bearblog.dev); Lambda AI H100 performance analysis.
 ### What healthy systems actually achieve:
 - H100 SXM in designed server: **~720 TFLOPS FP16 = ~73% of spec**
 - cuBLAS large square GEMM (8192³): up to **~83% flop utilization**
 - H200 NVL PCIe: no public data, extrapolating ~73% → ~610 TFLOPS FP16
 ### Our results vs expectation:
 | GPU | Our FP16 | Expected (73%) | Our % of spec | Gap |
 |---|---|---|---|---|
 | H100 PCIe HBM2e | 329 TOPS | ~552 TFLOPS | 44% | ~1.7× below |
 | H200 NVL PCIe | 340 TOPS | ~610 TFLOPS | 41% | ~1.8× below |
 Our results are roughly **half** of what a healthy system achieves even under throttle.
 This is NOT normal — 30-44% is not the industry baseline.
 ### Likely causes of the gap (in order of probability):
 1. **Thermal throttle** — confirmed, sw_thermal covers entire steady window
 2. **Power limit below TDP** — GPU may be software-limited below 350W/600W.
   Previous user may have set a lower limit via nvidia-smi -pl and it was not
   reset. Our normalization sets clock locks but does NOT reset power limit.
   Key check: `nvidia-smi -q | grep "Power Limit"` — default vs enforced.
 3. **Matrix size** — ruled out. bee-gpu-burn uses 4096×4096×4096 for fp16,
   8192×8192×4096 for fp8. These are large enough for peak tensor utilization.
 ### Power limit gap analysis (H100 PCIe):
 - Avg clock 1384 MHz = 79% of boost 1755 MHz
 - Expected TOPS at 79% clock: 756 × 0.79 ≈ 597 TFLOPS
 - Actually measured: 329 TOPS = 55% of that estimate
 - Remaining gap after accounting for clock throttle: ~45%
 - Most likely explanation: enforced power limit < 350W TDP, further reducing
  sustainable clock beyond what sw_thermal alone would cause.
 ### Action item:
 Add `power.limit` (enforced) AND `power.default_limit` to queryBenchmarkGPUInfo
 so result.json shows if the card was pre-configured with a non-default limit.
 If enforced < default × 0.95 → add finding "GPU power limit is below default TDP".
 ### CPU/RAM impact on GPU FLOPS:
 None. Pure on-GPU GEMM is fully compute-bound once data is in VRAM.
 CPU core count and host RAM are irrelevant.
 ## Compute efficiency metric (proposed, no hardcode)
 Instead of comparing TOPS to a hardcoded spec, compute:
  tops_per_sm_per_ghz = measured_tops / (sm_count × avg_clock_ghz)
 This is model-agnostic. A GPU computing correctly at its actual frequency
 will show a consistent tops_per_sm_per_ghz regardless of throttle level.
 A GPU with degraded silicon will show low tops_per_sm_per_ghz even at
 normal clocks.
 SM count is queryable: nvidia-smi --query-gpu=attribute.multiprocessor_count
 (needs to be added to queryBenchmarkGPUInfo).
 Reference values to establish after baseline runs:
 - H100 PCIe fp16_tensor: TBD tops/SM/GHz
 - H100 SXM fp16_tensor: TBD tops/SM/GHz
 ## Proposed threshold changes (pending more data)
 1. **`low_sm_clock_vs_target`**: raise threshold from 90% to 85% based on observed
   91–92% on healthy HBM2e. Or remove entirely — sw_power/sw_thermal already
   capture the root cause.
 2. **`variance_too_high`** (StabilityScore < 85): healthy HBM2e WILL oscillate
   under power cap. Consider suppressing this flag when power is flat and usage
   is 100% (oscillation is expected). Or lower threshold to 70.
 3. **New signal: MHz/Watt efficiency**: if base_graphics_clock_mhz is available,
   ratio avg_clock / power_w could identify degraded silicon (HBM3 restored S1
   would have been caught by this).
 Decision deferred until baseline on SXM designed servers collected.
--- a/bible-local/docs/iso-build-rules.md
+++ b/bible-local/docs/iso-build-rules.md
@@ -0,0 +1,62 @@
 # ISO Build Rules
 ## Verify package names before use
 ISO builds take 30–60 minutes. A wrong package name wastes an entire build cycle.
 **Rule: before adding any Debian package name to the ISO config, verify it exists and check its file list.**
 Use one of:
 - `https://packages.debian.org/bookworm/<package-name>` — existence + description
 - `https://packages.debian.org/bookworm/amd64/<package-name>/filelist` — exact files installed
 - `apt-cache show <package>` inside a Debian bookworm container
 This applies to:
 - `iso/builder/config/package-lists/*.list.chroot`
 - Any package referenced in bootloader configs, hooks, or overlay scripts
 ## Memtest rule
 Do not assume live-build's built-in memtest integration is sufficient for `bee`.
 We already tried that path and regressed again on 2026-04-01: `lb binary_memtest`
 ran, but the final ISO still lacked memtest binaries and menu entries.
 For this project, memtest is accepted only when the produced ISO actually
 contains all of the following:
 - `boot/memtest86+x64.bin`
 - `boot/memtest86+x64.efi`
 - a memtest entry in `boot/grub/grub.cfg`
 - a memtest entry in `isolinux/live.cfg`
 Rules:
 - Keep explicit post-build memtest validation in `build.sh`.
 - Treat ISO reader success as a separate prerequisite from memtest content.
  If the reader cannot list or extract from the ISO, that is a validator
  failure, not proof that memtest is missing.
 - If built-in integration does not produce the artifacts above, use a
  deterministic project-owned copy/extract step instead of hoping live-build
  will "start working".
 - Do not switch back to built-in-only memtest without fresh build evidence from
  a real ISO.
 - If you reference memtest files manually, verify the exact package file list
  first for the target Debian release.
 Known bad loops for this repository:
 - Do not retry built-in-only memtest without new evidence. We already proved
  that `lb binary_memtest` can run while the final ISO still has no memtest.
 - Do not assume fixing memtest file names is enough. Correct names did not fix
  the final artifact path.
 - Do not assume `chroot/boot/` contains memtest payloads at the time hooks run.
 - Do not assume source `grub.cfg` / `live.cfg.in` are the final writers of ISO
  bootloader configs.
 - Do not assume the current `config/hooks/normal/9100-memtest.hook.binary`
  timing is late enough to patch final `binary/boot/grub/grub.cfg` or
  `binary/isolinux/live.cfg`; logs from 2026-04-01 showed those files were not
  present yet when the hook executed.
 - Do not treat a validator warning as ground truth until you have confirmed the
  ISO reader actually succeeded. On 2026-04-01 we misdiagnosed another memtest
  regression because the final ISO was correct but the validator produced a
  false negative.
--- a/bible-local/docs/validate-vs-burn.md
+++ b/bible-local/docs/validate-vs-burn.md
@@ -0,0 +1,35 @@
 # Validate vs Burn: Hardware Impact Policy
 ## Validate Tests (non-destructive)
 Tests on the **Validate** page are purely diagnostic. They:
 - **Do not write to disks** — no data is written to storage devices; SMART counters (power-on hours, load cycle count, reallocated sectors) are not incremented.
 - **Do not run sustained high load** — commands complete quickly (seconds to minutes) and do not push hardware to thermal or electrical limits.
 - **Do not increment hardware wear counters** — GPU memory ECC counters, NVMe wear leveling counters, and similar endurance metrics are unaffected.
 - **Are safe to run repeatedly** — on new, production-bound, or already-deployed hardware without concern for reducing lifespan.
 ### What Validate tests actually do
 | Test | What it runs |
 |---|---|
 | NVIDIA GPU | `nvidia-smi`, `dcgmi diag` (levels 1–4 read-only diagnostics) |
 | Memory | `memtester` on a limited allocation; reads/writes to RAM only |
 | Storage | `smartctl -a`, `nvme smart-log` — reads SMART data only |
 | CPU | `stress-ng` for a bounded duration; CPU-only, no I/O |
 | AMD GPU | `rocm-smi --showallinfo`, `dmidecode` — read-only queries |
 ## Burn Tests (hardware wear)
 Tests on the **Burn** page run hardware at maximum or near-maximum load for extended durations. They:
 - **Wear storage**: write-intensive patterns can reduce SSD endurance (P/E cycles).
 - **Stress GPU memory**: extended ECC stress tests may surface latent defects but also exercise memory cells.
 - **Accelerate thermal cycling**: repeated heat/cool cycles degrade solder joints and capacitors over time.
 - **May increment wear counters**: GPU power-on hours, NVMe media wear indicator, and similar metrics will advance.
 ### Rule
 > Run **Validate** freely on any server, at any time, before or after deployment.
 > Run **Burn** only when explicitly required (e.g., initial acceptance after repair, or per customer SLA).
 > Document when and why Burn tests were run.
--- a/internal/chart
+++ b/internal/chart
--- a/iso/README.md
+++ b/iso/README.md
@@ -48,6 +48,7 @@ sh iso/builder/build-in-container.sh --cache-dir /path/to/cache
 - The builder image is automatically rebuilt if the local tag exists for the wrong architecture.
 - The live ISO boots with Debian `live-boot` `toram`, so the read-only medium is copied into RAM during boot and the runtime no longer depends on the original USB/BMC virtual media staying present.
 - Target systems need enough RAM for the full compressed live medium plus normal runtime overhead, or boot may fail before reaching the TUI.
 - The NVIDIA variant installs DCGM 4 packages matched to the CUDA user-mode driver major version. For driver branch `590` / CUDA `13.x`, the package family is `datacenter-gpu-manager-4-cuda13` rather than legacy `datacenter-gpu-manager`.
 - Override the container platform only if you know why:
 ```sh
--- a/iso/builder/Dockerfile
+++ b/iso/builder/Dockerfile
@@ -17,12 +17,23 @@ RUN apt-get update -qq && apt-get install -y \
    wget \
    curl \
    tar \
    libarchive-tools \
    xz-utils \
    rsync \
    build-essential \
    gcc \
    make \
    perl \
    pkg-config \
    yasm \
    libssl-dev \
    zlib1g-dev \
    libbz2-dev \
    libgmp-dev \
    libpcap-dev \
    libsqlite3-dev \
    libcurl4-openssl-dev \
    ocl-icd-opencl-dev \
    linux-headers-amd64 \
    && rm -rf /var/lib/apt/lists/*
--- a/iso/builder/VERSIONS
+++ b/iso/builder/VERSIONS
@@ -8,5 +8,18 @@ NCCL_TESTS_VERSION=2.13.10
 NVCC_VERSION=12.8
 CUBLAS_VERSION=13.0.2.14-1
 CUDA_USERSPACE_VERSION=13.0.96-1
 DCGM_VERSION=4.5.3-1
 JOHN_JUMBO_COMMIT=67fcf9fe5a
 ROCM_VERSION=6.3.4
 ROCM_SMI_VERSION=7.4.0.60304-76~22.04
 ROCM_BANDWIDTH_TEST_VERSION=1.4.0.60304-76~22.04
 ROCM_VALIDATION_SUITE_VERSION=1.1.0.60304-76~22.04
 ROCBLAS_VERSION=4.3.0.60304-76~22.04
 ROCRAND_VERSION=3.2.0.60304-76~22.04
 HIP_RUNTIME_AMD_VERSION=6.3.42134.60304-76~22.04
 HIPBLASLT_VERSION=0.10.0.60304-76~22.04
 COMGR_VERSION=2.8.0.60304-76~22.04
 HPL_VERSION=2.3
 HPL_SHA256=32c5c17d22330e6f2337b681aded51637fb6008d3f0eb7c277b163fadd612830
 GO_VERSION=1.24.0
 AUDIT_VERSION=1.0.0
--- a/iso/builder/auto/config
+++ b/iso/builder/auto/config
@@ -29,9 +29,10 @@ lb config noauto \
    --security true \
    --linux-flavours "amd64" \
    --linux-packages "${LB_LINUX_PACKAGES}" \
-    --memtest none \
+    --memtest memtest86+ \
-    --iso-volume "EASY-BEE" \
+    --iso-volume "EASY_BEE_${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
-    --iso-application "EASY-BEE" \
+    --iso-application "EASY-BEE-${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
-    --bootappend-live "boot=live components quiet nomodeset console=tty0 console=ttyS0,115200n8 loglevel=3 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
+    --bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=3 systemd.show_status=1 username=bee user-fullname=Bee modprobe.blacklist=nouveau,snd_hda_intel,snd_hda_codec_realtek,snd_hda_codec_generic,soundcore" \
    --apt-recommends false \
    --chroot-squashfs-compression-type zstd \
    "${@}"
--- a/iso/builder/bee-gpu-stress.c
+++ b/iso/builder/bee-gpu-stress.c
@@ -29,8 +29,13 @@ typedef void *CUfunction;
 typedef void *CUstream;
 #define CU_SUCCESS 0
 #define CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT 16
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76
 #define MAX_STRESS_STREAMS 16
 #define MAX_CUBLAS_PROFILES 5
 #define MIN_PROFILE_BUDGET_BYTES ((size_t)4u * 1024u * 1024u)
 #define MIN_STREAM_BUDGET_BYTES ((size_t)64u * 1024u * 1024u)
 static const char *ptx_source =
    ".version 6.0\n"
@@ -97,6 +102,9 @@ typedef CUresult (*cuLaunchKernel_fn)(CUfunction,
                                      CUstream,
                                      void **,
                                      void **);
 typedef CUresult (*cuMemGetInfo_fn)(size_t *, size_t *);
 typedef CUresult (*cuStreamCreate_fn)(CUstream *, unsigned int);
 typedef CUresult (*cuStreamDestroy_fn)(CUstream);
 typedef CUresult (*cuGetErrorName_fn)(CUresult, const char **);
 typedef CUresult (*cuGetErrorString_fn)(CUresult, const char **);
@@ -118,6 +126,9 @@ struct cuda_api {
    cuModuleLoadDataEx_fn cuModuleLoadDataEx;
    cuModuleGetFunction_fn cuModuleGetFunction;
    cuLaunchKernel_fn cuLaunchKernel;
    cuMemGetInfo_fn cuMemGetInfo;
    cuStreamCreate_fn cuStreamCreate;
    cuStreamDestroy_fn cuStreamDestroy;
    cuGetErrorName_fn cuGetErrorName;
    cuGetErrorString_fn cuGetErrorString;
 };
@@ -128,9 +139,10 @@ struct stress_report {
    int cc_major;
    int cc_minor;
    int buffer_mb;
    int stream_count;
    unsigned long iterations;
    uint64_t checksum;
-    char details[1024];
+    char details[16384];
 };
 static int load_symbol(void *lib, const char *name, void **out) {
@@ -144,7 +156,7 @@ static int load_cuda(struct cuda_api *api) {
    if (!api->lib) {
        return 0;
    }
-    return
+    if (!(
        load_symbol(api->lib, "cuInit", (void **)&api->cuInit) &&
        load_symbol(api->lib, "cuDeviceGetCount", (void **)&api->cuDeviceGetCount) &&
        load_symbol(api->lib, "cuDeviceGet", (void **)&api->cuDeviceGet) &&
@@ -160,7 +172,17 @@ static int load_cuda(struct cuda_api *api) {
        load_symbol(api->lib, "cuMemcpyDtoH_v2", (void **)&api->cuMemcpyDtoH) &&
        load_symbol(api->lib, "cuModuleLoadDataEx", (void **)&api->cuModuleLoadDataEx) &&
        load_symbol(api->lib, "cuModuleGetFunction", (void **)&api->cuModuleGetFunction) &&
-        load_symbol(api->lib, "cuLaunchKernel", (void **)&api->cuLaunchKernel);
+        load_symbol(api->lib, "cuLaunchKernel", (void **)&api->cuLaunchKernel))) {
        dlclose(api->lib);
        memset(api, 0, sizeof(*api));
        return 0;
    }
    load_symbol(api->lib, "cuMemGetInfo_v2", (void **)&api->cuMemGetInfo);
    load_symbol(api->lib, "cuStreamCreate", (void **)&api->cuStreamCreate);
    if (!load_symbol(api->lib, "cuStreamDestroy_v2", (void **)&api->cuStreamDestroy)) {
        load_symbol(api->lib, "cuStreamDestroy", (void **)&api->cuStreamDestroy);
    }
    return 1;
 }
 static const char *cu_error_name(struct cuda_api *api, CUresult rc) {
@@ -193,14 +215,12 @@ static double now_seconds(void) {
    return (double)ts.tv_sec + ((double)ts.tv_nsec / 1000000000.0);
 }
 #if HAVE_CUBLASLT_HEADERS
 static size_t round_down_size(size_t value, size_t multiple) {
    if (multiple == 0 || value < multiple) {
        return value;
    }
    return value - (value % multiple);
 }
 #endif
 static int query_compute_capability(struct cuda_api *api, CUdevice dev, int *major, int *minor) {
    int cc_major = 0;
@@ -220,6 +240,75 @@ static int query_compute_capability(struct cuda_api *api, CUdevice dev, int *maj
    return 1;
 }
 static int query_multiprocessor_count(struct cuda_api *api, CUdevice dev, int *count) {
    int mp_count = 0;
    if (!check_rc(api,
                  "cuDeviceGetAttribute(multiprocessors)",
                  api->cuDeviceGetAttribute(&mp_count, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev))) {
        return 0;
    }
    *count = mp_count;
    return 1;
 }
 static size_t clamp_budget_to_free_memory(struct cuda_api *api, size_t requested_bytes) {
    size_t free_bytes = 0;
    size_t total_bytes = 0;
    size_t max_bytes = requested_bytes;
    if (!api->cuMemGetInfo) {
        return requested_bytes;
    }
    if (api->cuMemGetInfo(&free_bytes, &total_bytes) != CU_SUCCESS || free_bytes == 0) {
        return requested_bytes;
    }
    max_bytes = (free_bytes * 9u) / 10u;
    if (max_bytes < (size_t)4u * 1024u * 1024u) {
        max_bytes = (size_t)4u * 1024u * 1024u;
    }
    if (requested_bytes > max_bytes) {
        return max_bytes;
    }
    return requested_bytes;
 }
 static int choose_stream_count(int mp_count, int planned_profiles, size_t total_budget, int have_streams) {
    int stream_count = 1;
    if (!have_streams || mp_count <= 0 || planned_profiles <= 0) {
        return 1;
    }
    stream_count = mp_count / 8;
    if (stream_count < 2) {
        stream_count = 2;
    }
    if (stream_count > MAX_STRESS_STREAMS) {
        stream_count = MAX_STRESS_STREAMS;
    }
    while (stream_count > 1) {
        size_t per_stream_budget = total_budget / ((size_t)planned_profiles * (size_t)stream_count);
        if (per_stream_budget >= MIN_STREAM_BUDGET_BYTES) {
            break;
        }
        stream_count--;
    }
    return stream_count;
 }
 static void destroy_streams(struct cuda_api *api, CUstream *streams, int count) {
    if (!api->cuStreamDestroy) {
        return;
    }
    for (int i = 0; i < count; i++) {
        if (streams[i]) {
            api->cuStreamDestroy(streams[i]);
            streams[i] = NULL;
        }
    }
 }
 #if HAVE_CUBLASLT_HEADERS
 static void append_detail(char *buf, size_t cap, const char *fmt, ...) {
    size_t len = strlen(buf);
@@ -242,12 +331,18 @@ static int run_ptx_fallback(struct cuda_api *api,
                            int size_mb,
                            struct stress_report *report) {
    CUcontext ctx = NULL;
    CUdeviceptr device_mem = 0;
    CUmodule module = NULL;
    CUfunction kernel = NULL;
    uint32_t sample[256];
-    uint32_t words = 0;
+    CUdeviceptr device_mem[MAX_STRESS_STREAMS] = {0};
    CUstream streams[MAX_STRESS_STREAMS] = {0};
    uint32_t words[MAX_STRESS_STREAMS] = {0};
    uint32_t rounds[MAX_STRESS_STREAMS] = {0};
    void *params[MAX_STRESS_STREAMS][3];
    size_t bytes_per_stream[MAX_STRESS_STREAMS] = {0};
    unsigned long iterations = 0;
    int mp_count = 0;
    int stream_count = 1;
    memset(report, 0, sizeof(*report));
    snprintf(report->backend, sizeof(report->backend), "driver-ptx");
@@ -260,64 +355,107 @@ static int run_ptx_fallback(struct cuda_api *api,
        return 0;
    }
-    size_t bytes = (size_t)size_mb * 1024u * 1024u;
+    size_t requested_bytes = (size_t)size_mb * 1024u * 1024u;
-    if (bytes < 4u * 1024u * 1024u) {
+    if (requested_bytes < MIN_PROFILE_BUDGET_BYTES) {
-        bytes = 4u * 1024u * 1024u;
+        requested_bytes = MIN_PROFILE_BUDGET_BYTES;
    }
-    if (bytes > (size_t)1024u * 1024u * 1024u) {
+    size_t total_bytes = clamp_budget_to_free_memory(api, requested_bytes);
-        bytes = (size_t)1024u * 1024u * 1024u;
+    if (total_bytes < MIN_PROFILE_BUDGET_BYTES) {
        total_bytes = MIN_PROFILE_BUDGET_BYTES;
    }
-    words = (uint32_t)(bytes / sizeof(uint32_t));
+    report->buffer_mb = (int)(total_bytes / (1024u * 1024u));
-    if (!check_rc(api, "cuMemAlloc", api->cuMemAlloc(&device_mem, bytes))) {
+    if (query_multiprocessor_count(api, dev, &mp_count) &&
-        api->cuCtxDestroy(ctx);
+        api->cuStreamCreate &&
-        return 0;
+        api->cuStreamDestroy) {
        stream_count = choose_stream_count(mp_count, 1, total_bytes, 1);
    }
-    if (!check_rc(api, "cuMemsetD8", api->cuMemsetD8(device_mem, 0, bytes))) {
+    if (stream_count > 1) {
-        api->cuMemFree(device_mem);
+        int created = 0;
-        api->cuCtxDestroy(ctx);
+        for (; created < stream_count; created++) {
-        return 0;
+            if (!check_rc(api, "cuStreamCreate", api->cuStreamCreate(&streams[created], 0))) {
                destroy_streams(api, streams, created);
                stream_count = 1;
                break;
            }
        }
    }
    report->stream_count = stream_count;
    for (int lane = 0; lane < stream_count; lane++) {
        size_t slice = total_bytes / (size_t)stream_count;
        if (lane == stream_count - 1) {
            slice = total_bytes - ((size_t)lane * (total_bytes / (size_t)stream_count));
        }
        slice = round_down_size(slice, sizeof(uint32_t));
        if (slice < MIN_PROFILE_BUDGET_BYTES) {
            slice = MIN_PROFILE_BUDGET_BYTES;
        }
        bytes_per_stream[lane] = slice;
        words[lane] = (uint32_t)(slice / sizeof(uint32_t));
        if (!check_rc(api, "cuMemAlloc", api->cuMemAlloc(&device_mem[lane], slice))) {
            goto fail;
        }
        if (!check_rc(api, "cuMemsetD8", api->cuMemsetD8(device_mem[lane], 0, slice))) {
            goto fail;
        }
        rounds[lane] = 2048;
        params[lane][0] = &device_mem[lane];
        params[lane][1] = &words[lane];
        params[lane][2] = &rounds[lane];
    }
    if (!check_rc(api,
                  "cuModuleLoadDataEx",
                  api->cuModuleLoadDataEx(&module, ptx_source, 0, NULL, NULL))) {
-        api->cuMemFree(device_mem);
+        goto fail;
        api->cuCtxDestroy(ctx);
        return 0;
    }
    if (!check_rc(api, "cuModuleGetFunction", api->cuModuleGetFunction(&kernel, module, "burn"))) {
-        api->cuMemFree(device_mem);
+        goto fail;
        api->cuCtxDestroy(ctx);
        return 0;
    }
    unsigned int threads = 256;
    unsigned int blocks = (unsigned int)((words + threads - 1) / threads);
    uint32_t rounds = 1024;
    void *params[] = {&device_mem, &words, &rounds};
-    double start = now_seconds();
+    double deadline = now_seconds() + (double)seconds;
-    double deadline = start + (double)seconds;
+    double next_sync = now_seconds() + 1.0;
    while (now_seconds() < deadline) {
-        if (!check_rc(api,
+        int launched = 0;
-                      "cuLaunchKernel",
+        for (int lane = 0; lane < stream_count; lane++) {
-                      api->cuLaunchKernel(kernel, blocks, 1, 1, threads, 1, 1, 0, NULL, params, NULL))) {
+            unsigned int blocks = (unsigned int)((words[lane] + threads - 1) / threads);
-            api->cuMemFree(device_mem);
+            if (!check_rc(api,
-            api->cuCtxDestroy(ctx);
+                          "cuLaunchKernel",
-            return 0;
+                          api->cuLaunchKernel(kernel,
                                              blocks,
                                              1,
                                              1,
                                              threads,
                                              1,
                                              1,
                                              0,
                                              streams[lane],
                                              params[lane],
                                              NULL))) {
                goto fail;
            }
            launched++;
            iterations++;
        }
        if (launched <= 0) {
            goto fail;
        }
        double now = now_seconds();
        if (now >= next_sync || now >= deadline) {
            if (!check_rc(api, "cuCtxSynchronize", api->cuCtxSynchronize())) {
                goto fail;
            }
            next_sync = now + 1.0;
        }
        iterations++;
    }
    api->cuCtxSynchronize();
-    if (!check_rc(api, "cuCtxSynchronize", api->cuCtxSynchronize())) {
+    if (!check_rc(api, "cuMemcpyDtoH", api->cuMemcpyDtoH(sample, device_mem[0], sizeof(sample)))) {
-        api->cuMemFree(device_mem);
+        goto fail;
        api->cuCtxDestroy(ctx);
        return 0;
    }
    if (!check_rc(api, "cuMemcpyDtoH", api->cuMemcpyDtoH(sample, device_mem, sizeof(sample)))) {
        api->cuMemFree(device_mem);
        api->cuCtxDestroy(ctx);
        return 0;
    }
    for (size_t i = 0; i < sizeof(sample) / sizeof(sample[0]); i++) {
@@ -326,12 +464,33 @@ static int run_ptx_fallback(struct cuda_api *api,
    report->iterations = iterations;
    snprintf(report->details,
             sizeof(report->details),
-             "profile_int32_fallback=OK iterations=%lu\n",
+             "fallback_int32=OK requested_mb=%d actual_mb=%d streams=%d per_stream_mb=%zu iterations=%lu\n",
             size_mb,
             report->buffer_mb,
             report->stream_count,
             bytes_per_stream[0] / (1024u * 1024u),
             iterations);
-    api->cuMemFree(device_mem);
+    for (int lane = 0; lane < stream_count; lane++) {
        if (device_mem[lane]) {
            api->cuMemFree(device_mem[lane]);
        }
    }
    destroy_streams(api, streams, stream_count);
    api->cuCtxDestroy(ctx);
    return 1;
 fail:
    for (int lane = 0; lane < MAX_STRESS_STREAMS; lane++) {
        if (device_mem[lane]) {
            api->cuMemFree(device_mem[lane]);
        }
    }
    destroy_streams(api, streams, MAX_STRESS_STREAMS);
    if (ctx) {
        api->cuCtxDestroy(ctx);
    }
    return 0;
 }
 #if HAVE_CUBLASLT_HEADERS
@@ -418,6 +577,7 @@ struct profile_desc {
 struct prepared_profile {
    struct profile_desc desc;
    CUstream stream;
    cublasLtMatmulDesc_t op_desc;
    cublasLtMatrixLayout_t a_layout;
    cublasLtMatrixLayout_t b_layout;
@@ -441,6 +601,20 @@ struct prepared_profile {
 };
 static const struct profile_desc k_profiles[] = {
    {
        "fp64",
        "fp64",
        80,
        1,
        0,
        0,
        8,
        CUDA_R_64F,
        CUDA_R_64F,
        CUDA_R_64F,
        CUDA_R_64F,
        CUBLAS_COMPUTE_64F,
    },
    {
        "fp32_tf32",
        "fp32",
@@ -617,8 +791,8 @@ static uint64_t choose_square_dim(size_t budget_bytes, size_t bytes_per_cell, in
    if (dim < (uint64_t)multiple) {
        dim = (uint64_t)multiple;
    }
-    if (dim > 8192u) {
+    if (dim > 65536u) {
-        dim = 8192u;
+        dim = 65536u;
    }
    return dim;
 }
@@ -704,10 +878,12 @@ static int prepare_profile(struct cublaslt_api *cublas,
                           cublasLtHandle_t handle,
                           struct cuda_api *cuda,
                           const struct profile_desc *desc,
                           CUstream stream,
                           size_t profile_budget_bytes,
                           struct prepared_profile *out) {
    memset(out, 0, sizeof(*out));
    out->desc = *desc;
    out->stream = stream;
    size_t bytes_per_cell = 0;
    bytes_per_cell += bytes_for_elements(desc->a_type, 1);
@@ -935,7 +1111,7 @@ static int run_cublas_profile(cublasLtHandle_t handle,
                                               &profile->heuristic.algo,
                                               (void *)(uintptr_t)profile->workspace_dev,
                                               profile->workspace_size,
-                                               (cudaStream_t)0));
+                                               profile->stream));
 }
 static int run_cublaslt_stress(struct cuda_api *cuda,
@@ -947,13 +1123,21 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                               int size_mb,
                               struct stress_report *report) {
    struct cublaslt_api cublas;
-    struct prepared_profile prepared[sizeof(k_profiles) / sizeof(k_profiles[0])];
+    struct prepared_profile prepared[MAX_STRESS_STREAMS * MAX_CUBLAS_PROFILES];
    cublasLtHandle_t handle = NULL;
    CUcontext ctx = NULL;
    CUstream streams[MAX_STRESS_STREAMS] = {0};
    uint16_t sample[256];
    int cc = cc_major * 10 + cc_minor;
    int planned = 0;
    int active = 0;
    int mp_count = 0;
    int stream_count = 1;
    int profile_count = (int)(sizeof(k_profiles) / sizeof(k_profiles[0]));
    int prepared_count = 0;
    size_t requested_budget = 0;
    size_t total_budget = 0;
    size_t per_profile_budget = 0;
    memset(report, 0, sizeof(*report));
    snprintf(report->backend, sizeof(report->backend), "cublasLt");
@@ -986,16 +1170,45 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }
-    size_t total_budget = (size_t)size_mb * 1024u * 1024u;
+    requested_budget = (size_t)size_mb * 1024u * 1024u;
-    if (total_budget < (size_t)planned * 4u * 1024u * 1024u) {
+    if (requested_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
-        total_budget = (size_t)planned * 4u * 1024u * 1024u;
+        requested_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
    }
-    size_t per_profile_budget = total_budget / (size_t)planned;
+    total_budget = clamp_budget_to_free_memory(cuda, requested_budget);
-    if (per_profile_budget < 4u * 1024u * 1024u) {
+    if (total_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
-        per_profile_budget = 4u * 1024u * 1024u;
+        total_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
    }
    if (query_multiprocessor_count(cuda, dev, &mp_count) &&
        cuda->cuStreamCreate &&
        cuda->cuStreamDestroy) {
        stream_count = choose_stream_count(mp_count, planned, total_budget, 1);
    }
    if (stream_count > 1) {
        int created = 0;
        for (; created < stream_count; created++) {
            if (!check_rc(cuda, "cuStreamCreate", cuda->cuStreamCreate(&streams[created], 0))) {
                destroy_streams(cuda, streams, created);
                stream_count = 1;
                break;
            }
        }
    }
    report->stream_count = stream_count;
    per_profile_budget = total_budget / ((size_t)planned * (size_t)stream_count);
    if (per_profile_budget < MIN_PROFILE_BUDGET_BYTES) {
        per_profile_budget = MIN_PROFILE_BUDGET_BYTES;
    }
    report->buffer_mb = (int)(total_budget / (1024u * 1024u));
    append_detail(report->details,
                  sizeof(report->details),
                  "requested_mb=%d actual_mb=%d streams=%d mp_count=%d per_worker_mb=%zu\n",
                  size_mb,
                  report->buffer_mb,
                  report->stream_count,
                  mp_count,
                  per_profile_budget / (1024u * 1024u));
-    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
+    for (int i = 0; i < profile_count; i++) {
        const struct profile_desc *desc = &k_profiles[i];
        if (!(desc->enabled && cc >= desc->min_cc)) {
            append_detail(report->details,
@@ -1005,30 +1218,51 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                          desc->min_cc);
            continue;
        }
-        if (prepare_profile(&cublas, handle, cuda, desc, per_profile_budget, &prepared[i])) {
+        for (int lane = 0; lane < stream_count; lane++) {
-            active++;
+            CUstream stream = streams[lane];
-            append_detail(report->details,
+            if (prepared_count >= (int)(sizeof(prepared) / sizeof(prepared[0]))) {
-                          sizeof(report->details),
+                break;
-                          "%s=READY dim=%llux%llux%llu block=%s\n",
+            }
-                          desc->name,
+            if (prepare_profile(&cublas, handle, cuda, desc, stream, per_profile_budget, &prepared[prepared_count])) {
-                          (unsigned long long)prepared[i].m,
+                active++;
-                          (unsigned long long)prepared[i].n,
+                append_detail(report->details,
-                          (unsigned long long)prepared[i].k,
+                              sizeof(report->details),
-                          desc->block_label);
+                              "%s[%d]=READY dim=%llux%llux%llu block=%s stream=%d\n",
-        } else {
+                              desc->name,
-            append_detail(report->details, sizeof(report->details), "%s=SKIPPED unsupported\n", desc->name);
+                              lane,
                              (unsigned long long)prepared[prepared_count].m,
                              (unsigned long long)prepared[prepared_count].n,
                              (unsigned long long)prepared[prepared_count].k,
                              desc->block_label,
                              lane);
                prepared_count++;
            } else {
                append_detail(report->details,
                              sizeof(report->details),
                              "%s[%d]=SKIPPED unsupported\n",
                              desc->name,
                              lane);
            }
        }
    }
    if (active <= 0) {
        cublas.cublasLtDestroy(handle);
        destroy_streams(cuda, streams, stream_count);
        cuda->cuCtxDestroy(ctx);
        return 0;
    }
    /* Keep the GPU queue continuously full by submitting kernels without
     * synchronizing after every wave.  A sync barrier after each small batch
     * creates CPU↔GPU ping-pong gaps that prevent full TDP utilisation,
     * especially when individual kernels are short.  Instead we sync at most
     * once per second (for error detection) and once at the very end. */
    double deadline = now_seconds() + (double)seconds;
    double next_sync = now_seconds() + 1.0;
    while (now_seconds() < deadline) {
-        for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+        int launched = 0;
        for (int i = 0; i < prepared_count; i++) {
            if (!prepared[i].ready) {
                continue;
            }
@@ -1037,31 +1271,39 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                              sizeof(report->details),
                              "%s=FAILED runtime\n",
                              prepared[i].desc.name);
-                for (size_t j = 0; j < sizeof(prepared) / sizeof(prepared[0]); j++) {
+                for (int j = 0; j < prepared_count; j++) {
                    destroy_profile(&cublas, cuda, &prepared[j]);
                }
                cublas.cublasLtDestroy(handle);
                destroy_streams(cuda, streams, stream_count);
                cuda->cuCtxDestroy(ctx);
                return 0;
            }
            prepared[i].iterations++;
            report->iterations++;
-            if (now_seconds() >= deadline) {
+            launched++;
-                break;
+        }
        if (launched <= 0) {
            break;
        }
        double now = now_seconds();
        if (now >= next_sync || now >= deadline) {
            if (!check_rc(cuda, "cuCtxSynchronize", cuda->cuCtxSynchronize())) {
                for (int i = 0; i < prepared_count; i++) {
                    destroy_profile(&cublas, cuda, &prepared[i]);
                }
                cublas.cublasLtDestroy(handle);
                destroy_streams(cuda, streams, stream_count);
                cuda->cuCtxDestroy(ctx);
                return 0;
            }
            next_sync = now + 1.0;
        }
    }
    /* Final drain — ensure all queued work finishes before we read results. */
    cuda->cuCtxSynchronize();
-    if (!check_rc(cuda, "cuCtxSynchronize", cuda->cuCtxSynchronize())) {
+    for (int i = 0; i < prepared_count; i++) {
        for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
            destroy_profile(&cublas, cuda, &prepared[i]);
        }
        cublas.cublasLtDestroy(handle);
        cuda->cuCtxDestroy(ctx);
        return 0;
    }
    for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
        if (!prepared[i].ready) {
            continue;
        }
@@ -1072,7 +1314,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                      prepared[i].iterations);
    }
-    for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+    for (int i = 0; i < prepared_count; i++) {
        if (prepared[i].ready) {
            if (check_rc(cuda, "cuMemcpyDtoH", cuda->cuMemcpyDtoH(sample, prepared[i].d_dev, sizeof(sample)))) {
                for (size_t j = 0; j < sizeof(sample) / sizeof(sample[0]); j++) {
@@ -1083,10 +1325,11 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        }
    }
-    for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+    for (int i = 0; i < prepared_count; i++) {
        destroy_profile(&cublas, cuda, &prepared[i]);
    }
    cublas.cublasLtDestroy(handle);
    destroy_streams(cuda, streams, stream_count);
    cuda->cuCtxDestroy(ctx);
    return 1;
 }
@@ -1095,13 +1338,16 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
 int main(int argc, char **argv) {
    int seconds = 5;
    int size_mb = 64;
    int device_index = 0;
    for (int i = 1; i < argc; i++) {
        if ((strcmp(argv[i], "--seconds") == 0 || strcmp(argv[i], "-t") == 0) && i + 1 < argc) {
            seconds = atoi(argv[++i]);
        } else if ((strcmp(argv[i], "--size-mb") == 0 || strcmp(argv[i], "-m") == 0) && i + 1 < argc) {
            size_mb = atoi(argv[++i]);
        } else if ((strcmp(argv[i], "--device") == 0 || strcmp(argv[i], "-d") == 0) && i + 1 < argc) {
            device_index = atoi(argv[++i]);
        } else {
-            fprintf(stderr, "usage: %s [--seconds N] [--size-mb N]\n", argv[0]);
+            fprintf(stderr, "usage: %s [--seconds N] [--size-mb N] [--device N]\n", argv[0]);
            return 2;
        }
    }
@@ -1111,6 +1357,9 @@ int main(int argc, char **argv) {
    if (size_mb <= 0) {
        size_mb = 64;
    }
    if (device_index < 0) {
        device_index = 0;
    }
    struct cuda_api cuda;
    if (!load_cuda(&cuda)) {
@@ -1133,8 +1382,13 @@ int main(int argc, char **argv) {
        return 1;
    }
    if (device_index >= count) {
        fprintf(stderr, "device index %d out of range (found %d CUDA device(s))\n", device_index, count);
        return 1;
    }
    CUdevice dev = 0;
-    if (!check_rc(&cuda, "cuDeviceGet", cuda.cuDeviceGet(&dev, 0))) {
+    if (!check_rc(&cuda, "cuDeviceGet", cuda.cuDeviceGet(&dev, device_index))) {
        return 1;
    }
@@ -1162,10 +1416,12 @@ int main(int argc, char **argv) {
    }
    printf("device=%s\n", report.device);
    printf("device_index=%d\n", device_index);
    printf("compute_capability=%d.%d\n", report.cc_major, report.cc_minor);
    printf("backend=%s\n", report.backend);
    printf("duration_s=%d\n", seconds);
    printf("buffer_mb=%d\n", report.buffer_mb);
    printf("streams=%d\n", report.stream_count);
    printf("iterations=%lu\n", report.iterations);
    printf("checksum=%llu\n", (unsigned long long)report.checksum);
    if (report.details[0] != '\0') {
--- a/iso/builder/build-cublas.sh
+++ b/iso/builder/build-cublas.sh
@@ -1,9 +1,9 @@
 #!/bin/sh
-# build-cublas.sh — download cuBLASLt/cuBLAS/cudart runtime + headers for bee-gpu-stress.
+# build-cublas.sh — download cuBLASLt/cuBLAS/cudart runtime + headers for bee-gpu-burn worker.
 #
 # Downloads .deb packages from NVIDIA's CUDA apt repository (Debian 12, x86_64),
 # verifies them against Packages.gz, and extracts the small subset we need:
-#   - headers for compiling bee-gpu-stress against cuBLASLt
+#   - headers for compiling bee-gpu-burn worker against cuBLASLt
 #   - runtime libs for libcublas, libcublasLt, libcudart inside the ISO
 set -e
--- a/iso/builder/build-hpl.sh
+++ b/iso/builder/build-hpl.sh
@@ -0,0 +1,244 @@
 #!/bin/sh
 # build-hpl.sh — build HPL (High Performance LINPACK) for the bee LiveCD.
 #
 # Downloads HPL 2.3 from netlib, downloads OpenBLAS runtime from the Debian 12
 # apt repo, and compiles xhpl using a minimal single-process MPI stub so that
 # no MPI package is required inside the ISO.
 #
 # The resulting xhpl binary is a standard HPL binary whose output is compatible
 # with the accepted HPL format (WR... Gflops lines).
 #
 # Output:
 #   $CACHE_DIR/bin/xhpl
 #   $CACHE_DIR/lib/libopenblas.so*   (runtime, injected into ISO /usr/lib/)
 set -e
 HPL_VERSION="$1"
 HPL_SHA256="$2"
 DIST_DIR="$3"
 [ -n "$HPL_VERSION" ] || { echo "usage: $0 <hpl-version> <sha256> <dist-dir>"; exit 1; }
 [ -n "$HPL_SHA256"  ] || { echo "usage: $0 <hpl-version> <sha256> <dist-dir>"; exit 1; }
 [ -n "$DIST_DIR"    ] || { echo "usage: $0 <hpl-version> <sha256> <dist-dir>"; exit 1; }
 echo "=== HPL ${HPL_VERSION} ==="
 CACHE_DIR="${DIST_DIR}/hpl-${HPL_VERSION}"
 CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
 DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/hpl-downloads"
 if [ -x "${CACHE_DIR}/bin/xhpl" ]; then
    echo "=== HPL cached, skipping build ==="
    echo "binary: ${CACHE_DIR}/bin/xhpl"
    exit 0
 fi
 mkdir -p "${DOWNLOAD_CACHE_DIR}" "${CACHE_DIR}/bin" "${CACHE_DIR}/lib"
 # ── download HPL source ────────────────────────────────────────────────────────
 HPL_TAR="${DOWNLOAD_CACHE_DIR}/hpl-${HPL_VERSION}.tar.gz"
 HPL_URL="https://www.netlib.org/benchmark/hpl/hpl-${HPL_VERSION}.tar.gz"
 if [ ! -f "${HPL_TAR}" ]; then
    echo "=== downloading HPL ${HPL_VERSION} ==="
    wget --show-progress -O "${HPL_TAR}" "${HPL_URL}"
 fi
 actual_sha="$(sha256sum "${HPL_TAR}" | awk '{print $1}')"
 if [ "${actual_sha}" != "${HPL_SHA256}" ]; then
    echo "ERROR: sha256 mismatch for hpl-${HPL_VERSION}.tar.gz" >&2
    echo "  expected: ${HPL_SHA256}" >&2
    echo "  actual:   ${actual_sha}" >&2
    rm -f "${HPL_TAR}"
    exit 1
 fi
 echo "sha256 OK: hpl-${HPL_VERSION}.tar.gz"
 # ── download OpenBLAS from Debian 12 apt repo ─────────────────────────────────
 REPO_BASE="https://deb.debian.org/debian/pool/main/o/openblas"
 PACKAGES_GZ="${DOWNLOAD_CACHE_DIR}/Packages.gz"
 OPENBLAS_PKG="libopenblas0-openmp"
 echo "=== fetching Debian 12 Packages.gz ==="
 wget -q -O "${PACKAGES_GZ}" \
    "https://deb.debian.org/debian/dists/bookworm/main/binary-amd64/Packages.gz"
 lookup_deb() {
    pkg="$1"
    gzip -dc "${PACKAGES_GZ}" | awk -v pkg="$pkg" '
        /^Package: / { cur=$2 }
        /^Filename: / { file=$2 }
        /^SHA256: /  { sha=$2 }
        /^$/ {
            if (cur == pkg) { print file " " sha; exit }
            cur=""; file=""; sha=""
        }
        END {
            if (cur == pkg) print file " " sha
        }'
 }
 meta="$(lookup_deb "${OPENBLAS_PKG}")"
 [ -n "$meta" ] || { echo "ERROR: ${OPENBLAS_PKG} not found in Packages.gz"; exit 1; }
 repo_file="$(printf '%s' "$meta" | awk '{print $1}')"
 repo_sha="$(printf '%s'  "$meta" | awk '{print $2}')"
 OPENBLAS_DEB="${DOWNLOAD_CACHE_DIR}/$(basename "${repo_file}")"
 if [ -f "${OPENBLAS_DEB}" ]; then
    actual="$(sha256sum "${OPENBLAS_DEB}" | awk '{print $1}')"
    [ "$actual" = "$repo_sha" ] || rm -f "${OPENBLAS_DEB}"
 fi
 if [ ! -f "${OPENBLAS_DEB}" ]; then
    echo "=== downloading ${OPENBLAS_PKG} ==="
    wget --show-progress -O "${OPENBLAS_DEB}" "https://deb.debian.org/debian/${repo_file}"
    actual="$(sha256sum "${OPENBLAS_DEB}" | awk '{print $1}')"
    [ "$actual" = "$repo_sha" ] || { echo "ERROR: sha256 mismatch for ${OPENBLAS_PKG}"; rm -f "${OPENBLAS_DEB}"; exit 1; }
 fi
 # extract libopenblas shared libs
 TMP_DEB=$(mktemp -d)
 trap 'rm -rf "${TMP_DEB}" "${BUILD_TMP:-}"' EXIT INT TERM
 (
    cd "${TMP_DEB}"
    ar x "${OPENBLAS_DEB}"
    tar xf data.tar.*
 )
 find "${TMP_DEB}" \( -name 'libopenblas*.so*' \) \( -type f -o -type l \) \
    -exec cp -a {} "${CACHE_DIR}/lib/" \;
 echo "=== OpenBLAS libs: $(ls "${CACHE_DIR}/lib/" | wc -l) files ==="
 # also need libopenblas-dev header for compilation (we only need the .so symlink)
 OPENBLAS_SO="$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libopenblas.so.*' -type f | sort | head -1)"
 [ -n "${OPENBLAS_SO}" ] || { echo "ERROR: libopenblas.so not extracted"; exit 1; }
 SONAME="$(basename "${OPENBLAS_SO}")"
 ln -sf "${SONAME}" "${CACHE_DIR}/lib/libopenblas.so" 2>/dev/null || true
 ln -sf "${SONAME}" "${CACHE_DIR}/lib/libblas.so" 2>/dev/null || true
 # ── build HPL ─────────────────────────────────────────────────────────────────
 BUILD_TMP=$(mktemp -d)
 cd "${BUILD_TMP}"
 tar xf "${HPL_TAR}"
 SRC_DIR="$(find . -maxdepth 1 -type d -name 'hpl-*' | head -1)"
 [ -n "${SRC_DIR}" ] || { echo "ERROR: HPL source dir not found"; exit 1; }
 cd "${SRC_DIR}"
 # Write a minimal single-process MPI stub so we don't need an MPI package.
 # HPL only needs these functions for single-process execution.
 cat > "${BUILD_TMP}/mpi_stub.c" <<'MPISTUB'
 #include <stdlib.h>
 #include <string.h>
 #include <sys/time.h>
 typedef int MPI_Comm;
 typedef int MPI_Datatype;
 typedef int MPI_Op;
 typedef int MPI_Status;
 typedef int MPI_Request;
 #define MPI_COMM_WORLD 0
 #define MPI_SUCCESS    0
 #define MPI_DOUBLE     6
 #define MPI_INT        5
 #define MPI_SUM        0
 #define MPI_MAX        1
 #define MPI_MIN        2
 #define MPI_BYTE       1
 #define MPI_ANY_SOURCE -1
 #define MPI_ANY_TAG    -1
 #define MPI_STATUS_IGNORE ((MPI_Status*)0)
 int MPI_Init(int *argc, char ***argv)          { (void)argc; (void)argv; return MPI_SUCCESS; }
 int MPI_Finalize(void)                          { return MPI_SUCCESS; }
 int MPI_Comm_rank(MPI_Comm c, int *rank)        { (void)c; *rank = 0; return MPI_SUCCESS; }
 int MPI_Comm_size(MPI_Comm c, int *size)        { (void)c; *size = 1; return MPI_SUCCESS; }
 int MPI_Bcast(void *b, int n, MPI_Datatype t, int r, MPI_Comm c)
    { (void)b;(void)n;(void)t;(void)r;(void)c; return MPI_SUCCESS; }
 int MPI_Reduce(const void *s, void *r, int n, MPI_Datatype t, MPI_Op op, int root, MPI_Comm c) {
    (void)op;(void)root;(void)c;
    size_t sz = (t==MPI_DOUBLE)?sizeof(double):(t==MPI_INT)?sizeof(int):1;
    memcpy(r, s, (size_t)n * sz);
    return MPI_SUCCESS;
 }
 int MPI_Allreduce(const void *s, void *r, int n, MPI_Datatype t, MPI_Op op, MPI_Comm c)
    { return MPI_Reduce(s,r,n,t,op,0,c); }
 int MPI_Send(const void *b, int n, MPI_Datatype t, int d, int tag, MPI_Comm c)
    { (void)b;(void)n;(void)t;(void)d;(void)tag;(void)c; return MPI_SUCCESS; }
 int MPI_Recv(void *b, int n, MPI_Datatype t, int s, int tag, MPI_Comm c, MPI_Status *st)
    { (void)b;(void)n;(void)t;(void)s;(void)tag;(void)c;(void)st; return MPI_SUCCESS; }
 int MPI_Sendrecv(const void *sb, int sn, MPI_Datatype st2, int dest, int stag,
                 void *rb, int rn, MPI_Datatype rt, int src, int rtag,
                 MPI_Comm c, MPI_Status *status)
    { (void)sb;(void)sn;(void)st2;(void)dest;(void)stag;
      (void)rb;(void)rn;(void)rt;(void)src;(void)rtag;(void)c;(void)status;
      return MPI_SUCCESS; }
 int MPI_Irecv(void *b, int n, MPI_Datatype t, int s, int tag, MPI_Comm c, MPI_Request *req)
    { (void)b;(void)n;(void)t;(void)s;(void)tag;(void)c;(void)req; return MPI_SUCCESS; }
 int MPI_Wait(MPI_Request *req, MPI_Status *st)
    { (void)req;(void)st; return MPI_SUCCESS; }
 int MPI_Abort(MPI_Comm c, int code) { (void)c; exit(code); }
 double MPI_Wtime(void) {
    struct timeval tv;
    gettimeofday(&tv, NULL);
    return (double)tv.tv_sec + (double)tv.tv_usec * 1e-6;
 }
 MPISTUB
 # Write Make.bee — HPL makefile configuration
 cat > Make.bee <<MAKEFILE
 SHELL        = /bin/sh
 CD           = cd
 CP           = cp
 LN_S         = ln -s
 MKDIR        = mkdir -p
 RM           = /bin/rm -f
 TOUCH        = touch
 ARCH         = bee
 # Directories
 TOPdir       = \$(shell pwd)
 INCdir       = \$(TOPdir)/include
 BINdir       = \$(TOPdir)/bin/\$(ARCH)
 LIBdir       = \$(TOPdir)/lib/\$(ARCH)
 HPLlib       = \$(LIBdir)/libhpl.a
 # Compiler
 CC           = gcc
 CCNOOPT      = \$(HPL_DEFS)
 CCFLAGS      = \$(HPL_DEFS) -O3 -march=native -funroll-loops -fomit-frame-pointer
 # Linker
 LINKER       = gcc
 LINKFLAGS    = \$(CCFLAGS)
 # MPI (single-process stub — no actual MPI needed)
 MPdir        =
 MPinc        = -I${BUILD_TMP}
 MPlib        = ${BUILD_TMP}/mpi_stub.o
 # BLAS (OpenBLAS)
 LAdir        = ${CACHE_DIR}/lib
 LAinc        =
 LAlib        = -L\$(LAdir) -Wl,-rpath,/usr/lib -lopenblas
 HPL_OPTS     =
 HPL_DEFS     = \$(HPL_OPTS) -DHPL_CALL_CBLAS
 MAKEFILE
 echo "=== Make.bee written ==="
 # compile MPI stub
 gcc -O2 -c -o "${BUILD_TMP}/mpi_stub.o" "${BUILD_TMP}/mpi_stub.c"
 # build HPL
 echo "=== building HPL ${HPL_VERSION} ==="
 make -j"$(nproc)" arch=bee 2>&1 | tail -20
 XHPL_BIN="bin/bee/xhpl"
 [ -x "${XHPL_BIN}" ] || { echo "ERROR: xhpl not found after build"; exit 1; }
 cp "${XHPL_BIN}" "${CACHE_DIR}/bin/xhpl"
 chmod +x "${CACHE_DIR}/bin/xhpl"
 echo "=== HPL build complete ==="
 echo "binary: ${CACHE_DIR}/bin/xhpl"
 echo "libs:   $(ls "${CACHE_DIR}/lib/")"
--- a/iso/builder/build-in-container.sh
+++ b/iso/builder/build-in-container.sh
@@ -11,6 +11,8 @@ BUILDER_PLATFORM="${BEE_BUILDER_PLATFORM:-linux/amd64}"
 CACHE_DIR="${BEE_BUILDER_CACHE_DIR:-${REPO_ROOT}/dist/container-cache}"
 AUTH_KEYS=""
 REBUILD_IMAGE=0
 CLEAN_CACHE=0
 VARIANT="all"
 . "${BUILDER_DIR}/VERSIONS"
@@ -28,14 +30,47 @@ while [ $# -gt 0 ]; do
            AUTH_KEYS="$2"
            shift 2
            ;;
        --clean-build)
            CLEAN_CACHE=1
            REBUILD_IMAGE=1
            shift
            ;;
        --variant)
            VARIANT="$2"
            shift 2
            ;;
        *)
            echo "unknown arg: $1" >&2
-            echo "usage: $0 [--cache-dir /path] [--rebuild-image] [--authorized-keys /path/to/authorized_keys]" >&2
+            echo "usage: $0 [--cache-dir /path] [--rebuild-image] [--clean-build] [--authorized-keys /path/to/authorized_keys] [--variant nvidia|nvidia-legacy|amd|nogpu|all]" >&2
            exit 1
            ;;
    esac
 done
 case "$VARIANT" in
    nvidia|nvidia-legacy|amd|nogpu|all) ;;
    *) echo "unknown variant: $VARIANT (expected nvidia, nvidia-legacy, amd, nogpu, or all)" >&2; exit 1 ;;
 esac
 if [ "$CLEAN_CACHE" = "1" ]; then
    echo "=== cleaning build cache: ${CACHE_DIR} ==="
    rm -rf "${CACHE_DIR:?}/go-build" \
           "${CACHE_DIR:?}/go-mod" \
           "${CACHE_DIR:?}/tmp" \
           "${CACHE_DIR:?}/bee" \
           "${CACHE_DIR:?}/lb-packages"
    echo "=== cleaning live-build work dirs ==="
    rm -rf "${REPO_ROOT}/dist/live-build-work-nvidia"
    rm -rf "${REPO_ROOT}/dist/live-build-work-nvidia-legacy"
    rm -rf "${REPO_ROOT}/dist/live-build-work-amd"
    rm -rf "${REPO_ROOT}/dist/live-build-work-nogpu"
    rm -rf "${REPO_ROOT}/dist/overlay-stage-nvidia"
    rm -rf "${REPO_ROOT}/dist/overlay-stage-nvidia-legacy"
    rm -rf "${REPO_ROOT}/dist/overlay-stage-amd"
    rm -rf "${REPO_ROOT}/dist/overlay-stage-nogpu"
    echo "=== caches cleared, proceeding with build ==="
 fi
 if ! command -v "$CONTAINER_TOOL" >/dev/null 2>&1; then
    echo "container tool not found: $CONTAINER_TOOL" >&2
    exit 1
@@ -90,34 +125,79 @@ else
    echo "=== using existing builder image ${IMAGE_REF} (${BUILDER_PLATFORM}) ==="
 fi
-set -- \
+# Build base docker run args (without --authorized-keys)
-    run --rm --privileged \
+build_run_args() {
-    --platform "${BUILDER_PLATFORM}" \
+    _variant="$1"
-    -v "${REPO_ROOT}:/work" \
+    _auth_arg=""
-    -v "${CACHE_DIR}:/cache" \
+    if [ -n "$AUTH_KEYS" ]; then
-    -e BEE_CONTAINER_BUILD=1 \
+        _auth_arg="--authorized-keys /tmp/bee-authkeys/${AUTH_KEYS_BASE}"
-    -e GOCACHE=/cache/go-build \
+    fi
-    -e GOMODCACHE=/cache/go-mod \
+    echo "run --rm --privileged \
-    -e TMPDIR=/cache/tmp \
+        --platform ${BUILDER_PLATFORM} \
-    -e BEE_CACHE_DIR=/cache/bee \
+        -v ${REPO_ROOT}:/work \
-    -w /work \
+        -v ${CACHE_DIR}:/cache \
-    "${IMAGE_REF}" \
+        ${AUTH_KEYS:+-v ${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro} \
    sh /work/iso/builder/build.sh
 if [ -n "$AUTH_KEYS" ]; then
    set -- run --rm --privileged \
        --platform "${BUILDER_PLATFORM}" \
        -v "${REPO_ROOT}:/work" \
        -v "${CACHE_DIR}:/cache" \
        -v "${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro" \
        -e BEE_CONTAINER_BUILD=1 \
        -e GOCACHE=/cache/go-build \
        -e GOMODCACHE=/cache/go-mod \
        -e TMPDIR=/cache/tmp \
        -e BEE_CACHE_DIR=/cache/bee \
        -w /work \
-        "${IMAGE_REF}" \
+        ${IMAGE_REF} \
-        sh /work/iso/builder/build.sh --authorized-keys "/tmp/bee-authkeys/${AUTH_KEYS_BASE}"
+        sh /work/iso/builder/build.sh --variant ${_variant} ${_auth_arg}"
-fi
+}
-"$CONTAINER_TOOL" "$@"
+run_variant() {
    _v="$1"
    echo "=== building variant: ${_v} ==="
    if [ -n "$AUTH_KEYS" ]; then
        "$CONTAINER_TOOL" run --rm --privileged \
            --platform "${BUILDER_PLATFORM}" \
            -v "${REPO_ROOT}:/work" \
            -v "${CACHE_DIR}:/cache" \
            -v "${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro" \
            -e BEE_CONTAINER_BUILD=1 \
            -e GOCACHE=/cache/go-build \
            -e GOMODCACHE=/cache/go-mod \
            -e TMPDIR=/cache/tmp \
            -e BEE_CACHE_DIR=/cache/bee \
            -w /work \
            "${IMAGE_REF}" \
            sh /work/iso/builder/build.sh --variant "${_v}" \
                --authorized-keys "/tmp/bee-authkeys/${AUTH_KEYS_BASE}"
    else
        "$CONTAINER_TOOL" run --rm --privileged \
            --platform "${BUILDER_PLATFORM}" \
            -v "${REPO_ROOT}:/work" \
            -v "${CACHE_DIR}:/cache" \
            -e BEE_CONTAINER_BUILD=1 \
            -e GOCACHE=/cache/go-build \
            -e GOMODCACHE=/cache/go-mod \
            -e TMPDIR=/cache/tmp \
            -e BEE_CACHE_DIR=/cache/bee \
            -w /work \
            "${IMAGE_REF}" \
            sh /work/iso/builder/build.sh --variant "${_v}"
    fi
 }
 case "$VARIANT" in
    nvidia)
        run_variant nvidia
        ;;
    nvidia-legacy)
        run_variant nvidia-legacy
        ;;
    amd)
        run_variant amd
        ;;
    nogpu)
        run_variant nogpu
        ;;
    all)
        run_variant nvidia
        run_variant nvidia-legacy
        run_variant amd
        run_variant nogpu
        ;;
 esac
--- a/iso/builder/build-john.sh
+++ b/iso/builder/build-john.sh
@@ -0,0 +1,55 @@
 #!/bin/sh
 # build-john.sh — build John the Ripper jumbo with OpenCL support for the LiveCD.
 #
 # Downloads a pinned source snapshot from the official openwall/john repository,
 # builds it inside the builder container, and caches the resulting run/ tree.
 set -e
 JOHN_COMMIT="$1"
 DIST_DIR="$2"
 [ -n "$JOHN_COMMIT" ] || { echo "usage: $0 <john-commit> <dist-dir>"; exit 1; }
 [ -n "$DIST_DIR" ] || { echo "usage: $0 <john-commit> <dist-dir>"; exit 1; }
 echo "=== John the Ripper jumbo ${JOHN_COMMIT} ==="
 CACHE_DIR="${DIST_DIR}/john-${JOHN_COMMIT}"
 CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
 DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/john-downloads"
 SRC_TAR="${DOWNLOAD_CACHE_DIR}/john-${JOHN_COMMIT}.tar.gz"
 SRC_URL="https://github.com/openwall/john/archive/${JOHN_COMMIT}.tar.gz"
 if [ -x "${CACHE_DIR}/run/john" ] && [ -f "${CACHE_DIR}/run/john.conf" ]; then
    echo "=== john cached, skipping build ==="
    echo "run dir: ${CACHE_DIR}/run"
    exit 0
 fi
 mkdir -p "${DOWNLOAD_CACHE_DIR}"
 if [ ! -f "${SRC_TAR}" ]; then
    echo "=== downloading john source snapshot ==="
    wget --show-progress -O "${SRC_TAR}" "${SRC_URL}"
 fi
 BUILD_TMP=$(mktemp -d)
 trap 'rm -rf "${BUILD_TMP}"' EXIT INT TERM
 cd "${BUILD_TMP}"
 tar xf "${SRC_TAR}"
 SRC_DIR=$(find . -maxdepth 1 -type d -name 'john-*' | head -1)
 [ -n "${SRC_DIR}" ] || { echo "ERROR: john source directory not found"; exit 1; }
 cd "${SRC_DIR}/src"
 echo "=== configuring john ==="
 ./configure
 echo "=== building john ==="
 make clean >/dev/null 2>&1 || true
 make -j"$(nproc)"
 mkdir -p "${CACHE_DIR}"
 cp -a "../run" "${CACHE_DIR}/run"
 chmod +x "${CACHE_DIR}/run/john"
 echo "=== john build complete ==="
 echo "run dir: ${CACHE_DIR}/run"
--- a/iso/builder/build-nccl-tests.sh
+++ b/iso/builder/build-nccl-tests.sh
@@ -9,6 +9,7 @@
 #
 # Output layout:
 #   $CACHE_DIR/bin/all_reduce_perf
 #   $CACHE_DIR/lib/libcudart.so* copied from the nvcc toolchain used to build nccl-tests
 set -e
@@ -16,11 +17,13 @@ NCCL_TESTS_VERSION="$1"
 NCCL_VERSION="$2"
 NCCL_CUDA_VERSION="$3"
 DIST_DIR="$4"
 NVCC_VERSION="${5:-}"
 DEBIAN_VERSION="${6:-12}"
-[ -n "$NCCL_TESTS_VERSION" ] || { echo "usage: $0 <nccl-tests-version> <nccl-version> <cuda-version> <dist-dir>"; exit 1; }
+[ -n "$NCCL_TESTS_VERSION" ] || { echo "usage: $0 <nccl-tests-version> <nccl-version> <cuda-version> <dist-dir> [nvcc-version] [debian-version]"; exit 1; }
-[ -n "$NCCL_VERSION" ]       || { echo "usage: $0 <nccl-tests-version> <nccl-version> <cuda-version> <dist-dir>"; exit 1; }
+[ -n "$NCCL_VERSION" ]       || { echo "usage: $0 <nccl-tests-version> <nccl-version> <cuda-version> <dist-dir> [nvcc-version] [debian-version]"; exit 1; }
-[ -n "$NCCL_CUDA_VERSION" ]  || { echo "usage: $0 <nccl-tests-version> <nccl-version> <cuda-version> <dist-dir>"; exit 1; }
+[ -n "$NCCL_CUDA_VERSION" ]  || { echo "usage: $0 <nccl-tests-version> <nccl-version> <cuda-version> <dist-dir> [nvcc-version] [debian-version]"; exit 1; }
-[ -n "$DIST_DIR" ]           || { echo "usage: $0 <nccl-tests-version> <nccl-version> <cuda-version> <dist-dir>"; exit 1; }
+[ -n "$DIST_DIR" ]           || { echo "usage: $0 <nccl-tests-version> <nccl-version> <cuda-version> <dist-dir> [nvcc-version] [debian-version]"; exit 1; }
 echo "=== nccl-tests ${NCCL_TESTS_VERSION} ==="
@@ -28,29 +31,47 @@ CACHE_DIR="${DIST_DIR}/nccl-tests-${NCCL_TESTS_VERSION}"
 CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
 DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nccl-tests-downloads"
-if [ -f "${CACHE_DIR}/bin/all_reduce_perf" ]; then
+if [ -f "${CACHE_DIR}/bin/all_reduce_perf" ] && [ "$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libcudart.so*' 2>/dev/null | wc -l)" -gt 0 ]; then
    echo "=== nccl-tests cached, skipping build ==="
    echo "binary: ${CACHE_DIR}/bin/all_reduce_perf"
    exit 0
 fi
-# Resolve nvcc path (cuda-nvcc-12-8 installs to /usr/local/cuda-12.8/bin/nvcc)
+# Resolve nvcc path (cuda-nvcc-X-Y installs to /usr/local/cuda-X.Y/bin/nvcc)
 NVCC_VERSION_PATH="$(echo "${NVCC_VERSION}" | tr '.' '.')"
 NVCC=""
-for candidate in nvcc /usr/local/cuda-12.8/bin/nvcc /usr/local/cuda-12/bin/nvcc /usr/local/cuda/bin/nvcc; do
+for candidate in nvcc "/usr/local/cuda-${NVCC_VERSION_PATH}/bin/nvcc" /usr/local/cuda-12/bin/nvcc /usr/local/cuda/bin/nvcc; do
    if command -v "$candidate" >/dev/null 2>&1 || [ -x "$candidate" ]; then
        NVCC="$candidate"
        break
    fi
 done
-[ -n "$NVCC" ] || { echo "ERROR: nvcc not found — install cuda-nvcc-13-0"; exit 1; }
+[ -n "$NVCC" ] || { echo "ERROR: nvcc not found — install cuda-nvcc-$(echo "${NVCC_VERSION}" | tr '.' '-')"; exit 1; }
 echo "nvcc: $NVCC"
 # Determine CUDA_HOME from nvcc location
 CUDA_HOME="$(dirname "$(dirname "$NVCC")")"
 echo "CUDA_HOME: $CUDA_HOME"
 find_cudart_dir() {
    for dir in \
        "${CUDA_HOME}/targets/x86_64-linux/lib" \
        "${CUDA_HOME}/targets/x86_64-linux/lib/stubs" \
        "${CUDA_HOME}/lib64" \
        "${CUDA_HOME}/lib"; do
        if [ -d "$dir" ] && find "$dir" -maxdepth 1 -name 'libcudart.so*' -type f | grep -q .; then
            printf '%s\n' "$dir"
            return 0
        fi
    done
    return 1
 }
 CUDART_DIR="$(find_cudart_dir)" || { echo "ERROR: libcudart.so* not found under ${CUDA_HOME}"; exit 1; }
 echo "cudart dir: $CUDART_DIR"
 # Download libnccl-dev for nccl.h
-REPO_BASE="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64"
+REPO_BASE="https://developer.download.nvidia.com/compute/cuda/repos/debian${DEBIAN_VERSION}/x86_64"
 DEV_PKG="libnccl-dev_${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}_amd64.deb"
 DEV_URL="${REPO_BASE}/${DEV_PKG}"
@@ -133,6 +154,11 @@ mkdir -p "${CACHE_DIR}/bin"
 cp "./build/all_reduce_perf" "${CACHE_DIR}/bin/all_reduce_perf"
 chmod +x "${CACHE_DIR}/bin/all_reduce_perf"
 mkdir -p "${CACHE_DIR}/lib"
 find "${CUDART_DIR}" -maxdepth 1 -name 'libcudart.so*' -type f -exec cp -a {} "${CACHE_DIR}/lib/" \;
 [ "$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libcudart.so*' -type f | wc -l)" -gt 0 ] || { echo "ERROR: libcudart runtime copy failed"; exit 1; }
 echo "=== nccl-tests build complete ==="
 echo "binary: ${CACHE_DIR}/bin/all_reduce_perf"
 ls -lh "${CACHE_DIR}/bin/all_reduce_perf"
 ls -lh "${CACHE_DIR}/lib/"libcudart.so* 2>/dev/null || true
--- a/iso/builder/build-nvidia-module.sh
+++ b/iso/builder/build-nvidia-module.sh
@@ -1,8 +1,10 @@
 #!/bin/sh
-# build-nvidia-module.sh — compile NVIDIA proprietary driver modules for Debian 12
+# build-nvidia-module.sh — compile NVIDIA kernel modules for Debian 12
 #
 # Downloads the official NVIDIA .run installer, extracts kernel modules and
-# userspace tools (nvidia-smi, libnvidia-ml). Everything is proprietary NVIDIA.
+# userspace tools (nvidia-smi, libnvidia-ml). Supports both:
 #   - open         -> kernel-open/ sources from the .run installer
 #   - proprietary  -> traditional proprietary kernel sources from the .run installer
 #
 # Output is cached in DIST_DIR/nvidia-<version>-<kver>/ so subsequent builds
 # are instant unless NVIDIA_DRIVER_VERSION or kernel version changes.
@@ -10,17 +12,26 @@
 # Output layout:
 #   $CACHE_DIR/modules/   — nvidia*.ko files
 #   $CACHE_DIR/bin/       — nvidia-smi, nvidia-debugdump
-#   $CACHE_DIR/lib/       — libnvidia-ml.so*, libcuda.so* (for nvidia-smi)
+#   $CACHE_DIR/lib/       — libnvidia-ml.so*, libcuda.so*, OpenCL-related libs
 set -e
 NVIDIA_VERSION="$1"
 DIST_DIR="$2"
 DEBIAN_KERNEL_ABI="$3"
 NVIDIA_FLAVOR="${4:-open}"
-[ -n "$NVIDIA_VERSION" ]    || { echo "usage: $0 <nvidia-version> <dist-dir> <debian-kernel-abi>"; exit 1; }
+[ -n "$NVIDIA_VERSION" ]    || { echo "usage: $0 <nvidia-version> <dist-dir> <debian-kernel-abi> [open|proprietary]"; exit 1; }
-[ -n "$DIST_DIR" ]          || { echo "usage: $0 <nvidia-version> <dist-dir> <debian-kernel-abi>"; exit 1; }
+[ -n "$DIST_DIR" ]          || { echo "usage: $0 <nvidia-version> <dist-dir> <debian-kernel-abi> [open|proprietary]"; exit 1; }
-[ -n "$DEBIAN_KERNEL_ABI" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <debian-kernel-abi>"; exit 1; }
+[ -n "$DEBIAN_KERNEL_ABI" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <debian-kernel-abi> [open|proprietary]"; exit 1; }
 case "$NVIDIA_FLAVOR" in
    open|proprietary) ;;
    *)
        echo "unsupported NVIDIA flavor: $NVIDIA_FLAVOR (expected open or proprietary)" >&2
        exit 1
        ;;
 esac
 KVER="${DEBIAN_KERNEL_ABI}-amd64"
 # On Debian, kernel headers are split into two packages:
@@ -31,7 +42,22 @@ KVER="${DEBIAN_KERNEL_ABI}-amd64"
 KDIR_ARCH="/usr/src/linux-headers-${KVER}"
 KDIR_COMMON="/usr/src/linux-headers-${DEBIAN_KERNEL_ABI}-common"
-echo "=== NVIDIA ${NVIDIA_VERSION} (proprietary) for kernel ${KVER} ==="
+echo "=== NVIDIA ${NVIDIA_VERSION} (${NVIDIA_FLAVOR}) for kernel ${KVER} ==="
 CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_FLAVOR}-${NVIDIA_VERSION}-${KVER}"
 CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
 DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads"
 EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract"
 CACHE_LAYOUT_VERSION="3"
 CACHE_LAYOUT_MARKER="${CACHE_DIR}/.cache-layout-v${CACHE_LAYOUT_VERSION}"
 if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \
        && [ -f "$CACHE_LAYOUT_MARKER" ] \
        && [ "$(ls "$CACHE_DIR/lib/libnvidia-ptxjitcompiler.so."* 2>/dev/null | wc -l)" -gt 0 ]; then
    echo "=== NVIDIA cached, skipping build ==="
    echo "cache: $CACHE_DIR"
    echo "modules: $(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) .ko files"
    exit 0
 fi
 if [ ! -d "$KDIR_ARCH" ] || [ ! -d "$KDIR_COMMON" ]; then
    echo "=== installing linux-headers-${KVER} ==="
@@ -42,18 +68,6 @@ fi
 echo "kernel headers (arch):   $KDIR_ARCH"
 echo "kernel headers (common): $KDIR_COMMON"
 CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_VERSION}-${KVER}"
 CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
 DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads"
 EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract"
 if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \
        && [ "$(ls "$CACHE_DIR/lib/libnvidia-ptxjitcompiler.so."* 2>/dev/null | wc -l)" -gt 0 ]; then
    echo "=== NVIDIA cached, skipping build ==="
    echo "cache: $CACHE_DIR"
    echo "modules: $(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) .ko files"
    exit 0
 fi
 # Download official NVIDIA .run installer with sha256 verification
 BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}"
 mkdir -p "$DOWNLOAD_CACHE_DIR" "$EXTRACT_CACHE_DIR"
@@ -87,12 +101,18 @@ EXTRACT_DIR="${EXTRACT_CACHE_DIR}/nvidia-extract-${NVIDIA_VERSION}"
 rm -rf "$EXTRACT_DIR"
 "$RUN_FILE" --extract-only --target "$EXTRACT_DIR"
-# Find kernel source directory (proprietary: kernel/, open: kernel-open/)
+# Find kernel source directory for the selected flavor.
 KERNEL_SRC=""
-for d in "$EXTRACT_DIR/kernel" "$EXTRACT_DIR/kernel-modules-sources" "$EXTRACT_DIR/kernel-source"; do
+if [ "$NVIDIA_FLAVOR" = "open" ]; then
-    [ -f "$d/Makefile" ] && KERNEL_SRC="$d" && break
+    for d in "$EXTRACT_DIR/kernel-open" "$EXTRACT_DIR/kernel-open/"*; do
-done
+        [ -f "$d/Makefile" ] && KERNEL_SRC="$d" && break
-[ -n "$KERNEL_SRC" ] || { echo "ERROR: kernel source dir not found in:"; ls "$EXTRACT_DIR/"; exit 1; }
+    done
 else
    for d in "$EXTRACT_DIR/kernel" "$EXTRACT_DIR/kernel-modules-sources" "$EXTRACT_DIR/kernel-source"; do
        [ -f "$d/Makefile" ] && KERNEL_SRC="$d" && break
    done
 fi
 [ -n "$KERNEL_SRC" ] || { echo "ERROR: kernel source dir not found for flavor ${NVIDIA_FLAVOR} in:"; ls "$EXTRACT_DIR/"; exit 1; }
 echo "kernel source: $KERNEL_SRC"
 # Build kernel modules
@@ -130,17 +150,30 @@ else
    echo "WARNING: no firmware/ dir found in installer (may be needed for Hopper GPUs)"
 fi
-# Copy ALL userspace library files.
+# Copy NVIDIA userspace libraries broadly instead of whitelisting a few names.
-# libnvidia-ptxjitcompiler is required by libcuda for PTX JIT compilation
+# Newer driver branches add extra runtime deps (for example OpenCL/compiler side
-# (cuModuleLoadDataEx with PTX source) — without it CUDA_ERROR_JIT_COMPILER_NOT_FOUND.
+# libraries). If we only copy a narrow allowlist, clinfo/John can see nvidia.icd
-for lib in libnvidia-ml libcuda libnvidia-ptxjitcompiler; do
+# but still fail with "no OpenCL platforms" because one dependent .so is absent.
-    count=0
+copied_libs=0
-    for f in $(find "$EXTRACT_DIR" -maxdepth 1 -name "${lib}.so.*" 2>/dev/null); do
+for f in $(find "$EXTRACT_DIR" -maxdepth 1 \( -name 'libnvidia*.so.*' -o -name 'libcuda.so.*' \) -type f 2>/dev/null | sort); do
-        cp "$f" "$CACHE_DIR/lib/" && count=$((count+1))
+    cp "$f" "$CACHE_DIR/lib/"
-    done
+    copied_libs=$((copied_libs+1))
-    if [ "$count" -eq 0 ]; then
+done
-        echo "ERROR: ${lib}.so.* not found in $EXTRACT_DIR"
+
-        ls "$EXTRACT_DIR/"*.so* 2>/dev/null | head -20 || true
+if [ "$copied_libs" -eq 0 ]; then
    echo "ERROR: no NVIDIA userspace libraries found in $EXTRACT_DIR"
    ls "$EXTRACT_DIR/"*.so* 2>/dev/null | head -40 || true
    exit 1
 fi
 for lib in \
    libnvidia-ml \
    libcuda \
    libnvidia-ptxjitcompiler \
    libnvidia-opencl; do
    if ! ls "$CACHE_DIR/lib/${lib}.so."* >/dev/null 2>&1; then
        echo "ERROR: required ${lib}.so.* not found in extracted userspace libs"
        ls "$CACHE_DIR/lib/" | sort >&2 || true
        exit 1
    fi
 done
@@ -149,16 +182,17 @@ done
 ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l)
 [ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; }
-# Create soname symlinks: use [0-9][0-9]* to avoid circular symlink (.so.1 has single digit)
+# Create soname symlinks for every copied versioned library.
-for lib in libnvidia-ml libcuda libnvidia-ptxjitcompiler; do
+for versioned in "$CACHE_DIR"/lib/*.so.*; do
-    versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9][0-9]* 2>/dev/null | head -1)
+    [ -f "$versioned" ] || continue
    [ -n "$versioned" ] || continue
    base=$(basename "$versioned")
-    ln -sf "$base" "$CACHE_DIR/lib/${lib}.so.1"
+    stem=${base%%.so.*}
-    ln -sf "${lib}.so.1" "$CACHE_DIR/lib/${lib}.so" 2>/dev/null || true
+    ln -sf "$base" "$CACHE_DIR/lib/${stem}.so.1"
-    echo "${lib}: .so.1 -> $base"
+    ln -sf "${stem}.so.1" "$CACHE_DIR/lib/${stem}.so" 2>/dev/null || true
 done
 touch "$CACHE_LAYOUT_MARKER"
 echo "=== NVIDIA build complete ==="
 echo "cache: $CACHE_DIR"
 echo "modules: $ko_count .ko files"
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
--- a/iso/builder/config/archives/nvidia-cuda.key.chroot
+++ b/iso/builder/config/archives/nvidia-cuda.key.chroot
@@ -0,0 +1,29 @@
 -----BEGIN PGP PUBLIC KEY BLOCK-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 mQINBGJYmlEBEAC6nJmeqByeReM+MSy4palACCnfOg4pOxffrrkldxz4jrDOZNK4
 q8KG+ZbXrkdP0e9qTFRvZzN+A6Jw3ySfoiKXRBw5l2Zp81AYkghV641OpWNjZOyL
 syKEtST9LR1ttHv1ZI71pj8NVG/EnpimZPOblEJ1OpibJJCXLrbn+qcJ8JNuGTSK
 6v2aLBmhR8VR/aSJpmkg7fFjcGklweTI8+Ibj72HuY9JRD/+dtUoSh7z037mWo56
 ee02lPFRD0pHOEAlLSXxFO/SDqRVMhcgHk0a8roCF+9h5Ni7ZUyxlGK/uHkqN7ED
 /U/ATpGKgvk4t23eTpdRC8FXAlBZQyf/xnhQXsyF/z7+RV5CL0o1zk1LKgo+5K32
 5ka5uZb6JSIrEPUaCPEMXu6EEY8zSFnCrRS/Vjkfvc9ViYZWzJ387WTjAhMdS7wd
 PmdDWw2ASGUP4FrfCireSZiFX+ZAOspKpZdh0P5iR5XSx14XDt3jNK2EQQboaJAD
 uqksItatOEYNu4JsCbc24roJvJtGhpjTnq1/dyoy6K433afU0DS2ZPLthLpGqeyK
 MKNY7a2WjxhRmCSu5Zok/fGKcO62XF8a3eSj4NzCRv8LM6mG1Oekz6Zz+tdxHg19
 ufHO0et7AKE5q+5VjE438Xpl4UWbM/Voj6VPJ9uzywDcnZXpeOqeTQh2pQARAQAB
 tCBjdWRhdG9vbHMgPGN1ZGF0b29sc0BudmlkaWEuY29tPokCOQQTAQIAIwUCYlia
 UQIbAwcLCQgHAwIBBhUIAgkKCwQWAgMBAh4BAheAAAoJEKS0aZY7+GPM1y4QALKh
 BqSozrYbe341Qu7SyxHQgjRCGi4YhI3bHCMj5F6vEOHnwiFH6YmFkxCYtqcGjca6
 iw7cCYMow/hgKLAPwkwSJ84EYpGLWx62+20rMM4OuZwauSUcY/kE2WgnQ74zbh3+
 MHs56zntJFfJ9G+NYidvwDWeZn5HIzR4CtxaxRgpiykg0s3ps6X0U+vuVcLnutBF
 7r81astvlVQERFbce/6KqHK+yj843Qrhb3JEolUoOETK06nD25bVtnAxe0QEyA90
 9MpRNLfR6BdjPpxqhphDcMOhJfyubAroQUxG/7S+Yw+mtEqHrL/dz9iEYqodYiSo
 zfi0b+HFI59sRkTfOBDBwb3kcARExwnvLJmqijiVqWkoJ3H67oA0XJN2nelucw+A
 Hb+Jt9BWjyzKWlLFDnVHdGicyRJ0I8yqi32w8hGeXmu3tU58VWJrkXEXadBftmci
 pemb6oZ/r5SCkW6kxr2PsNWcJoebUdynyOQGbVwpMtJAnjOYp0ObKOANbcIg+tsi
 kyCIO5TiY3ADbBDPCeZK8xdcugXoW5WFwACGC0z+Cn0mtw8z3VGIPAMSCYmLusgW
 t2+EpikwrP2inNp5Pc+YdczRAsa4s30Jpyv/UHEG5P9GKnvofaxJgnU56lJIRPzF
 iCUGy6cVI0Fq777X/ME1K6A/bzZ4vRYNx8rUmVE5
 =DO7z
 -----END PGP PUBLIC KEY BLOCK-----
--- a/iso/builder/config/archives/nvidia-cuda.list.chroot
+++ b/iso/builder/config/archives/nvidia-cuda.list.chroot
@@ -0,0 +1 @@
 deb https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/ /
--- a/iso/builder/config/archives/rocm.key.chroot
+++ b/iso/builder/config/archives/rocm.key.chroot
--- a/iso/builder/config/archives/rocm.list.chroot
+++ b/iso/builder/config/archives/rocm.list.chroot
@@ -0,0 +1 @@
 deb https://repo.radeon.com/rocm/apt/%%ROCM_VERSION%% jammy main
--- a/iso/builder/config/bootloaders/grub-pc/config.cfg
+++ b/iso/builder/config/bootloaders/grub-pc/config.cfg
@@ -8,7 +8,7 @@ else
 fi
 if loadfont $font ; then
-    set gfxmode=800x600
+    set gfxmode=1920x1080,1280x1024,auto
    set gfxpayload=keep
    insmod efi_gop
    insmod efi_uga
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1 @@`
							`deb https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/ /`
		`@@ -0,0 +1 @@`
							`deb https://repo.radeon.com/rocm/apt/%%ROCM_VERSION%% jammy main`