Default NVIDIA ISO to open modules and add nvidia-legacy

Include terminal charts in benchmark report
Normalize task IDs and artifact folder prefixes
2026-04-06 16:27:13 +03:00 · 2026-04-06 12:34:57 +03:00 · 2026-04-06 12:26:47 +03:00 · 2026-04-06 12:24:19 +03:00 · 2026-04-06 12:22:04 +03:00 · 2026-04-06 11:58:13 +03:00
113 changed files with 16363 additions and 1526 deletions
--- a/PLAN.md
+++ b/PLAN.md
@@ -343,9 +343,9 @@ Planned code shape:
 - `bee tui` can rerun the audit manually
 - `bee tui` can export the latest audit JSON to removable media
 - `bee tui` can show health summary and run NVIDIA/memory/storage acceptance tests
- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-stress`
+- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-burn`
 - SAT summaries now expose `overall_status` plus per-job `OK/FAILED/UNSUPPORTED`
- Memory/GPU SAT runtime defaults can be overridden via `BEE_MEMTESTER_*` and `BEE_GPU_STRESS_*`
+- Memory SAT runtime defaults can be overridden via `BEE_MEMTESTER_*`
 - removable export requires explicit target selection, mount, confirmation, copy, and cleanup
 ### 2.6 — Vendor utilities and optional assets
--- a/audit/Makefile
+++ b/audit/Makefile
@@ -1,7 +1,10 @@
 LISTEN ?= :8080
 AUDIT_PATH ?=
 EXPORT_DIR ?= $(CURDIR)/.tmp/export
 VERSION ?= $(shell sh ./scripts/resolve-version.sh)
 GO_LDFLAGS := -X main.Version=$(VERSION)
-RUN_ARGS := web --listen $(LISTEN)
+RUN_ARGS := web --listen $(LISTEN) --export-dir $(EXPORT_DIR)
 ifneq ($(AUDIT_PATH),)
 RUN_ARGS += --audit-path $(AUDIT_PATH)
 endif
@@ -9,10 +12,11 @@ endif
 .PHONY: run build test
 run:
-	go run ./cmd/bee $(RUN_ARGS)
+	mkdir -p $(EXPORT_DIR)
 	go run -ldflags "$(GO_LDFLAGS)" ./cmd/bee $(RUN_ARGS)
 build:
-	go build -o bee ./cmd/bee
+	go build -ldflags "$(GO_LDFLAGS)" -o bee ./cmd/bee
 test:
 	go test ./...
--- a/audit/cmd/bee/main.go
+++ b/audit/cmd/bee/main.go
@@ -8,6 +8,7 @@ import (
 	"log/slog"
 	"os"
 	"runtime/debug"
 	"strconv"
 	"strings"
 	"bee/audit/internal/app"
@@ -21,30 +22,7 @@ var Version = "dev"
 func buildLabel() string {
 	label := strings.TrimSpace(Version)
 	if label == "" {
-		label = "dev"
+		return "dev"
 	}
 	if info, ok := debug.ReadBuildInfo(); ok {
 		var revision string
 		var modified bool
 		for _, setting := range info.Settings {
 			switch setting.Key {
 			case "vcs.revision":
 				revision = setting.Value
 			case "vcs.modified":
 				modified = setting.Value == "true"
 			}
 		}
 		if revision != "" {
 			short := revision
 			if len(short) > 12 {
 				short = short[:12]
 			}
 			label += " (" + short
 			if modified {
 				label += "+"
 			}
 			label += ")"
 		}
 	}
 	return label
 }
@@ -53,10 +31,19 @@ func main() {
 	os.Exit(run(os.Args[1:], os.Stdout, os.Stderr))
 }
-func run(args []string, stdout, stderr io.Writer) int {
+func run(args []string, stdout, stderr io.Writer) (exitCode int) {
 	slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
 		Level: slog.LevelInfo,
 	})))
 	defer func() {
 		if rec := recover(); rec != nil {
 			slog.Error("fatal panic",
 				"panic", fmt.Sprint(rec),
 				"stack", string(debug.Stack()),
 			)
 			exitCode = 1
 		}
 	}()
 	if len(args) == 0 {
 		printRootUsage(stderr)
@@ -82,6 +69,8 @@ func run(args []string, stdout, stderr io.Writer) int {
 		return runWeb(args[1:], stdout, stderr)
 	case "sat":
 		return runSAT(args[1:], stdout, stderr)
 	case "benchmark":
 		return runBenchmark(args[1:], stdout, stderr)
 	case "version", "--version", "-version":
 		fmt.Fprintln(stdout, Version)
 		return 0
@@ -98,8 +87,9 @@ func printRootUsage(w io.Writer) {
  bee preflight --output stdout|file:<path>
  bee export  --target <device>
  bee support-bundle --output stdout|file:<path>
-  bee web     --listen :80 --audit-path `+app.DefaultAuditJSONPath+`
+  bee web     --listen :80 [--audit-path `+app.DefaultAuditJSONPath+`]
  bee sat nvidia|memory|storage|cpu [--duration <seconds>]
  bee benchmark nvidia [--profile standard|stability|overnight]
  bee version
  bee help [command]`)
 }
@@ -118,6 +108,8 @@ func runHelp(args []string, stdout, stderr io.Writer) int {
 		return runWeb([]string{"--help"}, stdout, stdout)
 	case "sat":
 		return runSAT([]string{"--help"}, stdout, stderr)
 	case "benchmark":
 		return runBenchmark([]string{"--help"}, stdout, stderr)
 	case "version":
 		fmt.Fprintln(stdout, "usage: bee version")
 		return 0
@@ -304,7 +296,7 @@ func runWeb(args []string, stdout, stderr io.Writer) int {
 	fs := flag.NewFlagSet("web", flag.ContinueOnError)
 	fs.SetOutput(stderr)
 	listenAddr := fs.String("listen", ":8080", "listen address, e.g. :80")
-	auditPath := fs.String("audit-path", app.DefaultAuditJSONPath, "path to the latest audit JSON snapshot")
+	auditPath := fs.String("audit-path", "", "optional path to the latest audit JSON snapshot")
 	exportDir := fs.String("export-dir", app.DefaultExportDir, "directory with logs, SAT results, and support bundles")
 	title := fs.String("title", "Bee Hardware Audit", "page title")
 	fs.Usage = func() {
@@ -356,6 +348,7 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	fs := flag.NewFlagSet("sat", flag.ContinueOnError)
 	fs.SetOutput(stderr)
 	duration := fs.Int("duration", 0, "stress-ng duration in seconds (cpu only; default: 60)")
 	diagLevel := fs.Int("diag-level", 0, "DCGM diagnostic level for nvidia (1=quick, 2=medium, 3=targeted stress, 4=extended stress; default: 1)")
 	if err := fs.Parse(args[1:]); err != nil {
 		if err == flag.ErrHelp {
 			return 0
@@ -370,7 +363,7 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	target := args[0]
 	if target != "nvidia" && target != "memory" && target != "storage" && target != "cpu" {
 		fmt.Fprintf(stderr, "bee sat: unknown target %q\n", target)
-		fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>]")
+		fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>] [--diag-level <1-4>]")
 		return 2
 	}
@@ -382,7 +375,12 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	logLine := func(s string) { fmt.Fprintln(os.Stderr, s) }
 	switch target {
 	case "nvidia":
-		archive, err = application.RunNvidiaAcceptancePack("", logLine)
+		level := *diagLevel
 		if level > 0 {
 			_, err = application.RunNvidiaAcceptancePackWithOptions(context.Background(), "", level, nil, logLine)
 		} else {
 			archive, err = application.RunNvidiaAcceptancePack("", logLine)
 		}
 	case "memory":
 		archive, err = application.RunMemoryAcceptancePackCtx(context.Background(), "", logLine)
 	case "storage":
@@ -401,3 +399,85 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	slog.Info("sat archive written", "target", target, "path", archive)
 	return 0
 }
 func runBenchmark(args []string, stdout, stderr io.Writer) int {
 	if len(args) == 0 {
 		fmt.Fprintln(stderr, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]")
 		return 2
 	}
 	if args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
 		fmt.Fprintln(stdout, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]")
 		return 0
 	}
 	target := args[0]
 	if target != "nvidia" {
 		fmt.Fprintf(stderr, "bee benchmark: unknown target %q\n", target)
 		fmt.Fprintln(stderr, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]")
 		return 2
 	}
 	fs := flag.NewFlagSet("benchmark", flag.ContinueOnError)
 	fs.SetOutput(stderr)
 	profile := fs.String("profile", platform.NvidiaBenchmarkProfileStandard, "benchmark profile: standard, stability, overnight")
 	devices := fs.String("devices", "", "comma-separated GPU indices to include")
 	exclude := fs.String("exclude", "", "comma-separated GPU indices to exclude")
 	sizeMB := fs.Int("size-mb", 0, "per-GPU benchmark buffer size in MB (0 = auto)")
 	skipNCCL := fs.Bool("skip-nccl", false, "skip multi-GPU NCCL interconnect benchmark")
 	if err := fs.Parse(args[1:]); err != nil {
 		if err == flag.ErrHelp {
 			return 0
 		}
 		return 2
 	}
 	if fs.NArg() != 0 {
 		fmt.Fprintf(stderr, "bee benchmark: unexpected arguments\n")
 		return 2
 	}
 	includeIndices, err := parseBenchmarkIndexCSV(*devices)
 	if err != nil {
 		fmt.Fprintf(stderr, "bee benchmark: invalid --devices: %v\n", err)
 		return 2
 	}
 	excludeIndices, err := parseBenchmarkIndexCSV(*exclude)
 	if err != nil {
 		fmt.Fprintf(stderr, "bee benchmark: invalid --exclude: %v\n", err)
 		return 2
 	}
 	application := app.New(platform.New())
 	logLine := func(s string) { fmt.Fprintln(os.Stderr, s) }
 	archive, err := application.RunNvidiaBenchmark("", platform.NvidiaBenchmarkOptions{
 		Profile:           *profile,
 		SizeMB:            *sizeMB,
 		GPUIndices:        includeIndices,
 		ExcludeGPUIndices: excludeIndices,
 		RunNCCL:           !*skipNCCL,
 	}, logLine)
 	if err != nil {
 		slog.Error("run benchmark", "target", target, "err", err)
 		return 1
 	}
 	slog.Info("benchmark archive written", "target", target, "path", archive)
 	return 0
 }
 func parseBenchmarkIndexCSV(raw string) ([]int, error) {
 	raw = strings.TrimSpace(raw)
 	if raw == "" {
 		return nil, nil
 	}
 	var indices []int
 	for _, part := range strings.Split(raw, ",") {
 		part = strings.TrimSpace(part)
 		if part == "" {
 			continue
 		}
 		value, err := strconv.Atoi(part)
 		if err != nil || value < 0 {
 			return nil, fmt.Errorf("bad gpu index %q", part)
 		}
 		indices = append(indices, value)
 	}
 	return indices, nil
 }
--- a/audit/cmd/bee/main_test.go
+++ b/audit/cmd/bee/main_test.go
@@ -46,8 +46,6 @@ func TestRunUnknownCommand(t *testing.T) {
 }
 func TestRunVersion(t *testing.T) {
 	t.Parallel()
 	old := Version
 	Version = "test-version"
 	t.Cleanup(func() { Version = old })
@@ -62,6 +60,16 @@ func TestRunVersion(t *testing.T) {
 	}
 }
 func TestBuildLabelUsesVersionAsIs(t *testing.T) {
 	old := Version
 	Version = "1.2.3"
 	t.Cleanup(func() { Version = old })
 	if got := buildLabel(); got != "1.2.3" {
 		t.Fatalf("buildLabel=%q want %q", got, "1.2.3")
 	}
 }
 func TestRunExportRequiresTarget(t *testing.T) {
 	t.Parallel()
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -19,17 +19,18 @@ import (
 )
 var (
-	DefaultExportDir       = "/appdata/bee/export"
+	DefaultExportDir        = "/appdata/bee/export"
-	DefaultAuditJSONPath   = DefaultExportDir + "/bee-audit.json"
+	DefaultAuditJSONPath    = DefaultExportDir + "/bee-audit.json"
-	DefaultAuditLogPath    = DefaultExportDir + "/bee-audit.log"
+	DefaultAuditLogPath     = DefaultExportDir + "/bee-audit.log"
-	DefaultWebLogPath      = DefaultExportDir + "/bee-web.log"
+	DefaultWebLogPath       = DefaultExportDir + "/bee-web.log"
-	DefaultNetworkLogPath  = DefaultExportDir + "/bee-network.log"
+	DefaultNetworkLogPath   = DefaultExportDir + "/bee-network.log"
-	DefaultNvidiaLogPath   = DefaultExportDir + "/bee-nvidia.log"
+	DefaultNvidiaLogPath    = DefaultExportDir + "/bee-nvidia.log"
-	DefaultSSHLogPath      = DefaultExportDir + "/bee-sshsetup.log"
+	DefaultSSHLogPath       = DefaultExportDir + "/bee-sshsetup.log"
-	DefaultRuntimeJSONPath = DefaultExportDir + "/runtime-health.json"
+	DefaultRuntimeJSONPath  = DefaultExportDir + "/runtime-health.json"
-	DefaultRuntimeLogPath  = DefaultExportDir + "/runtime-health.log"
+	DefaultRuntimeLogPath   = DefaultExportDir + "/runtime-health.log"
-	DefaultTechDumpDir     = DefaultExportDir + "/techdump"
+	DefaultTechDumpDir      = DefaultExportDir + "/techdump"
-	DefaultSATBaseDir      = DefaultExportDir + "/bee-sat"
+	DefaultSATBaseDir       = DefaultExportDir + "/bee-sat"
 	DefaultBenchmarkBaseDir = DefaultExportDir + "/bee-benchmark"
 )
 type App struct {
@@ -40,6 +41,8 @@ type App struct {
 	sat       satRunner
 	runtime   runtimeChecker
 	installer installer
 	// StatusDB is the unified component health store (nil if unavailable).
 	StatusDB *ComponentStatusDB
 }
 type ActionResult struct {
@@ -80,6 +83,7 @@ type installer interface {
 	ListInstallDisks() ([]platform.InstallDisk, error)
 	InstallToDisk(ctx context.Context, device string, logFile string) error
 	IsLiveMediaInRAM() bool
 	LiveBootSource() platform.LiveBootSource
 	RunInstallToRAM(ctx context.Context, logFunc func(string)) error
 }
@@ -100,6 +104,10 @@ func (a *App) IsLiveMediaInRAM() bool {
 	return a.installer.IsLiveMediaInRAM()
 }
 func (a *App) LiveBootSource() platform.LiveBootSource {
 	return a.installer.LiveBootSource()
 }
 func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
 	return a.installer.RunInstallToRAM(ctx, logFunc)
 }
@@ -107,6 +115,13 @@ func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
 type satRunner interface {
 	RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error)
 	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBenchmark(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
 	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaStressPack(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error)
 	RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
@@ -114,10 +129,13 @@ type satRunner interface {
 	DetectGPUVendor() string
 	ListAMDGPUs() ([]platform.AMDGPUInfo, error)
 	RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunAMDMemIntegrityPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunAMDMemBandwidthPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
 	RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error)
 	RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 }
@@ -127,7 +145,7 @@ type runtimeChecker interface {
 }
 func New(platform *platform.System) *App {
-	return &App{
+	a := &App{
 		network:   platform,
 		services:  platform,
 		exports:   platform,
@@ -136,19 +154,32 @@ func New(platform *platform.System) *App {
 		runtime:   platform,
 		installer: platform,
 	}
 	if db, err := OpenComponentStatusDB(DefaultExportDir + "/component-status.json"); err == nil {
 		a.StatusDB = db
 	}
 	return a
 }
 // ApplySATOverlay parses a raw audit JSON, overlays the latest SAT results,
 // and returns the updated JSON. Used by the web UI to serve always-fresh status.
 func ApplySATOverlay(auditJSON []byte) ([]byte, error) {
-	var snap schema.HardwareIngestRequest
+	snap, err := readAuditSnapshot(auditJSON)
-	if err := json.Unmarshal(auditJSON, &snap); err != nil {
+	if err != nil {
 		return nil, err
 	}
-	applyLatestSATStatuses(&snap.Hardware, DefaultSATBaseDir)
+	applyLatestSATStatuses(&snap.Hardware, DefaultSATBaseDir, nil)
 	return json.MarshalIndent(snap, "", "  ")
 }
 func readAuditSnapshot(auditJSON []byte) (schema.HardwareIngestRequest, error) {
 	var snap schema.HardwareIngestRequest
 	if err := json.Unmarshal(auditJSON, &snap); err != nil {
 		return schema.HardwareIngestRequest{}, err
 	}
 	collector.NormalizeSnapshot(&snap.Hardware, snap.CollectedAt)
 	return snap, nil
 }
 func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, error) {
 	if runtimeMode == runtimeenv.ModeLiveCD {
 		if err := a.runtime.CaptureTechnicalDump(DefaultTechDumpDir); err != nil {
@@ -156,7 +187,7 @@ func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, erro
 		}
 	}
 	result := collector.Run(runtimeMode)
-	applyLatestSATStatuses(&result.Hardware, DefaultSATBaseDir)
+	applyLatestSATStatuses(&result.Hardware, DefaultSATBaseDir, a.StatusDB)
 	if health, err := ReadRuntimeHealth(DefaultRuntimeJSONPath); err == nil {
 		result.Runtime = &health
 	}
@@ -171,10 +202,7 @@ func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, erro
 		return "stdout", err
 	case strings.HasPrefix(output, "file:"):
 		path := strings.TrimPrefix(output, "file:")
-		if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		if err := atomicWriteFile(path, append(data, '\n'), 0644); err != nil {
 			return "", err
 		}
 		if err := os.WriteFile(path, append(data, '\n'), 0644); err != nil {
 			return "", err
 		}
 		return path, nil
@@ -199,10 +227,7 @@ func (a *App) RunRuntimePreflight(output string) (string, error) {
 		return "stdout", err
 	case strings.HasPrefix(output, "file:"):
 		path := strings.TrimPrefix(output, "file:")
-		if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		if err := atomicWriteFile(path, append(data, '\n'), 0644); err != nil {
 			return "", err
 		}
 		if err := os.WriteFile(path, append(data, '\n'), 0644); err != nil {
 			return "", err
 		}
 		return path, nil
@@ -272,6 +297,9 @@ func (a *App) ExportLatestAudit(target platform.RemovableTarget) (string, error)
 	if err != nil {
 		return "", err
 	}
 	if normalized, normErr := ApplySATOverlay(data); normErr == nil {
 		data = normalized
 	}
 	if err := os.WriteFile(tmpPath, data, 0644); err != nil {
 		return "", err
 	}
@@ -505,6 +533,63 @@ func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir st
 	return ActionResult{Title: "NVIDIA DCGM", Body: body}, err
 }
 func (a *App) RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaTargetedStressValidatePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaStressPack(baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
 	return a.RunNvidiaStressPackCtx(context.Background(), baseDir, opts, logFunc)
 }
 func (a *App) RunNvidiaBenchmark(baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	return a.RunNvidiaBenchmarkCtx(context.Background(), baseDir, opts, logFunc)
 }
 func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultBenchmarkBaseDir
 	}
 	return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
 }
 func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaTargetedPowerPack(ctx, baseDir, durationSec, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaPulseTestPack(ctx, baseDir, durationSec, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaBandwidthPack(ctx, baseDir, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaStressPackCtx(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaStressPack(ctx, baseDir, opts, logFunc)
 }
 func (a *App) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
 	return a.RunMemoryAcceptancePackCtx(context.Background(), baseDir, logFunc)
 }
@@ -577,6 +662,20 @@ func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
 	return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
 }
 func (a *App) RunAMDMemIntegrityPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunAMDMemIntegrityPack(ctx, baseDir, logFunc)
 }
 func (a *App) RunAMDMemBandwidthPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunAMDMemBandwidthPack(ctx, baseDir, logFunc)
 }
 func (a *App) RunMemoryStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	return a.RunMemoryStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
 }
@@ -611,6 +710,13 @@ func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platfor
 	return a.sat.RunFanStressTest(ctx, baseDir, opts)
 }
 func (a *App) RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunPlatformStress(ctx, baseDir, opts, logFunc)
 }
 func (a *App) RunNCCLTestsResult(ctx context.Context) (ActionResult, error) {
 	path, err := a.sat.RunNCCLTests(ctx, DefaultSATBaseDir, nil)
 	body := "Results: " + path
@@ -697,6 +803,7 @@ func (a *App) HealthSummaryResult() ActionResult {
 	if err := json.Unmarshal(raw, &snapshot); err != nil {
 		return ActionResult{Title: "Health summary", Body: "Audit JSON is unreadable."}
 	}
 	collector.NormalizeSnapshot(&snapshot.Hardware, snapshot.CollectedAt)
 	summary := collector.BuildHealthSummary(snapshot.Hardware)
 	var body strings.Builder
@@ -731,6 +838,7 @@ func (a *App) MainBanner() string {
 	if err := json.Unmarshal(raw, &snapshot); err != nil {
 		return ""
 	}
 	collector.NormalizeSnapshot(&snapshot.Hardware, snapshot.CollectedAt)
 	var lines []string
 	if system := formatSystemLine(snapshot.Hardware.Board); system != "" {
@@ -825,6 +933,12 @@ func latestSATSummaries() []string {
 		prefix string
 	}{
 		{label: "NVIDIA SAT", prefix: "gpu-nvidia-"},
 		{label: "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)", prefix: "gpu-nvidia-targeted-stress-"},
 		{label: "NVIDIA Max Compute Load (dcgmproftester)", prefix: "gpu-nvidia-compute-"},
 		{label: "NVIDIA Targeted Power (dcgmi diag targeted_power)", prefix: "gpu-nvidia-targeted-power-"},
 		{label: "NVIDIA Pulse Test (dcgmi diag pulse_test)", prefix: "gpu-nvidia-pulse-"},
 		{label: "NVIDIA Interconnect Test (NCCL all_reduce_perf)", prefix: "gpu-nvidia-nccl-"},
 		{label: "NVIDIA Bandwidth Test (NVBandwidth)", prefix: "gpu-nvidia-bandwidth-"},
 		{label: "Memory SAT", prefix: "memory-"},
 		{label: "Storage SAT", prefix: "storage-"},
 		{label: "CPU SAT", prefix: "cpu-"},
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -120,14 +120,21 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
 }
 type fakeSAT struct {
-	runNvidiaFn      func(string) (string, error)
+	runNvidiaFn               func(string) (string, error)
-	runMemoryFn      func(string) (string, error)
+	runNvidiaBenchmarkFn      func(string, platform.NvidiaBenchmarkOptions) (string, error)
-	runStorageFn     func(string) (string, error)
+	runNvidiaStressFn         func(string, platform.NvidiaStressOptions) (string, error)
-	runCPUFn         func(string, int) (string, error)
+	runNvidiaComputeFn        func(string, int, []int) (string, error)
-	detectVendorFn   func() string
+	runNvidiaPowerFn          func(string, int, []int) (string, error)
-	listAMDGPUsFn    func() ([]platform.AMDGPUInfo, error)
+	runNvidiaPulseFn          func(string, int, []int) (string, error)
-	runAMDPackFn     func(string) (string, error)
+	runNvidiaBandwidthFn      func(string, []int) (string, error)
-	listNvidiaGPUsFn func() ([]platform.NvidiaGPU, error)
+	runNvidiaTargetedStressFn func(string, int, []int) (string, error)
 	runMemoryFn               func(string) (string, error)
 	runStorageFn              func(string) (string, error)
 	runCPUFn                  func(string, int) (string, error)
 	detectVendorFn            func() string
 	listAMDGPUsFn             func() ([]platform.AMDGPUInfo, error)
 	runAMDPackFn              func(string) (string, error)
 	listNvidiaGPUsFn          func() ([]platform.NvidiaGPU, error)
 }
 func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string, _ func(string)) (string, error) {
@@ -138,6 +145,55 @@ func (f fakeSAT) RunNvidiaAcceptancePackWithOptions(_ context.Context, baseDir s
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaBenchmark(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, _ func(string)) (string, error) {
 	if f.runNvidiaBenchmarkFn != nil {
 		return f.runNvidiaBenchmarkFn(baseDir, opts)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaTargetedStressFn != nil {
 		return f.runNvidiaTargetedStressFn(baseDir, durationSec, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaComputeFn != nil {
 		return f.runNvidiaComputeFn(baseDir, durationSec, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaTargetedPowerPack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaPowerFn != nil {
 		return f.runNvidiaPowerFn(baseDir, durationSec, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaPulseTestPack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaPulseFn != nil {
 		return f.runNvidiaPulseFn(baseDir, durationSec, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaBandwidthPack(_ context.Context, baseDir string, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaBandwidthFn != nil {
 		return f.runNvidiaBandwidthFn(baseDir, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaStressPack(_ context.Context, baseDir string, opts platform.NvidiaStressOptions, _ func(string)) (string, error) {
 	if f.runNvidiaStressFn != nil {
 		return f.runNvidiaStressFn(baseDir, opts)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
 	if f.listNvidiaGPUsFn != nil {
 		return f.listNvidiaGPUsFn()
@@ -181,6 +237,14 @@ func (f fakeSAT) RunAMDAcceptancePack(_ context.Context, baseDir string, _ func(
 	return "", nil
 }
 func (f fakeSAT) RunAMDMemIntegrityPack(_ context.Context, _ string, _ func(string)) (string, error) {
 	return "", nil
 }
 func (f fakeSAT) RunAMDMemBandwidthPack(_ context.Context, _ string, _ func(string)) (string, error) {
 	return "", nil
 }
 func (f fakeSAT) RunAMDStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
 	return "", nil
 }
@@ -195,6 +259,10 @@ func (f fakeSAT) RunFanStressTest(_ context.Context, _ string, _ platform.FanStr
 	return "", nil
 }
 func (f fakeSAT) RunPlatformStress(_ context.Context, _ string, _ platform.PlatformStressOptions, _ func(string)) (string, error) {
 	return "", nil
 }
 func (f fakeSAT) RunNCCLTests(_ context.Context, _ string, _ func(string)) (string, error) {
 	return "", nil
 }
@@ -640,13 +708,50 @@ func TestHealthSummaryResultIncludesCompactSATSummary(t *testing.T) {
 	}
 }
 func TestApplySATOverlayFiltersIgnoredLegacyDevices(t *testing.T) {
 	tmp := t.TempDir()
 	oldSATBaseDir := DefaultSATBaseDir
 	DefaultSATBaseDir = filepath.Join(tmp, "sat")
 	t.Cleanup(func() { DefaultSATBaseDir = oldSATBaseDir })
 	raw := `{
 	  "collected_at": "2026-03-15T10:00:00Z",
 	  "hardware": {
 	    "board": {"serial_number": "SRV123"},
 	    "storage": [
 	      {"model": "Virtual HDisk0", "serial_number": "AAAABBBBCCCC3"},
 	      {"model": "PASCARI", "serial_number": "DISK1", "status": "OK"}
 	    ],
 	    "pcie_devices": [
 	      {"device_class": "Co-processor", "model": "402xx Series QAT", "status": "OK"},
 	      {"device_class": "VideoController", "model": "NVIDIA H100", "status": "OK"}
 	    ]
 	  }
 	}`
 	got, err := ApplySATOverlay([]byte(raw))
 	if err != nil {
 		t.Fatalf("ApplySATOverlay error: %v", err)
 	}
 	text := string(got)
 	if contains(text, "Virtual HDisk0") {
 		t.Fatalf("overlaid audit should drop virtual hdisk:\n%s", text)
 	}
 	if contains(text, "\"device_class\": \"Co-processor\"") {
 		t.Fatalf("overlaid audit should drop co-processors:\n%s", text)
 	}
 	if !contains(text, "PASCARI") || !contains(text, "NVIDIA H100") {
 		t.Fatalf("overlaid audit should keep real devices:\n%s", text)
 	}
 }
 func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	tmp := t.TempDir()
 	exportDir := filepath.Join(tmp, "export")
 	if err := os.MkdirAll(filepath.Join(exportDir, "bee-sat", "memory-run"), 0755); err != nil {
 		t.Fatal(err)
 	}
-	if err := os.WriteFile(filepath.Join(exportDir, "bee-audit.json"), []byte(`{"ok":true}`), 0644); err != nil {
+	if err := os.WriteFile(filepath.Join(exportDir, "bee-audit.json"), []byte(`{"collected_at":"2026-03-15T10:00:00Z","hardware":{"board":{"serial_number":"SRV123"},"storage":[{"model":"Virtual HDisk0","serial_number":"AAAABBBBCCCC3"},{"model":"PASCARI","serial_number":"DISK1"}],"pcie_devices":[{"device_class":"Co-processor","model":"402xx Series QAT"},{"device_class":"VideoController","model":"NVIDIA H100"}]}}`), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run", "verbose.log"), []byte("sat verbose"), 0644); err != nil {
@@ -678,6 +783,7 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	tr := tar.NewReader(gzr)
 	var names []string
 	var auditJSON string
 	for {
 		hdr, err := tr.Next()
 		if errors.Is(err, io.EOF) {
@@ -687,6 +793,33 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 			t.Fatalf("read tar entry: %v", err)
 		}
 		names = append(names, hdr.Name)
 		if contains(hdr.Name, "/export/bee-audit.json") {
 			body, err := io.ReadAll(tr)
 			if err != nil {
 				t.Fatalf("read audit entry: %v", err)
 			}
 			auditJSON = string(body)
 		}
 	}
 	for _, want := range []string{
 		"/system/ip-link.txt",
 		"/system/ip-link-stats.txt",
 		"/system/ethtool-info.txt",
 		"/system/ethtool-link.txt",
 		"/system/ethtool-module.txt",
 		"/system/mstflint-query.txt",
 	} {
 		var found bool
 		for _, name := range names {
 			if contains(name, want) {
 				found = true
 				break
 			}
 		}
 		if !found {
 			t.Fatalf("support bundle missing %s, names=%v", want, names)
 		}
 	}
 	var foundRaw bool
@@ -701,6 +834,12 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	if !foundRaw {
 		t.Fatalf("support bundle missing raw SAT log, names=%v", names)
 	}
 	if contains(auditJSON, "Virtual HDisk0") || contains(auditJSON, "\"device_class\": \"Co-processor\"") {
 		t.Fatalf("support bundle should normalize ignored devices:\n%s", auditJSON)
 	}
 	if !contains(auditJSON, "PASCARI") || !contains(auditJSON, "NVIDIA H100") {
 		t.Fatalf("support bundle should keep real devices:\n%s", auditJSON)
 	}
 }
 func TestMainBanner(t *testing.T) {
@@ -714,6 +853,10 @@ func TestMainBanner(t *testing.T) {
 	product := "PowerEdge R760"
 	cpuModel := "Intel Xeon Gold 6430"
 	memoryType := "DDR5"
 	memorySerialA := "DIMM-A"
 	memorySerialB := "DIMM-B"
 	storageSerialA := "DISK-A"
 	storageSerialB := "DISK-B"
 	gpuClass := "VideoController"
 	gpuModel := "NVIDIA H100"
@@ -729,12 +872,12 @@ func TestMainBanner(t *testing.T) {
 				{Model: &cpuModel},
 			},
 			Memory: []schema.HardwareMemory{
-				{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType},
+				{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType, SerialNumber: &memorySerialA},
-				{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType},
+				{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType, SerialNumber: &memorySerialB},
 			},
 			Storage: []schema.HardwareStorage{
-				{Present: &trueValue, SizeGB: intPtr(3840)},
+				{Present: &trueValue, SizeGB: intPtr(3840), SerialNumber: &storageSerialA},
-				{Present: &trueValue, SizeGB: intPtr(3840)},
+				{Present: &trueValue, SizeGB: intPtr(3840), SerialNumber: &storageSerialB},
 			},
 			PCIeDevices: []schema.HardwarePCIeDevice{
 				{DeviceClass: &gpuClass, Model: &gpuModel},
--- a/audit/internal/app/atomic_write.go
+++ b/audit/internal/app/atomic_write.go
@@ -0,0 +1,48 @@
 package app
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 )
 func atomicWriteFile(path string, data []byte, perm os.FileMode) error {
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return fmt.Errorf("mkdir %s: %w", filepath.Dir(path), err)
 	}
 	tmpPath := path + ".tmp"
 	f, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, perm)
 	if err != nil {
 		return fmt.Errorf("open temp %s: %w", tmpPath, err)
 	}
 	success := false
 	defer func() {
 		_ = f.Close()
 		if !success {
 			_ = os.Remove(tmpPath)
 		}
 	}()
 	if _, err := f.Write(data); err != nil {
 		return fmt.Errorf("write temp %s: %w", tmpPath, err)
 	}
 	if err := f.Sync(); err != nil {
 		return fmt.Errorf("sync temp %s: %w", tmpPath, err)
 	}
 	if err := f.Close(); err != nil {
 		return fmt.Errorf("close temp %s: %w", tmpPath, err)
 	}
 	if err := os.Rename(tmpPath, path); err != nil {
 		return fmt.Errorf("rename %s -> %s: %w", tmpPath, path, err)
 	}
 	if dir, err := os.Open(filepath.Dir(path)); err == nil {
 		_ = dir.Sync()
 		_ = dir.Close()
 	}
 	success = true
 	return nil
 }
--- a/audit/internal/app/atomic_write_test.go
+++ b/audit/internal/app/atomic_write_test.go
@@ -0,0 +1,71 @@
 package app
 import (
 	"encoding/json"
 	"os"
 	"path/filepath"
 	"testing"
 	"bee/audit/internal/schema"
 )
 func TestAtomicWriteFileReplacesTargetWithoutLeavingTmp(t *testing.T) {
 	path := filepath.Join(t.TempDir(), "bee-audit.json")
 	if err := os.WriteFile(path, []byte("old\n"), 0644); err != nil {
 		t.Fatalf("seed file: %v", err)
 	}
 	if err := atomicWriteFile(path, []byte("new\n"), 0644); err != nil {
 		t.Fatalf("atomicWriteFile: %v", err)
 	}
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatalf("read final: %v", err)
 	}
 	if string(raw) != "new\n" {
 		t.Fatalf("final content=%q want %q", string(raw), "new\n")
 	}
 	if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
 		t.Fatalf("tmp file should be absent after success, err=%v", err)
 	}
 }
 func TestRunRuntimePreflightWritesAtomically(t *testing.T) {
 	path := filepath.Join(t.TempDir(), "runtime-health.json")
 	a := &App{
 		runtime: fakeRuntime{
 			collectFn: func(exportDir string) (schema.RuntimeHealth, error) {
 				return schema.RuntimeHealth{
 					Status:      "OK",
 					ExportDir:   exportDir,
 					DriverReady: true,
 					CUDAReady:   true,
 				}, nil
 			},
 		},
 	}
 	got, err := a.RunRuntimePreflight("file:" + path)
 	if err != nil {
 		t.Fatalf("RunRuntimePreflight: %v", err)
 	}
 	if got != path {
 		t.Fatalf("path=%q want %q", got, path)
 	}
 	if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
 		t.Fatalf("tmp file should be absent after success, err=%v", err)
 	}
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatalf("read runtime file: %v", err)
 	}
 	var health schema.RuntimeHealth
 	if err := json.Unmarshal(raw, &health); err != nil {
 		t.Fatalf("json unmarshal: %v", err)
 	}
 	if health.Status != "OK" {
 		t.Fatalf("status=%q want OK", health.Status)
 	}
 }
--- a/audit/internal/app/component_status_db.go
+++ b/audit/internal/app/component_status_db.go
@@ -0,0 +1,268 @@
 package app
 import (
 	"encoding/json"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
 	"time"
 )
 // ComponentStatusDB is a persistent, append-only store of hardware component health records.
 // Records are keyed by component identity strings (e.g. "pcie:0000:c8:00.0", "storage:nvme0n1").
 // Once a component is marked Warning or Critical, subsequent OK entries do not downgrade it —
 // the component stays at the highest observed severity until explicitly reset.
 type ComponentStatusDB struct {
 	path    string
 	mu      sync.Mutex
 	records map[string]*ComponentStatusRecord
 }
 // ComponentStatusRecord holds the current and historical health of one hardware component.
 type ComponentStatusRecord struct {
 	ComponentKey  string                 `json:"component_key"`
 	Status        string                 `json:"status"` // "OK", "Warning", "Critical", "Unknown"
 	LastCheckedAt time.Time              `json:"last_checked_at"`
 	LastChangedAt time.Time              `json:"last_changed_at"`
 	ErrorSummary  string                 `json:"error_summary,omitempty"`
 	History       []ComponentStatusEntry `json:"history"`
 }
 // ComponentStatusEntry is one observation written to a component's history.
 type ComponentStatusEntry struct {
 	At     time.Time `json:"at"`
 	Status string    `json:"status"`
 	Source string    `json:"source"` // e.g. "sat:nvidia", "sat:memory", "watchdog:kmsg"
 	Detail string    `json:"detail,omitempty"`
 }
 // OpenComponentStatusDB opens (or creates) the JSON status DB at path.
 func OpenComponentStatusDB(path string) (*ComponentStatusDB, error) {
 	db := &ComponentStatusDB{
 		path:    path,
 		records: make(map[string]*ComponentStatusRecord),
 	}
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return nil, err
 	}
 	data, err := os.ReadFile(path)
 	if err != nil && !os.IsNotExist(err) {
 		return nil, err
 	}
 	if len(data) > 0 {
 		var records []ComponentStatusRecord
 		if err := json.Unmarshal(data, &records); err == nil {
 			for i := range records {
 				db.records[records[i].ComponentKey] = &records[i]
 			}
 		}
 	}
 	return db, nil
 }
 // Record writes one observation for the given component key.
 // source is a short label like "sat:nvidia" or "watchdog:kmsg".
 // status is "OK", "Warning", "Critical", or "Unknown".
 // OK never downgrades an existing Warning or Critical status.
 func (db *ComponentStatusDB) Record(key, source, status, detail string) {
 	if db == nil || strings.TrimSpace(key) == "" {
 		return
 	}
 	db.mu.Lock()
 	defer db.mu.Unlock()
 	now := time.Now().UTC()
 	rec, exists := db.records[key]
 	if !exists {
 		rec = &ComponentStatusRecord{ComponentKey: key}
 		db.records[key] = rec
 	}
 	rec.LastCheckedAt = now
 	entry := ComponentStatusEntry{At: now, Status: status, Source: source, Detail: detail}
 	rec.History = append(rec.History, entry)
 	// Status merge: OK never downgrades Warning/Critical.
 	newSev := componentSeverity(status)
 	curSev := componentSeverity(rec.Status)
 	if newSev > curSev {
 		rec.Status = status
 		rec.LastChangedAt = now
 		rec.ErrorSummary = detail
 	} else if rec.Status == "" {
 		rec.Status = status
 		rec.LastChangedAt = now
 	}
 	_ = db.saveLocked()
 }
 // Get returns the current record for a component key.
 func (db *ComponentStatusDB) Get(key string) (ComponentStatusRecord, bool) {
 	if db == nil {
 		return ComponentStatusRecord{}, false
 	}
 	db.mu.Lock()
 	defer db.mu.Unlock()
 	r, ok := db.records[key]
 	if !ok {
 		return ComponentStatusRecord{}, false
 	}
 	return *r, true
 }
 // All returns a snapshot of all records.
 func (db *ComponentStatusDB) All() []ComponentStatusRecord {
 	if db == nil {
 		return nil
 	}
 	db.mu.Lock()
 	defer db.mu.Unlock()
 	out := make([]ComponentStatusRecord, 0, len(db.records))
 	for _, r := range db.records {
 		out = append(out, *r)
 	}
 	return out
 }
 func (db *ComponentStatusDB) saveLocked() error {
 	records := make([]ComponentStatusRecord, 0, len(db.records))
 	for _, r := range db.records {
 		records = append(records, *r)
 	}
 	data, err := json.MarshalIndent(records, "", "  ")
 	if err != nil {
 		return err
 	}
 	return os.WriteFile(db.path, data, 0644)
 }
 // componentSeverity returns a numeric severity so higher values win.
 func componentSeverity(status string) int {
 	switch strings.TrimSpace(status) {
 	case "Critical":
 		return 3
 	case "Warning":
 		return 2
 	case "OK":
 		return 1
 	default:
 		return 0
 	}
 }
 // ApplySATResultToDB reads a SAT summary.txt from the run directory next to archivePath
 // and writes component status records to db for the given SAT target.
 // archivePath may be either a bare .tar.gz path or "Archive written to /path/foo.tar.gz".
 func ApplySATResultToDB(db *ComponentStatusDB, target, archivePath string) {
 	if db == nil || strings.TrimSpace(archivePath) == "" {
 		return
 	}
 	archivePath = extractArchivePath(archivePath)
 	if archivePath == "" {
 		return
 	}
 	runDir := strings.TrimSuffix(archivePath, ".tar.gz")
 	data, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
 	if err != nil {
 		return
 	}
 	kv := parseSATKV(string(data))
 	overall := strings.ToUpper(strings.TrimSpace(kv["overall_status"]))
 	if overall == "" {
 		return
 	}
 	source := "sat:" + target
 	dbStatus := satStatusToDBStatus(overall)
 	// Map SAT target to component keys.
 	switch target {
 	case "nvidia", "nvidia-targeted-stress", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
 		"nvidia-interconnect", "nvidia-bandwidth", "amd", "nvidia-stress",
 		"amd-stress", "amd-mem", "amd-bandwidth":
 		db.Record("pcie:gpu:"+target, source, dbStatus, target+" SAT: "+overall)
 	case "memory", "memory-stress", "sat-stress":
 		db.Record("memory:all", source, dbStatus, target+" SAT: "+overall)
 	case "cpu", "platform-stress":
 		db.Record("cpu:all", source, dbStatus, target+" SAT: "+overall)
 	case "storage":
 		// Try to record per-device if available in summary.
 		recordedAny := false
 		for key, val := range kv {
 			if !strings.HasSuffix(key, "_status") || key == "overall_status" {
 				continue
 			}
 			base := strings.TrimSuffix(key, "_status")
 			idx := strings.Index(base, "_")
 			if idx <= 0 {
 				continue
 			}
 			devName := base[:idx]
 			devStatus := satStatusToDBStatus(strings.ToUpper(strings.TrimSpace(val)))
 			db.Record("storage:"+devName, source, devStatus, "storage SAT: "+val)
 			recordedAny = true
 		}
 		if !recordedAny {
 			db.Record("storage:all", source, dbStatus, "storage SAT: "+overall)
 		}
 	}
 }
 func satStatusToDBStatus(overall string) string {
 	switch overall {
 	case "OK":
 		return "OK"
 	case "FAILED":
 		return "Warning"
 	case "PARTIAL", "UNSUPPORTED":
 		return "Unknown"
 	default:
 		return "Unknown"
 	}
 }
 // ExtractArchivePath extracts a bare .tar.gz path from a string that may be
 // "Archive written to /path/foo.tar.gz" or already a bare path.
 func ExtractArchivePath(s string) string {
 	return extractArchivePath(s)
 }
 // ReadSATOverallStatus reads the overall_status value from the summary.txt
 // file located in the run directory alongside archivePath.
 // Returns "" if the file cannot be read.
 func ReadSATOverallStatus(archivePath string) string {
 	if strings.TrimSpace(archivePath) == "" {
 		return ""
 	}
 	runDir := strings.TrimSuffix(archivePath, ".tar.gz")
 	data, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
 	if err != nil {
 		return ""
 	}
 	kv := parseSATKV(string(data))
 	return strings.ToUpper(strings.TrimSpace(kv["overall_status"]))
 }
 func extractArchivePath(s string) string {
 	s = strings.TrimSpace(s)
 	if strings.HasSuffix(s, ".tar.gz") {
 		parts := strings.Fields(s)
 		if len(parts) > 0 {
 			return parts[len(parts)-1]
 		}
 	}
 	return s
 }
 func parseSATKV(raw string) map[string]string {
 	kv := make(map[string]string)
 	for _, line := range strings.Split(raw, "\n") {
 		k, v, ok := strings.Cut(strings.TrimSpace(line), "=")
 		if ok {
 			kv[strings.TrimSpace(k)] = strings.TrimSpace(v)
 		}
 	}
 	return kv
 }
--- a/audit/internal/app/sat_overlay.go
+++ b/audit/internal/app/sat_overlay.go
@@ -9,7 +9,7 @@ import (
 	"bee/audit/internal/schema"
 )
-func applyLatestSATStatuses(snap *schema.HardwareSnapshot, baseDir string) {
+func applyLatestSATStatuses(snap *schema.HardwareSnapshot, baseDir string, db *ComponentStatusDB) {
 	if snap == nil || strings.TrimSpace(baseDir) == "" {
 		return
 	}
@@ -28,6 +28,8 @@ func applyLatestSATStatuses(snap *schema.HardwareSnapshot, baseDir string) {
 	if summary, ok := loadLatestSATSummary(baseDir, "storage-"); ok {
 		applyStorageSAT(snap.Storage, summary)
 	}
 	// Apply unified component status DB — overlaid last so it can only upgrade severity.
 	applyComponentStatusDB(snap, db)
 }
 type satSummary struct {
@@ -206,6 +208,86 @@ func matchesGPUVendor(dev schema.HardwarePCIeDevice, vendor string) bool {
 	}
 }
 func applyComponentStatusDB(snap *schema.HardwareSnapshot, db *ComponentStatusDB) {
 	if snap == nil || db == nil {
 		return
 	}
 	for _, rec := range db.All() {
 		key := rec.ComponentKey
 		status := dbStatusToSATStatus(rec.Status)
 		if status == "" {
 			continue
 		}
 		detail := rec.ErrorSummary
 		ts := rec.LastChangedAt.UTC().Format("2006-01-02T15:04:05Z")
 		switch {
 		case strings.HasPrefix(key, "pcie:"):
 			bdf := strings.TrimPrefix(key, "pcie:")
 			bdf = strings.TrimPrefix(bdf, "gpu:") // strip sub-type if present
 			// bdf may be empty (e.g. "pcie:gpu:nvidia") — skip BDF matching
 			if sanitizeBDFForLookup(bdf) == "" {
 				break
 			}
 			normalized := sanitizeBDFForLookup(bdf)
 			for i := range snap.PCIeDevices {
 				if snap.PCIeDevices[i].BDF == nil {
 					continue
 				}
 				if sanitizeBDFForLookup(*snap.PCIeDevices[i].BDF) == normalized {
 					mergeComponentStatus(&snap.PCIeDevices[i].HardwareComponentStatus, ts, status, detail)
 				}
 			}
 		case strings.HasPrefix(key, "storage:"):
 			devName := strings.TrimPrefix(key, "storage:")
 			if devName == "all" {
 				for i := range snap.Storage {
 					mergeComponentStatus(&snap.Storage[i].HardwareComponentStatus, ts, status, detail)
 				}
 			} else {
 				for i := range snap.Storage {
 					linuxDev, _ := snap.Storage[i].Telemetry["linux_device"].(string)
 					if filepath.Base(strings.TrimSpace(linuxDev)) == devName {
 						mergeComponentStatus(&snap.Storage[i].HardwareComponentStatus, ts, status, detail)
 					}
 				}
 			}
 		case strings.HasPrefix(key, "memory:"):
 			for i := range snap.Memory {
 				mergeComponentStatus(&snap.Memory[i].HardwareComponentStatus, ts, status, detail)
 			}
 		case strings.HasPrefix(key, "cpu:"):
 			for i := range snap.CPUs {
 				mergeComponentStatus(&snap.CPUs[i].HardwareComponentStatus, ts, status, detail)
 			}
 		}
 	}
 }
 // dbStatusToSATStatus converts ComponentStatusDB status strings to the format
 // expected by mergeComponentStatus (which uses "OK", "Warning", "Critical", "Unknown").
 func dbStatusToSATStatus(s string) string {
 	switch strings.TrimSpace(s) {
 	case "OK", "Warning", "Critical", "Unknown":
 		return s
 	default:
 		return ""
 	}
 }
 // sanitizeBDFForLookup normalises a PCIe BDF address to a canonical lower-case form
 // suitable for comparison. "c8:00.0" → "0000:c8:00.0"; already-full BDFs are left as-is.
 func sanitizeBDFForLookup(bdf string) string {
 	bdf = strings.ToLower(strings.TrimSpace(bdf))
 	if bdf == "" || bdf == "gpu" || strings.ContainsAny(bdf, " \t") {
 		return ""
 	}
 	if strings.Count(bdf, ":") == 1 {
 		bdf = "0000:" + bdf
 	}
 	return bdf
 }
 func ptrString(v *string) string {
 	if v == nil {
 		return ""
--- a/audit/internal/app/sat_overlay_test.go
+++ b/audit/internal/app/sat_overlay_test.go
@@ -23,7 +23,7 @@ func TestApplyLatestSATStatusesMarksStorageByDevice(t *testing.T) {
 	usb := schema.HardwareStorage{Telemetry: map[string]any{"linux_device": "/dev/sda"}}
 	snap := schema.HardwareSnapshot{Storage: []schema.HardwareStorage{nvme, usb}}
-	applyLatestSATStatuses(&snap, baseDir)
+	applyLatestSATStatuses(&snap, baseDir, nil)
 	if snap.Storage[0].Status == nil || *snap.Storage[0].Status != "OK" {
 		t.Fatalf("nvme status=%v want OK", snap.Storage[0].Status)
@@ -53,7 +53,7 @@ func TestApplyLatestSATStatusesMarksAMDGPUs(t *testing.T) {
 		}},
 	}
-	applyLatestSATStatuses(&snap, baseDir)
+	applyLatestSATStatuses(&snap, baseDir, nil)
 	if snap.PCIeDevices[0].Status == nil || *snap.PCIeDevices[0].Status != "Critical" {
 		t.Fatalf("gpu status=%v want Critical", snap.PCIeDevices[0].Status)
--- a/audit/internal/app/support_bundle.go
+++ b/audit/internal/app/support_bundle.go
@@ -19,6 +19,8 @@ var supportBundleServices = []string{
 	"bee-network.service",
 	"bee-nvidia.service",
 	"bee-preflight.service",
 	"bee-selfheal.service",
 	"bee-selfheal.timer",
 	"bee-sshsetup.service",
 }
@@ -27,15 +29,118 @@ var supportBundleCommands = []struct {
 	cmd  []string
 }{
 	{name: "system/uname.txt", cmd: []string{"uname", "-a"}},
 	{name: "system/cmdline.txt", cmd: []string{"cat", "/proc/cmdline"}},
 	{name: "system/lsmod.txt", cmd: []string{"lsmod"}},
 	{name: "system/lspci-nn.txt", cmd: []string{"lspci", "-nn"}},
 	{name: "system/lspci-vvv.txt", cmd: []string{"lspci", "-vvv"}},
 	{name: "system/ip-addr.txt", cmd: []string{"ip", "addr"}},
 	{name: "system/ip-link.txt", cmd: []string{"ip", "-details", "link", "show"}},
 	{name: "system/ip-link-stats.txt", cmd: []string{"ip", "-s", "link", "show"}},
 	{name: "system/ip-route.txt", cmd: []string{"ip", "route"}},
 	{name: "system/mount.txt", cmd: []string{"mount"}},
 	{name: "system/df-h.txt", cmd: []string{"df", "-h"}},
-	{name: "system/dmesg-tail.txt", cmd: []string{"sh", "-c", "dmesg | tail -n 200"}},
+	{name: "system/dmesg.txt", cmd: []string{"dmesg"}},
 	{name: "system/nvidia-smi-q.txt", cmd: []string{"nvidia-smi", "-q"}},
 	{name: "system/pcie-nvidia-link.txt", cmd: []string{"sh", "-c", `
 for d in /sys/bus/pci/devices/*/; do
  vendor=$(cat "$d/vendor" 2>/dev/null)
  [ "$vendor" = "0x10de" ] || continue
  dev=$(basename "$d")
  echo "=== $dev ==="
  for f in current_link_speed current_link_width max_link_speed max_link_width; do
    printf "  %-22s %s\n" "$f" "$(cat "$d/$f" 2>/dev/null)"
  done
 done
 `}},
 	{name: "system/ethtool-info.txt", cmd: []string{"sh", "-c", `
 if ! command -v ethtool >/dev/null 2>&1; then
  echo "ethtool not found"
  exit 0
 fi
 found=0
 for path in /sys/class/net/*; do
  [ -e "$path" ] || continue
  iface=$(basename "$path")
  [ "$iface" = "lo" ] && continue
  found=1
  echo "=== $iface ==="
  ethtool -i "$iface" 2>&1 || true
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no interfaces found"
 fi
 `}},
 	{name: "system/ethtool-link.txt", cmd: []string{"sh", "-c", `
 if ! command -v ethtool >/dev/null 2>&1; then
  echo "ethtool not found"
  exit 0
 fi
 found=0
 for path in /sys/class/net/*; do
  [ -e "$path" ] || continue
  iface=$(basename "$path")
  [ "$iface" = "lo" ] && continue
  found=1
  echo "=== $iface ==="
  ethtool "$iface" 2>&1 || true
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no interfaces found"
 fi
 `}},
 	{name: "system/ethtool-module.txt", cmd: []string{"sh", "-c", `
 if ! command -v ethtool >/dev/null 2>&1; then
  echo "ethtool not found"
  exit 0
 fi
 found=0
 for path in /sys/class/net/*; do
  [ -e "$path" ] || continue
  iface=$(basename "$path")
  [ "$iface" = "lo" ] && continue
  found=1
  echo "=== $iface ==="
  ethtool -m "$iface" 2>&1 || true
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no interfaces found"
 fi
 `}},
 	{name: "system/mstflint-query.txt", cmd: []string{"sh", "-c", `
 if ! command -v mstflint >/dev/null 2>&1; then
  echo "mstflint not found"
  exit 0
 fi
 found=0
 for path in /sys/bus/pci/devices/*; do
  [ -e "$path/vendor" ] || continue
  vendor=$(cat "$path/vendor" 2>/dev/null)
  [ "$vendor" = "0x15b3" ] || continue
  bdf=$(basename "$path")
  found=1
  echo "=== $bdf ==="
  mstflint -d "$bdf" q 2>&1 || true
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no Mellanox/NVIDIA networking devices found"
 fi
 `}},
 }
 var supportBundleOptionalFiles = []struct {
 	name string
 	src  string
 }{
 	{name: "system/kern.log", src: "/var/log/kern.log"},
 	{name: "system/syslog.txt", src: "/var/log/syslog"},
 }
 const supportBundleGlob = "bee-support-*.tar.gz"
 func BuildSupportBundle(exportDir string) (string, error) {
 	exportDir = strings.TrimSpace(exportDir)
 	if exportDir == "" {
@@ -75,6 +180,9 @@ func BuildSupportBundle(exportDir string) (string, error) {
 			return "", err
 		}
 	}
 	for _, item := range supportBundleOptionalFiles {
 		_ = copyOptionalFile(item.src, filepath.Join(stageRoot, item.name))
 	}
 	if err := writeManifest(filepath.Join(stageRoot, "manifest.txt"), exportDir, stageRoot); err != nil {
 		return "", err
 	}
@@ -86,34 +194,64 @@ func BuildSupportBundle(exportDir string) (string, error) {
 	return archivePath, nil
 }
 func LatestSupportBundlePath() (string, error) {
 	return latestSupportBundlePath(os.TempDir())
 }
 func cleanupOldSupportBundles(dir string) error {
-	matches, err := filepath.Glob(filepath.Join(dir, "bee-support-*.tar.gz"))
+	matches, err := filepath.Glob(filepath.Join(dir, supportBundleGlob))
 	if err != nil {
 		return err
 	}
-	type entry struct {
+	entries := supportBundleEntries(matches)
-		path string
+	for path, mod := range entries {
-		mod  time.Time
+		if time.Since(mod) > 24*time.Hour {
 			_ = os.Remove(path)
 			delete(entries, path)
 		}
 	}
-	list := make([]entry, 0, len(matches))
+	ordered := orderSupportBundles(entries)
 	if len(ordered) > 3 {
 		for _, old := range ordered[3:] {
 			_ = os.Remove(old)
 		}
 	}
 	return nil
 }
 func latestSupportBundlePath(dir string) (string, error) {
 	matches, err := filepath.Glob(filepath.Join(dir, supportBundleGlob))
 	if err != nil {
 		return "", err
 	}
 	ordered := orderSupportBundles(supportBundleEntries(matches))
 	if len(ordered) == 0 {
 		return "", os.ErrNotExist
 	}
 	return ordered[0], nil
 }
 func supportBundleEntries(matches []string) map[string]time.Time {
 	entries := make(map[string]time.Time, len(matches))
 	for _, match := range matches {
 		info, err := os.Stat(match)
 		if err != nil {
 			continue
 		}
-		if time.Since(info.ModTime()) > 24*time.Hour {
+		entries[match] = info.ModTime()
 			_ = os.Remove(match)
 			continue
 		}
 		list = append(list, entry{path: match, mod: info.ModTime()})
 	}
-	sort.Slice(list, func(i, j int) bool { return list[i].mod.After(list[j].mod) })
+	return entries
-	if len(list) > 3 {
+}
-		for _, old := range list[3:] {
+
-			_ = os.Remove(old.path)
+func orderSupportBundles(entries map[string]time.Time) []string {
-		}
+	ordered := make([]string, 0, len(entries))
 	for path := range entries {
 		ordered = append(ordered, path)
 	}
-	return nil
+	sort.Slice(ordered, func(i, j int) bool {
 		return entries[ordered[i]].After(entries[ordered[j]])
 	})
 	return ordered
 }
 func writeJournalDump(dst string) error {
@@ -152,6 +290,24 @@ func writeCommandOutput(dst string, cmd []string) error {
 	return os.WriteFile(dst, raw, 0644)
 }
 func copyOptionalFile(src, dst string) error {
 	in, err := os.Open(src)
 	if err != nil {
 		return err
 	}
 	defer in.Close()
 	if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
 		return err
 	}
 	out, err := os.Create(dst)
 	if err != nil {
 		return err
 	}
 	defer out.Close()
 	_, err = io.Copy(out, in)
 	return err
 }
 func writeManifest(dst, exportDir, stageRoot string) error {
 	if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
 		return err
@@ -215,7 +371,7 @@ func copyDirContents(srcDir, dstDir string) error {
 }
 func copyExportDirForSupportBundle(srcDir, dstDir string) error {
-	return copyDirContentsFiltered(srcDir, dstDir, func(rel string, info os.FileInfo) bool {
+	if err := copyDirContentsFiltered(srcDir, dstDir, func(rel string, info os.FileInfo) bool {
 		cleanRel := filepath.ToSlash(strings.TrimPrefix(filepath.Clean(rel), "./"))
 		if cleanRel == "" {
 			return true
@@ -227,7 +383,25 @@ func copyExportDirForSupportBundle(srcDir, dstDir string) error {
 			return false
 		}
 		return true
-	})
+	}); err != nil {
 		return err
 	}
 	return normalizeSupportBundleAuditJSON(filepath.Join(dstDir, "bee-audit.json"))
 }
 func normalizeSupportBundleAuditJSON(path string) error {
 	data, err := os.ReadFile(path)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return nil
 		}
 		return err
 	}
 	normalized, err := ApplySATOverlay(data)
 	if err != nil {
 		return nil
 	}
 	return os.WriteFile(path, normalized, 0644)
 }
 func copyDirContentsFiltered(srcDir, dstDir string, keep func(rel string, info os.FileInfo) bool) error {
--- a/audit/internal/collector/finalize.go
+++ b/audit/internal/collector/finalize.go
@@ -1,10 +1,18 @@
 package collector
-import "bee/audit/internal/schema"
+import (
 	"bee/audit/internal/schema"
 	"strings"
 )
 func NormalizeSnapshot(snap *schema.HardwareSnapshot, collectedAt string) {
 	finalizeSnapshot(snap, collectedAt)
 }
 func finalizeSnapshot(snap *schema.HardwareSnapshot, collectedAt string) {
 	snap.Memory = filterMemory(snap.Memory)
 	snap.Storage = filterStorage(snap.Storage)
 	snap.PCIeDevices = filterPCIe(snap.PCIeDevices)
 	snap.PowerSupplies = filterPSUs(snap.PowerSupplies)
 	setComponentStatusMetadata(snap, collectedAt)
@@ -33,11 +41,25 @@ func filterStorage(disks []schema.HardwareStorage) []schema.HardwareStorage {
 		if disk.SerialNumber == nil || *disk.SerialNumber == "" {
 			continue
 		}
 		if disk.Model != nil && isVirtualHDiskModel(*disk.Model) {
 			continue
 		}
 		out = append(out, disk)
 	}
 	return out
 }
 func filterPCIe(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
 	out := make([]schema.HardwarePCIeDevice, 0, len(devs))
 	for _, dev := range devs {
 		if dev.DeviceClass != nil && strings.Contains(strings.ToLower(strings.TrimSpace(*dev.DeviceClass)), "co-processor") {
 			continue
 		}
 		out = append(out, dev)
 	}
 	return out
 }
 func filterPSUs(psus []schema.HardwarePowerSupply) []schema.HardwarePowerSupply {
 	out := make([]schema.HardwarePowerSupply, 0, len(psus))
 	for _, psu := range psus {
--- a/audit/internal/collector/finalize_test.go
+++ b/audit/internal/collector/finalize_test.go
@@ -10,6 +10,10 @@ func TestFinalizeSnapshotFiltersComponentsWithoutRequiredSerials(t *testing.T) {
 	present := true
 	status := statusOK
 	serial := "SN-1"
 	virtualModel := "Virtual HDisk1"
 	realModel := "PASCARI"
 	coProcessorClass := "Co-processor"
 	gpuClass := "VideoController"
 	snap := schema.HardwareSnapshot{
 		Memory: []schema.HardwareMemory{
@@ -17,9 +21,15 @@ func TestFinalizeSnapshotFiltersComponentsWithoutRequiredSerials(t *testing.T) {
 			{Present: &present, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 		},
 		Storage: []schema.HardwareStorage{
 			{Model: &virtualModel, SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 			{SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 			{Model: &realModel, SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 			{HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 		},
 		PCIeDevices: []schema.HardwarePCIeDevice{
 			{DeviceClass: &coProcessorClass, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 			{DeviceClass: &gpuClass, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 		},
 		PowerSupplies: []schema.HardwarePowerSupply{
 			{SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
 			{HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
@@ -31,9 +41,12 @@ func TestFinalizeSnapshotFiltersComponentsWithoutRequiredSerials(t *testing.T) {
 	if len(snap.Memory) != 1 || snap.Memory[0].StatusCheckedAt == nil || *snap.Memory[0].StatusCheckedAt != collectedAt {
 		t.Fatalf("memory finalize mismatch: %+v", snap.Memory)
 	}
-	if len(snap.Storage) != 1 || snap.Storage[0].StatusCheckedAt == nil || *snap.Storage[0].StatusCheckedAt != collectedAt {
+	if len(snap.Storage) != 2 || snap.Storage[0].StatusCheckedAt == nil || *snap.Storage[0].StatusCheckedAt != collectedAt {
 		t.Fatalf("storage finalize mismatch: %+v", snap.Storage)
 	}
 	if len(snap.PCIeDevices) != 1 || snap.PCIeDevices[0].DeviceClass == nil || *snap.PCIeDevices[0].DeviceClass != gpuClass {
 		t.Fatalf("pcie finalize mismatch: %+v", snap.PCIeDevices)
 	}
 	if len(snap.PowerSupplies) != 1 || snap.PowerSupplies[0].StatusCheckedAt == nil || *snap.PowerSupplies[0].StatusCheckedAt != collectedAt {
 		t.Fatalf("psu finalize mismatch: %+v", snap.PowerSupplies)
 	}
--- a/audit/internal/collector/nic_mellanox.go
+++ b/audit/internal/collector/nic_mellanox.go
@@ -2,18 +2,21 @@ package collector
 import (
 	"bee/audit/internal/schema"
 	"context"
 	"log/slog"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"time"
 )
 const mellanoxVendorID = 0x15b3
 const nicProbeTimeout = 2 * time.Second
 var (
 	mstflintQuery = func(bdf string) (string, error) {
-		out, err := exec.Command("mstflint", "-d", bdf, "q").Output()
+		out, err := commandOutputWithTimeout(nicProbeTimeout, "mstflint", "-d", bdf, "q")
 		if err != nil {
 			return "", err
 		}
@@ -21,7 +24,7 @@ var (
 	}
 	ethtoolInfoQuery = func(iface string) (string, error) {
-		out, err := exec.Command("ethtool", "-i", iface).Output()
+		out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-i", iface)
 		if err != nil {
 			return "", err
 		}
@@ -29,6 +32,14 @@ var (
 	}
 	netIfacesByBDF = listNetIfacesByBDF
 	readNetCarrierFile = func(iface string) (string, error) {
 		path := filepath.Join("/sys/class/net", iface, "carrier")
 		raw, err := os.ReadFile(path)
 		if err != nil {
 			return "", err
 		}
 		return strings.TrimSpace(string(raw)), nil
 	}
 )
 // enrichPCIeWithMellanox enriches Mellanox/NVIDIA Networking devices with
@@ -162,3 +173,17 @@ func listNetIfacesByBDF(bdf string) []string {
 	}
 	return ifaces
 }
 func commandOutputWithTimeout(timeout time.Duration, name string, args ...string) ([]byte, error) {
 	ctx, cancel := context.WithTimeout(context.Background(), timeout)
 	defer cancel()
 	return exec.CommandContext(ctx, name, args...).Output()
 }
 func interfaceHasCarrier(iface string) bool {
 	raw, err := readNetCarrierFile(iface)
 	if err != nil {
 		return false
 	}
 	return strings.TrimSpace(raw) == "1"
 }
--- a/audit/internal/collector/nic_telemetry.go
+++ b/audit/internal/collector/nic_telemetry.go
@@ -12,7 +12,7 @@ import (
 var (
 	ethtoolModuleQuery = func(iface string) (string, error) {
-		out, err := raidToolQuery("ethtool", "-m", iface)
+		out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-m", iface)
 		if err != nil {
 			return "", err
 		}
@@ -58,10 +58,12 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw
 			}
 		}
-		if out, err := ethtoolModuleQuery(iface); err == nil {
+		if interfaceHasCarrier(iface) {
-			if injectSFPDOMTelemetry(&devs[i], out) {
+			if out, err := ethtoolModuleQuery(iface); err == nil {
-				enriched++
+				if injectSFPDOMTelemetry(&devs[i], out) {
-				continue
+					enriched++
 					continue
 				}
 			}
 		}
 		if len(devs[i].MacAddresses) > 0 || devs[i].Firmware != nil {
--- a/audit/internal/collector/nic_telemetry_test.go
+++ b/audit/internal/collector/nic_telemetry_test.go
@@ -57,6 +57,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
 	origReadMAC := readNetAddressFile
 	origEth := ethtoolInfoQuery
 	origModule := ethtoolModuleQuery
 	origCarrier := readNetCarrierFile
 	t.Cleanup(func() {
 		queryPCILSPCIDetail = origDetail
 		readPCIVPDFile = origVPD
@@ -64,6 +65,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
 		readNetAddressFile = origReadMAC
 		ethtoolInfoQuery = origEth
 		ethtoolModuleQuery = origModule
 		readNetCarrierFile = origCarrier
 	})
 	queryPCILSPCIDetail = func(bdf string) (string, error) {
@@ -82,6 +84,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
 		}
 		return "aa:bb:cc:dd:ee:ff", nil
 	}
 	readNetCarrierFile = func(string) (string, error) { return "1", nil }
 	ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
 	ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("skip optics") }
@@ -101,6 +104,42 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
 	}
 }
 func TestEnrichPCIeWithNICTelemetrySkipsModuleQueryWithoutCarrier(t *testing.T) {
 	origIfaces := netIfacesByBDF
 	origReadMAC := readNetAddressFile
 	origEth := ethtoolInfoQuery
 	origModule := ethtoolModuleQuery
 	origCarrier := readNetCarrierFile
 	t.Cleanup(func() {
 		netIfacesByBDF = origIfaces
 		readNetAddressFile = origReadMAC
 		ethtoolInfoQuery = origEth
 		ethtoolModuleQuery = origModule
 		readNetCarrierFile = origCarrier
 	})
 	netIfacesByBDF = func(string) []string { return []string{"eth0"} }
 	readNetAddressFile = func(string) (string, error) { return "aa:bb:cc:dd:ee:ff", nil }
 	readNetCarrierFile = func(string) (string, error) { return "0", nil }
 	ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
 	ethtoolModuleQuery = func(string) (string, error) {
 		t.Fatal("ethtool -m should not be called without carrier")
 		return "", nil
 	}
 	class := "EthernetController"
 	bdf := "0000:18:00.0"
 	devs := []schema.HardwarePCIeDevice{{
 		DeviceClass: &class,
 		BDF:         &bdf,
 	}}
 	out := enrichPCIeWithNICTelemetry(devs)
 	if len(out[0].MacAddresses) != 1 || out[0].MacAddresses[0] != "aa:bb:cc:dd:ee:ff" {
 		t.Fatalf("mac_addresses=%v", out[0].MacAddresses)
 	}
 }
 func TestDBMValue(t *testing.T) {
 	tests := []struct {
 		in   string
--- a/audit/internal/collector/nvidia.go
+++ b/audit/internal/collector/nvidia.go
@@ -13,14 +13,18 @@ import (
 const nvidiaVendorID = 0x10de
 type nvidiaGPUInfo struct {
-	BDF            string
+	BDF                string
-	Serial         string
+	Serial             string
-	VBIOS          string
+	VBIOS              string
-	TemperatureC   *float64
+	TemperatureC       *float64
-	PowerW         *float64
+	PowerW             *float64
-	ECCUncorrected *int64
+	ECCUncorrected     *int64
-	ECCCorrected   *int64
+	ECCCorrected       *int64
-	HWSlowdown     *bool
+	HWSlowdown         *bool
 	PCIeLinkGenCurrent *int
 	PCIeLinkGenMax     *int
 	PCIeLinkWidthCur   *int
 	PCIeLinkWidthMax   *int
 }
 // enrichPCIeWithNVIDIA enriches NVIDIA PCIe devices with data from nvidia-smi.
@@ -94,7 +98,7 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
 func queryNVIDIAGPUs() (map[string]nvidiaGPUInfo, error) {
 	out, err := exec.Command(
 		"nvidia-smi",
-		"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown",
+		"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown,pcie.link.gen.current,pcie.link.gen.max,pcie.link.width.current,pcie.link.width.max",
 		"--format=csv,noheader,nounits",
 	).Output()
 	if err != nil {
@@ -118,8 +122,8 @@ func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
 		if len(rec) == 0 {
 			continue
 		}
-		if len(rec) < 9 {
+		if len(rec) < 13 {
-			return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 9", len(rec))
+			return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 13", len(rec))
 		}
 		bdf := normalizePCIeBDF(rec[1])
@@ -128,14 +132,18 @@ func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
 		}
 		info := nvidiaGPUInfo{
-			BDF:            bdf,
+			BDF:                bdf,
-			Serial:         strings.TrimSpace(rec[2]),
+			Serial:             strings.TrimSpace(rec[2]),
-			VBIOS:          strings.TrimSpace(rec[3]),
+			VBIOS:              strings.TrimSpace(rec[3]),
-			TemperatureC:   parseMaybeFloat(rec[4]),
+			TemperatureC:       parseMaybeFloat(rec[4]),
-			PowerW:         parseMaybeFloat(rec[5]),
+			PowerW:             parseMaybeFloat(rec[5]),
-			ECCUncorrected: parseMaybeInt64(rec[6]),
+			ECCUncorrected:     parseMaybeInt64(rec[6]),
-			ECCCorrected:   parseMaybeInt64(rec[7]),
+			ECCCorrected:       parseMaybeInt64(rec[7]),
-			HWSlowdown:     parseMaybeBool(rec[8]),
+			HWSlowdown:         parseMaybeBool(rec[8]),
 			PCIeLinkGenCurrent: parseMaybeInt(rec[9]),
 			PCIeLinkGenMax:     parseMaybeInt(rec[10]),
 			PCIeLinkWidthCur:   parseMaybeInt(rec[11]),
 			PCIeLinkWidthMax:   parseMaybeInt(rec[12]),
 		}
 		result[bdf] = info
 	}
@@ -167,6 +175,22 @@ func parseMaybeInt64(v string) *int64 {
 	return &n
 }
 func parseMaybeInt(v string) *int {
 	v = strings.TrimSpace(v)
 	if v == "" || strings.EqualFold(v, "n/a") || strings.EqualFold(v, "not supported") || strings.EqualFold(v, "[not supported]") {
 		return nil
 	}
 	n, err := strconv.Atoi(v)
 	if err != nil {
 		return nil
 	}
 	return &n
 }
 func pcieLinkGenLabel(gen int) string {
 	return fmt.Sprintf("Gen%d", gen)
 }
 func parseMaybeBool(v string) *bool {
 	v = strings.TrimSpace(strings.ToLower(v))
 	switch v {
@@ -231,4 +255,22 @@ func injectNVIDIATelemetry(dev *schema.HardwarePCIeDevice, info nvidiaGPUInfo) {
 	if info.HWSlowdown != nil {
 		dev.HWSlowdown = info.HWSlowdown
 	}
 	// Override PCIe link speed/width with nvidia-smi driver values.
 	// sysfs current_link_speed reflects the instantaneous physical link state and
 	// can show Gen1 when the GPU is idle due to ASPM power management. The driver
 	// knows the negotiated speed regardless of the current power state.
 	if info.PCIeLinkGenCurrent != nil {
 		s := pcieLinkGenLabel(*info.PCIeLinkGenCurrent)
 		dev.LinkSpeed = &s
 	}
 	if info.PCIeLinkGenMax != nil {
 		s := pcieLinkGenLabel(*info.PCIeLinkGenMax)
 		dev.MaxLinkSpeed = &s
 	}
 	if info.PCIeLinkWidthCur != nil {
 		dev.LinkWidth = info.PCIeLinkWidthCur
 	}
 	if info.PCIeLinkWidthMax != nil {
 		dev.MaxLinkWidth = info.PCIeLinkWidthMax
 	}
 }
--- a/audit/internal/collector/nvidia_test.go
+++ b/audit/internal/collector/nvidia_test.go
@@ -6,7 +6,7 @@ import (
 )
 func TestParseNVIDIASMIQuery(t *testing.T) {
-	raw := "0, 00000000:65:00.0, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active\n"
+	raw := "0, 00000000:65:00.0, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active, 4, 4, 16, 16\n"
 	byBDF, err := parseNVIDIASMIQuery(raw)
 	if err != nil {
 		t.Fatalf("parse failed: %v", err)
@@ -28,6 +28,12 @@ func TestParseNVIDIASMIQuery(t *testing.T) {
 	if gpu.HWSlowdown == nil || *gpu.HWSlowdown {
 		t.Fatalf("hw slowdown: got %v, want false", gpu.HWSlowdown)
 	}
 	if gpu.PCIeLinkGenCurrent == nil || *gpu.PCIeLinkGenCurrent != 4 {
 		t.Fatalf("pcie link gen current: got %v, want 4", gpu.PCIeLinkGenCurrent)
 	}
 	if gpu.PCIeLinkGenMax == nil || *gpu.PCIeLinkGenMax != 4 {
 		t.Fatalf("pcie link gen max: got %v, want 4", gpu.PCIeLinkGenMax)
 	}
 }
 func TestNormalizePCIeBDF(t *testing.T) {
--- a/audit/internal/collector/pcie.go
+++ b/audit/internal/collector/pcie.go
@@ -59,6 +59,7 @@ func shouldIncludePCIeDevice(class, vendor, device string) bool {
 		"host bridge",
 		"isa bridge",
 		"pci bridge",
 		"co-processor",
 		"performance counter",
 		"performance counters",
 		"ram memory",
--- a/audit/internal/collector/pcie_filter_test.go
+++ b/audit/internal/collector/pcie_filter_test.go
@@ -19,6 +19,7 @@ func TestShouldIncludePCIeDevice(t *testing.T) {
 		{name: "audio", class: "Audio device", want: false},
 		{name: "host bridge", class: "Host bridge", want: false},
 		{name: "pci bridge", class: "PCI bridge", want: false},
 		{name: "co-processor", class: "Co-processor", want: false},
 		{name: "smbus", class: "SMBus", want: false},
 		{name: "perf", class: "Performance counters", want: false},
 		{name: "non essential instrumentation", class: "Non-Essential Instrumentation", want: false},
@@ -76,6 +77,20 @@ func TestParseLspci_filtersAMDChipsetNoise(t *testing.T) {
 	}
 }
 func TestParseLspci_filtersCoProcessors(t *testing.T) {
 	input := "" +
 		"Slot:\t0000:01:00.0\nClass:\tCo-processor\nVendor:\tIntel Corporation\nDevice:\t402xx Series QAT\n\n" +
 		"Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
 	devs := parseLspci(input)
 	if len(devs) != 1 {
 		t.Fatalf("expected 1 remaining device, got %d", len(devs))
 	}
 	if devs[0].Model == nil || *devs[0].Model != "H100" {
 		t.Fatalf("unexpected remaining device: %+v", devs[0])
 	}
 }
 func TestPCIeJSONUsesSlotNotBDF(t *testing.T) {
 	input := "Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
--- a/audit/internal/collector/storage.go
+++ b/audit/internal/collector/storage.go
@@ -77,11 +77,28 @@ func discoverStorageDevices() []lsblkDevice {
 		if dev.Type != "disk" {
 			continue
 		}
 		if isVirtualBMCDisk(dev) {
 			slog.Debug("storage: skipping BMC virtual disk", "name", dev.Name, "model", dev.Model)
 			continue
 		}
 		disks = append(disks, dev)
 	}
 	return disks
 }
 // isVirtualBMCDisk returns true for BMC/IPMI virtual USB mass storage devices
 // that appear as disks but are not real hardware (e.g. iDRAC Virtual HDisk*).
 // These have zero reported size, a generic fake serial, and a model name that
 // starts with "Virtual HDisk".
 func isVirtualBMCDisk(dev lsblkDevice) bool {
 	return isVirtualHDiskModel(dev.Model)
 }
 func isVirtualHDiskModel(model string) bool {
 	model = strings.ToLower(strings.TrimSpace(model))
 	return strings.HasPrefix(model, "virtual hdisk")
 }
 func lsblkDevices() []lsblkDevice {
 	out, err := exec.Command("lsblk", "-J", "-d",
 		"-o", "NAME,TYPE,SIZE,SERIAL,MODEL,TRAN,HCTL").Output()
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -0,0 +1,199 @@
 package platform
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	"time"
 )
 func renderBenchmarkReport(result NvidiaBenchmarkResult) string {
 	return renderBenchmarkReportWithCharts(result, nil)
 }
 type benchmarkReportChart struct {
 	Title   string
 	Content string
 }
 var ansiEscapePattern = regexp.MustCompile(`\x1b\[[0-9;]*m`)
 func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benchmarkReportChart) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "Bee NVIDIA Benchmark Report\n")
 	fmt.Fprintf(&b, "===========================\n\n")
 	fmt.Fprintf(&b, "Generated: %s\n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC"))
 	fmt.Fprintf(&b, "Host: %s\n", result.Hostname)
 	fmt.Fprintf(&b, "Profile: %s\n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "Overall status: %s\n", result.OverallStatus)
 	fmt.Fprintf(&b, "Selected GPUs: %s\n", joinIndexList(result.SelectedGPUIndices))
 	fmt.Fprintf(&b, "Normalization: %s\n\n", result.Normalization.Status)
 	if len(result.Findings) > 0 {
 		fmt.Fprintf(&b, "Executive Summary\n")
 		fmt.Fprintf(&b, "-----------------\n")
 		for _, finding := range result.Findings {
 			fmt.Fprintf(&b, "- %s\n", finding)
 		}
 		b.WriteString("\n")
 	}
 	if len(result.Warnings) > 0 {
 		fmt.Fprintf(&b, "Warnings\n")
 		fmt.Fprintf(&b, "--------\n")
 		for _, warning := range result.Warnings {
 			fmt.Fprintf(&b, "- %s\n", warning)
 		}
 		b.WriteString("\n")
 	}
 	fmt.Fprintf(&b, "Per GPU Scorecard\n")
 	fmt.Fprintf(&b, "-----------------\n")
 	for _, gpu := range result.GPUs {
 		fmt.Fprintf(&b, "GPU %d  %s\n", gpu.Index, gpu.Name)
 		fmt.Fprintf(&b, "  Status: %s\n", gpu.Status)
 		fmt.Fprintf(&b, "  Composite score: %.2f\n", gpu.Scores.CompositeScore)
 		fmt.Fprintf(&b, "  Compute score: %.2f\n", gpu.Scores.ComputeScore)
 		fmt.Fprintf(&b, "  Power sustain: %.1f\n", gpu.Scores.PowerSustainScore)
 		fmt.Fprintf(&b, "  Thermal sustain: %.1f\n", gpu.Scores.ThermalSustainScore)
 		fmt.Fprintf(&b, "  Stability: %.1f\n", gpu.Scores.StabilityScore)
 		if gpu.Scores.InterconnectScore > 0 {
 			fmt.Fprintf(&b, "  Interconnect: %.1f\n", gpu.Scores.InterconnectScore)
 		}
 		if len(gpu.DegradationReasons) > 0 {
 			fmt.Fprintf(&b, "  Degradation reasons: %s\n", strings.Join(gpu.DegradationReasons, ", "))
 		}
 		fmt.Fprintf(&b, "  Avg power/temp/clock: %.1f W / %.1f C / %.0f MHz\n", gpu.Steady.AvgPowerW, gpu.Steady.AvgTempC, gpu.Steady.AvgGraphicsClockMHz)
 		fmt.Fprintf(&b, "  P95 power/temp/clock: %.1f W / %.1f C / %.0f MHz\n", gpu.Steady.P95PowerW, gpu.Steady.P95TempC, gpu.Steady.P95GraphicsClockMHz)
 		if len(gpu.PrecisionResults) > 0 {
 			fmt.Fprintf(&b, "  Precision results:\n")
 			for _, precision := range gpu.PrecisionResults {
 				if precision.Supported {
 					fmt.Fprintf(&b, "    - %s: %.2f TOPS lanes=%d iterations=%d\n", precision.Name, precision.TeraOpsPerSec, precision.Lanes, precision.Iterations)
 				} else {
 					fmt.Fprintf(&b, "    - %s: unsupported (%s)\n", precision.Name, precision.Notes)
 				}
 			}
 		}
 		fmt.Fprintf(&b, "  Throttle counters (us): sw_power=%d sw_thermal=%d sync_boost=%d hw_thermal=%d hw_power_brake=%d\n",
 			gpu.Throttle.SWPowerCapUS,
 			gpu.Throttle.SWThermalSlowdownUS,
 			gpu.Throttle.SyncBoostUS,
 			gpu.Throttle.HWThermalSlowdownUS,
 			gpu.Throttle.HWPowerBrakeSlowdownUS,
 		)
 		if len(gpu.Notes) > 0 {
 			fmt.Fprintf(&b, "  Notes:\n")
 			for _, note := range gpu.Notes {
 				fmt.Fprintf(&b, "    - %s\n", note)
 			}
 		}
 		b.WriteString("\n")
 	}
 	if result.Interconnect != nil {
 		fmt.Fprintf(&b, "Interconnect\n")
 		fmt.Fprintf(&b, "------------\n")
 		fmt.Fprintf(&b, "Status: %s\n", result.Interconnect.Status)
 		if result.Interconnect.Supported {
 			fmt.Fprintf(&b, "Avg algbw / busbw: %.1f / %.1f GB/s\n", result.Interconnect.AvgAlgBWGBps, result.Interconnect.AvgBusBWGBps)
 			fmt.Fprintf(&b, "Max algbw / busbw: %.1f / %.1f GB/s\n", result.Interconnect.MaxAlgBWGBps, result.Interconnect.MaxBusBWGBps)
 		}
 		for _, note := range result.Interconnect.Notes {
 			fmt.Fprintf(&b, "- %s\n", note)
 		}
 		b.WriteString("\n")
 	}
 	if len(charts) > 0 {
 		fmt.Fprintf(&b, "Terminal Charts\n")
 		fmt.Fprintf(&b, "---------------\n")
 		for _, chart := range charts {
 			content := strings.TrimSpace(stripANSIEscapeSequences(chart.Content))
 			if content == "" {
 				continue
 			}
 			fmt.Fprintf(&b, "%s\n", chart.Title)
 			fmt.Fprintf(&b, "%s\n", strings.Repeat("~", len(chart.Title)))
 			fmt.Fprintf(&b, "%s\n\n", content)
 		}
 	}
 	fmt.Fprintf(&b, "Methodology\n")
 	fmt.Fprintf(&b, "-----------\n")
 	fmt.Fprintf(&b, "- Profile %s uses standardized baseline, warmup, steady-state, interconnect, and cooldown phases.\n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "- Single-GPU compute score comes from bee-gpu-burn cuBLASLt output when available.\n")
 	fmt.Fprintf(&b, "- Thermal and power limitations are inferred from NVIDIA clock event reason counters and sustained telemetry.\n")
 	fmt.Fprintf(&b, "- result.json is the canonical machine-readable source for this benchmark run.\n\n")
 	fmt.Fprintf(&b, "Raw Files\n")
 	fmt.Fprintf(&b, "---------\n")
 	fmt.Fprintf(&b, "- result.json\n")
 	fmt.Fprintf(&b, "- report.txt\n")
 	fmt.Fprintf(&b, "- summary.txt\n")
 	fmt.Fprintf(&b, "- verbose.log\n")
 	fmt.Fprintf(&b, "- gpu-*-baseline-metrics.csv/html/term.txt\n")
 	fmt.Fprintf(&b, "- gpu-*-warmup.log\n")
 	fmt.Fprintf(&b, "- gpu-*-steady.log\n")
 	fmt.Fprintf(&b, "- gpu-*-steady-metrics.csv/html/term.txt\n")
 	fmt.Fprintf(&b, "- gpu-*-cooldown-metrics.csv/html/term.txt\n")
 	if result.Interconnect != nil {
 		fmt.Fprintf(&b, "- nccl-all-reduce.log\n")
 	}
 	return b.String()
 }
 func loadBenchmarkReportCharts(runDir string, gpuIndices []int) []benchmarkReportChart {
 	phases := []struct {
 		name  string
 		label string
 	}{
 		{name: "baseline", label: "Baseline"},
 		{name: "steady", label: "Steady State"},
 		{name: "cooldown", label: "Cooldown"},
 	}
 	var charts []benchmarkReportChart
 	for _, idx := range gpuIndices {
 		for _, phase := range phases {
 			path := filepath.Join(runDir, fmt.Sprintf("gpu-%d-%s-metrics-term.txt", idx, phase.name))
 			raw, err := os.ReadFile(path)
 			if err != nil || len(raw) == 0 {
 				continue
 			}
 			charts = append(charts, benchmarkReportChart{
 				Title:   fmt.Sprintf("GPU %d %s", idx, phase.label),
 				Content: string(raw),
 			})
 		}
 	}
 	return charts
 }
 func stripANSIEscapeSequences(raw string) string {
 	return ansiEscapePattern.ReplaceAllString(raw, "")
 }
 func renderBenchmarkSummary(result NvidiaBenchmarkResult) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "run_at_utc=%s\n", result.GeneratedAt.Format(time.RFC3339))
 	fmt.Fprintf(&b, "benchmark_profile=%s\n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "overall_status=%s\n", result.OverallStatus)
 	fmt.Fprintf(&b, "gpu_count=%d\n", len(result.GPUs))
 	fmt.Fprintf(&b, "normalization_status=%s\n", result.Normalization.Status)
 	var best float64
 	for i, gpu := range result.GPUs {
 		fmt.Fprintf(&b, "gpu_%d_status=%s\n", gpu.Index, gpu.Status)
 		fmt.Fprintf(&b, "gpu_%d_composite_score=%.2f\n", gpu.Index, gpu.Scores.CompositeScore)
 		if i == 0 || gpu.Scores.CompositeScore > best {
 			best = gpu.Scores.CompositeScore
 		}
 	}
 	fmt.Fprintf(&b, "best_composite_score=%.2f\n", best)
 	if result.Interconnect != nil {
 		fmt.Fprintf(&b, "interconnect_status=%s\n", result.Interconnect.Status)
 		fmt.Fprintf(&b, "interconnect_max_busbw_gbps=%.1f\n", result.Interconnect.MaxBusBWGBps)
 	}
 	return b.String()
 }
--- a/audit/internal/platform/benchmark_test.go
+++ b/audit/internal/platform/benchmark_test.go
@@ -0,0 +1,179 @@
 package platform
 import (
 	"strings"
 	"testing"
 )
 func TestResolveBenchmarkProfile(t *testing.T) {
 	t.Parallel()
 	cases := []struct {
 		name    string
 		profile string
 		want    benchmarkProfileSpec
 	}{
 		{
 			name:    "default",
 			profile: "",
 			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 120, SteadySec: 480, NCCLSec: 180, CooldownSec: 120},
 		},
 		{
 			name:    "stability",
 			profile: "stability",
 			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 300, SteadySec: 3600, NCCLSec: 300, CooldownSec: 300},
 		},
 		{
 			name:    "overnight",
 			profile: "overnight",
 			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 600, SteadySec: 27000, NCCLSec: 600, CooldownSec: 300},
 		},
 	}
 	for _, tc := range cases {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
 			got := resolveBenchmarkProfile(tc.profile)
 			if got != tc.want {
 				t.Fatalf("profile=%q got %+v want %+v", tc.profile, got, tc.want)
 			}
 		})
 	}
 }
 func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
 	t.Parallel()
 	opts := normalizeNvidiaBenchmarkOptionsForBenchmark(NvidiaBenchmarkOptions{
 		Profile: "stability",
 		RunNCCL: false,
 	})
 	if opts.Profile != NvidiaBenchmarkProfileStability {
 		t.Fatalf("profile=%q want %q", opts.Profile, NvidiaBenchmarkProfileStability)
 	}
 	if opts.RunNCCL {
 		t.Fatalf("RunNCCL should stay false when explicitly disabled")
 	}
 }
 func TestParseBenchmarkBurnLog(t *testing.T) {
 	t.Parallel()
 	raw := strings.Join([]string{
 		"loader=bee-gpu-burn",
 		"[gpu 0] device=NVIDIA H100",
 		"[gpu 0] compute_capability=9.0",
 		"[gpu 0] backend=cublasLt",
 		"[gpu 0] duration_s=10",
 		"[gpu 0] fp16_tensor[0]=READY dim=4096x4096x4096 block=128 stream=0",
 		"[gpu 0] fp8_e4m3[0]=READY dim=8192x8192x4096 block=128 stream=0",
 		"[gpu 0] fp16_tensor_iterations=200",
 		"[gpu 0] fp8_e4m3_iterations=50",
 		"[gpu 0] status=OK",
 	}, "\n")
 	got := parseBenchmarkBurnLog(raw)
 	if got.Backend != "cublasLt" {
 		t.Fatalf("backend=%q want cublasLt", got.Backend)
 	}
 	if got.ComputeCapability != "9.0" {
 		t.Fatalf("compute capability=%q want 9.0", got.ComputeCapability)
 	}
 	if len(got.Profiles) != 2 {
 		t.Fatalf("profiles=%d want 2", len(got.Profiles))
 	}
 	if got.Profiles[0].TeraOpsPerSec <= 0 {
 		t.Fatalf("profile[0] teraops=%f want >0", got.Profiles[0].TeraOpsPerSec)
 	}
 	if got.Profiles[1].Category != "fp8" {
 		t.Fatalf("profile[1] category=%q want fp8", got.Profiles[1].Category)
 	}
 }
 func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 	t.Parallel()
 	result := NvidiaBenchmarkResult{
 		BenchmarkVersion:   benchmarkVersion,
 		BenchmarkProfile:   NvidiaBenchmarkProfileStandard,
 		OverallStatus:      "PARTIAL",
 		SelectedGPUIndices: []int{0},
 		Normalization: BenchmarkNormalization{
 			Status: "partial",
 		},
 		Findings: []string{"GPU 0 spent measurable time under SW power cap."},
 		GPUs: []BenchmarkGPUResult{
 			{
 				Index:  0,
 				Name:   "NVIDIA H100",
 				Status: "OK",
 				Steady: BenchmarkTelemetrySummary{
 					AvgPowerW:           680,
 					AvgTempC:            79,
 					AvgGraphicsClockMHz: 1725,
 					P95PowerW:           700,
 					P95TempC:            82,
 					P95GraphicsClockMHz: 1800,
 				},
 				Scores: BenchmarkScorecard{
 					ComputeScore:        1200,
 					PowerSustainScore:   96,
 					ThermalSustainScore: 88,
 					StabilityScore:      92,
 					CompositeScore:      1176,
 				},
 				PrecisionResults: []BenchmarkPrecisionResult{
 					{Name: "fp16_tensor", Supported: true, TeraOpsPerSec: 700},
 				},
 				Throttle: BenchmarkThrottleCounters{
 					SWPowerCapUS: 1000000,
 				},
 				DegradationReasons: []string{"power_capped"},
 			},
 		},
 	}
 	report := renderBenchmarkReport(result)
 	for _, needle := range []string{
 		"Executive Summary",
 		"GPU 0 spent measurable time under SW power cap.",
 		"Composite score: 1176.00",
 		"fp16_tensor: 700.00 TOPS",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
 		}
 	}
 }
 func TestRenderBenchmarkReportIncludesTerminalChartsWithoutANSI(t *testing.T) {
 	t.Parallel()
 	report := renderBenchmarkReportWithCharts(NvidiaBenchmarkResult{
 		BenchmarkProfile:   NvidiaBenchmarkProfileStandard,
 		OverallStatus:      "OK",
 		SelectedGPUIndices: []int{0},
 		Normalization: BenchmarkNormalization{
 			Status: "full",
 		},
 	}, []benchmarkReportChart{
 		{
 			Title:   "GPU 0 Steady State",
 			Content: "\x1b[31mGPU 0 chart\x1b[0m\n 42┤───",
 		},
 	})
 	for _, needle := range []string{
 		"Terminal Charts",
 		"GPU 0 Steady State",
 		"GPU 0 chart",
 		"42┤───",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
 		}
 	}
 	if strings.Contains(report, "\x1b[31m") {
 		t.Fatalf("report should not contain ANSI escapes\n%s", report)
 	}
 }
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -0,0 +1,132 @@
 package platform
 import "time"
 const (
 	NvidiaBenchmarkProfileStandard  = "standard"
 	NvidiaBenchmarkProfileStability = "stability"
 	NvidiaBenchmarkProfileOvernight = "overnight"
 )
 type NvidiaBenchmarkOptions struct {
 	Profile           string
 	SizeMB            int
 	GPUIndices        []int
 	ExcludeGPUIndices []int
 	RunNCCL           bool
 }
 type NvidiaBenchmarkResult struct {
 	BenchmarkVersion   string                       `json:"benchmark_version"`
 	GeneratedAt        time.Time                    `json:"generated_at"`
 	Hostname           string                       `json:"hostname,omitempty"`
 	BenchmarkProfile   string                       `json:"benchmark_profile"`
 	OverallStatus      string                       `json:"overall_status"`
 	SelectedGPUIndices []int                        `json:"selected_gpu_indices"`
 	Findings           []string                     `json:"findings,omitempty"`
 	Warnings           []string                     `json:"warnings,omitempty"`
 	Normalization      BenchmarkNormalization       `json:"normalization"`
 	GPUs               []BenchmarkGPUResult         `json:"gpus"`
 	Interconnect       *BenchmarkInterconnectResult `json:"interconnect,omitempty"`
 }
 type BenchmarkNormalization struct {
 	Status string                      `json:"status"`
 	Notes  []string                    `json:"notes,omitempty"`
 	GPUs   []BenchmarkNormalizationGPU `json:"gpus,omitempty"`
 }
 type BenchmarkNormalizationGPU struct {
 	Index                 int      `json:"index"`
 	PersistenceMode       string   `json:"persistence_mode,omitempty"`
 	GPUClockLockMHz       float64  `json:"gpu_clock_lock_mhz,omitempty"`
 	GPUClockLockStatus    string   `json:"gpu_clock_lock_status,omitempty"`
 	MemoryClockLockMHz    float64  `json:"memory_clock_lock_mhz,omitempty"`
 	MemoryClockLockStatus string   `json:"memory_clock_lock_status,omitempty"`
 	Notes                 []string `json:"notes,omitempty"`
 }
 type BenchmarkGPUResult struct {
 	Index                  int                        `json:"index"`
 	UUID                   string                     `json:"uuid,omitempty"`
 	Name                   string                     `json:"name,omitempty"`
 	BusID                  string                     `json:"bus_id,omitempty"`
 	VBIOS                  string                     `json:"vbios,omitempty"`
 	ComputeCapability      string                     `json:"compute_capability,omitempty"`
 	Backend                string                     `json:"backend,omitempty"`
 	Status                 string                     `json:"status"`
 	PowerLimitW            float64                    `json:"power_limit_w,omitempty"`
 	MaxGraphicsClockMHz    float64                    `json:"max_graphics_clock_mhz,omitempty"`
 	MaxMemoryClockMHz      float64                    `json:"max_memory_clock_mhz,omitempty"`
 	LockedGraphicsClockMHz float64                    `json:"locked_graphics_clock_mhz,omitempty"`
 	LockedMemoryClockMHz   float64                    `json:"locked_memory_clock_mhz,omitempty"`
 	Baseline               BenchmarkTelemetrySummary  `json:"baseline"`
 	Steady                 BenchmarkTelemetrySummary  `json:"steady"`
 	Cooldown               BenchmarkTelemetrySummary  `json:"cooldown"`
 	Throttle               BenchmarkThrottleCounters  `json:"throttle_counters"`
 	PrecisionResults       []BenchmarkPrecisionResult `json:"precision_results,omitempty"`
 	Scores                 BenchmarkScorecard         `json:"scores"`
 	DegradationReasons     []string                   `json:"degradation_reasons,omitempty"`
 	Notes                  []string                   `json:"notes,omitempty"`
 }
 type BenchmarkTelemetrySummary struct {
 	DurationSec         float64 `json:"duration_sec"`
 	Samples             int     `json:"samples"`
 	AvgTempC            float64 `json:"avg_temp_c"`
 	P95TempC            float64 `json:"p95_temp_c"`
 	AvgPowerW           float64 `json:"avg_power_w"`
 	P95PowerW           float64 `json:"p95_power_w"`
 	AvgGraphicsClockMHz float64 `json:"avg_graphics_clock_mhz"`
 	P95GraphicsClockMHz float64 `json:"p95_graphics_clock_mhz"`
 	AvgMemoryClockMHz   float64 `json:"avg_memory_clock_mhz"`
 	P95MemoryClockMHz   float64 `json:"p95_memory_clock_mhz"`
 	AvgUsagePct         float64 `json:"avg_usage_pct"`
 	AvgMemUsagePct      float64 `json:"avg_mem_usage_pct"`
 	ClockCVPct          float64 `json:"clock_cv_pct"`
 	PowerCVPct          float64 `json:"power_cv_pct"`
 	TempCVPct           float64 `json:"temp_cv_pct"`
 	ClockDriftPct       float64 `json:"clock_drift_pct"`
 }
 type BenchmarkThrottleCounters struct {
 	SWPowerCapUS           uint64 `json:"sw_power_cap_us"`
 	SWThermalSlowdownUS    uint64 `json:"sw_thermal_slowdown_us"`
 	SyncBoostUS            uint64 `json:"sync_boost_us"`
 	HWThermalSlowdownUS    uint64 `json:"hw_thermal_slowdown_us"`
 	HWPowerBrakeSlowdownUS uint64 `json:"hw_power_brake_slowdown_us"`
 }
 type BenchmarkPrecisionResult struct {
 	Name          string  `json:"name"`
 	Category      string  `json:"category"`
 	Supported     bool    `json:"supported"`
 	Lanes         int     `json:"lanes,omitempty"`
 	M             uint64  `json:"m,omitempty"`
 	N             uint64  `json:"n,omitempty"`
 	K             uint64  `json:"k,omitempty"`
 	Iterations    uint64  `json:"iterations,omitempty"`
 	TeraOpsPerSec float64 `json:"teraops_per_sec,omitempty"`
 	Notes         string  `json:"notes,omitempty"`
 }
 type BenchmarkScorecard struct {
 	ComputeScore        float64 `json:"compute_score"`
 	PowerSustainScore   float64 `json:"power_sustain_score"`
 	ThermalSustainScore float64 `json:"thermal_sustain_score"`
 	StabilityScore      float64 `json:"stability_score"`
 	InterconnectScore   float64 `json:"interconnect_score"`
 	CompositeScore      float64 `json:"composite_score"`
 }
 type BenchmarkInterconnectResult struct {
 	Status             string   `json:"status"`
 	Attempted          bool     `json:"attempted"`
 	Supported          bool     `json:"supported"`
 	SelectedGPUIndices []int    `json:"selected_gpu_indices,omitempty"`
 	AvgAlgBWGBps       float64  `json:"avg_algbw_gbps,omitempty"`
 	MaxAlgBWGBps       float64  `json:"max_algbw_gbps,omitempty"`
 	AvgBusBWGBps       float64  `json:"avg_busbw_gbps,omitempty"`
 	MaxBusBWGBps       float64  `json:"max_busbw_gbps,omitempty"`
 	Notes              []string `json:"notes,omitempty"`
 }
--- a/audit/internal/platform/error_patterns.go
+++ b/audit/internal/platform/error_patterns.go
@@ -0,0 +1,139 @@
 package platform
 import "regexp"
 // ErrorPattern describes a kernel log pattern that indicates a hardware error.
 // Add new patterns by appending to HardwareErrorPatterns — no other code changes needed.
 type ErrorPattern struct {
 	// Name is a short machine-readable label for logging and deduplication.
 	Name string
 	// Re is the compiled regular expression matched against a single kmsg line.
 	Re *regexp.Regexp
 	// Category groups related errors: "gpu", "pcie", "storage", "mce", "memory", "cpu".
 	Category string
 	// Severity is "warning" for recoverable/uncertain faults, "critical" for definitive failures.
 	Severity string
 	// BDFGroup is the capture group index (1-based) that contains a PCIe BDF address
 	// (e.g. "0000:c8:00.0"). 0 means no BDF is captured by this pattern.
 	BDFGroup int
 	// DevGroup is the capture group index (1-based) that contains a device name
 	// (e.g. "sda", "nvme0"). 0 means no device name is captured by this pattern.
 	DevGroup int
 }
 // HardwareErrorPatterns is the global list of kernel log patterns that indicate hardware faults.
 // To add a new pattern: append a new ErrorPattern struct to this slice.
 var HardwareErrorPatterns = []ErrorPattern{
 	// ── GPU / NVIDIA ────────────────────────────────────────────────────────────
 	{
 		Name:     "nvidia-rminitadapter",
 		Re:       mustPat(`(?i)NVRM:.*GPU\s+([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d)`),
 		Category: "gpu",
 		Severity: "warning",
 		BDFGroup: 1,
 	},
 	{
 		Name:     "nvidia-msi-fail",
 		Re:       mustPat(`(?i)NVRM:.*Failed to enable MSI`),
 		Category: "gpu",
 		Severity: "warning",
 	},
 	{
 		Name:     "nvidia-aer",
 		Re:       mustPat(`(?i)nvidia\s+([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d).*AER`),
 		Category: "gpu",
 		Severity: "warning",
 		BDFGroup: 1,
 	},
 	{
 		Name:     "nvidia-xid",
 		Re:       mustPat(`(?i)NVRM:.*Xid.*\b([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d)`),
 		Category: "gpu",
 		Severity: "warning",
 		BDFGroup: 1,
 	},
 	// ── PCIe AER (generic) ──────────────────────────────────────────────────────
 	{
 		Name:     "pcie-aer",
 		Re:       mustPat(`(?i)pcieport\s+([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d).*AER`),
 		Category: "pcie",
 		Severity: "warning",
 		BDFGroup: 1,
 	},
 	{
 		Name:     "pcie-uncorrectable",
 		Re:       mustPat(`(?i)([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d).*[Uu]ncorrectable`),
 		Category: "pcie",
 		Severity: "warning",
 		BDFGroup: 1,
 	},
 	{
 		Name:     "pcie-link-down",
 		Re:       mustPat(`(?i)pcieport\s+([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d).*[Ll]ink.*[Dd]own`),
 		Category: "pcie",
 		Severity: "warning",
 		BDFGroup: 1,
 	},
 	// ── Storage ─────────────────────────────────────────────────────────────────
 	{
 		Name:     "blk-io-error",
 		Re:       mustPat(`(?i)blk_update_request.*I/O error.*dev\s+(\w+)`),
 		Category: "storage",
 		Severity: "warning",
 		DevGroup: 1,
 	},
 	{
 		Name:     "nvme-timeout",
 		Re:       mustPat(`(?i)nvme\s+(\w+):.*timeout`),
 		Category: "storage",
 		Severity: "warning",
 		DevGroup: 1,
 	},
 	{
 		Name:     "scsi-failed",
 		Re:       mustPat(`(?i)sd\s+[\da-f:]+:.*FAILED`),
 		Category: "storage",
 		Severity: "warning",
 	},
 	{
 		Name:     "nvme-reset",
 		Re:       mustPat(`(?i)nvme\s+(\w+):.*reset`),
 		Category: "storage",
 		Severity: "warning",
 		DevGroup: 1,
 	},
 	// ── Machine Check Exceptions ────────────────────────────────────────────────
 	{
 		Name:     "mce-hardware-error",
 		Re:       mustPat(`(?i)mce:.*[Hh]ardware [Ee]rror`),
 		Category: "mce",
 		Severity: "warning",
 	},
 	{
 		Name:     "mce-corrected",
 		Re:       mustPat(`(?i)mce:.*[Cc]orrected`),
 		Category: "mce",
 		Severity: "warning",
 	},
 	// ── Memory ─────────────────────────────────────────────────────────────────
 	{
 		Name:     "edac-ue",
 		Re:       mustPat(`(?i)EDAC.*[Uu]ncorrectable`),
 		Category: "memory",
 		Severity: "warning",
 	},
 	{
 		Name:     "edac-ce",
 		Re:       mustPat(`(?i)EDAC.*[Cc]orrectable`),
 		Category: "memory",
 		Severity: "warning",
 	},
 }
 func mustPat(s string) *regexp.Regexp {
 	return regexp.MustCompile(s)
 }
--- a/audit/internal/platform/gpu_metrics.go
+++ b/audit/internal/platform/gpu_metrics.go
@@ -20,12 +20,13 @@ type GPUMetricRow struct {
 	MemUsagePct float64 `json:"mem_usage_pct"`
 	PowerW      float64 `json:"power_w"`
 	ClockMHz    float64 `json:"clock_mhz"`
 	MemClockMHz float64 `json:"mem_clock_mhz"`
 }
 // sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
 func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
 	args := []string{
-		"--query-gpu=index,temperature.gpu,utilization.gpu,utilization.memory,power.draw,clocks.current.graphics",
+		"--query-gpu=index,temperature.gpu,utilization.gpu,utilization.memory,power.draw,clocks.current.graphics,clocks.current.memory",
 		"--format=csv,noheader,nounits",
 	}
 	if len(gpuIndices) > 0 {
@@ -46,7 +47,7 @@ func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
 			continue
 		}
 		parts := strings.Split(line, ", ")
-		if len(parts) < 6 {
+		if len(parts) < 7 {
 			continue
 		}
 		idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
@@ -57,6 +58,7 @@ func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
 			MemUsagePct: parseGPUFloat(parts[3]),
 			PowerW:      parseGPUFloat(parts[4]),
 			ClockMHz:    parseGPUFloat(parts[5]),
 			MemClockMHz: parseGPUFloat(parts[6]),
 		})
 	}
 	return rows, nil
@@ -139,10 +141,10 @@ func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
 // WriteGPUMetricsCSV writes collected rows as a CSV file.
 func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
 	var b bytes.Buffer
-	b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,power_w,clock_mhz\n")
+	b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz\n")
 	for _, r := range rows {
-		fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.0f\n",
+		fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f\n",
-			r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.PowerW, r.ClockMHz)
+			r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz)
 	}
 	return os.WriteFile(path, b.Bytes(), 0644)
 }
@@ -197,7 +199,7 @@ func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
 	const PW = plotX2 - plotX1
 	const PH = plotY2 - plotY1
 	// Outer axes
-	const tempAxisX = 60  // temp axis line
+	const tempAxisX = 60   // temp axis line
 	const clockAxisX = 900 // clock axis line
 	colors := [4]string{"#e74c3c", "#3498db", "#2ecc71", "#f39c12"}
--- a/audit/internal/platform/install.go
+++ b/audit/internal/platform/install.go
@@ -11,10 +11,10 @@ import (
 // InstallDisk describes a candidate disk for installation.
 type InstallDisk struct {
-	Device      string   // e.g. /dev/sda
+	Device       string // e.g. /dev/sda
-	Model       string
+	Model        string
-	Size        string   // human-readable, e.g. "500G"
+	Size         string   // human-readable, e.g. "500G"
-	SizeBytes   int64    // raw byte count from lsblk
+	SizeBytes    int64    // raw byte count from lsblk
 	MountedParts []string // partition mount points currently active
 }
@@ -117,6 +117,61 @@ func findLiveBootDevice() string {
 	return "/dev/" + strings.TrimSpace(string(out2))
 }
 func mountSource(target string) string {
 	out, err := exec.Command("findmnt", "-n", "-o", "SOURCE", target).Output()
 	if err != nil {
 		return ""
 	}
 	return strings.TrimSpace(string(out))
 }
 func mountFSType(target string) string {
 	out, err := exec.Command("findmnt", "-n", "-o", "FSTYPE", target).Output()
 	if err != nil {
 		return ""
 	}
 	return strings.TrimSpace(string(out))
 }
 func blockDeviceType(device string) string {
 	if strings.TrimSpace(device) == "" {
 		return ""
 	}
 	out, err := exec.Command("lsblk", "-dn", "-o", "TYPE", device).Output()
 	if err != nil {
 		return ""
 	}
 	return strings.TrimSpace(string(out))
 }
 func blockDeviceTransport(device string) string {
 	if strings.TrimSpace(device) == "" {
 		return ""
 	}
 	out, err := exec.Command("lsblk", "-dn", "-o", "TRAN", device).Output()
 	if err != nil {
 		return ""
 	}
 	return strings.TrimSpace(string(out))
 }
 func inferLiveBootKind(fsType, source, deviceType, transport string) string {
 	switch {
 	case strings.EqualFold(strings.TrimSpace(fsType), "tmpfs"):
 		return "ram"
 	case strings.EqualFold(strings.TrimSpace(deviceType), "rom"):
 		return "cdrom"
 	case strings.EqualFold(strings.TrimSpace(transport), "usb"):
 		return "usb"
 	case strings.HasPrefix(strings.TrimSpace(source), "/dev/sr"):
 		return "cdrom"
 	case strings.HasPrefix(strings.TrimSpace(source), "/dev/"):
 		return "disk"
 	default:
 		return "unknown"
 	}
 }
 // MinInstallBytes returns the minimum recommended disk size for installation:
 // squashfs size × 1.5 to allow for extracted filesystem and bootloader.
 // Returns 0 if the squashfs is not available (non-live environment).
--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -12,11 +12,40 @@ import (
 )
 func (s *System) IsLiveMediaInRAM() bool {
-	out, err := exec.Command("findmnt", "-n", "-o", "FSTYPE", "/run/live/medium").Output()
+	fsType := mountFSType("/run/live/medium")
-	if err != nil {
+	if fsType == "" {
 		return toramActive()
 	}
-	return strings.TrimSpace(string(out)) == "tmpfs"
+	return strings.EqualFold(fsType, "tmpfs")
 }
 func (s *System) LiveBootSource() LiveBootSource {
 	fsType := mountFSType("/run/live/medium")
 	source := mountSource("/run/live/medium")
 	device := findLiveBootDevice()
 	status := LiveBootSource{
 		InRAM:  strings.EqualFold(fsType, "tmpfs"),
 		Source: source,
 		Device: device,
 	}
 	if fsType == "" && source == "" && device == "" {
 		if toramActive() {
 			status.InRAM = true
 			status.Kind = "ram"
 			status.Source = "tmpfs"
 			return status
 		}
 		status.Kind = "unknown"
 		return status
 	}
 	status.Kind = inferLiveBootKind(fsType, source, blockDeviceType(device), blockDeviceTransport(device))
 	if status.Kind == "" {
 		status.Kind = "unknown"
 	}
 	if status.InRAM && strings.TrimSpace(status.Source) == "" {
 		status.Source = "tmpfs"
 	}
 	return status
 }
 func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
@@ -91,10 +120,45 @@ func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) erro
 		log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
 	}
 	log("Verifying live medium now served from RAM...")
 	status := s.LiveBootSource()
 	if err := verifyInstallToRAMStatus(status); err != nil {
 		return err
 	}
 	log(fmt.Sprintf("Verification passed: live medium now served from %s.", describeLiveBootSource(status)))
 	log("Done. Installation media can be safely disconnected.")
 	return nil
 }
 func verifyInstallToRAMStatus(status LiveBootSource) error {
 	if status.InRAM {
 		return nil
 	}
 	return fmt.Errorf("install to RAM verification failed: live medium still mounted from %s", describeLiveBootSource(status))
 }
 func describeLiveBootSource(status LiveBootSource) string {
 	source := strings.TrimSpace(status.Device)
 	if source == "" {
 		source = strings.TrimSpace(status.Source)
 	}
 	if source == "" {
 		source = "unknown source"
 	}
 	switch strings.TrimSpace(status.Kind) {
 	case "ram":
 		return "RAM"
 	case "usb":
 		return "USB (" + source + ")"
 	case "cdrom":
 		return "CD-ROM (" + source + ")"
 	case "disk":
 		return "disk (" + source + ")"
 	default:
 		return source
 	}
 }
 func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) error {
 	in, err := os.Open(src)
 	if err != nil {
--- a/audit/internal/platform/install_to_ram_test.go
+++ b/audit/internal/platform/install_to_ram_test.go
@@ -0,0 +1,57 @@
 package platform
 import "testing"
 func TestInferLiveBootKind(t *testing.T) {
 	t.Parallel()
 	tests := []struct {
 		name       string
 		fsType     string
 		source     string
 		deviceType string
 		transport  string
 		want       string
 	}{
 		{name: "ram tmpfs", fsType: "tmpfs", source: "/dev/shm/bee-live", want: "ram"},
 		{name: "usb disk", source: "/dev/sdb1", deviceType: "disk", transport: "usb", want: "usb"},
 		{name: "cdrom rom", source: "/dev/sr0", deviceType: "rom", want: "cdrom"},
 		{name: "disk sata", source: "/dev/nvme0n1p1", deviceType: "disk", transport: "nvme", want: "disk"},
 		{name: "unknown", source: "overlay", want: "unknown"},
 	}
 	for _, tc := range tests {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
 			got := inferLiveBootKind(tc.fsType, tc.source, tc.deviceType, tc.transport)
 			if got != tc.want {
 				t.Fatalf("inferLiveBootKind(%q,%q,%q,%q)=%q want %q", tc.fsType, tc.source, tc.deviceType, tc.transport, got, tc.want)
 			}
 		})
 	}
 }
 func TestVerifyInstallToRAMStatus(t *testing.T) {
 	t.Parallel()
 	if err := verifyInstallToRAMStatus(LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"}); err != nil {
 		t.Fatalf("expected success for RAM-backed status, got %v", err)
 	}
 	err := verifyInstallToRAMStatus(LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"})
 	if err == nil {
 		t.Fatal("expected verification failure when media is still on USB")
 	}
 	if got := err.Error(); got != "install to RAM verification failed: live medium still mounted from USB (/dev/sdb1)" {
 		t.Fatalf("error=%q", got)
 	}
 }
 func TestDescribeLiveBootSource(t *testing.T) {
 	t.Parallel()
 	if got := describeLiveBootSource(LiveBootSource{InRAM: true, Kind: "ram"}); got != "RAM" {
 		t.Fatalf("got %q want RAM", got)
 	}
 	if got := describeLiveBootSource(LiveBootSource{Kind: "unknown", Source: "/run/live/medium"}); got != "/run/live/medium" {
 		t.Fatalf("got %q want /run/live/medium", got)
 	}
 }
--- a/audit/internal/platform/kill_workers.go
+++ b/audit/internal/platform/kill_workers.go
@@ -0,0 +1,68 @@
 package platform
 import (
 	"fmt"
 	"os"
 	"strconv"
 	"strings"
 	"syscall"
 )
 // workerPatterns are substrings matched against /proc/<pid>/cmdline to identify
 // bee test worker processes that should be killed by KillTestWorkers.
 var workerPatterns = []string{
 	"bee-gpu-burn",
 	"stress-ng",
 	"stressapptest",
 	"memtester",
 	// DCGM diagnostic workers — nvvs is spawned by dcgmi diag and survives
 	// if dcgmi is killed mid-run, leaving the GPU occupied (DCGM_ST_IN_USE).
 	"nvvs",
 	"dcgmi",
 }
 // KilledProcess describes a process that was sent SIGKILL.
 type KilledProcess struct {
 	PID  int    `json:"pid"`
 	Name string `json:"name"`
 }
 // KillTestWorkers scans /proc for running test worker processes and sends
 // SIGKILL to each one found. It returns a list of killed processes.
 // Errors for individual processes (e.g. already exited) are silently ignored.
 func KillTestWorkers() []KilledProcess {
 	entries, err := os.ReadDir("/proc")
 	if err != nil {
 		return nil
 	}
 	var killed []KilledProcess
 	for _, e := range entries {
 		if !e.IsDir() {
 			continue
 		}
 		pid, err := strconv.Atoi(e.Name())
 		if err != nil {
 			continue
 		}
 		cmdline, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid))
 		if err != nil {
 			continue
 		}
 		// /proc/*/cmdline uses NUL bytes as argument separators.
 		args := strings.SplitN(strings.ReplaceAll(string(cmdline), "\x00", " "), " ", 2)
 		exe := strings.TrimSpace(args[0])
 		base := exe
 		if idx := strings.LastIndexByte(exe, '/'); idx >= 0 {
 			base = exe[idx+1:]
 		}
 		for _, pat := range workerPatterns {
 			if strings.Contains(base, pat) || strings.Contains(exe, pat) {
 				_ = syscall.Kill(pid, syscall.SIGKILL)
 				killed = append(killed, KilledProcess{PID: pid, Name: base})
 				break
 			}
 		}
 	}
 	return killed
 }
--- a/audit/internal/platform/live_metrics.go
+++ b/audit/internal/platform/live_metrics.go
@@ -68,18 +68,20 @@ func SampleLiveMetrics() LiveMetricSample {
 // sampleCPULoadPct reads two /proc/stat snapshots 200ms apart and returns
 // the overall CPU utilisation percentage.
 var cpuStatPrev [2]uint64 // [total, idle]
 func sampleCPULoadPct() float64 {
-	total, idle := readCPUStat()
+	total0, idle0 := readCPUStat()
-	if total == 0 {
+	if total0 == 0 {
 		return 0
 	}
-	prevTotal, prevIdle := cpuStatPrev[0], cpuStatPrev[1]
+	time.Sleep(200 * time.Millisecond)
-	cpuStatPrev = [2]uint64{total, idle}
+	total1, idle1 := readCPUStat()
-	if prevTotal == 0 {
+	if total1 == 0 {
 		return 0
 	}
 	return cpuLoadPctBetween(total0, idle0, total1, idle1)
 }
 func cpuLoadPctBetween(prevTotal, prevIdle, total, idle uint64) float64 {
 	dt := float64(total - prevTotal)
 	di := float64(idle - prevIdle)
 	if dt <= 0 {
--- a/audit/internal/platform/live_metrics_test.go
+++ b/audit/internal/platform/live_metrics_test.go
@@ -42,3 +42,53 @@ func TestCompactAmbientTempName(t *testing.T) {
 		t.Fatalf("got %q", got)
 	}
 }
 func TestCPULoadPctBetween(t *testing.T) {
 	tests := []struct {
 		name      string
 		prevTotal uint64
 		prevIdle  uint64
 		total     uint64
 		idle      uint64
 		want      float64
 	}{
 		{
 			name:      "busy half",
 			prevTotal: 100,
 			prevIdle:  40,
 			total:     200,
 			idle:      90,
 			want:      50,
 		},
 		{
 			name:      "fully busy",
 			prevTotal: 100,
 			prevIdle:  40,
 			total:     200,
 			idle:      40,
 			want:      100,
 		},
 		{
 			name:      "no progress",
 			prevTotal: 100,
 			prevIdle:  40,
 			total:     100,
 			idle:      40,
 			want:      0,
 		},
 		{
 			name:      "idle delta larger than total clamps to zero",
 			prevTotal: 100,
 			prevIdle:  40,
 			total:     200,
 			idle:      150,
 			want:      0,
 		},
 	}
 	for _, tc := range tests {
 		if got := cpuLoadPctBetween(tc.prevTotal, tc.prevIdle, tc.total, tc.idle); got != tc.want {
 			t.Fatalf("%s: cpuLoadPctBetween(...)=%v want %v", tc.name, got, tc.want)
 		}
 	}
 }
--- a/audit/internal/platform/nvidia_stress.go
+++ b/audit/internal/platform/nvidia_stress.go
@@ -0,0 +1,203 @@
 package platform
 import (
 	"context"
 	"fmt"
 	"sort"
 	"strconv"
 	"strings"
 )
 func (s *System) RunNvidiaStressPack(ctx context.Context, baseDir string, opts NvidiaStressOptions, logFunc func(string)) (string, error) {
 	normalizeNvidiaStressOptions(&opts)
 	job, err := buildNvidiaStressJob(opts)
 	if err != nil {
 		return "", err
 	}
 	return runAcceptancePackCtx(ctx, baseDir, nvidiaStressArchivePrefix(opts.Loader), withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{name: "02-nvidia-smi-list.log", cmd: []string{"nvidia-smi", "-L"}},
 		job,
 		satJob{name: "04-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	), logFunc)
 }
 func nvidiaStressArchivePrefix(loader string) string {
 	switch strings.TrimSpace(strings.ToLower(loader)) {
 	case NvidiaStressLoaderJohn:
 		return "gpu-nvidia-john"
 	case NvidiaStressLoaderNCCL:
 		return "gpu-nvidia-nccl"
 	default:
 		return "gpu-nvidia-burn"
 	}
 }
 func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
 	selected, err := resolveNvidiaGPUSelection(opts.GPUIndices, opts.ExcludeGPUIndices)
 	if err != nil {
 		return satJob{}, err
 	}
 	loader := strings.TrimSpace(strings.ToLower(opts.Loader))
 	switch loader {
 	case "", NvidiaStressLoaderBuiltin:
 		cmd := []string{
 			"bee-gpu-burn",
 			"--seconds", strconv.Itoa(opts.DurationSec),
 			"--size-mb", strconv.Itoa(opts.SizeMB),
 		}
 		if len(selected) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(selected))
 		}
 		return satJob{
 			name:       "03-bee-gpu-burn.log",
 			cmd:        cmd,
 			collectGPU: true,
 			gpuIndices: selected,
 		}, nil
 	case NvidiaStressLoaderJohn:
 		cmd := []string{
 			"bee-john-gpu-stress",
 			"--seconds", strconv.Itoa(opts.DurationSec),
 		}
 		if len(selected) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(selected))
 		}
 		return satJob{
 			name:       "03-john-gpu-stress.log",
 			cmd:        cmd,
 			collectGPU: true,
 			gpuIndices: selected,
 		}, nil
 	case NvidiaStressLoaderNCCL:
 		cmd := []string{
 			"bee-nccl-gpu-stress",
 			"--seconds", strconv.Itoa(opts.DurationSec),
 		}
 		if len(selected) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(selected))
 		}
 		return satJob{
 			name:       "03-bee-nccl-gpu-stress.log",
 			cmd:        cmd,
 			collectGPU: true,
 			gpuIndices: selected,
 		}, nil
 	default:
 		return satJob{}, fmt.Errorf("unknown NVIDIA stress loader %q", opts.Loader)
 	}
 }
 func normalizeNvidiaStressOptions(opts *NvidiaStressOptions) {
 	if opts.DurationSec <= 0 {
 		opts.DurationSec = 300
 	}
 	// SizeMB=0 means "auto" — bee-gpu-burn will query per-GPU memory at runtime.
 	switch strings.TrimSpace(strings.ToLower(opts.Loader)) {
 	case "", NvidiaStressLoaderBuiltin:
 		opts.Loader = NvidiaStressLoaderBuiltin
 	case NvidiaStressLoaderJohn:
 		opts.Loader = NvidiaStressLoaderJohn
 	case NvidiaStressLoaderNCCL:
 		opts.Loader = NvidiaStressLoaderNCCL
 	default:
 		opts.Loader = NvidiaStressLoaderBuiltin
 	}
 	opts.GPUIndices = dedupeSortedIndices(opts.GPUIndices)
 	opts.ExcludeGPUIndices = dedupeSortedIndices(opts.ExcludeGPUIndices)
 }
 func resolveNvidiaGPUSelection(include, exclude []int) ([]int, error) {
 	all, err := listNvidiaGPUIndices()
 	if err != nil {
 		return nil, err
 	}
 	if len(all) == 0 {
 		return nil, fmt.Errorf("nvidia-smi found no NVIDIA GPUs")
 	}
 	selected := all
 	if len(include) > 0 {
 		want := make(map[int]struct{}, len(include))
 		for _, idx := range include {
 			want[idx] = struct{}{}
 		}
 		selected = selected[:0]
 		for _, idx := range all {
 			if _, ok := want[idx]; ok {
 				selected = append(selected, idx)
 			}
 		}
 	}
 	if len(exclude) > 0 {
 		skip := make(map[int]struct{}, len(exclude))
 		for _, idx := range exclude {
 			skip[idx] = struct{}{}
 		}
 		filtered := selected[:0]
 		for _, idx := range selected {
 			if _, ok := skip[idx]; ok {
 				continue
 			}
 			filtered = append(filtered, idx)
 		}
 		selected = filtered
 	}
 	if len(selected) == 0 {
 		return nil, fmt.Errorf("no NVIDIA GPUs selected after applying filters")
 	}
 	out := append([]int(nil), selected...)
 	sort.Ints(out)
 	return out, nil
 }
 func listNvidiaGPUIndices() ([]int, error) {
 	out, err := satExecCommand("nvidia-smi", "--query-gpu=index", "--format=csv,noheader,nounits").Output()
 	if err != nil {
 		return nil, fmt.Errorf("nvidia-smi: %w", err)
 	}
 	var indices []int
 	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
 		line = strings.TrimSpace(line)
 		if line == "" {
 			continue
 		}
 		idx, err := strconv.Atoi(line)
 		if err != nil {
 			continue
 		}
 		indices = append(indices, idx)
 	}
 	return dedupeSortedIndices(indices), nil
 }
 func dedupeSortedIndices(values []int) []int {
 	if len(values) == 0 {
 		return nil
 	}
 	seen := make(map[int]struct{}, len(values))
 	out := make([]int, 0, len(values))
 	for _, value := range values {
 		if value < 0 {
 			continue
 		}
 		if _, ok := seen[value]; ok {
 			continue
 		}
 		seen[value] = struct{}{}
 		out = append(out, value)
 	}
 	sort.Ints(out)
 	return out
 }
 func joinIndexList(values []int) string {
 	parts := make([]string, 0, len(values))
 	for _, value := range values {
 		parts = append(parts, strconv.Itoa(value))
 	}
 	return strings.Join(parts, ",")
 }
--- a/audit/internal/platform/platform_stress.go
+++ b/audit/internal/platform/platform_stress.go
@@ -0,0 +1,569 @@
 package platform
 import (
 	"archive/tar"
 	"bytes"
 	"compress/gzip"
 	"context"
 	"encoding/csv"
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"runtime"
 	"strconv"
 	"strings"
 	"sync"
 	"syscall"
 	"time"
 )
 // PlatformStressCycle defines one load+idle cycle.
 type PlatformStressCycle struct {
 	LoadSec int // seconds of simultaneous CPU+GPU stress
 	IdleSec int // seconds of idle monitoring after load cut
 }
 // PlatformStressOptions controls the thermal cycling test.
 type PlatformStressOptions struct {
 	Cycles     []PlatformStressCycle
 	Components []string // if empty: run all; values: "cpu", "gpu"
 }
 // platformStressRow is one second of telemetry.
 type platformStressRow struct {
 	ElapsedSec   float64
 	Cycle        int
 	Phase        string // "load" | "idle"
 	CPULoadPct   float64
 	MaxCPUTempC  float64
 	MaxGPUTempC  float64
 	SysPowerW    float64
 	FanMinRPM    float64
 	FanMaxRPM    float64
 	GPUThrottled bool
 }
 // RunPlatformStress runs repeated load+idle thermal cycling.
 // Each cycle starts CPU (stressapptest) and GPU stress simultaneously,
 // runs for LoadSec, then cuts load abruptly and monitors for IdleSec.
 func (s *System) RunPlatformStress(
 	ctx context.Context,
 	baseDir string,
 	opts PlatformStressOptions,
 	logFunc func(string),
 ) (string, error) {
 	if logFunc == nil {
 		logFunc = func(string) {}
 	}
 	if len(opts.Cycles) == 0 {
 		return "", fmt.Errorf("no cycles defined")
 	}
 	if err := os.MkdirAll(baseDir, 0755); err != nil {
 		return "", fmt.Errorf("mkdir %s: %w", baseDir, err)
 	}
 	stamp := time.Now().UTC().Format("20060102-150405")
 	runDir := filepath.Join(baseDir, "platform-stress-"+stamp)
 	if err := os.MkdirAll(runDir, 0755); err != nil {
 		return "", fmt.Errorf("mkdir run dir: %w", err)
 	}
 	hasCPU := len(opts.Components) == 0 || containsComponent(opts.Components, "cpu")
 	hasGPU := len(opts.Components) == 0 || containsComponent(opts.Components, "gpu")
 	vendor := s.DetectGPUVendor()
 	logFunc(fmt.Sprintf("Platform Thermal Cycling — %d cycle(s), GPU vendor: %s, cpu=%v gpu=%v", len(opts.Cycles), vendor, hasCPU, hasGPU))
 	var rows []platformStressRow
 	start := time.Now()
 	var analyses []cycleAnalysis
 	for i, cycle := range opts.Cycles {
 		if ctx.Err() != nil {
 			break
 		}
 		cycleNum := i + 1
 		logFunc(fmt.Sprintf("--- Cycle %d/%d: load=%ds, idle=%ds ---", cycleNum, len(opts.Cycles), cycle.LoadSec, cycle.IdleSec))
 		// ── LOAD PHASE ───────────────────────────────────────────────────────
 		loadCtx, loadCancel := context.WithTimeout(ctx, time.Duration(cycle.LoadSec)*time.Second)
 		var wg sync.WaitGroup
 		// CPU stress
 		if hasCPU {
 			wg.Add(1)
 			go func() {
 				defer wg.Done()
 				cpuCmd, err := buildCPUStressCmd(loadCtx)
 				if err != nil {
 					logFunc("CPU stress: " + err.Error())
 					return
 				}
 				_ = cpuCmd.Wait() // exits when loadCtx times out (SIGKILL)
 			}()
 		}
 		// GPU stress
 		if hasGPU {
 			wg.Add(1)
 			go func() {
 				defer wg.Done()
 				gpuCmd := buildGPUStressCmd(loadCtx, vendor, cycle.LoadSec)
 				if gpuCmd == nil {
 					return
 				}
 				_ = gpuCmd.Wait()
 			}()
 		}
 		// Monitoring goroutine for load phase
 		loadRows := collectPhase(loadCtx, cycleNum, "load", start)
 		for _, r := range loadRows {
 			logFunc(formatPlatformRow(r))
 		}
 		rows = append(rows, loadRows...)
 		loadCancel()
 		wg.Wait()
 		if len(loadRows) > 0 {
 			logFunc(fmt.Sprintf("Cycle %d load ended (%.0fs)", cycleNum, loadRows[len(loadRows)-1].ElapsedSec))
 		}
 		// ── IDLE PHASE ───────────────────────────────────────────────────────
 		idleCtx, idleCancel := context.WithTimeout(ctx, time.Duration(cycle.IdleSec)*time.Second)
 		idleRows := collectPhase(idleCtx, cycleNum, "idle", start)
 		for _, r := range idleRows {
 			logFunc(formatPlatformRow(r))
 		}
 		rows = append(rows, idleRows...)
 		idleCancel()
 		// Per-cycle analysis
 		an := analyzePlatformCycle(loadRows, idleRows)
 		analyses = append(analyses, an)
 		logFunc(fmt.Sprintf("Cycle %d: maxCPU=%.1f°C maxGPU=%.1f°C power=%.0fW throttled=%v fanDrop=%.0f%%",
 			cycleNum, an.maxCPUTemp, an.maxGPUTemp, an.maxPower, an.throttled, an.fanDropPct))
 	}
 	// Write CSV
 	csvData := writePlatformCSV(rows)
 	_ = os.WriteFile(filepath.Join(runDir, "metrics.csv"), csvData, 0644)
 	// Write summary
 	summary := writePlatformSummary(opts, analyses)
 	logFunc("--- Summary ---")
 	for _, line := range strings.Split(summary, "\n") {
 		if line != "" {
 			logFunc(line)
 		}
 	}
 	_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)
 	// Pack tar.gz
 	archivePath := filepath.Join(baseDir, "platform-stress-"+stamp+".tar.gz")
 	if err := packPlatformDir(runDir, archivePath); err != nil {
 		return "", fmt.Errorf("pack archive: %w", err)
 	}
 	_ = os.RemoveAll(runDir)
 	return archivePath, nil
 }
 // collectPhase samples live metrics every second until ctx is done.
 func collectPhase(ctx context.Context, cycle int, phase string, testStart time.Time) []platformStressRow {
 	var rows []platformStressRow
 	ticker := time.NewTicker(time.Second)
 	defer ticker.Stop()
 	for {
 		select {
 		case <-ctx.Done():
 			return rows
 		case <-ticker.C:
 			sample := SampleLiveMetrics()
 			rows = append(rows, sampleToPlatformRow(sample, cycle, phase, testStart))
 		}
 	}
 }
 func sampleToPlatformRow(s LiveMetricSample, cycle int, phase string, testStart time.Time) platformStressRow {
 	r := platformStressRow{
 		ElapsedSec: time.Since(testStart).Seconds(),
 		Cycle:      cycle,
 		Phase:      phase,
 		CPULoadPct: s.CPULoadPct,
 		SysPowerW:  s.PowerW,
 	}
 	for _, t := range s.Temps {
 		switch t.Group {
 		case "cpu":
 			if t.Celsius > r.MaxCPUTempC {
 				r.MaxCPUTempC = t.Celsius
 			}
 		case "gpu":
 			if t.Celsius > r.MaxGPUTempC {
 				r.MaxGPUTempC = t.Celsius
 			}
 		}
 	}
 	for _, g := range s.GPUs {
 		if g.TempC > r.MaxGPUTempC {
 			r.MaxGPUTempC = g.TempC
 		}
 	}
 	if len(s.Fans) > 0 {
 		r.FanMinRPM = s.Fans[0].RPM
 		r.FanMaxRPM = s.Fans[0].RPM
 		for _, f := range s.Fans[1:] {
 			if f.RPM < r.FanMinRPM {
 				r.FanMinRPM = f.RPM
 			}
 			if f.RPM > r.FanMaxRPM {
 				r.FanMaxRPM = f.RPM
 			}
 		}
 	}
 	return r
 }
 func formatPlatformRow(r platformStressRow) string {
 	throttle := ""
 	if r.GPUThrottled {
 		throttle = " THROTTLE"
 	}
 	fans := ""
 	if r.FanMinRPM > 0 {
 		fans = fmt.Sprintf(" fans=%.0f-%.0fRPM", r.FanMinRPM, r.FanMaxRPM)
 	}
 	return fmt.Sprintf("[%5.0fs] cycle=%d phase=%-4s cpu=%.0f%% cpuT=%.1f°C gpuT=%.1f°C pwr=%.0fW%s%s",
 		r.ElapsedSec, r.Cycle, r.Phase, r.CPULoadPct, r.MaxCPUTempC, r.MaxGPUTempC, r.SysPowerW, fans, throttle)
 }
 func analyzePlatformCycle(loadRows, idleRows []platformStressRow) cycleAnalysis {
 	var an cycleAnalysis
 	for _, r := range loadRows {
 		if r.MaxCPUTempC > an.maxCPUTemp {
 			an.maxCPUTemp = r.MaxCPUTempC
 		}
 		if r.MaxGPUTempC > an.maxGPUTemp {
 			an.maxGPUTemp = r.MaxGPUTempC
 		}
 		if r.SysPowerW > an.maxPower {
 			an.maxPower = r.SysPowerW
 		}
 		if r.GPUThrottled {
 			an.throttled = true
 		}
 	}
 	// Fan RPM at cut = avg of last 5 load rows
 	if n := len(loadRows); n > 0 {
 		window := loadRows
 		if n > 5 {
 			window = loadRows[n-5:]
 		}
 		var sum float64
 		var cnt int
 		for _, r := range window {
 			if r.FanMinRPM > 0 {
 				sum += (r.FanMinRPM + r.FanMaxRPM) / 2
 				cnt++
 			}
 		}
 		if cnt > 0 {
 			an.fanAtCutAvg = sum / float64(cnt)
 		}
 	}
 	// Fan RPM min in first 15s of idle
 	an.fanMin15s = an.fanAtCutAvg
 	var cutElapsed float64
 	if len(loadRows) > 0 {
 		cutElapsed = loadRows[len(loadRows)-1].ElapsedSec
 	}
 	for _, r := range idleRows {
 		if r.ElapsedSec > cutElapsed+15 {
 			break
 		}
 		avg := (r.FanMinRPM + r.FanMaxRPM) / 2
 		if avg > 0 && (an.fanMin15s == 0 || avg < an.fanMin15s) {
 			an.fanMin15s = avg
 		}
 	}
 	if an.fanAtCutAvg > 0 {
 		an.fanDropPct = (an.fanAtCutAvg - an.fanMin15s) / an.fanAtCutAvg * 100
 	}
 	return an
 }
 type cycleAnalysis struct {
 	maxCPUTemp  float64
 	maxGPUTemp  float64
 	maxPower    float64
 	throttled   bool
 	fanAtCutAvg float64
 	fanMin15s   float64
 	fanDropPct  float64
 }
 func writePlatformSummary(opts PlatformStressOptions, analyses []cycleAnalysis) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "Platform Thermal Cycling — %d cycle(s)\n", len(opts.Cycles))
 	fmt.Fprintf(&b, "%s\n\n", strings.Repeat("=", 48))
 	totalThrottle := 0
 	totalFanWarn := 0
 	for i, an := range analyses {
 		cycle := opts.Cycles[i]
 		fmt.Fprintf(&b, "Cycle %d/%d (load=%ds, idle=%ds)\n", i+1, len(opts.Cycles), cycle.LoadSec, cycle.IdleSec)
 		fmt.Fprintf(&b, "  Max CPU temp: %.1f°C\n", an.maxCPUTemp)
 		fmt.Fprintf(&b, "  Max GPU temp: %.1f°C\n", an.maxGPUTemp)
 		fmt.Fprintf(&b, "  Max sys power: %.0f W\n", an.maxPower)
 		if an.throttled {
 			fmt.Fprintf(&b, "  Throttle: DETECTED\n")
 			totalThrottle++
 		} else {
 			fmt.Fprintf(&b, "  Throttle: none\n")
 		}
 		if an.fanAtCutAvg > 0 {
 			fmt.Fprintf(&b, "  Fan at load cut: %.0f RPM avg\n", an.fanAtCutAvg)
 			fmt.Fprintf(&b, "  Fan min (first 15s idle): %.0f RPM (drop %.0f%%)\n", an.fanMin15s, an.fanDropPct)
 			if an.fanDropPct > 20 {
 				fmt.Fprintf(&b, "  Fan response: WARN — fast spindown (>20%% drop in 15s)\n")
 				totalFanWarn++
 			} else {
 				fmt.Fprintf(&b, "  Fan response: OK\n")
 			}
 		}
 		b.WriteString("\n")
 	}
 	fmt.Fprintf(&b, "%s\n", strings.Repeat("=", 48))
 	if totalThrottle > 0 {
 		fmt.Fprintf(&b, "Overall: FAIL — throttle detected in %d/%d cycles\n", totalThrottle, len(analyses))
 	} else if totalFanWarn > 0 {
 		fmt.Fprintf(&b, "Overall: WARN — fast fan spindown in %d/%d cycles (cooling recovery risk)\n", totalFanWarn, len(analyses))
 	} else {
 		fmt.Fprintf(&b, "Overall: PASS\n")
 	}
 	return b.String()
 }
 func writePlatformCSV(rows []platformStressRow) []byte {
 	var buf bytes.Buffer
 	w := csv.NewWriter(&buf)
 	_ = w.Write([]string{
 		"elapsed_sec", "cycle", "phase",
 		"cpu_load_pct", "max_cpu_temp_c", "max_gpu_temp_c",
 		"sys_power_w", "fan_min_rpm", "fan_max_rpm", "gpu_throttled",
 	})
 	for _, r := range rows {
 		throttled := "0"
 		if r.GPUThrottled {
 			throttled = "1"
 		}
 		_ = w.Write([]string{
 			strconv.FormatFloat(r.ElapsedSec, 'f', 1, 64),
 			strconv.Itoa(r.Cycle),
 			r.Phase,
 			strconv.FormatFloat(r.CPULoadPct, 'f', 1, 64),
 			strconv.FormatFloat(r.MaxCPUTempC, 'f', 1, 64),
 			strconv.FormatFloat(r.MaxGPUTempC, 'f', 1, 64),
 			strconv.FormatFloat(r.SysPowerW, 'f', 1, 64),
 			strconv.FormatFloat(r.FanMinRPM, 'f', 0, 64),
 			strconv.FormatFloat(r.FanMaxRPM, 'f', 0, 64),
 			throttled,
 		})
 	}
 	w.Flush()
 	return buf.Bytes()
 }
 // buildCPUStressCmd creates a stressapptest command that runs until ctx is cancelled.
 func buildCPUStressCmd(ctx context.Context) (*exec.Cmd, error) {
 	path, err := satLookPath("stressapptest")
 	if err != nil {
 		return nil, fmt.Errorf("stressapptest not found: %w", err)
 	}
 	// Use a very long duration; the context timeout will kill it at the right time.
 	cmdArgs := []string{"-s", "86400", "-W", "--cc_test"}
 	if threads := platformStressCPUThreads(); threads > 0 {
 		cmdArgs = append(cmdArgs, "-m", strconv.Itoa(threads))
 	}
 	if mb := platformStressMemoryMB(); mb > 0 {
 		cmdArgs = append(cmdArgs, "-M", strconv.Itoa(mb))
 	}
 	cmd := exec.CommandContext(ctx, path, cmdArgs...)
 	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
 	cmd.Cancel = func() error {
 		if cmd.Process != nil {
 			_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
 		}
 		return nil
 	}
 	cmd.Stdout = nil
 	cmd.Stderr = nil
 	if err := startLowPriorityCmd(cmd, 15); err != nil {
 		return nil, fmt.Errorf("stressapptest start: %w", err)
 	}
 	return cmd, nil
 }
 // buildGPUStressCmd creates a GPU stress command appropriate for the detected vendor.
 // Returns nil if no GPU stress tool is available (CPU-only cycling still useful).
 func buildGPUStressCmd(ctx context.Context, vendor string, durSec int) *exec.Cmd {
 	switch strings.ToLower(vendor) {
 	case "amd":
 		return buildAMDGPUStressCmd(ctx, durSec)
 	case "nvidia":
 		return buildNvidiaGPUStressCmd(ctx, durSec)
 	}
 	return nil
 }
 func buildAMDGPUStressCmd(ctx context.Context, durSec int) *exec.Cmd {
 	rvsArgs, err := resolveRVSCommand()
 	if err != nil {
 		return nil
 	}
 	rvsPath := rvsArgs[0]
 	cfg := fmt.Sprintf(`actions:
 - name: gst_platform
  device: all
  module: gst
  parallel: true
  duration: %d`, durSec*1000) + `
  copy_matrix: false
  target_stress: 90
  matrix_size_a: 8640
  matrix_size_b: 8640
  matrix_size_c: 8640
 `
 	cfgFile := "/tmp/bee-platform-gst.conf"
 	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
 	cmd := exec.CommandContext(ctx, rvsPath, "-c", cfgFile)
 	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
 	cmd.Cancel = func() error {
 		if cmd.Process != nil {
 			_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
 		}
 		return nil
 	}
 	cmd.Stdout = nil
 	cmd.Stderr = nil
 	_ = startLowPriorityCmd(cmd, 10)
 	return cmd
 }
 func buildNvidiaGPUStressCmd(ctx context.Context, durSec int) *exec.Cmd {
 	path, err := satLookPath("bee-gpu-burn")
 	if err != nil {
 		path, err = satLookPath("bee-gpu-stress")
 	}
 	if err != nil {
 		return nil
 	}
 	// Pass exact duration so bee-gpu-burn exits on its own when the cycle ends.
 	// Process group kill via Setpgid+Cancel is kept as a safety net for cases
 	// where the context is cancelled early (user stop, parent timeout).
 	cmd := exec.CommandContext(ctx, path, "--seconds", strconv.Itoa(durSec))
 	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
 	cmd.Cancel = func() error {
 		if cmd.Process != nil {
 			_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
 		}
 		return nil
 	}
 	cmd.Stdout = nil
 	cmd.Stderr = nil
 	_ = startLowPriorityCmd(cmd, 10)
 	return cmd
 }
 func startLowPriorityCmd(cmd *exec.Cmd, nice int) error {
 	if err := cmd.Start(); err != nil {
 		return err
 	}
 	if cmd.Process != nil {
 		_ = syscall.Setpriority(syscall.PRIO_PROCESS, cmd.Process.Pid, nice)
 	}
 	return nil
 }
 func platformStressCPUThreads() int {
 	if n := envInt("BEE_PLATFORM_STRESS_THREADS", 0); n > 0 {
 		return n
 	}
 	cpus := runtime.NumCPU()
 	switch {
 	case cpus <= 2:
 		return 1
 	case cpus <= 8:
 		return cpus - 1
 	default:
 		return cpus - 2
 	}
 }
 func platformStressMemoryMB() int {
 	if mb := envInt("BEE_PLATFORM_STRESS_MB", 0); mb > 0 {
 		return mb
 	}
 	free := freeMemBytes()
 	if free <= 0 {
 		return 0
 	}
 	mb := int((free * 60) / 100 / (1024 * 1024))
 	if mb < 1024 {
 		return 1024
 	}
 	return mb
 }
 func containsComponent(components []string, name string) bool {
 	for _, c := range components {
 		if c == name {
 			return true
 		}
 	}
 	return false
 }
 func packPlatformDir(dir, dest string) error {
 	f, err := os.Create(dest)
 	if err != nil {
 		return err
 	}
 	defer f.Close()
 	gz := gzip.NewWriter(f)
 	defer gz.Close()
 	tw := tar.NewWriter(gz)
 	defer tw.Close()
 	entries, err := os.ReadDir(dir)
 	if err != nil {
 		return err
 	}
 	base := filepath.Base(dir)
 	for _, e := range entries {
 		if e.IsDir() {
 			continue
 		}
 		fpath := filepath.Join(dir, e.Name())
 		data, err := os.ReadFile(fpath)
 		if err != nil {
 			continue
 		}
 		hdr := &tar.Header{
 			Name:    filepath.Join(base, e.Name()),
 			Size:    int64(len(data)),
 			Mode:    0644,
 			ModTime: time.Now(),
 		}
 		if err := tw.WriteHeader(hdr); err != nil {
 			return err
 		}
 		if _, err := tw.Write(data); err != nil {
 			return err
 		}
 	}
 	return nil
 }
--- a/audit/internal/platform/platform_stress_test.go
+++ b/audit/internal/platform/platform_stress_test.go
@@ -0,0 +1,34 @@
 package platform
 import (
 	"runtime"
 	"testing"
 )
 func TestPlatformStressCPUThreadsOverride(t *testing.T) {
 	t.Setenv("BEE_PLATFORM_STRESS_THREADS", "7")
 	if got := platformStressCPUThreads(); got != 7 {
 		t.Fatalf("platformStressCPUThreads=%d want 7", got)
 	}
 }
 func TestPlatformStressCPUThreadsDefaultLeavesHeadroom(t *testing.T) {
 	t.Setenv("BEE_PLATFORM_STRESS_THREADS", "")
 	got := platformStressCPUThreads()
 	if got < 1 {
 		t.Fatalf("platformStressCPUThreads=%d want >= 1", got)
 	}
 	if got > runtime.NumCPU() {
 		t.Fatalf("platformStressCPUThreads=%d want <= NumCPU=%d", got, runtime.NumCPU())
 	}
 	if runtime.NumCPU() > 2 && got >= runtime.NumCPU() {
 		t.Fatalf("platformStressCPUThreads=%d want headroom below NumCPU=%d", got, runtime.NumCPU())
 	}
 }
 func TestPlatformStressMemoryMBOverride(t *testing.T) {
 	t.Setenv("BEE_PLATFORM_STRESS_MB", "8192")
 	if got := platformStressMemoryMB(); got != 8192 {
 		t.Fatalf("platformStressMemoryMB=%d want 8192", got)
 	}
 }
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -135,9 +135,15 @@ func (s *System) runtimeToolStatuses(vendor string) []ToolStatus {
 	case "nvidia":
 		tools = append(tools, s.CheckTools([]string{
 			"nvidia-smi",
 			"dcgmi",
 			"nv-hostengine",
 			"nvidia-bug-report.sh",
-			"bee-gpu-stress",
+			"bee-gpu-burn",
 			"bee-john-gpu-stress",
 			"bee-nccl-gpu-stress",
 			"all_reduce_perf",
 		})...)
 		tools = append(tools, resolvedToolStatus("dcgmproftester", dcgmProfTesterCandidates...))
 	case "amd":
 		tool := ToolStatus{Name: "rocm-smi"}
 		if cmd, err := resolveROCmSMICommand(); err == nil && len(cmd) > 0 {
@@ -152,11 +158,37 @@ func (s *System) runtimeToolStatuses(vendor string) []ToolStatus {
 	return tools
 }
 func resolvedToolStatus(display string, candidates ...string) ToolStatus {
 	for _, candidate := range candidates {
 		path, err := exec.LookPath(candidate)
 		if err == nil {
 			return ToolStatus{Name: display, Path: path, OK: true}
 		}
 	}
 	return ToolStatus{Name: display}
 }
 func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHealth) {
 	lsmodText := commandText("lsmod")
 	switch vendor {
 	case "nvidia":
 		if raw, err := os.ReadFile("/run/bee-nvidia-mode"); err == nil {
 			health.NvidiaGSPMode = strings.TrimSpace(string(raw))
 			if health.NvidiaGSPMode == "gsp-stuck" {
 				health.Issues = append(health.Issues, schema.RuntimeIssue{
 					Code:        "nvidia_gsp_stuck",
 					Severity:    "critical",
 					Description: "NVIDIA GSP firmware init timed out and the kernel module is stuck. Reboot and select 'GSP=off' in the boot menu.",
 				})
 			} else if health.NvidiaGSPMode == "gsp-off" {
 				health.Issues = append(health.Issues, schema.RuntimeIssue{
 					Code:        "nvidia_gsp_disabled",
 					Severity:    "warning",
 					Description: "NVIDIA GSP firmware disabled (fallback). Power management runs via CPU path — power draw readings may differ from reference hardware.",
 				})
 			}
 		}
 		health.DriverReady = strings.Contains(lsmodText, "nvidia ")
 		if !health.DriverReady {
 			health.Issues = append(health.Issues, schema.RuntimeIssue{
@@ -176,8 +208,8 @@ func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHe
 			health.DriverReady = true
 		}
-		if lookErr := exec.Command("sh", "-c", "command -v bee-gpu-stress >/dev/null 2>&1").Run(); lookErr == nil {
+		if _, lookErr := exec.LookPath("bee-gpu-burn"); lookErr == nil {
-			out, err := exec.Command("bee-gpu-stress", "--seconds", "1", "--size-mb", "1").CombinedOutput()
+			out, err := exec.Command("bee-gpu-burn", "--seconds", "1", "--size-mb", "1").CombinedOutput()
 			if err == nil {
 				health.CUDAReady = true
 			} else if strings.Contains(strings.ToLower(string(out)), "cuda_error_system_not_ready") {
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -16,14 +16,16 @@ import (
 	"strconv"
 	"strings"
 	"sync"
 	"syscall"
 	"time"
 )
 var (
-	satExecCommand = exec.Command
+	satExecCommand  = exec.Command
-	satLookPath    = exec.LookPath
+	satLookPath     = exec.LookPath
-	satGlob        = filepath.Glob
+	satGlob         = filepath.Glob
-	satStat        = os.Stat
+	satStat         = os.Stat
 	satFreeMemBytes = freeMemBytes
 	rocmSMIExecutableGlobs = []string{
 		"/opt/rocm/bin/rocm-smi",
@@ -37,6 +39,12 @@ var (
 		"/opt/rocm/bin/rvs",
 		"/opt/rocm-*/bin/rvs",
 	}
 	dcgmProfTesterCandidates = []string{
 		"dcgmproftester",
 		"dcgmproftester13",
 		"dcgmproftester12",
 		"dcgmproftester11",
 	}
 )
 // streamExecOutput runs cmd and streams each output line to logFunc (if non-nil).
@@ -75,15 +83,15 @@ func streamExecOutput(cmd *exec.Cmd, logFunc func(string)) ([]byte, error) {
 // NvidiaGPU holds basic GPU info from nvidia-smi.
 type NvidiaGPU struct {
-	Index    int
+	Index    int    `json:"index"`
-	Name     string
+	Name     string `json:"name"`
-	MemoryMB int
+	MemoryMB int    `json:"memory_mb"`
 }
 // AMDGPUInfo holds basic info about an AMD GPU from rocm-smi.
 type AMDGPUInfo struct {
-	Index int
+	Index int    `json:"index"`
-	Name  string
+	Name  string `json:"name"`
 }
 // DetectGPUVendor returns "nvidia" if /dev/nvidia0 exists, "amd" if /dev/kfd exists, or "" otherwise.
@@ -136,6 +144,54 @@ func (s *System) RunAMDAcceptancePack(ctx context.Context, baseDir string, logFu
 	}, logFunc)
 }
 // RunAMDMemIntegrityPack runs the official RVS MEM module as a validate-style memory integrity test.
 func (s *System) RunAMDMemIntegrityPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 	if err := ensureAMDRuntimeReady(); err != nil {
 		return "", err
 	}
 	cfgFile := "/tmp/bee-amd-mem.conf"
 	cfg := `actions:
 - name: mem_integrity
  device: all
  module: mem
  parallel: true
  duration: 60000
  copy_matrix: false
  target_stress: 90
  matrix_size: 8640
 `
 	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-mem", []satJob{
 		{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
 		{name: "02-rvs-mem.log", cmd: []string{"rvs", "-c", cfgFile}},
 		{name: "03-rocm-smi-after.log", cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--showmemuse", "--csv"}},
 	}, logFunc)
 }
 // RunAMDMemBandwidthPack runs AMD's memory/interconnect bandwidth-oriented tools.
 func (s *System) RunAMDMemBandwidthPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 	if err := ensureAMDRuntimeReady(); err != nil {
 		return "", err
 	}
 	cfgFile := "/tmp/bee-amd-babel.conf"
 	cfg := `actions:
 - name: babel_mem_bw
  device: all
  module: babel
  parallel: true
  copy_matrix: true
  target_stress: 90
  matrix_size: 134217728
 `
 	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-bandwidth", []satJob{
 		{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
 		{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
 		{name: "03-rvs-babel.log", cmd: []string{"rvs", "-c", cfgFile}},
 		{name: "04-rocm-smi-after.log", cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--showmemuse", "--csv"}},
 	}, logFunc)
 }
 // RunAMDStressPack runs an AMD GPU burn-in pack.
 // Missing tools are reported as UNSUPPORTED, consistent with the existing SAT pattern.
 func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
@@ -146,8 +202,16 @@ func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationS
 	if err := ensureAMDRuntimeReady(); err != nil {
 		return "", err
 	}
-	// Write RVS GST config to a temp file
+	// Enable copy_matrix so the same GST run drives VRAM traffic in addition to compute.
-	rvsCfg := fmt.Sprintf(`actions:
+	rvsCfg := amdStressRVSConfig(seconds)
 	cfgFile := "/tmp/bee-amd-gst.conf"
 	_ = os.WriteFile(cfgFile, []byte(rvsCfg), 0644)
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-stress", amdStressJobs(seconds, cfgFile), logFunc)
 }
 func amdStressRVSConfig(seconds int) string {
 	return fmt.Sprintf(`actions:
 - name: gst_stress
  device: all
  module: gst
@@ -159,15 +223,15 @@ func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationS
  matrix_size_b: 8640
  matrix_size_c: 8640
 `, seconds*1000)
-	cfgFile := "/tmp/bee-amd-gst.conf"
+}
 	_ = os.WriteFile(cfgFile, []byte(rvsCfg), 0644)
-	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-stress", []satJob{
+func amdStressJobs(seconds int, cfgFile string) []satJob {
 	return []satJob{
 		{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
 		{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
 		{name: fmt.Sprintf("03-rvs-gst-%ds.log", seconds), cmd: []string{"rvs", "-c", cfgFile}},
 		{name: fmt.Sprintf("04-rocm-smi-after.log"), cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--csv"}},
-	}, logFunc)
+	}
 }
 // ListNvidiaGPUs returns GPUs visible to nvidia-smi.
@@ -199,6 +263,9 @@ func (s *System) ListNvidiaGPUs() ([]NvidiaGPU, error) {
 			MemoryMB: memMB,
 		})
 	}
 	sort.Slice(gpus, func(i, j int) bool {
 		return gpus[i].Index < gpus[j].Index
 	})
 	return gpus, nil
 }
@@ -211,13 +278,87 @@ func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(
 	if gpuCount < 1 {
 		gpuCount = 1
 	}
-	return runAcceptancePackCtx(ctx, baseDir, "nccl-tests", []satJob{
+	return runAcceptancePackCtx(ctx, baseDir, "nccl-tests", withNvidiaPersistenceMode(
-		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
+		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
-		{name: "02-all-reduce-perf.log", cmd: []string{
+		satJob{name: "02-all-reduce-perf.log", cmd: []string{
 			"all_reduce_perf", "-b", "512M", "-e", "4G", "-f", "2",
 			"-g", strconv.Itoa(gpuCount), "--iters", "20",
 		}},
-	}, logFunc)
+	), logFunc)
 }
 func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
 	profCmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)))
 	if err != nil {
 		return "", err
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-compute", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{name: "02-dcgmi-version.log", cmd: []string{"dcgmi", "-v"}},
 		satJob{
 			name:       "03-dcgmproftester.log",
 			cmd:        profCmd,
 			env:        nvidiaVisibleDevicesEnv(selected),
 			collectGPU: true,
 			gpuIndices: selected,
 		},
 		satJob{name: "04-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	), logFunc)
 }
 func (s *System) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-targeted-power", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{
 			name:       "02-dcgmi-targeted-power.log",
 			cmd:        nvidiaDCGMNamedDiagCommand("targeted_power", normalizeNvidiaBurnDuration(durationSec), selected),
 			collectGPU: true,
 			gpuIndices: selected,
 		},
 		satJob{name: "03-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	), logFunc)
 }
 func (s *System) RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-pulse", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{
 			name:       "02-dcgmi-pulse-test.log",
 			cmd:        nvidiaDCGMNamedDiagCommand("pulse_test", normalizeNvidiaBurnDuration(durationSec), selected),
 			collectGPU: true,
 			gpuIndices: selected,
 		},
 		satJob{name: "03-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	), logFunc)
 }
 func (s *System) RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-bandwidth", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{
 			name:       "02-dcgmi-nvbandwidth.log",
 			cmd:        nvidiaDCGMNamedDiagCommand("nvbandwidth", 0, selected),
 			collectGPU: true,
 			gpuIndices: selected,
 		},
 		satJob{name: "03-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	), logFunc)
 }
 func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
@@ -229,7 +370,68 @@ func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (
 // gpuIndices: specific GPU indices to test (empty = all GPUs).
 // ctx cancellation kills the running job.
 func (s *System) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error) {
-	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, gpuIndices), logFunc)
+	resolvedGPUIndices, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, resolvedGPUIndices), logFunc)
 }
 func (s *System) RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
 	// Kill any lingering nvvs/dcgmi processes from a previous interrupted run
 	// before starting — otherwise dcgmi diag fails with DCGM_ST_IN_USE (-34).
 	if killed := KillTestWorkers(); len(killed) > 0 && logFunc != nil {
 		for _, p := range killed {
 			logFunc(fmt.Sprintf("pre-flight: killed stale worker pid=%d name=%s", p.PID, p.Name))
 		}
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-targeted-stress", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{
 			name:       "02-dcgmi-targeted-stress.log",
 			cmd:        nvidiaDCGMNamedDiagCommand("targeted_stress", normalizeNvidiaBurnDuration(durationSec), selected),
 			collectGPU: true,
 			gpuIndices: selected,
 		},
 		satJob{name: "03-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	), logFunc)
 }
 func resolveDCGMGPUIndices(gpuIndices []int) ([]int, error) {
 	if len(gpuIndices) > 0 {
 		return dedupeSortedIndices(gpuIndices), nil
 	}
 	all, err := listNvidiaGPUIndices()
 	if err != nil {
 		return nil, err
 	}
 	if len(all) == 0 {
 		return nil, fmt.Errorf("nvidia-smi found no NVIDIA GPUs")
 	}
 	return all, nil
 }
 func memoryStressSizeArg() string {
 	if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 {
 		return fmt.Sprintf("%dM", mb)
 	}
 	availBytes := satFreeMemBytes()
 	if availBytes <= 0 {
 		return "80%"
 	}
 	availMB := availBytes / (1024 * 1024)
 	targetMB := (availMB * 2) / 3
 	if targetMB >= 256 {
 		targetMB = (targetMB / 256) * 256
 	}
 	if targetMB <= 0 {
 		return "80%"
 	}
 	return fmt.Sprintf("%dM", targetMB)
 }
 func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
@@ -247,11 +449,9 @@ func (s *System) RunMemoryStressPack(ctx context.Context, baseDir string, durati
 	if seconds <= 0 {
 		seconds = envInt("BEE_VM_STRESS_SECONDS", 300)
 	}
-	// Use 80% of RAM by default; override with BEE_VM_STRESS_SIZE_MB.
+	// Base the default on current MemAvailable and keep headroom for the OS and
-	sizeArg := "80%"
+	// concurrent stressors so mixed burn runs do not trip the OOM killer.
-	if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 {
+	sizeArg := memoryStressSizeArg()
 		sizeArg = fmt.Sprintf("%dM", mb)
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "memory-stress", []satJob{
 		{name: "01-free-before.log", cmd: []string{"free", "-h"}},
 		{name: "02-stress-ng-vm.log", cmd: []string{
@@ -368,16 +568,24 @@ type satStats struct {
 	Unsupported int
 }
 func withNvidiaPersistenceMode(jobs ...satJob) []satJob {
 	out := make([]satJob, 0, len(jobs)+1)
 	out = append(out, satJob{
 		name: "00-nvidia-smi-persistence-mode.log",
 		cmd:  []string{"nvidia-smi", "-pm", "1"},
 	})
 	out = append(out, jobs...)
 	return out
 }
 func nvidiaSATJobs() []satJob {
-	seconds := envInt("BEE_GPU_STRESS_SECONDS", 5)
+	return withNvidiaPersistenceMode(
-	sizeMB := envInt("BEE_GPU_STRESS_SIZE_MB", 64)
+		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
-	return []satJob{
+		satJob{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
-		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
+		satJob{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
-		{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
+		satJob{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output-file", "{{run_dir}}/nvidia-bug-report.log"}},
-		{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
+		satJob{name: "05-bee-gpu-burn.log", cmd: []string{"bee-gpu-burn", "--seconds", "5", "--size-mb", "64"}},
-		{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output-file", "{{run_dir}}/nvidia-bug-report.log"}},
+	)
 		{name: "05-bee-gpu-stress.log", cmd: []string{"bee-gpu-stress", "--seconds", fmt.Sprintf("%d", seconds), "--size-mb", fmt.Sprintf("%d", sizeMB)}},
 	}
 }
 func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
@@ -392,11 +600,39 @@ func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
 		}
 		diagArgs = append(diagArgs, "-i", strings.Join(ids, ","))
 	}
-	return []satJob{
+	return withNvidiaPersistenceMode(
-		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
+		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
-		{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
+		satJob{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
-		{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
+		satJob{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
-		{name: "04-dcgmi-diag.log", cmd: diagArgs},
+		satJob{name: "04-dcgmi-diag.log", cmd: diagArgs},
 	)
 }
 func nvidiaDCGMNamedDiagCommand(name string, durationSec int, gpuIndices []int) []string {
 	args := []string{"dcgmi", "diag", "-r", name}
 	if durationSec > 0 {
 		args = append(args, "-p", fmt.Sprintf("%s.test_duration=%d", name, durationSec))
 	}
 	if len(gpuIndices) > 0 {
 		args = append(args, "-i", joinIndexList(gpuIndices))
 	}
 	return args
 }
 func normalizeNvidiaBurnDuration(durationSec int) int {
 	if durationSec <= 0 {
 		return 300
 	}
 	return durationSec
 }
 func nvidiaVisibleDevicesEnv(gpuIndices []int) []string {
 	if len(gpuIndices) == 0 {
 		return nil
 	}
 	return []string{
 		"CUDA_DEVICE_ORDER=PCI_BUS_ID",
 		"CUDA_VISIBLE_DEVICES=" + joinIndexList(gpuIndices),
 	}
 }
@@ -438,6 +674,9 @@ func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []sa
 		if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
 			return "", writeErr
 		}
 		if ctx.Err() != nil {
 			return "", ctx.Err()
 		}
 		status, rc := classifySATResult(job.name, out, err)
 		stats.Add(status)
 		key := strings.TrimSuffix(strings.TrimPrefix(job.name, "0"), ".log")
@@ -477,6 +716,13 @@ func runSATCommandCtx(ctx context.Context, verboseLog, name string, cmd []string
 	}
 	c := exec.CommandContext(ctx, resolvedCmd[0], resolvedCmd[1:]...)
 	c.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
 	c.Cancel = func() error {
 		if c.Process != nil {
 			_ = syscall.Kill(-c.Process.Pid, syscall.SIGKILL)
 		}
 		return nil
 	}
 	if len(env) > 0 {
 		c.Env = append(os.Environ(), env...)
 	}
@@ -562,6 +808,7 @@ func classifySATResult(name string, out []byte, err error) (string, int) {
 	}
 	if strings.Contains(text, "unsupported") ||
 		strings.Contains(text, "not supported") ||
 		strings.Contains(text, "not found in path") ||
 		strings.Contains(text, "invalid opcode") ||
 		strings.Contains(text, "unknown command") ||
 		strings.Contains(text, "not implemented") ||
@@ -630,7 +877,11 @@ func resolveSATCommand(cmd []string) ([]string, error) {
 	case "rvs":
 		return resolveRVSCommand(cmd[1:]...)
 	}
-	return cmd, nil
+	path, err := satLookPath(cmd[0])
 	if err != nil {
 		return nil, fmt.Errorf("%s not found in PATH: %w", cmd[0], err)
 	}
 	return append([]string{path}, cmd[1:]...), nil
 }
 func resolveRVSCommand(args ...string) ([]string, error) {
@@ -664,6 +915,15 @@ func resolveROCmSMICommand(args ...string) ([]string, error) {
 	return nil, errors.New("rocm-smi not found in PATH or under /opt/rocm")
 }
 func resolveDCGMProfTesterCommand(args ...string) ([]string, error) {
 	for _, candidate := range dcgmProfTesterCandidates {
 		if path, err := satLookPath(candidate); err == nil {
 			return append([]string{path}, args...), nil
 		}
 	}
 	return nil, errors.New("dcgmproftester not found in PATH")
 }
 func ensureAMDRuntimeReady() error {
 	if _, err := os.Stat("/dev/kfd"); err == nil {
 		return nil
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -51,6 +51,18 @@ type FanStressRow struct {
 	SysPowerW    float64 // DCMI system power reading
 }
 type cachedPowerReading struct {
 	Value     float64
 	UpdatedAt time.Time
 }
 var (
 	systemPowerCacheMu sync.Mutex
 	systemPowerCache   cachedPowerReading
 )
 const systemPowerHoldTTL = 15 * time.Second
 // RunFanStressTest runs a two-phase GPU stress test while monitoring fan speeds,
 // temperatures, and power draw every second. Exports metrics.csv and fan-sensors.csv.
 // Designed to reproduce case-04 fan-speed lag and detect GPU thermal throttling.
@@ -130,26 +142,21 @@ func (s *System) RunFanStressTest(ctx context.Context, baseDir string, opts FanS
 		stats.OK++
 	}
-	// loadPhase runs bee-gpu-stress for durSec; sampler stamps phaseName on each row.
+	// loadPhase runs bee-gpu-burn for durSec; sampler stamps phaseName on each row.
 	loadPhase := func(phaseName, stepName string, durSec int) {
 		if ctx.Err() != nil {
 			return
 		}
 		setPhase(phaseName)
 		var env []string
 		if len(opts.GPUIndices) > 0 {
 			ids := make([]string, len(opts.GPUIndices))
 			for i, idx := range opts.GPUIndices {
 				ids[i] = strconv.Itoa(idx)
 			}
 			env = []string{"CUDA_VISIBLE_DEVICES=" + strings.Join(ids, ",")}
 		}
 		cmd := []string{
-			"bee-gpu-stress",
+			"bee-gpu-burn",
 			"--seconds", strconv.Itoa(durSec),
 			"--size-mb", strconv.Itoa(opts.SizeMB),
 		}
-		out, err := runSATCommandCtx(ctx, verboseLog, stepName, cmd, env, nil)
+		if len(opts.GPUIndices) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(dedupeSortedIndices(opts.GPUIndices)))
 		}
 		out, err := runSATCommandCtx(ctx, verboseLog, stepName, cmd, nil, nil)
 		_ = os.WriteFile(filepath.Join(runDir, stepName+".log"), out, 0644)
 		if err != nil && err != context.Canceled && err.Error() != "signal: killed" {
 			fmt.Fprintf(&summary, "%s_status=FAILED\n", stepName)
@@ -323,8 +330,9 @@ func sampleFanSpeeds() ([]FanReading, error) {
 // parseFanSpeeds parses "ipmitool sdr type Fan" output.
 // Handles two formats:
-//   Old: "FAN1 | 2400.000 | RPM | ok"           (value in col[1], unit in col[2])
+//
-//   New: "FAN1 | 41h | ok | 29.1 | 4340 RPM"   (value+unit combined in last col)
+//	Old: "FAN1 | 2400.000 | RPM | ok"           (value in col[1], unit in col[2])
 //	New: "FAN1 | 41h | ok | 29.1 | 4340 RPM"   (value+unit combined in last col)
 func parseFanSpeeds(raw string) []FanReading {
 	var fans []FanReading
 	for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
@@ -512,11 +520,17 @@ func sampleCPUTempViaSensors() float64 {
 // sampleSystemPower reads system power draw via DCMI.
 func sampleSystemPower() float64 {
 	now := time.Now()
 	current := 0.0
 	out, err := exec.Command("ipmitool", "dcmi", "power", "reading").Output()
-	if err != nil {
+	if err == nil {
-		return 0
+		current = parseDCMIPowerReading(string(out))
 	}
-	return parseDCMIPowerReading(string(out))
+	systemPowerCacheMu.Lock()
 	defer systemPowerCacheMu.Unlock()
 	value, updated := effectiveSystemPowerReading(systemPowerCache, current, now)
 	systemPowerCache = updated
 	return value
 }
 // parseDCMIPowerReading extracts the instantaneous power reading from ipmitool dcmi output.
@@ -539,6 +553,17 @@ func parseDCMIPowerReading(raw string) float64 {
 	return 0
 }
 func effectiveSystemPowerReading(cache cachedPowerReading, current float64, now time.Time) (float64, cachedPowerReading) {
 	if current > 0 {
 		cache = cachedPowerReading{Value: current, UpdatedAt: now}
 		return current, cache
 	}
 	if cache.Value > 0 && !cache.UpdatedAt.IsZero() && now.Sub(cache.UpdatedAt) <= systemPowerHoldTTL {
 		return cache.Value, cache
 	}
 	return 0, cache
 }
 // analyzeThrottling returns true if any GPU reported an active throttle reason
 // during either load phase.
 func analyzeThrottling(rows []FanStressRow) bool {
--- a/audit/internal/platform/sat_fan_stress_test.go
+++ b/audit/internal/platform/sat_fan_stress_test.go
@@ -1,6 +1,9 @@
 package platform
-import "testing"
+import (
 	"testing"
 	"time"
 )
 func TestParseFanSpeeds(t *testing.T) {
 	raw := "FAN1 | 2400.000 | RPM | ok\nFAN2 | 1800 RPM | ok | ok\nFAN3 | na | RPM | ns\n"
@@ -25,3 +28,40 @@ func TestFirstFanInputValue(t *testing.T) {
 		t.Fatalf("got=%v ok=%v", got, ok)
 	}
 }
 func TestParseDCMIPowerReading(t *testing.T) {
 	raw := `
 Instantaneous power reading:                   512 Watts
 Minimum during sampling period:               498 Watts
 `
 	if got := parseDCMIPowerReading(raw); got != 512 {
 		t.Fatalf("parseDCMIPowerReading()=%v want 512", got)
 	}
 }
 func TestEffectiveSystemPowerReading(t *testing.T) {
 	now := time.Now()
 	cache := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-5 * time.Second)}
 	got, updated := effectiveSystemPowerReading(cache, 0, now)
 	if got != 480 {
 		t.Fatalf("got=%v want cached 480", got)
 	}
 	if updated.Value != 480 {
 		t.Fatalf("updated=%+v", updated)
 	}
 	got, updated = effectiveSystemPowerReading(cache, 530, now)
 	if got != 530 {
 		t.Fatalf("got=%v want 530", got)
 	}
 	if updated.Value != 530 {
 		t.Fatalf("updated=%+v", updated)
 	}
 	expired := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-systemPowerHoldTTL - time.Second)}
 	got, _ = effectiveSystemPowerReading(expired, 0, now)
 	if got != 0 {
 		t.Fatalf("expired cache returned %v want 0", got)
 	}
 }
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -1,11 +1,14 @@
 package platform
 import (
 	"context"
 	"errors"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"testing"
 	"time"
 )
 func TestStorageSATCommands(t *testing.T) {
@@ -27,24 +30,68 @@ func TestRunNvidiaAcceptancePackIncludesGPUStress(t *testing.T) {
 	jobs := nvidiaSATJobs()
-	if len(jobs) != 5 {
+	if len(jobs) != 6 {
-		t.Fatalf("jobs=%d want 5", len(jobs))
+		t.Fatalf("jobs=%d want 6", len(jobs))
 	}
-	if got := jobs[4].cmd[0]; got != "bee-gpu-stress" {
+	if got := jobs[0].cmd[0]; got != "nvidia-smi" {
-		t.Fatalf("gpu stress command=%q want bee-gpu-stress", got)
+		t.Fatalf("preflight command=%q want nvidia-smi", got)
 	}
-	if got := jobs[3].cmd[1]; got != "--output-file" {
+	if got := strings.Join(jobs[0].cmd, " "); got != "nvidia-smi -pm 1" {
 		t.Fatalf("preflight=%q want %q", got, "nvidia-smi -pm 1")
 	}
 	if got := jobs[5].cmd[0]; got != "bee-gpu-burn" {
 		t.Fatalf("gpu stress command=%q want bee-gpu-burn", got)
 	}
 	if got := jobs[4].cmd[1]; got != "--output-file" {
 		t.Fatalf("bug report flag=%q want --output-file", got)
 	}
 }
-func TestNvidiaSATJobsUseEnvOverrides(t *testing.T) {
+func TestAMDStressConfigUsesSingleGSTAction(t *testing.T) {
-	t.Setenv("BEE_GPU_STRESS_SECONDS", "9")
+	t.Parallel()
 	t.Setenv("BEE_GPU_STRESS_SIZE_MB", "96")
 	cfg := amdStressRVSConfig(123)
 	if !strings.Contains(cfg, "module: gst") {
 		t.Fatalf("config missing gst module:\n%s", cfg)
 	}
 	if strings.Contains(cfg, "module: mem") {
 		t.Fatalf("config should not include mem module:\n%s", cfg)
 	}
 	if !strings.Contains(cfg, "copy_matrix: false") {
 		t.Fatalf("config should use copy_matrix=false:\n%s", cfg)
 	}
 	if strings.Count(cfg, "duration: 123000") != 1 {
 		t.Fatalf("config should apply duration once:\n%s", cfg)
 	}
 	for _, field := range []string{"matrix_size_a: 8640", "matrix_size_b: 8640", "matrix_size_c: 8640"} {
 		if !strings.Contains(cfg, field) {
 			t.Fatalf("config missing %s:\n%s", field, cfg)
 		}
 	}
 }
 func TestAMDStressJobsIncludeBandwidthAndGST(t *testing.T) {
 	t.Parallel()
 	jobs := amdStressJobs(300, "/tmp/test-amd-gst.conf")
 	if len(jobs) != 4 {
 		t.Fatalf("jobs=%d want 4", len(jobs))
 	}
 	if got := jobs[1].cmd[0]; got != "rocm-bandwidth-test" {
 		t.Fatalf("jobs[1]=%q want rocm-bandwidth-test", got)
 	}
 	if got := jobs[2].cmd[0]; got != "rvs" {
 		t.Fatalf("jobs[2]=%q want rvs", got)
 	}
 	if got := jobs[2].cmd[2]; got != "/tmp/test-amd-gst.conf" {
 		t.Fatalf("jobs[2] cfg=%q want /tmp/test-amd-gst.conf", got)
 	}
 }
 func TestNvidiaSATJobsUseBuiltinBurnDefaults(t *testing.T) {
 	jobs := nvidiaSATJobs()
-	got := jobs[4].cmd
+	got := jobs[5].cmd
-	want := []string{"bee-gpu-stress", "--seconds", "9", "--size-mb", "96"}
+	want := []string{"bee-gpu-burn", "--seconds", "5", "--size-mb", "64"}
 	if len(got) != len(want) {
 		t.Fatalf("cmd len=%d want %d", len(got), len(want))
 	}
@@ -55,6 +102,189 @@ func TestNvidiaSATJobsUseEnvOverrides(t *testing.T) {
 	}
 }
 func TestNvidiaDCGMJobsEnablePersistenceModeBeforeDiag(t *testing.T) {
 	jobs := nvidiaDCGMJobs(3, []int{2, 0})
 	if len(jobs) != 5 {
 		t.Fatalf("jobs=%d want 5", len(jobs))
 	}
 	if got := strings.Join(jobs[0].cmd, " "); got != "nvidia-smi -pm 1" {
 		t.Fatalf("preflight=%q want %q", got, "nvidia-smi -pm 1")
 	}
 	if got := strings.Join(jobs[4].cmd, " "); got != "dcgmi diag -r 3 -i 2,0" {
 		t.Fatalf("diag=%q want %q", got, "dcgmi diag -r 3 -i 2,0")
 	}
 }
 func TestBuildNvidiaStressJobUsesSelectedLoaderAndDevices(t *testing.T) {
 	t.Parallel()
 	oldExecCommand := satExecCommand
 	satExecCommand = func(name string, args ...string) *exec.Cmd {
 		if name == "nvidia-smi" {
 			return exec.Command("sh", "-c", "printf '0\n1\n2\n'")
 		}
 		return exec.Command(name, args...)
 	}
 	t.Cleanup(func() { satExecCommand = oldExecCommand })
 	job, err := buildNvidiaStressJob(NvidiaStressOptions{
 		DurationSec:       600,
 		Loader:            NvidiaStressLoaderJohn,
 		ExcludeGPUIndices: []int{1},
 	})
 	if err != nil {
 		t.Fatalf("buildNvidiaStressJob error: %v", err)
 	}
 	wantCmd := []string{"bee-john-gpu-stress", "--seconds", "600", "--devices", "0,2"}
 	if len(job.cmd) != len(wantCmd) {
 		t.Fatalf("cmd len=%d want %d (%v)", len(job.cmd), len(wantCmd), job.cmd)
 	}
 	for i := range wantCmd {
 		if job.cmd[i] != wantCmd[i] {
 			t.Fatalf("cmd[%d]=%q want %q", i, job.cmd[i], wantCmd[i])
 		}
 	}
 	if got := joinIndexList(job.gpuIndices); got != "0,2" {
 		t.Fatalf("gpuIndices=%q want 0,2", got)
 	}
 }
 func TestBuildNvidiaStressJobUsesNCCLLoader(t *testing.T) {
 	t.Parallel()
 	oldExecCommand := satExecCommand
 	satExecCommand = func(name string, args ...string) *exec.Cmd {
 		if name == "nvidia-smi" {
 			return exec.Command("sh", "-c", "printf '0\n1\n2\n'")
 		}
 		return exec.Command(name, args...)
 	}
 	t.Cleanup(func() { satExecCommand = oldExecCommand })
 	job, err := buildNvidiaStressJob(NvidiaStressOptions{
 		DurationSec: 120,
 		Loader:      NvidiaStressLoaderNCCL,
 		GPUIndices:  []int{2, 0},
 	})
 	if err != nil {
 		t.Fatalf("buildNvidiaStressJob error: %v", err)
 	}
 	wantCmd := []string{"bee-nccl-gpu-stress", "--seconds", "120", "--devices", "0,2"}
 	if len(job.cmd) != len(wantCmd) {
 		t.Fatalf("cmd len=%d want %d (%v)", len(job.cmd), len(wantCmd), job.cmd)
 	}
 	for i := range wantCmd {
 		if job.cmd[i] != wantCmd[i] {
 			t.Fatalf("cmd[%d]=%q want %q", i, job.cmd[i], wantCmd[i])
 		}
 	}
 	if got := joinIndexList(job.gpuIndices); got != "0,2" {
 		t.Fatalf("gpuIndices=%q want 0,2", got)
 	}
 }
 func TestResolveDCGMGPUIndicesUsesDetectedGPUsWhenUnset(t *testing.T) {
 	t.Parallel()
 	oldExecCommand := satExecCommand
 	satExecCommand = func(name string, args ...string) *exec.Cmd {
 		if name == "nvidia-smi" {
 			return exec.Command("sh", "-c", "printf '2\n0\n1\n'")
 		}
 		return exec.Command(name, args...)
 	}
 	t.Cleanup(func() { satExecCommand = oldExecCommand })
 	got, err := resolveDCGMGPUIndices(nil)
 	if err != nil {
 		t.Fatalf("resolveDCGMGPUIndices error: %v", err)
 	}
 	if want := "0,1,2"; joinIndexList(got) != want {
 		t.Fatalf("gpuIndices=%q want %q", joinIndexList(got), want)
 	}
 }
 func TestResolveDCGMGPUIndicesKeepsExplicitSelection(t *testing.T) {
 	t.Parallel()
 	got, err := resolveDCGMGPUIndices([]int{3, 1, 3})
 	if err != nil {
 		t.Fatalf("resolveDCGMGPUIndices error: %v", err)
 	}
 	if want := "1,3"; joinIndexList(got) != want {
 		t.Fatalf("gpuIndices=%q want %q", joinIndexList(got), want)
 	}
 }
 func TestResolveDCGMProfTesterCommandUsesVersionedBinary(t *testing.T) {
 	oldLookPath := satLookPath
 	satLookPath = func(file string) (string, error) {
 		switch file {
 		case "dcgmproftester13":
 			return "/usr/bin/dcgmproftester13", nil
 		default:
 			return "", exec.ErrNotFound
 		}
 	}
 	t.Cleanup(func() { satLookPath = oldLookPath })
 	cmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004")
 	if err != nil {
 		t.Fatalf("resolveDCGMProfTesterCommand error: %v", err)
 	}
 	if len(cmd) != 4 {
 		t.Fatalf("cmd len=%d want 4 (%v)", len(cmd), cmd)
 	}
 	if cmd[0] != "/usr/bin/dcgmproftester13" {
 		t.Fatalf("cmd[0]=%q want /usr/bin/dcgmproftester13", cmd[0])
 	}
 }
 func TestNvidiaDCGMNamedDiagCommandUsesDurationAndSelection(t *testing.T) {
 	cmd := nvidiaDCGMNamedDiagCommand("targeted_power", 900, []int{3, 1})
 	want := []string{"dcgmi", "diag", "-r", "targeted_power", "-p", "targeted_power.test_duration=900", "-i", "3,1"}
 	if len(cmd) != len(want) {
 		t.Fatalf("cmd len=%d want %d (%v)", len(cmd), len(want), cmd)
 	}
 	for i := range want {
 		if cmd[i] != want[i] {
 			t.Fatalf("cmd[%d]=%q want %q", i, cmd[i], want[i])
 		}
 	}
 }
 func TestNvidiaVisibleDevicesEnvUsesSelectedGPUs(t *testing.T) {
 	env := nvidiaVisibleDevicesEnv([]int{0, 2, 4})
 	if len(env) != 2 {
 		t.Fatalf("env len=%d want 2 (%v)", len(env), env)
 	}
 	if env[0] != "CUDA_DEVICE_ORDER=PCI_BUS_ID" {
 		t.Fatalf("env[0]=%q want CUDA_DEVICE_ORDER=PCI_BUS_ID", env[0])
 	}
 	if env[1] != "CUDA_VISIBLE_DEVICES=0,2,4" {
 		t.Fatalf("env[1]=%q want CUDA_VISIBLE_DEVICES=0,2,4", env[1])
 	}
 }
 func TestNvidiaStressArchivePrefixByLoader(t *testing.T) {
 	t.Parallel()
 	tests := []struct {
 		loader string
 		want   string
 	}{
 		{loader: NvidiaStressLoaderBuiltin, want: "gpu-nvidia-burn"},
 		{loader: NvidiaStressLoaderJohn, want: "gpu-nvidia-john"},
 		{loader: NvidiaStressLoaderNCCL, want: "gpu-nvidia-nccl"},
 		{loader: "", want: "gpu-nvidia-burn"},
 	}
 	for _, tt := range tests {
 		if got := nvidiaStressArchivePrefix(tt.loader); got != tt.want {
 			t.Fatalf("loader=%q prefix=%q want %q", tt.loader, got, tt.want)
 		}
 	}
 }
 func TestEnvIntFallback(t *testing.T) {
 	os.Unsetenv("BEE_MEMTESTER_SIZE_MB")
 	if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
@@ -70,6 +300,37 @@ func TestEnvIntFallback(t *testing.T) {
 	}
 }
 func TestMemoryStressSizeArgUsesAvailableMemory(t *testing.T) {
 	oldFreeMemBytes := satFreeMemBytes
 	satFreeMemBytes = func() int64 { return 96 * 1024 * 1024 * 1024 }
 	t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
 	if got := memoryStressSizeArg(); got != "65536M" {
 		t.Fatalf("sizeArg=%q want 65536M", got)
 	}
 }
 func TestMemoryStressSizeArgRespectsOverride(t *testing.T) {
 	oldFreeMemBytes := satFreeMemBytes
 	satFreeMemBytes = func() int64 { return 96 * 1024 * 1024 * 1024 }
 	t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
 	t.Setenv("BEE_VM_STRESS_SIZE_MB", "4096")
 	if got := memoryStressSizeArg(); got != "4096M" {
 		t.Fatalf("sizeArg=%q want 4096M", got)
 	}
 }
 func TestMemoryStressSizeArgFallsBackWhenFreeMemoryUnknown(t *testing.T) {
 	oldFreeMemBytes := satFreeMemBytes
 	satFreeMemBytes = func() int64 { return 0 }
 	t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
 	if got := memoryStressSizeArg(); got != "80%" {
 		t.Fatalf("sizeArg=%q want 80%%", got)
 	}
 }
 func TestClassifySATResult(t *testing.T) {
 	tests := []struct {
 		name   string
@@ -80,8 +341,8 @@ func TestClassifySATResult(t *testing.T) {
 	}{
 		{name: "ok", job: "memtester", out: "done", err: nil, status: "OK"},
 		{name: "unsupported", job: "smartctl-self-test-short", out: "Self-test not supported", err: errors.New("rc 1"), status: "UNSUPPORTED"},
-		{name: "failed", job: "bee-gpu-stress", out: "cuda error", err: errors.New("rc 1"), status: "FAILED"},
+		{name: "failed", job: "bee-gpu-burn", out: "cuda error", err: errors.New("rc 1"), status: "FAILED"},
-		{name: "cuda not ready", job: "bee-gpu-stress", out: "cuInit failed: CUDA_ERROR_SYSTEM_NOT_READY", err: errors.New("rc 1"), status: "UNSUPPORTED"},
+		{name: "cuda not ready", job: "bee-gpu-burn", out: "cuInit failed: CUDA_ERROR_SYSTEM_NOT_READY", err: errors.New("rc 1"), status: "UNSUPPORTED"},
 	}
 	for _, tt := range tests {
@@ -94,6 +355,38 @@ func TestClassifySATResult(t *testing.T) {
 	}
 }
 func TestRunAcceptancePackCtxReturnsContextErrorWithoutArchive(t *testing.T) {
 	dir := t.TempDir()
 	ctx, cancel := context.WithCancel(context.Background())
 	t.Cleanup(cancel)
 	done := make(chan struct{})
 	go func() {
 		time.Sleep(100 * time.Millisecond)
 		cancel()
 		close(done)
 	}()
 	archive, err := runAcceptancePackCtx(ctx, dir, "cancelled-pack", []satJob{
 		{name: "01-sleep.log", cmd: []string{"sh", "-c", "sleep 5"}},
 	}, nil)
 	<-done
 	if !errors.Is(err, context.Canceled) {
 		t.Fatalf("err=%v want context.Canceled", err)
 	}
 	if archive != "" {
 		t.Fatalf("archive=%q want empty", archive)
 	}
 	matches, globErr := filepath.Glob(filepath.Join(dir, "cancelled-pack-*.tar.gz"))
 	if globErr != nil {
 		t.Fatalf("Glob error: %v", globErr)
 	}
 	if len(matches) != 0 {
 		t.Fatalf("archives=%v want none", matches)
 	}
 }
 func TestParseStorageDevicesSkipsUSBDisks(t *testing.T) {
 	t.Parallel()
@@ -130,6 +423,44 @@ func TestResolveROCmSMICommandFromPATH(t *testing.T) {
 	}
 }
 func TestResolveSATCommandUsesLookPathForGenericTools(t *testing.T) {
 	oldLookPath := satLookPath
 	satLookPath = func(file string) (string, error) {
 		if file == "stress-ng" {
 			return "/usr/bin/stress-ng", nil
 		}
 		return "", exec.ErrNotFound
 	}
 	t.Cleanup(func() { satLookPath = oldLookPath })
 	cmd, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
 	if err != nil {
 		t.Fatalf("resolveSATCommand error: %v", err)
 	}
 	if len(cmd) != 3 {
 		t.Fatalf("cmd len=%d want 3 (%v)", len(cmd), cmd)
 	}
 	if cmd[0] != "/usr/bin/stress-ng" {
 		t.Fatalf("cmd[0]=%q want /usr/bin/stress-ng", cmd[0])
 	}
 }
 func TestResolveSATCommandFailsForMissingGenericTool(t *testing.T) {
 	oldLookPath := satLookPath
 	satLookPath = func(file string) (string, error) {
 		return "", exec.ErrNotFound
 	}
 	t.Cleanup(func() { satLookPath = oldLookPath })
 	_, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
 	if err == nil {
 		t.Fatal("expected error")
 	}
 	if !strings.Contains(err.Error(), "stress-ng not found in PATH") {
 		t.Fatalf("error=%q", err)
 	}
 }
 func TestResolveROCmSMICommandFallsBackToROCmTree(t *testing.T) {
 	tmp := t.TempDir()
 	execPath := filepath.Join(tmp, "opt", "rocm", "bin", "rocm-smi")
--- a/audit/internal/platform/services.go
+++ b/audit/internal/platform/services.go
@@ -10,17 +10,30 @@ import (
 func (s *System) ListBeeServices() ([]string, error) {
 	seen := map[string]bool{}
 	var out []string
-	for _, pattern := range []string{"/etc/systemd/system/bee-*.service", "/lib/systemd/system/bee-*.service"} {
+	for _, pattern := range []string{
 		"/etc/systemd/system/bee-*.service",
 		"/lib/systemd/system/bee-*.service",
 		"/etc/systemd/system/bee-*.timer",
 		"/lib/systemd/system/bee-*.timer",
 	} {
 		matches, err := filepath.Glob(pattern)
 		if err != nil {
 			return nil, err
 		}
 		for _, match := range matches {
-			name := strings.TrimSuffix(filepath.Base(match), ".service")
+			base := filepath.Base(match)
 			name := base
 			if strings.HasSuffix(base, ".service") {
 				name = strings.TrimSuffix(base, ".service")
 			}
 			// Skip template units (e.g. bee-journal-mirror@) — they have no instances to query.
 			if strings.HasSuffix(name, "@") {
 				continue
 			}
 			// bee-selfheal is timer-managed; showing the oneshot service as inactive is misleading.
 			if name == "bee-selfheal" && strings.HasSuffix(base, ".service") {
 				continue
 			}
 			if !seen[name] {
 				seen[name] = true
 				out = append(out, name)
@@ -48,7 +61,9 @@ func (s *System) ServiceState(name string) string {
 }
 func (s *System) ServiceDo(name string, action ServiceAction) (string, error) {
-	raw, err := exec.Command("systemctl", string(action), name).CombinedOutput()
+	// bee-web runs as the bee user; sudo is required to control system services.
 	// /etc/sudoers.d/bee grants bee NOPASSWD:ALL.
 	raw, err := exec.Command("sudo", "systemctl", string(action), name).CombinedOutput()
 	return string(raw), err
 }
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -2,6 +2,13 @@ package platform
 type System struct{}
 type LiveBootSource struct {
 	InRAM  bool   `json:"in_ram"`
 	Kind   string `json:"kind"`
 	Source string `json:"source,omitempty"`
 	Device string `json:"device,omitempty"`
 }
 type InterfaceInfo struct {
 	Name  string
 	State string
@@ -37,12 +44,12 @@ type StaticIPv4Config struct {
 }
 type RemovableTarget struct {
-	Device     string
+	Device     string `json:"device"`
-	FSType     string
+	FSType     string `json:"fs_type"`
-	Size       string
+	Size       string `json:"size"`
-	Label      string
+	Label      string `json:"label"`
-	Model      string
+	Model      string `json:"model"`
-	Mountpoint string
+	Mountpoint string `json:"mountpoint"`
 }
 type ToolStatus struct {
@@ -51,6 +58,20 @@ type ToolStatus struct {
 	OK   bool
 }
 const (
 	NvidiaStressLoaderBuiltin = "builtin"
 	NvidiaStressLoaderJohn    = "john"
 	NvidiaStressLoaderNCCL    = "nccl"
 )
 type NvidiaStressOptions struct {
 	DurationSec       int
 	SizeMB            int
 	Loader            string
 	GPUIndices        []int
 	ExcludeGPUIndices []int
 }
 func New() *System {
 	return &System{}
 }
--- a/audit/internal/platform/types_test.go
+++ b/audit/internal/platform/types_test.go
@@ -0,0 +1,31 @@
 package platform
 import (
 	"encoding/json"
 	"strings"
 	"testing"
 )
 func TestRemovableTargetJSONUsesFrontendFieldNames(t *testing.T) {
 	t.Parallel()
 	data, err := json.Marshal(RemovableTarget{
 		Device: "/dev/sdb1",
 		FSType: "exfat",
 		Size:   "1.8T",
 		Label:  "USB",
 		Model:  "Flash",
 	})
 	if err != nil {
 		t.Fatalf("marshal: %v", err)
 	}
 	raw := string(data)
 	for _, key := range []string{`"device"`, `"fs_type"`, `"size"`, `"label"`, `"model"`} {
 		if !strings.Contains(raw, key) {
 			t.Fatalf("json missing key %s: %s", key, raw)
 		}
 	}
 	if strings.Contains(raw, `"Device"`) || strings.Contains(raw, `"FSType"`) {
 		t.Fatalf("json still contains Go field names: %s", raw)
 	}
 }
--- a/audit/internal/schema/hardware.go
+++ b/audit/internal/schema/hardware.go
@@ -20,6 +20,7 @@ type RuntimeHealth struct {
 	ExportDir     string                 `json:"export_dir,omitempty"`
 	DriverReady   bool                   `json:"driver_ready,omitempty"`
 	CUDAReady     bool                   `json:"cuda_ready,omitempty"`
 	NvidiaGSPMode string                 `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
 	NetworkStatus string                 `json:"network_status,omitempty"`
 	Issues        []RuntimeIssue         `json:"issues,omitempty"`
 	Tools         []RuntimeToolStatus    `json:"tools,omitempty"`
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -2,27 +2,258 @@ package webui
 import (
 	"bufio"
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"io"
 	"net/http"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"regexp"
 	"sort"
 	"strings"
 	"sync/atomic"
 	"syscall"
 	"time"
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 )
 var ansiEscapeRE = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]|\x1b[()][A-Z0-9]|\x1b[DABC]`)
 var apiListNvidiaGPUs = func(a *app.App) ([]platform.NvidiaGPU, error) {
 	if a == nil {
 		return nil, fmt.Errorf("app not configured")
 	}
 	return a.ListNvidiaGPUs()
 }
 // ── Job ID counter ────────────────────────────────────────────────────────────
 var jobCounter atomic.Uint64
-func newJobID(prefix string) string {
+func newJobID(_ string) string {
-	return fmt.Sprintf("%s-%d", prefix, jobCounter.Add(1))
+	start := int((jobCounter.Add(1) - 1) % 1000)
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	for offset := 0; offset < 1000; offset++ {
 		n := (start + offset) % 1000
 		id := fmt.Sprintf("TASK-%03d", n)
 		if !taskIDInUseLocked(id) {
 			return id
 		}
 	}
 	return fmt.Sprintf("TASK-%03d", start)
 }
 func taskIDInUseLocked(id string) bool {
 	for _, t := range globalQueue.tasks {
 		if t != nil && t.ID == id {
 			return true
 		}
 	}
 	return false
 }
 type taskRunResponse struct {
 	TaskID    string   `json:"task_id,omitempty"`
 	JobID     string   `json:"job_id,omitempty"`
 	TaskIDs   []string `json:"task_ids,omitempty"`
 	JobIDs    []string `json:"job_ids,omitempty"`
 	TaskCount int      `json:"task_count,omitempty"`
 }
 type nvidiaTaskSelection struct {
 	GPUIndices []int
 	Label      string
 }
 func writeTaskRunResponse(w http.ResponseWriter, tasks []*Task) {
 	if len(tasks) == 0 {
 		writeJSON(w, taskRunResponse{})
 		return
 	}
 	ids := make([]string, 0, len(tasks))
 	for _, t := range tasks {
 		if t == nil || strings.TrimSpace(t.ID) == "" {
 			continue
 		}
 		ids = append(ids, t.ID)
 	}
 	resp := taskRunResponse{TaskCount: len(ids)}
 	if len(ids) > 0 {
 		resp.TaskID = ids[0]
 		resp.JobID = ids[0]
 		resp.TaskIDs = ids
 		resp.JobIDs = ids
 	}
 	writeJSON(w, resp)
 }
 func shouldSplitHomogeneousNvidiaTarget(target string) bool {
 	switch strings.TrimSpace(target) {
 	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute",
 		"nvidia-targeted-power", "nvidia-pulse", "nvidia-interconnect",
 		"nvidia-bandwidth", "nvidia-stress":
 		return true
 	default:
 		return false
 	}
 }
 func expandHomogeneousNvidiaSelections(gpus []platform.NvidiaGPU, include, exclude []int) ([]nvidiaTaskSelection, error) {
 	if len(gpus) == 0 {
 		return nil, fmt.Errorf("no NVIDIA GPUs detected")
 	}
 	indexed := make(map[int]platform.NvidiaGPU, len(gpus))
 	allIndices := make([]int, 0, len(gpus))
 	for _, gpu := range gpus {
 		indexed[gpu.Index] = gpu
 		allIndices = append(allIndices, gpu.Index)
 	}
 	sort.Ints(allIndices)
 	selected := allIndices
 	if len(include) > 0 {
 		selected = make([]int, 0, len(include))
 		seen := make(map[int]struct{}, len(include))
 		for _, idx := range include {
 			if _, ok := indexed[idx]; !ok {
 				continue
 			}
 			if _, dup := seen[idx]; dup {
 				continue
 			}
 			seen[idx] = struct{}{}
 			selected = append(selected, idx)
 		}
 		sort.Ints(selected)
 	}
 	if len(exclude) > 0 {
 		skip := make(map[int]struct{}, len(exclude))
 		for _, idx := range exclude {
 			skip[idx] = struct{}{}
 		}
 		filtered := selected[:0]
 		for _, idx := range selected {
 			if _, ok := skip[idx]; ok {
 				continue
 			}
 			filtered = append(filtered, idx)
 		}
 		selected = filtered
 	}
 	if len(selected) == 0 {
 		return nil, fmt.Errorf("no NVIDIA GPUs selected")
 	}
 	modelGroups := make(map[string][]platform.NvidiaGPU)
 	modelOrder := make([]string, 0)
 	for _, idx := range selected {
 		gpu := indexed[idx]
 		model := strings.TrimSpace(gpu.Name)
 		if model == "" {
 			model = fmt.Sprintf("GPU %d", gpu.Index)
 		}
 		if _, ok := modelGroups[model]; !ok {
 			modelOrder = append(modelOrder, model)
 		}
 		modelGroups[model] = append(modelGroups[model], gpu)
 	}
 	sort.Slice(modelOrder, func(i, j int) bool {
 		left := modelGroups[modelOrder[i]]
 		right := modelGroups[modelOrder[j]]
 		if len(left) == 0 || len(right) == 0 {
 			return modelOrder[i] < modelOrder[j]
 		}
 		return left[0].Index < right[0].Index
 	})
 	var groups []nvidiaTaskSelection
 	var singles []nvidiaTaskSelection
 	for _, model := range modelOrder {
 		group := modelGroups[model]
 		sort.Slice(group, func(i, j int) bool { return group[i].Index < group[j].Index })
 		indices := make([]int, 0, len(group))
 		for _, gpu := range group {
 			indices = append(indices, gpu.Index)
 		}
 		if len(indices) >= 2 {
 			groups = append(groups, nvidiaTaskSelection{
 				GPUIndices: indices,
 				Label:      fmt.Sprintf("%s; GPUs %s", model, joinTaskIndices(indices)),
 			})
 			continue
 		}
 		gpu := group[0]
 		singles = append(singles, nvidiaTaskSelection{
 			GPUIndices: []int{gpu.Index},
 			Label:      fmt.Sprintf("GPU %d — %s", gpu.Index, model),
 		})
 	}
 	return append(groups, singles...), nil
 }
 func joinTaskIndices(indices []int) string {
 	parts := make([]string, 0, len(indices))
 	for _, idx := range indices {
 		parts = append(parts, fmt.Sprintf("%d", idx))
 	}
 	return strings.Join(parts, ",")
 }
 func formatSplitTaskName(baseName, selectionLabel string) string {
 	baseName = strings.TrimSpace(baseName)
 	selectionLabel = strings.TrimSpace(selectionLabel)
 	if baseName == "" {
 		return selectionLabel
 	}
 	if selectionLabel == "" {
 		return baseName
 	}
 	return baseName + " (" + selectionLabel + ")"
 }
 func buildNvidiaTaskSet(target string, priority int, createdAt time.Time, params taskParams, baseName string, appRef *app.App, idPrefix string) ([]*Task, error) {
 	if !shouldSplitHomogeneousNvidiaTarget(target) {
 		t := &Task{
 			ID:        newJobID(idPrefix),
 			Name:      baseName,
 			Target:    target,
 			Priority:  priority,
 			Status:    TaskPending,
 			CreatedAt: createdAt,
 			params:    params,
 		}
 		return []*Task{t}, nil
 	}
 	gpus, err := apiListNvidiaGPUs(appRef)
 	if err != nil {
 		return nil, err
 	}
 	selections, err := expandHomogeneousNvidiaSelections(gpus, params.GPUIndices, params.ExcludeGPUIndices)
 	if err != nil {
 		return nil, err
 	}
 	tasks := make([]*Task, 0, len(selections))
 	for _, selection := range selections {
 		taskParamsCopy := params
 		taskParamsCopy.GPUIndices = append([]int(nil), selection.GPUIndices...)
 		taskParamsCopy.ExcludeGPUIndices = nil
 		displayName := formatSplitTaskName(baseName, selection.Label)
 		taskParamsCopy.DisplayName = displayName
 		tasks = append(tasks, &Task{
 			ID:        newJobID(idPrefix),
 			Name:      displayName,
 			Target:    target,
 			Priority:  priority,
 			Status:    TaskPending,
 			CreatedAt: createdAt,
 			params:    taskParamsCopy,
 		})
 	}
 	return tasks, nil
 }
 // ── SSE helpers ───────────────────────────────────────────────────────────────
@@ -58,6 +289,10 @@ func streamJob(w http.ResponseWriter, r *http.Request, j *jobState) {
 	if !sseStart(w) {
 		return
 	}
 	streamSubscribedJob(w, r, j)
 }
 func streamSubscribedJob(w http.ResponseWriter, r *http.Request, j *jobState) {
 	existing, ch := j.subscribe()
 	for _, line := range existing {
 		sseWrite(w, "", line)
@@ -81,31 +316,59 @@ func streamJob(w http.ResponseWriter, r *http.Request, j *jobState) {
 	}
 }
-// runCmdJob runs an exec.Cmd as a background job, streaming stdout+stderr lines.
+// streamCmdJob runs an exec.Cmd and streams stdout+stderr lines into j.
-func runCmdJob(j *jobState, cmd *exec.Cmd) {
+func streamCmdJob(j *jobState, cmd *exec.Cmd) error {
 	pr, pw := io.Pipe()
 	cmd.Stdout = pw
 	cmd.Stderr = pw
 	if err := cmd.Start(); err != nil {
-		j.finish(err.Error())
+		_ = pw.Close()
-		return
+		_ = pr.Close()
 		return err
 	}
 	// Lower the CPU scheduling priority of stress/audit subprocesses to nice+10
 	// so the X server and kernel interrupt handling remain responsive under load
 	// (prevents KVM/IPMI graphical console from freezing during GPU stress tests).
 	if cmd.Process != nil {
 		_ = syscall.Setpriority(syscall.PRIO_PROCESS, cmd.Process.Pid, 10)
 	}
 	scanDone := make(chan error, 1)
 	go func() {
 		defer func() {
 			if rec := recover(); rec != nil {
 				scanDone <- fmt.Errorf("stream scanner panic: %v", rec)
 			}
 		}()
 		scanner := bufio.NewScanner(pr)
 		scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
 		for scanner.Scan() {
-			j.append(scanner.Text())
+			// Split on \r to handle progress-bar style output (e.g. \r overwrites)
 			// and strip ANSI escape codes so logs are readable in the browser.
 			parts := strings.Split(scanner.Text(), "\r")
 			for _, part := range parts {
 				line := ansiEscapeRE.ReplaceAllString(part, "")
 				if line != "" {
 					j.append(line)
 				}
 			}
 		}
 		if err := scanner.Err(); err != nil && !errors.Is(err, io.ErrClosedPipe) {
 			scanDone <- err
 			return
 		}
 		scanDone <- nil
 	}()
 	err := cmd.Wait()
 	_ = pw.Close()
 	scanErr := <-scanDone
 	_ = pr.Close()
 	if err != nil {
-		j.finish(err.Error())
+		return err
 	} else {
 		j.finish("")
 	}
 	return scanErr
 }
 // ── Audit ─────────────────────────────────────────────────────────────────────
@@ -153,42 +416,95 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 		}
 		var body struct {
-			Duration    int    `json:"duration"`
+			Duration           int      `json:"duration"`
-			DiagLevel   int    `json:"diag_level"`
+			DiagLevel          int      `json:"diag_level"`
-			GPUIndices  []int  `json:"gpu_indices"`
+			GPUIndices         []int    `json:"gpu_indices"`
-			Profile     string `json:"profile"`
+			ExcludeGPUIndices  []int    `json:"exclude_gpu_indices"`
-			DisplayName string `json:"display_name"`
+			Loader             string   `json:"loader"`
 			Profile            string   `json:"profile"`
 			DisplayName        string   `json:"display_name"`
 			PlatformComponents []string `json:"platform_components"`
 		}
-		if r.ContentLength > 0 {
+		if r.Body != nil {
-			_ = json.NewDecoder(r.Body).Decode(&body)
+			if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
 				writeError(w, http.StatusBadRequest, "invalid request body")
 				return
 			}
 		}
-		name := taskNames[target]
+		name := taskDisplayName(target, body.Profile, body.Loader)
 		if name == "" {
 			name = target
 		}
 		t := &Task{
 			ID:        newJobID("sat-" + target),
 			Name:      name,
 			Target:    target,
 			Status:    TaskPending,
 			CreatedAt: time.Now(),
 			params: taskParams{
 				Duration:    body.Duration,
 				DiagLevel:   body.DiagLevel,
 				GPUIndices:  body.GPUIndices,
 				BurnProfile: body.Profile,
 				DisplayName: body.DisplayName,
 			},
 		}
 		if strings.TrimSpace(body.DisplayName) != "" {
-			t.Name = body.DisplayName
+			name = body.DisplayName
 		}
-		globalQueue.enqueue(t)
+		params := taskParams{
-		writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
+			Duration:           body.Duration,
 			DiagLevel:          body.DiagLevel,
 			GPUIndices:         body.GPUIndices,
 			ExcludeGPUIndices:  body.ExcludeGPUIndices,
 			Loader:             body.Loader,
 			BurnProfile:        body.Profile,
 			DisplayName:        body.DisplayName,
 			PlatformComponents: body.PlatformComponents,
 		}
 		tasks, err := buildNvidiaTaskSet(target, 0, time.Now(), params, name, h.opts.App, "sat-"+target)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, err.Error())
 			return
 		}
 		for _, t := range tasks {
 			globalQueue.enqueue(t)
 		}
 		writeTaskRunResponse(w, tasks)
 	}
 }
 func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
 	var body struct {
 		Profile           string `json:"profile"`
 		SizeMB            int    `json:"size_mb"`
 		GPUIndices        []int  `json:"gpu_indices"`
 		ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
 		RunNCCL           *bool  `json:"run_nccl"`
 		DisplayName       string `json:"display_name"`
 	}
 	if r.Body != nil {
 		if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
 			writeError(w, http.StatusBadRequest, "invalid request body")
 			return
 		}
 	}
 	runNCCL := true
 	if body.RunNCCL != nil {
 		runNCCL = *body.RunNCCL
 	}
 	name := taskDisplayName("nvidia-benchmark", "", "")
 	if strings.TrimSpace(body.DisplayName) != "" {
 		name = body.DisplayName
 	}
 	tasks, err := buildNvidiaTaskSet("nvidia-benchmark", 15, time.Now(), taskParams{
 		GPUIndices:        body.GPUIndices,
 		ExcludeGPUIndices: body.ExcludeGPUIndices,
 		SizeMB:            body.SizeMB,
 		BenchmarkProfile:  body.Profile,
 		RunNCCL:           runNCCL,
 		DisplayName:       body.DisplayName,
 	}, name, h.opts.App, "benchmark-nvidia")
 	if err != nil {
 		writeError(w, http.StatusBadRequest, err.Error())
 		return
 	}
 	for _, t := range tasks {
 		globalQueue.enqueue(t)
 	}
 	writeTaskRunResponse(w, tasks)
 }
 func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
 	id := r.URL.Query().Get("job_id")
 	if id == "" {
@@ -292,11 +608,13 @@ func (h *handler) handleAPIServicesAction(w http.ResponseWriter, r *http.Request
 		return
 	}
 	result, err := h.opts.App.ServiceActionResult(req.Name, action)
 	status := "ok"
 	if err != nil {
-		writeError(w, http.StatusInternalServerError, err.Error())
+		status = "error"
 		return
 	}
-	writeJSON(w, map[string]string{"status": "ok", "output": result.Body})
+	// Always return 200 with output so the frontend can display the actual
 	// systemctl error message instead of a generic "exit status 1".
 	writeJSON(w, map[string]string{"status": status, "output": result.Body})
 }
 // ── Network ───────────────────────────────────────────────────────────────────
@@ -312,8 +630,10 @@ func (h *handler) handleAPINetworkStatus(w http.ResponseWriter, r *http.Request)
 		return
 	}
 	writeJSON(w, map[string]any{
-		"interfaces":    ifaces,
+		"interfaces":     ifaces,
-		"default_route": h.opts.App.DefaultRoute(),
+		"default_route":  h.opts.App.DefaultRoute(),
 		"pending_change": h.hasPendingNetworkChange(),
 		"rollback_in":    h.pendingNetworkRollbackIn(),
 	})
 }
@@ -392,21 +712,76 @@ func (h *handler) handleAPIExportList(w http.ResponseWriter, r *http.Request) {
 	writeJSON(w, entries)
 }
-func (h *handler) handleAPIExportBundle(w http.ResponseWriter, r *http.Request) {
+func (h *handler) handleAPIExportUSBTargets(w http.ResponseWriter, _ *http.Request) {
-	archive, err := app.BuildSupportBundle(h.opts.ExportDir)
+	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
 	targets, err := h.opts.App.ListRemovableTargets()
 	if err != nil {
 		writeError(w, http.StatusInternalServerError, err.Error())
 		return
 	}
-	writeJSON(w, map[string]string{
+	if targets == nil {
-		"status": "ok",
+		targets = []platform.RemovableTarget{}
-		"path":   archive,
+	}
-		"url":    "/export/support.tar.gz",
+	writeJSON(w, targets)
-	})
+}
 func (h *handler) handleAPIExportUSBAudit(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
 	var target platform.RemovableTarget
 	if err := json.NewDecoder(r.Body).Decode(&target); err != nil || target.Device == "" {
 		writeError(w, http.StatusBadRequest, "device is required")
 		return
 	}
 	result, err := h.opts.App.ExportLatestAuditResult(target)
 	if err != nil {
 		writeError(w, http.StatusInternalServerError, err.Error())
 		return
 	}
 	writeJSON(w, map[string]string{"status": "ok", "message": result.Body})
 }
 func (h *handler) handleAPIExportUSBBundle(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
 	var target platform.RemovableTarget
 	if err := json.NewDecoder(r.Body).Decode(&target); err != nil || target.Device == "" {
 		writeError(w, http.StatusBadRequest, "device is required")
 		return
 	}
 	result, err := h.opts.App.ExportSupportBundleResult(target)
 	if err != nil {
 		writeError(w, http.StatusInternalServerError, err.Error())
 		return
 	}
 	writeJSON(w, map[string]string{"status": "ok", "message": result.Body})
 }
 // ── GPU presence ──────────────────────────────────────────────────────────────
 func (h *handler) handleAPIGNVIDIAGPUs(w http.ResponseWriter, _ *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
 	gpus, err := h.opts.App.ListNvidiaGPUs()
 	if err != nil {
 		writeError(w, http.StatusInternalServerError, err.Error())
 		return
 	}
 	if gpus == nil {
 		gpus = []platform.NvidiaGPU{}
 	}
 	writeJSON(w, gpus)
 }
 func (h *handler) handleAPIGPUPresence(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
@@ -420,6 +795,45 @@ func (h *handler) handleAPIGPUPresence(w http.ResponseWriter, r *http.Request) {
 	})
 }
 // ── GPU tools ─────────────────────────────────────────────────────────────────
 func (h *handler) handleAPIGPUTools(w http.ResponseWriter, _ *http.Request) {
 	type toolEntry struct {
 		ID        string `json:"id"`
 		Available bool   `json:"available"`
 		Vendor    string `json:"vendor"` // "nvidia" | "amd"
 	}
 	_, nvidiaErr := os.Stat("/dev/nvidia0")
 	_, amdErr := os.Stat("/dev/kfd")
 	nvidiaUp := nvidiaErr == nil
 	amdUp := amdErr == nil
 	_, dcgmErr := exec.LookPath("dcgmi")
 	_, ncclStressErr := exec.LookPath("bee-nccl-gpu-stress")
 	_, johnErr := exec.LookPath("bee-john-gpu-stress")
 	_, beeBurnErr := exec.LookPath("bee-gpu-burn")
 	_, nvBandwidthErr := exec.LookPath("nvbandwidth")
 	profErr := lookPathAny("dcgmproftester", "dcgmproftester13", "dcgmproftester12", "dcgmproftester11")
 	writeJSON(w, []toolEntry{
 		{ID: "nvidia-compute", Available: nvidiaUp && profErr == nil, Vendor: "nvidia"},
 		{ID: "nvidia-targeted-power", Available: nvidiaUp && dcgmErr == nil, Vendor: "nvidia"},
 		{ID: "nvidia-pulse", Available: nvidiaUp && dcgmErr == nil, Vendor: "nvidia"},
 		{ID: "nvidia-interconnect", Available: nvidiaUp && ncclStressErr == nil, Vendor: "nvidia"},
 		{ID: "nvidia-bandwidth", Available: nvidiaUp && dcgmErr == nil && nvBandwidthErr == nil, Vendor: "nvidia"},
 		{ID: "bee-gpu-burn", Available: nvidiaUp && beeBurnErr == nil, Vendor: "nvidia"},
 		{ID: "john", Available: nvidiaUp && johnErr == nil, Vendor: "nvidia"},
 		{ID: "rvs", Available: amdUp, Vendor: "amd"},
 	})
 }
 func lookPathAny(names ...string) error {
 	for _, name := range names {
 		if _, err := exec.LookPath(name); err == nil {
 			return nil
 		}
 	}
 	return exec.ErrNotFound
 }
 // ── System ────────────────────────────────────────────────────────────────────
 func (h *handler) handleAPIRAMStatus(w http.ResponseWriter, r *http.Request) {
@@ -427,9 +841,9 @@ func (h *handler) handleAPIRAMStatus(w http.ResponseWriter, r *http.Request) {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-	inRAM := h.opts.App.IsLiveMediaInRAM()
+	status := h.opts.App.LiveBootSource()
 	w.Header().Set("Content-Type", "application/json")
-	_ = json.NewEncoder(w).Encode(map[string]bool{"in_ram": inRAM})
+	_ = json.NewEncoder(w).Encode(status)
 }
 func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request) {
@@ -437,10 +851,7 @@ func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request)
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-	h.installMu.Lock()
+	if globalQueue.hasActiveTarget("install") {
 	installRunning := h.installJob != nil && !h.installJob.isDone()
 	h.installMu.Unlock()
 	if installRunning {
 		writeError(w, http.StatusConflict, "install to disk is already running")
 		return
 	}
@@ -461,7 +872,7 @@ func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request)
 var standardTools = []string{
 	"dmidecode", "smartctl", "nvme", "lspci", "ipmitool",
-	"nvidia-smi", "memtester", "stress-ng", "nvtop",
+	"nvidia-smi", "dcgmi", "nv-hostengine", "memtester", "stress-ng", "nvtop",
 	"mstflint", "qrencode",
 }
@@ -555,39 +966,43 @@ func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) {
 		writeError(w, http.StatusConflict, "install to RAM task is already pending or running")
 		return
 	}
-
+	if globalQueue.hasActiveTarget("install") {
-	h.installMu.Lock()
+		writeError(w, http.StatusConflict, "install task is already pending or running")
 	if h.installJob != nil && !h.installJob.isDone() {
 		h.installMu.Unlock()
 		writeError(w, http.StatusConflict, "install already running")
 		return
 	}
-	j := &jobState{}
+	t := &Task{
-	h.installJob = j
+		ID:        newJobID("install"),
-	h.installMu.Unlock()
+		Name:      "Install to Disk",
-
+		Target:    "install",
-	logFile := platform.InstallLogPath(req.Device)
+		Priority:  20,
-	go runCmdJob(j, exec.CommandContext(context.Background(), "bee-install", req.Device, logFile))
+		Status:    TaskPending,
-
+		CreatedAt: time.Now(),
-	w.WriteHeader(http.StatusNoContent)
+		params: taskParams{
-}
+			Device: req.Device,
-
+		},
 func (h *handler) handleAPIInstallStream(w http.ResponseWriter, r *http.Request) {
 	h.installMu.Lock()
 	j := h.installJob
 	h.installMu.Unlock()
 	if j == nil {
 		if !sseStart(w) {
 			return
 		}
 		sseWrite(w, "done", "")
 		return
 	}
-	streamJob(w, r, j)
+	globalQueue.enqueue(t)
 	writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
 }
 // ── Metrics SSE ───────────────────────────────────────────────────────────────
 func (h *handler) handleAPIMetricsLatest(w http.ResponseWriter, r *http.Request) {
 	sample, ok := h.latestMetric()
 	if !ok {
 		w.Header().Set("Content-Type", "application/json")
 		_, _ = w.Write([]byte("{}"))
 		return
 	}
 	b, err := json.Marshal(sample)
 	if err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
 	w.Header().Set("Content-Type", "application/json")
 	_, _ = w.Write(b)
 }
 func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request) {
 	if !sseStart(w) {
 		return
@@ -599,10 +1014,9 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
 		case <-r.Context().Done():
 			return
 		case <-ticker.C:
-			sample := platform.SampleLiveMetrics()
+			sample, ok := h.latestMetric()
-			h.feedRings(sample)
+			if !ok {
-			if h.metricsDB != nil {
+				continue
 				_ = h.metricsDB.Write(sample)
 			}
 			b, err := json.Marshal(sample)
 			if err != nil {
@@ -630,13 +1044,7 @@ func (h *handler) feedRings(sample platform.LiveMetricSample) {
 	h.ringMemLoad.push(sample.MemLoadPct)
 	h.ringsMu.Lock()
-	for i, fan := range sample.Fans {
+	h.pushFanRings(sample.Fans)
 		for len(h.ringFans) <= i {
 			h.ringFans = append(h.ringFans, newMetricsRing(120))
 			h.fanNames = append(h.fanNames, fan.Name)
 		}
 		h.ringFans[i].push(float64(fan.RPM))
 	}
 	for _, gpu := range sample.GPUs {
 		idx := gpu.GPUIndex
 		for len(h.gpuRings) <= idx {
@@ -655,6 +1063,51 @@ func (h *handler) feedRings(sample platform.LiveMetricSample) {
 	h.ringsMu.Unlock()
 }
 func (h *handler) pushFanRings(fans []platform.FanReading) {
 	if len(fans) == 0 && len(h.ringFans) == 0 {
 		return
 	}
 	fanValues := make(map[string]float64, len(fans))
 	for _, fan := range fans {
 		if fan.Name == "" {
 			continue
 		}
 		fanValues[fan.Name] = fan.RPM
 		found := false
 		for i, name := range h.fanNames {
 			if name == fan.Name {
 				found = true
 				if i >= len(h.ringFans) {
 					h.ringFans = append(h.ringFans, newMetricsRing(120))
 				}
 				break
 			}
 		}
 		if !found {
 			h.fanNames = append(h.fanNames, fan.Name)
 			h.ringFans = append(h.ringFans, newMetricsRing(120))
 		}
 	}
 	for i, ring := range h.ringFans {
 		if ring == nil {
 			continue
 		}
 		name := ""
 		if i < len(h.fanNames) {
 			name = h.fanNames[i]
 		}
 		if rpm, ok := fanValues[name]; ok {
 			ring.push(rpm)
 			continue
 		}
 		if last, ok := ring.latest(); ok {
 			ring.push(last)
 			continue
 		}
 		ring.push(0)
 	}
 }
 func (h *handler) pushNamedMetricRing(dst *[]*namedMetricsRing, name string, value float64) {
 	if name == "" {
 		return
@@ -733,7 +1186,10 @@ func (h *handler) applyPendingNetworkChange(apply func() (app.ActionResult, erro
 		return result, err
 	}
-	pnc := &pendingNetChange{snapshot: snapshot}
+	pnc := &pendingNetChange{
 		snapshot: snapshot,
 		deadline: time.Now().Add(netRollbackTimeout),
 	}
 	pnc.timer = time.AfterFunc(netRollbackTimeout, func() {
 		_ = h.opts.App.RestoreNetworkSnapshot(snapshot)
 		h.pendingNetMu.Lock()
@@ -750,6 +1206,25 @@ func (h *handler) applyPendingNetworkChange(apply func() (app.ActionResult, erro
 	return result, nil
 }
 func (h *handler) hasPendingNetworkChange() bool {
 	h.pendingNetMu.Lock()
 	defer h.pendingNetMu.Unlock()
 	return h.pendingNet != nil
 }
 func (h *handler) pendingNetworkRollbackIn() int {
 	h.pendingNetMu.Lock()
 	defer h.pendingNetMu.Unlock()
 	if h.pendingNet == nil {
 		return 0
 	}
 	remaining := int(time.Until(h.pendingNet.deadline).Seconds())
 	if remaining < 1 {
 		return 1
 	}
 	return remaining
 }
 func (h *handler) handleAPINetworkConfirm(w http.ResponseWriter, _ *http.Request) {
 	h.pendingNetMu.Lock()
 	pnc := h.pendingNet
@@ -791,3 +1266,108 @@ func (h *handler) rollbackPendingNetworkChange() error {
 	}
 	return nil
 }
 // ── Display / Screen Resolution ───────────────────────────────────────────────
 type displayMode struct {
 	Output  string `json:"output"`
 	Mode    string `json:"mode"`
 	Current bool   `json:"current"`
 }
 type displayInfo struct {
 	Output  string        `json:"output"`
 	Modes   []displayMode `json:"modes"`
 	Current string        `json:"current"`
 }
 var xrandrOutputRE = regexp.MustCompile(`^(\S+)\s+connected`)
 var xrandrModeRE = regexp.MustCompile(`^\s{3}(\d+x\d+)\s`)
 var xrandrCurrentRE = regexp.MustCompile(`\*`)
 func parseXrandrOutput(out string) []displayInfo {
 	var infos []displayInfo
 	var cur *displayInfo
 	for _, line := range strings.Split(out, "\n") {
 		if m := xrandrOutputRE.FindStringSubmatch(line); m != nil {
 			if cur != nil {
 				infos = append(infos, *cur)
 			}
 			cur = &displayInfo{Output: m[1]}
 			continue
 		}
 		if cur == nil {
 			continue
 		}
 		if m := xrandrModeRE.FindStringSubmatch(line); m != nil {
 			isCurrent := xrandrCurrentRE.MatchString(line)
 			mode := displayMode{Output: cur.Output, Mode: m[1], Current: isCurrent}
 			cur.Modes = append(cur.Modes, mode)
 			if isCurrent {
 				cur.Current = m[1]
 			}
 		}
 	}
 	if cur != nil {
 		infos = append(infos, *cur)
 	}
 	return infos
 }
 func xrandrCommand(args ...string) *exec.Cmd {
 	cmd := exec.Command("xrandr", args...)
 	env := append([]string{}, os.Environ()...)
 	hasDisplay := false
 	hasXAuthority := false
 	for _, kv := range env {
 		if strings.HasPrefix(kv, "DISPLAY=") && strings.TrimPrefix(kv, "DISPLAY=") != "" {
 			hasDisplay = true
 		}
 		if strings.HasPrefix(kv, "XAUTHORITY=") && strings.TrimPrefix(kv, "XAUTHORITY=") != "" {
 			hasXAuthority = true
 		}
 	}
 	if !hasDisplay {
 		env = append(env, "DISPLAY=:0")
 	}
 	if !hasXAuthority {
 		env = append(env, "XAUTHORITY=/home/bee/.Xauthority")
 	}
 	cmd.Env = env
 	return cmd
 }
 func (h *handler) handleAPIDisplayResolutions(w http.ResponseWriter, _ *http.Request) {
 	out, err := xrandrCommand().Output()
 	if err != nil {
 		writeError(w, http.StatusInternalServerError, "xrandr: "+err.Error())
 		return
 	}
 	writeJSON(w, parseXrandrOutput(string(out)))
 }
 func (h *handler) handleAPIDisplaySet(w http.ResponseWriter, r *http.Request) {
 	var req struct {
 		Output string `json:"output"`
 		Mode   string `json:"mode"`
 	}
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil || req.Output == "" || req.Mode == "" {
 		writeError(w, http.StatusBadRequest, "output and mode are required")
 		return
 	}
 	// Validate mode looks like WxH to prevent injection
 	if !regexp.MustCompile(`^\d+x\d+$`).MatchString(req.Mode) {
 		writeError(w, http.StatusBadRequest, "invalid mode format")
 		return
 	}
 	// Validate output name (no special chars)
 	if !regexp.MustCompile(`^[A-Za-z0-9_\-]+$`).MatchString(req.Output) {
 		writeError(w, http.StatusBadRequest, "invalid output name")
 		return
 	}
 	if out, err := xrandrCommand("--output", req.Output, "--mode", req.Mode).CombinedOutput(); err != nil {
 		writeError(w, http.StatusInternalServerError, "xrandr: "+strings.TrimSpace(string(out)))
 		return
 	}
 	writeJSON(w, map[string]string{"status": "ok", "output": req.Output, "mode": req.Mode})
 }
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -0,0 +1,228 @@
 package webui
 import (
 	"encoding/json"
 	"net/http/httptest"
 	"strings"
 	"testing"
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 )
 func TestXrandrCommandAddsDefaultX11Env(t *testing.T) {
 	t.Setenv("DISPLAY", "")
 	t.Setenv("XAUTHORITY", "")
 	cmd := xrandrCommand("--query")
 	var hasDisplay bool
 	var hasXAuthority bool
 	for _, kv := range cmd.Env {
 		if kv == "DISPLAY=:0" {
 			hasDisplay = true
 		}
 		if kv == "XAUTHORITY=/home/bee/.Xauthority" {
 			hasXAuthority = true
 		}
 	}
 	if !hasDisplay {
 		t.Fatalf("DISPLAY not injected: %v", cmd.Env)
 	}
 	if !hasXAuthority {
 		t.Fatalf("XAUTHORITY not injected: %v", cmd.Env)
 	}
 }
 func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/sat/cpu/run", strings.NewReader(`{"profile":"smoke"}`))
 	req.ContentLength = -1
 	rec := httptest.NewRecorder()
 	h.handleAPISATRun("cpu").ServeHTTP(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 1 {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	if got := globalQueue.tasks[0].params.BurnProfile; got != "smoke" {
 		t.Fatalf("burn profile=%q want smoke", got)
 	}
 }
 func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	prevList := apiListNvidiaGPUs
 	apiListNvidiaGPUs = func(_ *app.App) ([]platform.NvidiaGPU, error) {
 		return []platform.NvidiaGPU{
 			{Index: 1, Name: "NVIDIA H100 PCIe"},
 			{Index: 3, Name: "NVIDIA H100 PCIe"},
 		}, nil
 	}
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
 	rec := httptest.NewRecorder()
 	h.handleAPIBenchmarkNvidiaRun(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 1 {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	task := globalQueue.tasks[0]
 	if task.Target != "nvidia-benchmark" {
 		t.Fatalf("target=%q want nvidia-benchmark", task.Target)
 	}
 	if got := task.params.GPUIndices; len(got) != 2 || got[0] != 1 || got[1] != 3 {
 		t.Fatalf("gpu indices=%v want [1 3]", got)
 	}
 	if task.params.RunNCCL {
 		t.Fatal("RunNCCL should reflect explicit false from request")
 	}
 }
 func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	prevList := apiListNvidiaGPUs
 	apiListNvidiaGPUs = func(_ *app.App) ([]platform.NvidiaGPU, error) {
 		return []platform.NvidiaGPU{
 			{Index: 0, Name: "NVIDIA H100 PCIe"},
 			{Index: 1, Name: "NVIDIA H100 PCIe"},
 			{Index: 2, Name: "NVIDIA H200 NVL"},
 		}, nil
 	}
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"run_nccl":false}`))
 	rec := httptest.NewRecorder()
 	h.handleAPIBenchmarkNvidiaRun(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	var resp taskRunResponse
 	if err := json.Unmarshal(rec.Body.Bytes(), &resp); err != nil {
 		t.Fatalf("decode response: %v", err)
 	}
 	if len(resp.TaskIDs) != 2 {
 		t.Fatalf("task_ids=%v want 2 items", resp.TaskIDs)
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 2 {
 		t.Fatalf("tasks=%d want 2", len(globalQueue.tasks))
 	}
 	if got := globalQueue.tasks[0].params.GPUIndices; len(got) != 2 || got[0] != 0 || got[1] != 1 {
 		t.Fatalf("task[0] gpu indices=%v want [0 1]", got)
 	}
 	if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 {
 		t.Fatalf("task[1] gpu indices=%v want [2]", got)
 	}
 }
 func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	prevList := apiListNvidiaGPUs
 	apiListNvidiaGPUs = func(_ *app.App) ([]platform.NvidiaGPU, error) {
 		return []platform.NvidiaGPU{
 			{Index: 0, Name: "NVIDIA H100 PCIe"},
 			{Index: 1, Name: "NVIDIA H100 PCIe"},
 			{Index: 2, Name: "NVIDIA H200 NVL"},
 		}, nil
 	}
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/sat/nvidia-targeted-power/run", strings.NewReader(`{"profile":"acceptance","gpu_indices":[0,1,2]}`))
 	rec := httptest.NewRecorder()
 	h.handleAPISATRun("nvidia-targeted-power").ServeHTTP(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 2 {
 		t.Fatalf("tasks=%d want 2", len(globalQueue.tasks))
 	}
 	if got := globalQueue.tasks[0].params.GPUIndices; len(got) != 2 || got[0] != 0 || got[1] != 1 {
 		t.Fatalf("task[0] gpu indices=%v want [0 1]", got)
 	}
 	if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 {
 		t.Fatalf("task[1] gpu indices=%v want [2]", got)
 	}
 }
 func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
 	h := &handler{}
 	h.pushFanRings([]platform.FanReading{
 		{Name: "FAN_A", RPM: 4200},
 		{Name: "FAN_B", RPM: 5100},
 	})
 	h.pushFanRings([]platform.FanReading{
 		{Name: "FAN_B", RPM: 5200},
 	})
 	if len(h.fanNames) != 2 || h.fanNames[0] != "FAN_A" || h.fanNames[1] != "FAN_B" {
 		t.Fatalf("fanNames=%v", h.fanNames)
 	}
 	aVals, _ := h.ringFans[0].snapshot()
 	bVals, _ := h.ringFans[1].snapshot()
 	if len(aVals) != 2 || len(bVals) != 2 {
 		t.Fatalf("fan ring lengths: A=%d B=%d", len(aVals), len(bVals))
 	}
 	if aVals[1] != 4200 {
 		t.Fatalf("FAN_A should carry forward last value, got %v", aVals)
 	}
 	if bVals[1] != 5200 {
 		t.Fatalf("FAN_B should use latest sampled value, got %v", bVals)
 	}
 }
--- a/audit/internal/webui/charts_svg.go
+++ b/audit/internal/webui/charts_svg.go
@@ -0,0 +1,773 @@
 package webui
 import (
 	"fmt"
 	"math"
 	"sort"
 	"strconv"
 	"strings"
 	"sync"
 	"time"
 	"bee/audit/internal/platform"
 )
 type chartTimelineSegment struct {
 	Start  time.Time
 	End    time.Time
 	Active bool
 }
 type chartScale struct {
 	Min   float64
 	Max   float64
 	Ticks []float64
 }
 type chartLayout struct {
 	Width      int
 	Height     int
 	PlotLeft   int
 	PlotRight  int
 	PlotTop    int
 	PlotBottom int
 }
 type metricChartSeries struct {
 	Name      string
 	AxisTitle string
 	Color     string
 	Values    []float64
 }
 var metricChartPalette = []string{
 	"#5794f2",
 	"#73bf69",
 	"#f2cc0c",
 	"#ff9830",
 	"#f2495c",
 	"#b877d9",
 	"#56d2f7",
 	"#8ab8ff",
 	"#9adf8f",
 	"#ffbe5c",
 }
 var gpuLabelCache struct {
 	mu       sync.Mutex
 	loadedAt time.Time
 	byIndex  map[int]string
 }
 func renderMetricChartSVG(title string, labels []string, times []time.Time, datasets [][]float64, names []string, yMin, yMax *float64, canvasHeight int, timeline []chartTimelineSegment) ([]byte, error) {
 	pointCount := len(labels)
 	if len(times) > pointCount {
 		pointCount = len(times)
 	}
 	if pointCount == 0 {
 		pointCount = 1
 		labels = []string{""}
 		times = []time.Time{time.Time{}}
 	}
 	if len(labels) < pointCount {
 		padded := make([]string, pointCount)
 		copy(padded, labels)
 		labels = padded
 	}
 	if len(times) < pointCount {
 		times = synthesizeChartTimes(times, pointCount)
 	}
 	for i := range datasets {
 		if len(datasets[i]) == 0 {
 			datasets[i] = make([]float64, pointCount)
 		}
 	}
 	statsLabel := chartStatsLabel(datasets)
 	legendItems := []metricChartSeries{}
 	for i, name := range names {
 		color := metricChartPalette[i%len(metricChartPalette)]
 		values := make([]float64, pointCount)
 		if i < len(datasets) {
 			copy(values, coalesceDataset(datasets[i], pointCount))
 		}
 		legendItems = append(legendItems, metricChartSeries{
 			Name:   name,
 			Color:  color,
 			Values: values,
 		})
 	}
 	scale := singleAxisChartScale(datasets, yMin, yMax)
 	layout := singleAxisChartLayout(canvasHeight, len(legendItems))
 	start, end := chartTimeBounds(times)
 	var b strings.Builder
 	writeSVGOpen(&b, layout.Width, layout.Height)
 	writeChartFrame(&b, title, statsLabel, layout.Width, layout.Height)
 	writeTimelineIdleSpans(&b, layout, start, end, timeline)
 	writeVerticalGrid(&b, layout, times, pointCount, 8)
 	writeHorizontalGrid(&b, layout, scale)
 	writeTimelineBoundaries(&b, layout, start, end, timeline)
 	writePlotBorder(&b, layout)
 	writeSingleAxisY(&b, layout, scale)
 	writeXAxisLabels(&b, layout, times, labels, start, end, 8)
 	for _, item := range legendItems {
 		writeSeriesPolyline(&b, layout, times, start, end, item.Values, scale, item.Color)
 	}
 	writeLegend(&b, layout, legendItems)
 	writeSVGClose(&b)
 	return []byte(b.String()), nil
 }
 func renderGPUOverviewChartSVG(idx int, samples []platform.LiveMetricSample, timeline []chartTimelineSegment) ([]byte, bool, error) {
 	temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
 	power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
 	coreClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
 	if temp == nil && power == nil && coreClock == nil {
 		return nil, false, nil
 	}
 	labels := sampleTimeLabels(samples)
 	times := sampleTimes(samples)
 	svg, err := drawGPUOverviewChartSVG(
 		gpuDisplayLabel(idx)+" Overview",
 		labels,
 		times,
 		[]metricChartSeries{
 			{Name: "Temp C", Values: coalesceDataset(temp, len(labels)), Color: "#f05a5a", AxisTitle: "Temp C"},
 			{Name: "Power W", Values: coalesceDataset(power, len(labels)), Color: "#ffb357", AxisTitle: "Power W"},
 			{Name: "Core Clock MHz", Values: coalesceDataset(coreClock, len(labels)), Color: "#73bf69", AxisTitle: "Core MHz"},
 		},
 		timeline,
 	)
 	if err != nil {
 		return nil, false, err
 	}
 	return svg, true, nil
 }
 func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, series []metricChartSeries, timeline []chartTimelineSegment) ([]byte, error) {
 	if len(series) != 3 {
 		return nil, fmt.Errorf("gpu overview requires 3 series, got %d", len(series))
 	}
 	const (
 		width      = 1400
 		height     = 840
 		plotLeft   = 180
 		plotRight  = 1220
 		plotTop    = 96
 		plotBottom = 660
 	)
 	const (
 		leftOuterAxis  = 72
 		leftInnerAxis  = 132
 		rightInnerAxis = 1268
 	)
 	layout := chartLayout{
 		Width:      width,
 		Height:     height,
 		PlotLeft:   plotLeft,
 		PlotRight:  plotRight,
 		PlotTop:    plotTop,
 		PlotBottom: plotBottom,
 	}
 	axisX := []int{leftOuterAxis, leftInnerAxis, rightInnerAxis}
 	pointCount := len(labels)
 	if len(times) > pointCount {
 		pointCount = len(times)
 	}
 	if pointCount == 0 {
 		pointCount = 1
 		labels = []string{""}
 		times = []time.Time{time.Time{}}
 	}
 	if len(labels) < pointCount {
 		padded := make([]string, pointCount)
 		copy(padded, labels)
 		labels = padded
 	}
 	if len(times) < pointCount {
 		times = synthesizeChartTimes(times, pointCount)
 	}
 	for i := range series {
 		if len(series[i].Values) == 0 {
 			series[i].Values = make([]float64, pointCount)
 		}
 	}
 	scales := make([]chartScale, len(series))
 	for i := range series {
 		min, max := chartSeriesBounds(series[i].Values)
 		ticks := chartNiceTicks(min, max, 8)
 		scales[i] = chartScale{
 			Min:   ticks[0],
 			Max:   ticks[len(ticks)-1],
 			Ticks: ticks,
 		}
 	}
 	start, end := chartTimeBounds(times)
 	var b strings.Builder
 	writeSVGOpen(&b, width, height)
 	writeChartFrame(&b, title, "", width, height)
 	writeTimelineIdleSpans(&b, layout, start, end, timeline)
 	writeVerticalGrid(&b, layout, times, pointCount, 8)
 	writeHorizontalGrid(&b, layout, scales[0])
 	writeTimelineBoundaries(&b, layout, start, end, timeline)
 	writePlotBorder(&b, layout)
 	for i, axisLineX := range axisX {
 		fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="1"/>`+"\n",
 			axisLineX, layout.PlotTop, axisLineX, layout.PlotBottom, series[i].Color)
 		fmt.Fprintf(&b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="11" font-weight="700" fill="%s">%s</text>`+"\n",
 			axisLineX, 64, series[i].Color, sanitizeChartText(series[i].AxisTitle))
 		for _, tick := range scales[i].Ticks {
 			y := chartYForValue(valueClamp(tick, scales[i]), scales[i], layout.PlotTop, layout.PlotBottom)
 			label := sanitizeChartText(chartYAxisNumber(tick))
 			if i < 2 {
 				fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
 					axisLineX, y, axisLineX+6, y, series[i].Color)
 				fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
 					axisLineX-8, y, series[i].Color, label)
 				continue
 			}
 			fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
 				axisLineX, y, axisLineX-6, y, series[i].Color)
 			fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="start" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
 				axisLineX+8, y, series[i].Color, label)
 		}
 	}
 	writeXAxisLabels(&b, layout, times, labels, start, end, 8)
 	for i := range series {
 		writeSeriesPolyline(&b, layout, times, start, end, series[i].Values, scales[i], series[i].Color)
 	}
 	writeLegend(&b, layout, series)
 	writeSVGClose(&b)
 	return []byte(b.String()), nil
 }
 func metricsTimelineSegments(samples []platform.LiveMetricSample, now time.Time) []chartTimelineSegment {
 	if len(samples) == 0 {
 		return nil
 	}
 	times := sampleTimes(samples)
 	start, end := chartTimeBounds(times)
 	if start.IsZero() || end.IsZero() {
 		return nil
 	}
 	return chartTimelineSegmentsForRange(start, end, now, snapshotTaskHistory())
 }
 func snapshotTaskHistory() []Task {
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	out := make([]Task, len(globalQueue.tasks))
 	for i, t := range globalQueue.tasks {
 		out[i] = *t
 	}
 	return out
 }
 func chartTimelineSegmentsForRange(start, end, now time.Time, tasks []Task) []chartTimelineSegment {
 	if start.IsZero() || end.IsZero() {
 		return nil
 	}
 	if end.Before(start) {
 		start, end = end, start
 	}
 	type interval struct {
 		start time.Time
 		end   time.Time
 	}
 	active := make([]interval, 0, len(tasks))
 	for _, task := range tasks {
 		if task.StartedAt == nil {
 			continue
 		}
 		intervalStart := task.StartedAt.UTC()
 		intervalEnd := now.UTC()
 		if task.DoneAt != nil {
 			intervalEnd = task.DoneAt.UTC()
 		}
 		if !intervalEnd.After(intervalStart) {
 			continue
 		}
 		if intervalEnd.Before(start) || intervalStart.After(end) {
 			continue
 		}
 		if intervalStart.Before(start) {
 			intervalStart = start
 		}
 		if intervalEnd.After(end) {
 			intervalEnd = end
 		}
 		active = append(active, interval{start: intervalStart, end: intervalEnd})
 	}
 	sort.Slice(active, func(i, j int) bool {
 		if active[i].start.Equal(active[j].start) {
 			return active[i].end.Before(active[j].end)
 		}
 		return active[i].start.Before(active[j].start)
 	})
 	merged := make([]interval, 0, len(active))
 	for _, span := range active {
 		if len(merged) == 0 {
 			merged = append(merged, span)
 			continue
 		}
 		last := &merged[len(merged)-1]
 		if !span.start.After(last.end) {
 			if span.end.After(last.end) {
 				last.end = span.end
 			}
 			continue
 		}
 		merged = append(merged, span)
 	}
 	segments := make([]chartTimelineSegment, 0, len(merged)*2+1)
 	cursor := start
 	for _, span := range merged {
 		if span.start.After(cursor) {
 			segments = append(segments, chartTimelineSegment{Start: cursor, End: span.start, Active: false})
 		}
 		segments = append(segments, chartTimelineSegment{Start: span.start, End: span.end, Active: true})
 		cursor = span.end
 	}
 	if cursor.Before(end) {
 		segments = append(segments, chartTimelineSegment{Start: cursor, End: end, Active: false})
 	}
 	if len(segments) == 0 {
 		segments = append(segments, chartTimelineSegment{Start: start, End: end, Active: false})
 	}
 	return segments
 }
 func sampleTimes(samples []platform.LiveMetricSample) []time.Time {
 	times := make([]time.Time, 0, len(samples))
 	for _, sample := range samples {
 		times = append(times, sample.Timestamp)
 	}
 	return times
 }
 func singleAxisChartScale(datasets [][]float64, yMin, yMax *float64) chartScale {
 	min, max := 0.0, 1.0
 	if yMin != nil && yMax != nil {
 		min, max = *yMin, *yMax
 	} else {
 		min, max = chartSeriesBounds(flattenDatasets(datasets))
 		if yMin != nil {
 			min = *yMin
 		}
 		if yMax != nil {
 			max = *yMax
 		}
 	}
 	ticks := chartNiceTicks(min, max, 8)
 	return chartScale{Min: ticks[0], Max: ticks[len(ticks)-1], Ticks: ticks}
 }
 func flattenDatasets(datasets [][]float64) []float64 {
 	total := 0
 	for _, ds := range datasets {
 		total += len(ds)
 	}
 	out := make([]float64, 0, total)
 	for _, ds := range datasets {
 		out = append(out, ds...)
 	}
 	return out
 }
 func singleAxisChartLayout(canvasHeight int, seriesCount int) chartLayout {
 	legendRows := 0
 	if chartLegendVisible(seriesCount) && seriesCount > 0 {
 		cols := 4
 		if seriesCount < cols {
 			cols = seriesCount
 		}
 		legendRows = (seriesCount + cols - 1) / cols
 	}
 	legendHeight := 0
 	if legendRows > 0 {
 		legendHeight = legendRows*24 + 24
 	}
 	return chartLayout{
 		Width:      1400,
 		Height:     canvasHeight,
 		PlotLeft:   96,
 		PlotRight:  1352,
 		PlotTop:    72,
 		PlotBottom: canvasHeight - 60 - legendHeight,
 	}
 }
 func chartTimeBounds(times []time.Time) (time.Time, time.Time) {
 	if len(times) == 0 {
 		return time.Time{}, time.Time{}
 	}
 	start := times[0].UTC()
 	end := start
 	for _, ts := range times[1:] {
 		t := ts.UTC()
 		if t.Before(start) {
 			start = t
 		}
 		if t.After(end) {
 			end = t
 		}
 	}
 	return start, end
 }
 func synthesizeChartTimes(times []time.Time, count int) []time.Time {
 	if count <= 0 {
 		return nil
 	}
 	if len(times) == count {
 		return times
 	}
 	if len(times) == 1 {
 		out := make([]time.Time, count)
 		for i := range out {
 			out[i] = times[0].Add(time.Duration(i) * time.Minute)
 		}
 		return out
 	}
 	base := time.Now().UTC().Add(-time.Duration(count-1) * time.Minute)
 	out := make([]time.Time, count)
 	for i := range out {
 		out[i] = base.Add(time.Duration(i) * time.Minute)
 	}
 	return out
 }
 func writeSVGOpen(b *strings.Builder, width, height int) {
 	fmt.Fprintf(b, `<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" viewBox="0 0 %d %d">`+"\n", width, height, width, height)
 }
 func writeSVGClose(b *strings.Builder) {
 	b.WriteString("</svg>\n")
 }
 func writeChartFrame(b *strings.Builder, title, subtitle string, width, height int) {
 	fmt.Fprintf(b, `<rect width="%d" height="%d" rx="10" ry="10" fill="#ffffff" stroke="#d7e0ea"/>`+"\n", width, height)
 	fmt.Fprintf(b, `<text x="%d" y="30" text-anchor="middle" font-family="sans-serif" font-size="16" font-weight="700" fill="#1f2937">%s</text>`+"\n",
 		width/2, sanitizeChartText(title))
 	if strings.TrimSpace(subtitle) != "" {
 		fmt.Fprintf(b, `<text x="%d" y="50" text-anchor="middle" font-family="sans-serif" font-size="12" font-weight="600" fill="#64748b">%s</text>`+"\n",
 			width/2, sanitizeChartText(subtitle))
 	}
 }
 func writePlotBorder(b *strings.Builder, layout chartLayout) {
 	fmt.Fprintf(b, `<rect x="%d" y="%d" width="%d" height="%d" fill="none" stroke="#cbd5e1" stroke-width="1"/>`+"\n",
 		layout.PlotLeft, layout.PlotTop, layout.PlotRight-layout.PlotLeft, layout.PlotBottom-layout.PlotTop)
 }
 func writeHorizontalGrid(b *strings.Builder, layout chartLayout, scale chartScale) {
 	b.WriteString(`<g stroke="#e2e8f0" stroke-width="1">` + "\n")
 	for _, tick := range scale.Ticks {
 		y := chartYForValue(tick, scale, layout.PlotTop, layout.PlotBottom)
 		fmt.Fprintf(b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"/>`+"\n",
 			layout.PlotLeft, y, layout.PlotRight, y)
 	}
 	b.WriteString(`</g>` + "\n")
 }
 func writeVerticalGrid(b *strings.Builder, layout chartLayout, times []time.Time, pointCount, target int) {
 	if pointCount <= 0 {
 		return
 	}
 	start, end := chartTimeBounds(times)
 	b.WriteString(`<g stroke="#edf2f7" stroke-width="1">` + "\n")
 	for _, idx := range gpuChartLabelIndices(pointCount, target) {
 		ts := chartPointTime(times, idx)
 		x := chartXForTime(ts, start, end, layout.PlotLeft, layout.PlotRight)
 		fmt.Fprintf(b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d"/>`+"\n",
 			x, layout.PlotTop, x, layout.PlotBottom)
 	}
 	b.WriteString(`</g>` + "\n")
 }
 func writeSingleAxisY(b *strings.Builder, layout chartLayout, scale chartScale) {
 	fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#64748b" stroke-width="1"/>`+"\n",
 		layout.PlotLeft, layout.PlotTop, layout.PlotLeft, layout.PlotBottom)
 	for _, tick := range scale.Ticks {
 		y := chartYForValue(tick, scale, layout.PlotTop, layout.PlotBottom)
 		fmt.Fprintf(b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="#64748b" stroke-width="1"/>`+"\n",
 			layout.PlotLeft, y, layout.PlotLeft-6, y)
 		fmt.Fprintf(b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="#475569">%s</text>`+"\n",
 			layout.PlotLeft-10, y, sanitizeChartText(chartYAxisNumber(tick)))
 	}
 }
 func writeXAxisLabels(b *strings.Builder, layout chartLayout, times []time.Time, labels []string, start, end time.Time, target int) {
 	pointCount := len(labels)
 	if len(times) > pointCount {
 		pointCount = len(times)
 	}
 	b.WriteString(`<g font-family="sans-serif" font-size="11" fill="#64748b" text-anchor="middle">` + "\n")
 	for _, idx := range gpuChartLabelIndices(pointCount, target) {
 		x := chartXForTime(chartPointTime(times, idx), start, end, layout.PlotLeft, layout.PlotRight)
 		label := ""
 		if idx < len(labels) {
 			label = labels[idx]
 		}
 		fmt.Fprintf(b, `<text x="%.1f" y="%d">%s</text>`+"\n", x, layout.PlotBottom+28, sanitizeChartText(label))
 	}
 	b.WriteString(`</g>` + "\n")
 	fmt.Fprintf(b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="12" fill="#64748b">Time</text>`+"\n",
 		(layout.PlotLeft+layout.PlotRight)/2, layout.PlotBottom+48)
 }
 func writeSeriesPolyline(b *strings.Builder, layout chartLayout, times []time.Time, start, end time.Time, values []float64, scale chartScale, color string) {
 	if len(values) == 0 {
 		return
 	}
 	var points strings.Builder
 	for idx, value := range values {
 		if idx > 0 {
 			points.WriteByte(' ')
 		}
 		x := chartXForTime(chartPointTime(times, idx), start, end, layout.PlotLeft, layout.PlotRight)
 		y := chartYForValue(value, scale, layout.PlotTop, layout.PlotBottom)
 		points.WriteString(strconv.FormatFloat(x, 'f', 1, 64))
 		points.WriteByte(',')
 		points.WriteString(strconv.FormatFloat(y, 'f', 1, 64))
 	}
 	fmt.Fprintf(b, `<polyline points="%s" fill="none" stroke="%s" stroke-width="2.2" stroke-linejoin="round" stroke-linecap="round"/>`+"\n",
 		points.String(), color)
 	if len(values) == 1 {
 		x := chartXForTime(chartPointTime(times, 0), start, end, layout.PlotLeft, layout.PlotRight)
 		y := chartYForValue(values[0], scale, layout.PlotTop, layout.PlotBottom)
 		fmt.Fprintf(b, `<circle cx="%.1f" cy="%.1f" r="3.5" fill="%s"/>`+"\n", x, y, color)
 		return
 	}
 	peakIdx := 0
 	peakValue := values[0]
 	for idx, value := range values[1:] {
 		if value >= peakValue {
 			peakIdx = idx + 1
 			peakValue = value
 		}
 	}
 	x := chartXForTime(chartPointTime(times, peakIdx), start, end, layout.PlotLeft, layout.PlotRight)
 	y := chartYForValue(peakValue, scale, layout.PlotTop, layout.PlotBottom)
 	fmt.Fprintf(b, `<circle cx="%.1f" cy="%.1f" r="4.2" fill="%s" stroke="#ffffff" stroke-width="1.6"/>`+"\n", x, y, color)
 	fmt.Fprintf(b, `<path d="M %.1f %.1f L %.1f %.1f L %.1f %.1f Z" fill="%s" opacity="0.9"/>`+"\n",
 		x, y-10, x-5, y-18, x+5, y-18, color)
 }
 func writeLegend(b *strings.Builder, layout chartLayout, series []metricChartSeries) {
 	if !chartLegendVisible(len(series)) || len(series) == 0 {
 		return
 	}
 	cols := 4
 	if len(series) < cols {
 		cols = len(series)
 	}
 	cellWidth := float64(layout.PlotRight-layout.PlotLeft) / float64(cols)
 	baseY := layout.PlotBottom + 74
 	for i, item := range series {
 		row := i / cols
 		col := i % cols
 		x := float64(layout.PlotLeft) + cellWidth*float64(col) + 8
 		y := float64(baseY + row*24)
 		fmt.Fprintf(b, `<line x1="%.1f" y1="%.1f" x2="%.1f" y2="%.1f" stroke="%s" stroke-width="3"/>`+"\n",
 			x, y, x+28, y, item.Color)
 		fmt.Fprintf(b, `<text x="%.1f" y="%.1f" font-family="sans-serif" font-size="12" fill="#1f2937">%s</text>`+"\n",
 			x+38, y+4, sanitizeChartText(item.Name))
 	}
 }
 func writeTimelineIdleSpans(b *strings.Builder, layout chartLayout, start, end time.Time, segments []chartTimelineSegment) {
 	if len(segments) == 0 {
 		return
 	}
 	b.WriteString(`<g data-role="timeline-overlay">` + "\n")
 	for _, segment := range segments {
 		if segment.Active || !segment.End.After(segment.Start) {
 			continue
 		}
 		x0 := chartXForTime(segment.Start, start, end, layout.PlotLeft, layout.PlotRight)
 		x1 := chartXForTime(segment.End, start, end, layout.PlotLeft, layout.PlotRight)
 		fmt.Fprintf(b, `<rect x="%.1f" y="%d" width="%.1f" height="%d" fill="#475569" opacity="0.10"/>`+"\n",
 			x0, layout.PlotTop, math.Max(1, x1-x0), layout.PlotBottom-layout.PlotTop)
 	}
 	b.WriteString(`</g>` + "\n")
 }
 func writeTimelineBoundaries(b *strings.Builder, layout chartLayout, start, end time.Time, segments []chartTimelineSegment) {
 	if len(segments) == 0 {
 		return
 	}
 	seen := map[int]bool{}
 	b.WriteString(`<g data-role="timeline-boundaries" stroke="#94a3b8" stroke-width="1.2">` + "\n")
 	for i, segment := range segments {
 		if i > 0 {
 			x := int(math.Round(chartXForTime(segment.Start, start, end, layout.PlotLeft, layout.PlotRight)))
 			if !seen[x] {
 				seen[x] = true
 				fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d"/>`+"\n", x, layout.PlotTop, x, layout.PlotBottom)
 			}
 		}
 		if i < len(segments)-1 {
 			x := int(math.Round(chartXForTime(segment.End, start, end, layout.PlotLeft, layout.PlotRight)))
 			if !seen[x] {
 				seen[x] = true
 				fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d"/>`+"\n", x, layout.PlotTop, x, layout.PlotBottom)
 			}
 		}
 	}
 	b.WriteString(`</g>` + "\n")
 }
 func chartXForTime(ts, start, end time.Time, left, right int) float64 {
 	if !end.After(start) {
 		return float64(left+right) / 2
 	}
 	if ts.Before(start) {
 		ts = start
 	}
 	if ts.After(end) {
 		ts = end
 	}
 	ratio := float64(ts.Sub(start)) / float64(end.Sub(start))
 	return float64(left) + ratio*float64(right-left)
 }
 func chartPointTime(times []time.Time, idx int) time.Time {
 	if idx >= 0 && idx < len(times) && !times[idx].IsZero() {
 		return times[idx].UTC()
 	}
 	if len(times) > 0 && !times[0].IsZero() {
 		return times[0].UTC().Add(time.Duration(idx) * time.Minute)
 	}
 	return time.Now().UTC().Add(time.Duration(idx) * time.Minute)
 }
 func chartYForValue(value float64, scale chartScale, plotTop, plotBottom int) float64 {
 	if scale.Max <= scale.Min {
 		return float64(plotTop+plotBottom) / 2
 	}
 	return float64(plotBottom) - (value-scale.Min)/(scale.Max-scale.Min)*float64(plotBottom-plotTop)
 }
 func chartSeriesBounds(values []float64) (float64, float64) {
 	if len(values) == 0 {
 		return 0, 1
 	}
 	min, max := values[0], values[0]
 	for _, value := range values[1:] {
 		if value < min {
 			min = value
 		}
 		if value > max {
 			max = value
 		}
 	}
 	if min == max {
 		if max == 0 {
 			return 0, 1
 		}
 		pad := math.Abs(max) * 0.1
 		if pad == 0 {
 			pad = 1
 		}
 		min -= pad
 		max += pad
 	}
 	if min > 0 {
 		pad := (max - min) * 0.2
 		if pad == 0 {
 			pad = max * 0.1
 		}
 		min -= pad
 		if min < 0 {
 			min = 0
 		}
 		max += pad
 	}
 	return min, max
 }
 func chartNiceTicks(min, max float64, target int) []float64 {
 	if min == max {
 		max = min + 1
 	}
 	span := max - min
 	step := math.Pow(10, math.Floor(math.Log10(span/float64(target))))
 	for _, factor := range []float64{1, 2, 5, 10} {
 		if span/(factor*step) <= float64(target)*1.5 {
 			step = factor * step
 			break
 		}
 	}
 	low := math.Floor(min/step) * step
 	high := math.Ceil(max/step) * step
 	var ticks []float64
 	for value := low; value <= high+step*0.001; value += step {
 		ticks = append(ticks, math.Round(value*1e9)/1e9)
 	}
 	return ticks
 }
 func valueClamp(value float64, scale chartScale) float64 {
 	if value < scale.Min {
 		return scale.Min
 	}
 	if value > scale.Max {
 		return scale.Max
 	}
 	return value
 }
 func chartStatsLabel(datasets [][]float64) string {
 	mn, avg, mx := globalStats(datasets)
 	if mx <= 0 && avg <= 0 && mn <= 0 {
 		return ""
 	}
 	return fmt.Sprintf("min %s   avg %s   max %s",
 		chartLegendNumber(mn),
 		chartLegendNumber(avg),
 		chartLegendNumber(mx),
 	)
 }
 func gpuDisplayLabel(idx int) string {
 	if name := gpuModelNameByIndex(idx); name != "" {
 		return fmt.Sprintf("GPU %d — %s", idx, name)
 	}
 	return fmt.Sprintf("GPU %d", idx)
 }
 func gpuModelNameByIndex(idx int) string {
 	now := time.Now()
 	gpuLabelCache.mu.Lock()
 	if now.Sub(gpuLabelCache.loadedAt) > 30*time.Second || gpuLabelCache.byIndex == nil {
 		gpuLabelCache.loadedAt = now
 		gpuLabelCache.byIndex = loadGPUModelNames()
 	}
 	name := strings.TrimSpace(gpuLabelCache.byIndex[idx])
 	gpuLabelCache.mu.Unlock()
 	return name
 }
 func loadGPUModelNames() map[int]string {
 	out := map[int]string{}
 	gpus, err := platform.New().ListNvidiaGPUs()
 	if err != nil {
 		return out
 	}
 	for _, gpu := range gpus {
 		name := strings.TrimSpace(gpu.Name)
 		if name != "" {
 			out[gpu.Index] = name
 		}
 	}
 	return out
 }
--- a/audit/internal/webui/jobs.go
+++ b/audit/internal/webui/jobs.go
@@ -9,13 +9,14 @@ import (
 // jobState holds the output lines and completion status of an async job.
 type jobState struct {
-	lines   []string
+	lines        []string
-	done    bool
+	done         bool
-	err     string
+	err          string
-	mu      sync.Mutex
+	mu           sync.Mutex
-	subs    []chan string
+	subs         []chan string
-	cancel  func() // optional cancel function; nil if job is not cancellable
+	cancel       func() // optional cancel function; nil if job is not cancellable
-	logPath string
+	logPath      string
 	serialPrefix string
 }
 // abort cancels the job if it has a cancel function and is not yet done.
@@ -36,6 +37,9 @@ func (j *jobState) append(line string) {
 	if j.logPath != "" {
 		appendJobLog(j.logPath, line)
 	}
 	if j.serialPrefix != "" {
 		taskSerialWriteLine(j.serialPrefix + line)
 	}
 	for _, ch := range j.subs {
 		select {
 		case ch <- line:
@@ -84,12 +88,12 @@ func (m *jobManager) create(id string) *jobState {
 	j := &jobState{}
 	m.jobs[id] = j
 	// Schedule cleanup after 30 minutes
-	go func() {
+	goRecoverOnce("job cleanup", func() {
 		time.Sleep(30 * time.Minute)
 		m.mu.Lock()
 		delete(m.jobs, id)
 		m.mu.Unlock()
-	}()
+	})
 	return j
 }
@@ -107,8 +111,11 @@ func (m *jobManager) get(id string) (*jobState, bool) {
 	return j, ok
 }
-func newTaskJobState(logPath string) *jobState {
+func newTaskJobState(logPath string, serialPrefix ...string) *jobState {
 	j := &jobState{logPath: logPath}
 	if len(serialPrefix) > 0 {
 		j.serialPrefix = serialPrefix[0]
 	}
 	if logPath == "" {
 		return j
 	}
--- a/audit/internal/webui/kmsg_watcher.go
+++ b/audit/internal/webui/kmsg_watcher.go
@@ -0,0 +1,242 @@
 package webui
 import (
 	"bufio"
 	"io"
 	"log/slog"
 	"os"
 	"strings"
 	"sync"
 	"time"
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 )
 // kmsgWatcher reads /dev/kmsg and accumulates hardware error events.
 // It supports multiple concurrent SAT tasks: a shared event window is open
 // while any SAT task is running, and flushed when all tasks complete.
 type kmsgWatcher struct {
 	mu          sync.Mutex
 	activeCount int // number of in-flight SAT tasks
 	window      *kmsgWindow
 	statusDB    *app.ComponentStatusDB
 }
 type kmsgWindow struct {
 	targets   []string // SAT targets running concurrently
 	startedAt time.Time
 	seen      map[kmsgEventKey]bool
 	events    []kmsgEvent
 }
 type kmsgEventKey struct {
 	id       string // BDF or device name
 	category string
 }
 type kmsgEvent struct {
 	timestamp time.Time
 	raw       string
 	ids       []string // BDF addresses or device names extracted
 	category  string
 }
 func newKmsgWatcher(statusDB *app.ComponentStatusDB) *kmsgWatcher {
 	return &kmsgWatcher{statusDB: statusDB}
 }
 // start launches the background kmsg reading goroutine.
 func (w *kmsgWatcher) start() {
 	goRecoverLoop("kmsg watcher", 5*time.Second, w.run)
 }
 func (w *kmsgWatcher) run() {
 	for {
 		f, err := os.Open("/dev/kmsg")
 		if err != nil {
 			slog.Warn("kmsg watcher unavailable", "err", err)
 			time.Sleep(30 * time.Second)
 			continue
 		}
 		// Best-effort seek to end so we only capture events from now forward.
 		_, _ = f.Seek(0, io.SeekEnd)
 		scanner := bufio.NewScanner(f)
 		scanner.Buffer(make([]byte, 64*1024), 64*1024)
 		for scanner.Scan() {
 			line := scanner.Text()
 			evt, ok := parseKmsgLine(line)
 			if !ok {
 				continue
 			}
 			w.mu.Lock()
 			if w.window != nil {
 				w.recordEvent(evt)
 			}
 			w.mu.Unlock()
 		}
 		if err := scanner.Err(); err != nil {
 			slog.Warn("kmsg watcher stopped", "err", err)
 		}
 		_ = f.Close()
 		time.Sleep(2 * time.Second)
 	}
 }
 // recordEvent appends evt to the active window, deduplicating by (id, category).
 // Must be called with w.mu held.
 func (w *kmsgWatcher) recordEvent(evt kmsgEvent) {
 	if len(evt.ids) == 0 {
 		key := kmsgEventKey{id: "", category: evt.category}
 		if !w.window.seen[key] {
 			w.window.seen[key] = true
 			w.window.events = append(w.window.events, evt)
 		}
 		return
 	}
 	for _, id := range evt.ids {
 		key := kmsgEventKey{id: id, category: evt.category}
 		if !w.window.seen[key] {
 			w.window.seen[key] = true
 			w.window.events = append(w.window.events, evt)
 		}
 	}
 }
 // NotifyTaskStarted increments the active task counter and opens a shared event window
 // if this is the first task starting.
 func (w *kmsgWatcher) NotifyTaskStarted(taskID, target string) {
 	w.mu.Lock()
 	defer w.mu.Unlock()
 	if w.activeCount == 0 {
 		w.window = &kmsgWindow{
 			startedAt: time.Now(),
 			seen:      make(map[kmsgEventKey]bool),
 		}
 	}
 	w.activeCount++
 	if w.window != nil {
 		w.window.targets = append(w.window.targets, target)
 	}
 }
 // NotifyTaskFinished decrements the active task counter. When all tasks finish,
 // it flushes the accumulated events to the status DB.
 func (w *kmsgWatcher) NotifyTaskFinished(taskID string) {
 	w.mu.Lock()
 	w.activeCount--
 	var window *kmsgWindow
 	if w.activeCount <= 0 {
 		w.activeCount = 0
 		window = w.window
 		w.window = nil
 	}
 	w.mu.Unlock()
 	if window == nil || len(window.events) == 0 {
 		return
 	}
 	goRecoverOnce("kmsg watcher flush", func() { w.flushWindow(window) })
 }
 func (w *kmsgWatcher) flushWindow(window *kmsgWindow) {
 	if w.statusDB == nil {
 		return
 	}
 	source := "watchdog:kmsg"
 	// Collect unique component keys from events.
 	seen := map[string]string{} // componentKey → first raw line
 	for _, evt := range window.events {
 		if len(evt.ids) == 0 {
 			// MCE or un-identified error.
 			key := "cpu:all"
 			if evt.category == "memory" {
 				key = "memory:all"
 			}
 			if _, exists := seen[key]; !exists {
 				seen[key] = evt.raw
 			}
 			continue
 		}
 		for _, id := range evt.ids {
 			var key string
 			switch evt.category {
 			case "gpu", "pcie":
 				key = "pcie:" + normalizeBDF(id)
 			case "storage":
 				key = "storage:" + id
 			default:
 				key = "pcie:" + normalizeBDF(id)
 			}
 			if _, exists := seen[key]; !exists {
 				seen[key] = evt.raw
 			}
 		}
 	}
 	for key, detail := range seen {
 		detail = "kernel error during SAT (" + strings.Join(window.targets, ",") + "): " + truncate(detail, 120)
 		w.statusDB.Record(key, source, "Warning", detail)
 	}
 }
 // parseKmsgLine parses a single /dev/kmsg line and returns an event if it matches
 // any pattern in platform.HardwareErrorPatterns.
 // kmsg format: "<priority>,<sequence>,<timestamp_usec>,-;message text"
 func parseKmsgLine(raw string) (kmsgEvent, bool) {
 	msg := raw
 	if idx := strings.Index(raw, ";"); idx >= 0 {
 		msg = strings.TrimSpace(raw[idx+1:])
 	}
 	if msg == "" {
 		return kmsgEvent{}, false
 	}
 	for _, p := range platform.HardwareErrorPatterns {
 		m := p.Re.FindStringSubmatch(msg)
 		if m == nil {
 			continue
 		}
 		evt := kmsgEvent{
 			timestamp: time.Now(),
 			raw:       msg,
 			category:  p.Category,
 		}
 		if p.BDFGroup > 0 && p.BDFGroup < len(m) {
 			evt.ids = append(evt.ids, normalizeBDF(m[p.BDFGroup]))
 		}
 		if p.DevGroup > 0 && p.DevGroup < len(m) {
 			evt.ids = append(evt.ids, m[p.DevGroup])
 		}
 		return evt, true
 	}
 	return kmsgEvent{}, false
 }
 // normalizeBDF normalizes a PCIe BDF to the 4-part form "0000:c8:00.0".
 func normalizeBDF(bdf string) string {
 	bdf = strings.ToLower(strings.TrimSpace(bdf))
 	if strings.Count(bdf, ":") == 1 {
 		return "0000:" + bdf
 	}
 	return bdf
 }
 func truncate(s string, max int) string {
 	if len(s) <= max {
 		return s
 	}
 	return s[:max] + "..."
 }
 // isSATTarget returns true for task targets that run hardware acceptance tests.
 func isSATTarget(target string) bool {
 	switch target {
 	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
 		"nvidia-interconnect", "nvidia-bandwidth", "nvidia-stress", "memory", "memory-stress", "storage",
 		"cpu", "sat-stress", "amd", "amd-mem", "amd-bandwidth", "amd-stress",
 		"platform-stress":
 		return true
 	}
 	return false
 }
--- a/audit/internal/webui/metricsdb.go
+++ b/audit/internal/webui/metricsdb.go
@@ -3,9 +3,12 @@ package webui
 import (
 	"database/sql"
 	"encoding/csv"
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 	"sort"
 	"strconv"
 	"strings"
 	"time"
 	"bee/audit/internal/platform"
@@ -13,15 +16,24 @@ import (
 )
 const metricsDBPath = "/appdata/bee/metrics.db"
 const metricsKeepDuration = 24 * time.Hour
 // MetricsDB persists live metric samples to SQLite.
 type MetricsDB struct {
 	db *sql.DB
 }
 func (m *MetricsDB) Close() error {
 	if m == nil || m.db == nil {
 		return nil
 	}
 	return m.db.Close()
 }
 // openMetricsDB opens (or creates) the metrics database at the given path.
 func openMetricsDB(path string) (*MetricsDB, error) {
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return nil, err
 	}
 	db, err := sql.Open("sqlite", path+"?_journal=WAL&_busy_timeout=5000")
 	if err != nil {
 		return nil, err
@@ -50,6 +62,8 @@ CREATE TABLE IF NOT EXISTS gpu_metrics (
  usage_pct     REAL,
  mem_usage_pct REAL,
  power_w       REAL,
  clock_mhz     REAL,
  mem_clock_mhz REAL,
  PRIMARY KEY (ts, gpu_index)
 );
 CREATE TABLE IF NOT EXISTS fan_metrics (
@@ -66,6 +80,38 @@ CREATE TABLE IF NOT EXISTS temp_metrics (
  PRIMARY KEY (ts, name)
 );
 `)
 	if err != nil {
 		return err
 	}
 	if err := ensureMetricsColumn(db, "gpu_metrics", "clock_mhz", "REAL"); err != nil {
 		return err
 	}
 	return ensureMetricsColumn(db, "gpu_metrics", "mem_clock_mhz", "REAL")
 }
 func ensureMetricsColumn(db *sql.DB, table, column, definition string) error {
 	rows, err := db.Query("PRAGMA table_info(" + table + ")")
 	if err != nil {
 		return err
 	}
 	defer rows.Close()
 	for rows.Next() {
 		var cid int
 		var name, ctype string
 		var notNull, pk int
 		var dflt sql.NullString
 		if err := rows.Scan(&cid, &name, &ctype, &notNull, &dflt, &pk); err != nil {
 			return err
 		}
 		if strings.EqualFold(name, column) {
 			return nil
 		}
 	}
 	if err := rows.Err(); err != nil {
 		return err
 	}
 	_, err = db.Exec("ALTER TABLE " + table + " ADD COLUMN " + column + " " + definition)
 	return err
 }
@@ -87,8 +133,8 @@ func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
 	}
 	for _, g := range s.GPUs {
 		_, err = tx.Exec(
-			`INSERT OR REPLACE INTO gpu_metrics(ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w) VALUES(?,?,?,?,?,?)`,
+			`INSERT OR REPLACE INTO gpu_metrics(ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz) VALUES(?,?,?,?,?,?,?,?)`,
-			ts, g.GPUIndex, g.TempC, g.UsagePct, g.MemUsagePct, g.PowerW,
+			ts, g.GPUIndex, g.TempC, g.UsagePct, g.MemUsagePct, g.PowerW, g.ClockMHz, g.MemClockMHz,
 		)
 		if err != nil {
 			return err
@@ -116,18 +162,42 @@ func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
 }
 // LoadRecent returns up to n samples in chronological order (oldest first).
 // It reconstructs LiveMetricSample from the normalized tables.
 func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
-	rows, err := m.db.Query(
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM (SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?) ORDER BY ts`, n)
-		`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?`, n,
+}
 // LoadAll returns all persisted samples in chronological order (oldest first).
 func (m *MetricsDB) LoadAll() ([]platform.LiveMetricSample, error) {
 	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts`, nil)
 }
 // LoadBetween returns samples in chronological order within the given time window.
 func (m *MetricsDB) LoadBetween(start, end time.Time) ([]platform.LiveMetricSample, error) {
 	if m == nil {
 		return nil, nil
 	}
 	if start.IsZero() || end.IsZero() {
 		return nil, nil
 	}
 	if end.Before(start) {
 		start, end = end, start
 	}
 	return m.loadSamples(
 		`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics WHERE ts>=? AND ts<=? ORDER BY ts`,
 		start.Unix(), end.Unix(),
 	)
 }
 // loadSamples reconstructs LiveMetricSample rows from the normalized tables.
 func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetricSample, error) {
 	rows, err := m.db.Query(query, args...)
 	if err != nil {
 		return nil, err
 	}
 	defer rows.Close()
 	type sysRow struct {
-		ts          int64
+		ts            int64
 		cpu, mem, pwr float64
 	}
 	var sysRows []sysRow
@@ -141,20 +211,18 @@ func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
 	if len(sysRows) == 0 {
 		return nil, nil
 	}
 	// Reverse to chronological order
 	for i, j := 0, len(sysRows)-1; i < j; i, j = i+1, j-1 {
 		sysRows[i], sysRows[j] = sysRows[j], sysRows[i]
 	}
 	// Collect min/max ts for range query
 	minTS := sysRows[0].ts
 	maxTS := sysRows[len(sysRows)-1].ts
 	// Load GPU rows in range
-	type gpuKey struct{ ts int64; idx int }
+	type gpuKey struct {
 		ts  int64
 		idx int
 	}
 	gpuData := map[gpuKey]platform.GPUMetricRow{}
 	gRows, err := m.db.Query(
-		`SELECT ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w FROM gpu_metrics WHERE ts>=? AND ts<=? ORDER BY ts,gpu_index`,
+		`SELECT ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w,IFNULL(clock_mhz,0),IFNULL(mem_clock_mhz,0) FROM gpu_metrics WHERE ts>=? AND ts<=? ORDER BY ts,gpu_index`,
 		minTS, maxTS,
 	)
 	if err == nil {
@@ -162,14 +230,17 @@ func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
 		for gRows.Next() {
 			var ts int64
 			var g platform.GPUMetricRow
-			if err := gRows.Scan(&ts, &g.GPUIndex, &g.TempC, &g.UsagePct, &g.MemUsagePct, &g.PowerW); err == nil {
+			if err := gRows.Scan(&ts, &g.GPUIndex, &g.TempC, &g.UsagePct, &g.MemUsagePct, &g.PowerW, &g.ClockMHz, &g.MemClockMHz); err == nil {
 				gpuData[gpuKey{ts, g.GPUIndex}] = g
 			}
 		}
 	}
 	// Load fan rows in range
-	type fanKey struct{ ts int64; name string }
+	type fanKey struct {
 		ts   int64
 		name string
 	}
 	fanData := map[fanKey]float64{}
 	fRows, err := m.db.Query(
 		`SELECT ts,name,rpm FROM fan_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
@@ -187,7 +258,10 @@ func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
 	}
 	// Load temp rows in range
-	type tempKey struct{ ts int64; name string }
+	type tempKey struct {
 		ts   int64
 		name string
 	}
 	tempData := map[tempKey]platform.TempReading{}
 	tRows, err := m.db.Query(
 		`SELECT ts,name,grp,celsius FROM temp_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
@@ -203,7 +277,9 @@ func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
 		}
 	}
-	// Collect unique GPU indices and fan names from loaded data (preserve order)
+	// Collect unique GPU indices and fan/temp names from loaded data.
 	// Sort each list so that sample reconstruction is deterministic regardless
 	// of Go's non-deterministic map iteration order.
 	seenGPU := map[int]bool{}
 	var gpuIndices []int
 	for k := range gpuData {
@@ -212,6 +288,8 @@ func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
 			gpuIndices = append(gpuIndices, k.idx)
 		}
 	}
 	sort.Ints(gpuIndices)
 	seenFan := map[string]bool{}
 	var fanNames []string
 	for k := range fanData {
@@ -220,6 +298,8 @@ func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
 			fanNames = append(fanNames, k.name)
 		}
 	}
 	sort.Strings(fanNames)
 	seenTemp := map[string]bool{}
 	var tempNames []string
 	for k := range tempData {
@@ -228,6 +308,7 @@ func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
 			tempNames = append(tempNames, k.name)
 		}
 	}
 	sort.Strings(tempNames)
 	samples := make([]platform.LiveMetricSample, len(sysRows))
 	for i, r := range sysRows {
@@ -257,19 +338,12 @@ func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
 	return samples, nil
 }
 // Prune deletes samples older than keepDuration.
 func (m *MetricsDB) Prune(keepDuration time.Duration) {
 	cutoff := time.Now().Add(-keepDuration).Unix()
 	for _, table := range []string{"sys_metrics", "gpu_metrics", "fan_metrics", "temp_metrics"} {
 		_, _ = m.db.Exec(fmt.Sprintf("DELETE FROM %s WHERE ts < ?", table), cutoff)
 	}
 }
 // ExportCSV writes all sys+gpu data as CSV to w.
 func (m *MetricsDB) ExportCSV(w io.Writer) error {
 	rows, err := m.db.Query(`
 		SELECT s.ts, s.cpu_load_pct, s.mem_load_pct, s.power_w,
-		       g.gpu_index, g.temp_c, g.usage_pct, g.mem_usage_pct, g.power_w
+		       g.gpu_index, g.temp_c, g.usage_pct, g.mem_usage_pct, g.power_w,
 		       g.clock_mhz, g.mem_clock_mhz
 		FROM sys_metrics s
 		LEFT JOIN gpu_metrics g ON g.ts = s.ts
 		ORDER BY s.ts, g.gpu_index
@@ -280,13 +354,13 @@ func (m *MetricsDB) ExportCSV(w io.Writer) error {
 	defer rows.Close()
 	cw := csv.NewWriter(w)
-	_ = cw.Write([]string{"ts", "cpu_load_pct", "mem_load_pct", "sys_power_w", "gpu_index", "gpu_temp_c", "gpu_usage_pct", "gpu_mem_pct", "gpu_power_w"})
+	_ = cw.Write([]string{"ts", "cpu_load_pct", "mem_load_pct", "sys_power_w", "gpu_index", "gpu_temp_c", "gpu_usage_pct", "gpu_mem_pct", "gpu_power_w", "gpu_clock_mhz", "gpu_mem_clock_mhz"})
 	for rows.Next() {
 		var ts int64
 		var cpu, mem, pwr float64
 		var gpuIdx sql.NullInt64
-		var gpuTemp, gpuUse, gpuMem, gpuPow sql.NullFloat64
+		var gpuTemp, gpuUse, gpuMem, gpuPow, gpuClock, gpuMemClock sql.NullFloat64
-		if err := rows.Scan(&ts, &cpu, &mem, &pwr, &gpuIdx, &gpuTemp, &gpuUse, &gpuMem, &gpuPow); err != nil {
+		if err := rows.Scan(&ts, &cpu, &mem, &pwr, &gpuIdx, &gpuTemp, &gpuUse, &gpuMem, &gpuPow, &gpuClock, &gpuMemClock); err != nil {
 			continue
 		}
 		row := []string{
@@ -302,9 +376,11 @@ func (m *MetricsDB) ExportCSV(w io.Writer) error {
 				strconv.FormatFloat(gpuUse.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuMem.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuPow.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuClock.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuMemClock.Float64, 'f', 1, 64),
 			)
 		} else {
-			row = append(row, "", "", "", "", "")
+			row = append(row, "", "", "", "", "", "", "")
 		}
 		_ = cw.Write(row)
 	}
@@ -312,9 +388,6 @@ func (m *MetricsDB) ExportCSV(w io.Writer) error {
 	return cw.Error()
 }
 // Close closes the database.
 func (m *MetricsDB) Close() { _ = m.db.Close() }
 func nullFloat(v float64) sql.NullFloat64 {
 	return sql.NullFloat64{Float64: v, Valid: true}
 }
--- a/audit/internal/webui/metricsdb_test.go
+++ b/audit/internal/webui/metricsdb_test.go
@@ -0,0 +1,174 @@
 package webui
 import (
 	"database/sql"
 	"path/filepath"
 	"testing"
 	"time"
 	"bee/audit/internal/platform"
 	_ "modernc.org/sqlite"
 )
 func TestMetricsDBLoadSamplesKeepsChronologicalRangeForGPUs(t *testing.T) {
 	db, err := openMetricsDB(filepath.Join(t.TempDir(), "metrics.db"))
 	if err != nil {
 		t.Fatalf("openMetricsDB: %v", err)
 	}
 	defer db.Close()
 	base := time.Unix(1_700_000_000, 0).UTC()
 	for i := 0; i < 3; i++ {
 		err := db.Write(platform.LiveMetricSample{
 			Timestamp:  base.Add(time.Duration(i) * time.Second),
 			CPULoadPct: float64(10 + i),
 			MemLoadPct: float64(20 + i),
 			PowerW:     float64(300 + i),
 			GPUs: []platform.GPUMetricRow{
 				{GPUIndex: 0, PowerW: float64(100 + i)},
 				{GPUIndex: 2, PowerW: float64(200 + i)},
 			},
 		})
 		if err != nil {
 			t.Fatalf("Write(%d): %v", i, err)
 		}
 	}
 	all, err := db.LoadAll()
 	if err != nil {
 		t.Fatalf("LoadAll: %v", err)
 	}
 	if len(all) != 3 {
 		t.Fatalf("LoadAll len=%d want 3", len(all))
 	}
 	for i, sample := range all {
 		if len(sample.GPUs) != 2 {
 			t.Fatalf("LoadAll sample %d GPUs=%v want 2 rows", i, sample.GPUs)
 		}
 		if sample.GPUs[0].GPUIndex != 0 || sample.GPUs[0].PowerW != float64(100+i) {
 			t.Fatalf("LoadAll sample %d GPU0=%+v", i, sample.GPUs[0])
 		}
 		if sample.GPUs[1].GPUIndex != 2 || sample.GPUs[1].PowerW != float64(200+i) {
 			t.Fatalf("LoadAll sample %d GPU1=%+v", i, sample.GPUs[1])
 		}
 	}
 	recent, err := db.LoadRecent(2)
 	if err != nil {
 		t.Fatalf("LoadRecent: %v", err)
 	}
 	if len(recent) != 2 {
 		t.Fatalf("LoadRecent len=%d want 2", len(recent))
 	}
 	if !recent[0].Timestamp.Before(recent[1].Timestamp) {
 		t.Fatalf("LoadRecent timestamps not ascending: %v >= %v", recent[0].Timestamp, recent[1].Timestamp)
 	}
 	for i, sample := range recent {
 		if len(sample.GPUs) != 2 {
 			t.Fatalf("LoadRecent sample %d GPUs=%v want 2 rows", i, sample.GPUs)
 		}
 	}
 }
 func TestMetricsDBMigratesLegacyGPUSchema(t *testing.T) {
 	path := filepath.Join(t.TempDir(), "metrics.db")
 	raw, err := sql.Open("sqlite", path)
 	if err != nil {
 		t.Fatalf("sql.Open: %v", err)
 	}
 	_, err = raw.Exec(`
 CREATE TABLE gpu_metrics (
  ts            INTEGER NOT NULL,
  gpu_index     INTEGER NOT NULL,
  temp_c        REAL,
  usage_pct     REAL,
  mem_usage_pct REAL,
  power_w       REAL,
  PRIMARY KEY (ts, gpu_index)
 );
 CREATE TABLE sys_metrics (
  ts           INTEGER NOT NULL,
  cpu_load_pct REAL,
  mem_load_pct REAL,
  power_w      REAL,
  PRIMARY KEY (ts)
 );
 CREATE TABLE fan_metrics (
  ts   INTEGER NOT NULL,
  name TEXT NOT NULL,
  rpm  REAL,
  PRIMARY KEY (ts, name)
 );
 CREATE TABLE temp_metrics (
  ts      INTEGER NOT NULL,
  name    TEXT NOT NULL,
  grp     TEXT NOT NULL,
  celsius REAL,
  PRIMARY KEY (ts, name)
 );
 `)
 	if err != nil {
 		t.Fatalf("create legacy schema: %v", err)
 	}
 	_ = raw.Close()
 	db, err := openMetricsDB(path)
 	if err != nil {
 		t.Fatalf("openMetricsDB: %v", err)
 	}
 	defer db.Close()
 	now := time.Unix(1_700_000_100, 0).UTC()
 	err = db.Write(platform.LiveMetricSample{
 		Timestamp: now,
 		GPUs: []platform.GPUMetricRow{
 			{GPUIndex: 0, ClockMHz: 1410, MemClockMHz: 2600},
 		},
 	})
 	if err != nil {
 		t.Fatalf("Write: %v", err)
 	}
 	samples, err := db.LoadAll()
 	if err != nil {
 		t.Fatalf("LoadAll: %v", err)
 	}
 	if len(samples) != 1 || len(samples[0].GPUs) != 1 {
 		t.Fatalf("samples=%+v", samples)
 	}
 	if got := samples[0].GPUs[0].ClockMHz; got != 1410 {
 		t.Fatalf("ClockMHz=%v want 1410", got)
 	}
 	if got := samples[0].GPUs[0].MemClockMHz; got != 2600 {
 		t.Fatalf("MemClockMHz=%v want 2600", got)
 	}
 }
 func TestMetricsDBLoadBetweenFiltersWindow(t *testing.T) {
 	db, err := openMetricsDB(filepath.Join(t.TempDir(), "metrics.db"))
 	if err != nil {
 		t.Fatalf("openMetricsDB: %v", err)
 	}
 	defer db.Close()
 	base := time.Unix(1_700_000_000, 0).UTC()
 	for i := 0; i < 5; i++ {
 		if err := db.Write(platform.LiveMetricSample{
 			Timestamp:  base.Add(time.Duration(i) * time.Minute),
 			CPULoadPct: float64(i),
 		}); err != nil {
 			t.Fatalf("Write(%d): %v", i, err)
 		}
 	}
 	got, err := db.LoadBetween(base.Add(1*time.Minute), base.Add(3*time.Minute))
 	if err != nil {
 		t.Fatalf("LoadBetween: %v", err)
 	}
 	if len(got) != 3 {
 		t.Fatalf("LoadBetween len=%d want 3", len(got))
 	}
 	if !got[0].Timestamp.Equal(base.Add(1*time.Minute)) || !got[2].Timestamp.Equal(base.Add(3*time.Minute)) {
 		t.Fatalf("window=%v..%v", got[0].Timestamp, got[2].Timestamp)
 	}
 }
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
--- a/audit/internal/webui/serial_console.go
+++ b/audit/internal/webui/serial_console.go
@@ -0,0 +1,41 @@
 package webui
 import (
 	"fmt"
 	"os"
 	"strings"
 	"time"
 )
 var taskSerialWriteLine = writeTaskSerialLine
 func writeTaskSerialLine(line string) {
 	line = strings.TrimSpace(line)
 	if line == "" {
 		return
 	}
 	payload := fmt.Sprintf("%s %s\n", time.Now().UTC().Format("2006-01-02 15:04:05Z"), line)
 	for _, path := range []string{"/dev/ttyS0", "/dev/ttyS1", "/dev/console"} {
 		f, err := os.OpenFile(path, os.O_WRONLY|os.O_APPEND, 0)
 		if err != nil {
 			continue
 		}
 		_, _ = f.WriteString(payload)
 		_ = f.Close()
 		return
 	}
 }
 func taskSerialPrefix(t *Task) string {
 	if t == nil {
 		return "[task] "
 	}
 	return fmt.Sprintf("[task %s %s] ", t.ID, t.Name)
 }
 func taskSerialEvent(t *Task, event string) {
 	if t == nil {
 		return
 	}
 	taskSerialWriteLine(fmt.Sprintf("%s%s", taskSerialPrefix(t), strings.TrimSpace(event)))
 }
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -1,12 +1,16 @@
 package webui
 import (
 	"encoding/json"
 	"net/http"
 	"net/http/httptest"
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 	"time"
 	"bee/audit/internal/platform"
 )
 func TestChartLegendNumber(t *testing.T) {
@@ -31,6 +35,429 @@ func TestChartLegendNumber(t *testing.T) {
 	}
 }
 func TestRecoverMiddlewareReturns500OnPanic(t *testing.T) {
 	handler := recoverMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		panic("boom")
 	}))
 	rec := httptest.NewRecorder()
 	req := httptest.NewRequest(http.MethodGet, "/panic", nil)
 	handler.ServeHTTP(rec, req)
 	if rec.Code != http.StatusInternalServerError {
 		t.Fatalf("status=%d want %d", rec.Code, http.StatusInternalServerError)
 	}
 	if !strings.Contains(rec.Body.String(), "internal server error") {
 		t.Fatalf("body=%q", rec.Body.String())
 	}
 }
 func TestRecoverMiddlewarePreservesStreamingInterfaces(t *testing.T) {
 	handler := recoverMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		if !sseStart(w) {
 			return
 		}
 		if !sseWrite(w, "tick", "ok") {
 			t.Fatal("expected sse write to succeed")
 		}
 	}))
 	rec := httptest.NewRecorder()
 	req := httptest.NewRequest(http.MethodGet, "/stream", nil)
 	handler.ServeHTTP(rec, req)
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	if got := rec.Header().Get("Content-Type"); got != "text/event-stream" {
 		t.Fatalf("content-type=%q", got)
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, "event: tick\n") || !strings.Contains(body, "data: ok\n\n") {
 		t.Fatalf("body=%q", body)
 	}
 }
 func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
 	samples := []platform.LiveMetricSample{
 		{
 			Timestamp:  time.Now().Add(-3 * time.Minute),
 			CPULoadPct: 10,
 			MemLoadPct: 20,
 			PowerW:     300,
 			GPUs: []platform.GPUMetricRow{
 				{GPUIndex: 0, UsagePct: 90, MemUsagePct: 5, PowerW: 120, TempC: 50},
 			},
 		},
 		{
 			Timestamp:  time.Now().Add(-2 * time.Minute),
 			CPULoadPct: 30,
 			MemLoadPct: 40,
 			PowerW:     320,
 			GPUs: []platform.GPUMetricRow{
 				{GPUIndex: 0, UsagePct: 95, MemUsagePct: 7, PowerW: 125, TempC: 51},
 			},
 		},
 		{
 			Timestamp:  time.Now().Add(-1 * time.Minute),
 			CPULoadPct: 50,
 			MemLoadPct: 60,
 			PowerW:     340,
 			GPUs: []platform.GPUMetricRow{
 				{GPUIndex: 0, UsagePct: 97, MemUsagePct: 9, PowerW: 130, TempC: 52},
 			},
 		},
 	}
 	datasets, names, labels, title, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
 	if !ok {
 		t.Fatal("chartDataFromSamples returned ok=false")
 	}
 	if title != "GPU Power" {
 		t.Fatalf("title=%q", title)
 	}
 	if len(names) != 1 || names[0] != "GPU 0" {
 		t.Fatalf("names=%v", names)
 	}
 	if len(labels) != len(samples) {
 		t.Fatalf("labels len=%d want %d", len(labels), len(samples))
 	}
 	if len(datasets) != 1 || len(datasets[0]) != len(samples) {
 		t.Fatalf("datasets shape=%v", datasets)
 	}
 	if got := datasets[0][0]; got != 120 {
 		t.Fatalf("datasets[0][0]=%v want 120", got)
 	}
 	if got := datasets[0][2]; got != 130 {
 		t.Fatalf("datasets[0][2]=%v want 130", got)
 	}
 }
 func TestChartDataFromSamplesKeepsStableGPUSeriesOrder(t *testing.T) {
 	samples := []platform.LiveMetricSample{
 		{
 			Timestamp: time.Now().Add(-2 * time.Minute),
 			GPUs: []platform.GPUMetricRow{
 				{GPUIndex: 7, PowerW: 170},
 				{GPUIndex: 2, PowerW: 120},
 				{GPUIndex: 0, PowerW: 100},
 			},
 		},
 		{
 			Timestamp: time.Now().Add(-1 * time.Minute),
 			GPUs: []platform.GPUMetricRow{
 				{GPUIndex: 0, PowerW: 101},
 				{GPUIndex: 7, PowerW: 171},
 				{GPUIndex: 2, PowerW: 121},
 			},
 		},
 	}
 	datasets, names, _, title, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
 	if !ok {
 		t.Fatal("chartDataFromSamples returned ok=false")
 	}
 	if title != "GPU Power" {
 		t.Fatalf("title=%q", title)
 	}
 	wantNames := []string{"GPU 0", "GPU 2", "GPU 7"}
 	if len(names) != len(wantNames) {
 		t.Fatalf("names len=%d want %d: %v", len(names), len(wantNames), names)
 	}
 	for i := range wantNames {
 		if names[i] != wantNames[i] {
 			t.Fatalf("names[%d]=%q want %q; full=%v", i, names[i], wantNames[i], names)
 		}
 	}
 	if got := datasets[0]; len(got) != 2 || got[0] != 100 || got[1] != 101 {
 		t.Fatalf("GPU 0 dataset=%v want [100 101]", got)
 	}
 	if got := datasets[1]; len(got) != 2 || got[0] != 120 || got[1] != 121 {
 		t.Fatalf("GPU 2 dataset=%v want [120 121]", got)
 	}
 	if got := datasets[2]; len(got) != 2 || got[0] != 170 || got[1] != 171 {
 		t.Fatalf("GPU 7 dataset=%v want [170 171]", got)
 	}
 }
 func TestChartDataFromSamplesIncludesGPUClockCharts(t *testing.T) {
 	samples := []platform.LiveMetricSample{
 		{
 			Timestamp: time.Now().Add(-2 * time.Minute),
 			GPUs: []platform.GPUMetricRow{
 				{GPUIndex: 0, ClockMHz: 1400},
 				{GPUIndex: 3, ClockMHz: 1500},
 			},
 		},
 		{
 			Timestamp: time.Now().Add(-1 * time.Minute),
 			GPUs: []platform.GPUMetricRow{
 				{GPUIndex: 0, ClockMHz: 1410},
 				{GPUIndex: 3, ClockMHz: 1510},
 			},
 		},
 	}
 	datasets, names, _, title, _, _, ok := chartDataFromSamples("gpu-all-clock", samples)
 	if !ok {
 		t.Fatal("gpu-all-clock returned ok=false")
 	}
 	if title != "GPU Core Clock" {
 		t.Fatalf("title=%q", title)
 	}
 	if len(names) != 2 || names[0] != "GPU 0" || names[1] != "GPU 3" {
 		t.Fatalf("names=%v", names)
 	}
 	if got := datasets[1][1]; got != 1510 {
 		t.Fatalf("GPU 3 core clock=%v want 1510", got)
 	}
 }
 func TestNormalizePowerSeriesHoldsLastPositive(t *testing.T) {
 	got := normalizePowerSeries([]float64{0, 480, 0, 0, 510, 0})
 	want := []float64{0, 480, 480, 480, 510, 510}
 	if len(got) != len(want) {
 		t.Fatalf("len=%d want %d", len(got), len(want))
 	}
 	for i := range want {
 		if got[i] != want[i] {
 			t.Fatalf("got[%d]=%v want %v", i, got[i], want[i])
 		}
 	}
 }
 func TestRenderMetricsUsesBufferedChartRefresh(t *testing.T) {
 	body := renderMetrics()
 	if !strings.Contains(body, "const probe = new Image();") {
 		t.Fatalf("metrics page should preload chart images before swap: %s", body)
 	}
 	if !strings.Contains(body, "el.dataset.loading === '1'") {
 		t.Fatalf("metrics page should avoid overlapping chart reloads: %s", body)
 	}
 	if !strings.Contains(body, `id="gpu-metrics-section" style="display:none`) {
 		t.Fatalf("metrics page should keep gpu charts in a hidden dedicated section until GPUs are detected: %s", body)
 	}
 	if !strings.Contains(body, `id="gpu-chart-toggle"`) {
 		t.Fatalf("metrics page should render GPU chart mode toggle: %s", body)
 	}
 	if !strings.Contains(body, `/api/metrics/chart/gpu-all-clock.svg`) {
 		t.Fatalf("metrics page should include GPU core clock chart: %s", body)
 	}
 	if strings.Contains(body, `/api/metrics/chart/gpu-all-memclock.svg`) {
 		t.Fatalf("metrics page should not include GPU memory clock chart: %s", body)
 	}
 	if !strings.Contains(body, `renderGPUOverviewCards(indices, names)`) {
 		t.Fatalf("metrics page should build per-GPU chart cards dynamically: %s", body)
 	}
 }
 func TestChartLegendVisible(t *testing.T) {
 	if !chartLegendVisible(8) {
 		t.Fatal("legend should stay visible for charts with up to 8 series")
 	}
 	if chartLegendVisible(9) {
 		t.Fatal("legend should be hidden for charts with more than 8 series")
 	}
 }
 func TestChartYAxisNumber(t *testing.T) {
 	tests := []struct {
 		in   float64
 		want string
 	}{
 		{in: 999, want: "999"},
 		{in: 1000, want: "1к"},
 		{in: 1370, want: "1,4к"},
 		{in: 1500, want: "1,5к"},
 		{in: 1700, want: "1,7к"},
 		{in: 2000, want: "2к"},
 		{in: 9999, want: "10к"},
 		{in: 10200, want: "10к"},
 		{in: -1500, want: "-1,5к"},
 	}
 	for _, tc := range tests {
 		if got := chartYAxisNumber(tc.in); got != tc.want {
 			t.Fatalf("chartYAxisNumber(%v)=%q want %q", tc.in, got, tc.want)
 		}
 	}
 }
 func TestChartCanvasHeight(t *testing.T) {
 	if got := chartCanvasHeight(4); got != 360 {
 		t.Fatalf("chartCanvasHeight(4)=%d want 360", got)
 	}
 	if got := chartCanvasHeight(12); got != 288 {
 		t.Fatalf("chartCanvasHeight(12)=%d want 288", got)
 	}
 }
 func TestChartTimelineSegmentsForRangeMergesActiveSpansAndIdleGaps(t *testing.T) {
 	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
 	end := start.Add(10 * time.Minute)
 	taskWindow := func(offsetStart, offsetEnd time.Duration) Task {
 		s := start.Add(offsetStart)
 		e := start.Add(offsetEnd)
 		return Task{
 			Name:      "task",
 			Status:    TaskDone,
 			StartedAt: &s,
 			DoneAt:    &e,
 		}
 	}
 	segments := chartTimelineSegmentsForRange(start, end, end, []Task{
 		taskWindow(1*time.Minute, 3*time.Minute),
 		taskWindow(2*time.Minute, 5*time.Minute),
 		taskWindow(7*time.Minute, 8*time.Minute),
 	})
 	if len(segments) != 5 {
 		t.Fatalf("segments=%d want 5: %#v", len(segments), segments)
 	}
 	wantActive := []bool{false, true, false, true, false}
 	wantMinutes := [][2]int{{0, 1}, {1, 5}, {5, 7}, {7, 8}, {8, 10}}
 	for i, segment := range segments {
 		if segment.Active != wantActive[i] {
 			t.Fatalf("segment[%d].Active=%v want %v", i, segment.Active, wantActive[i])
 		}
 		if got := int(segment.Start.Sub(start).Minutes()); got != wantMinutes[i][0] {
 			t.Fatalf("segment[%d] start=%d want %d", i, got, wantMinutes[i][0])
 		}
 		if got := int(segment.End.Sub(start).Minutes()); got != wantMinutes[i][1] {
 			t.Fatalf("segment[%d] end=%d want %d", i, got, wantMinutes[i][1])
 		}
 	}
 }
 func TestRenderMetricChartSVGIncludesTimelineOverlay(t *testing.T) {
 	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
 	labels := []string{"12:00", "12:01", "12:02"}
 	times := []time.Time{start, start.Add(time.Minute), start.Add(2 * time.Minute)}
 	svg, err := renderMetricChartSVG(
 		"System Power",
 		labels,
 		times,
 		[][]float64{{300, 320, 310}},
 		[]string{"Power W"},
 		floatPtr(0),
 		floatPtr(400),
 		360,
 		[]chartTimelineSegment{
 			{Start: start, End: start.Add(time.Minute), Active: false},
 			{Start: start.Add(time.Minute), End: start.Add(2 * time.Minute), Active: true},
 		},
 	)
 	if err != nil {
 		t.Fatal(err)
 	}
 	body := string(svg)
 	if !strings.Contains(body, `data-role="timeline-overlay"`) {
 		t.Fatalf("svg missing timeline overlay: %s", body)
 	}
 	if !strings.Contains(body, `opacity="0.10"`) {
 		t.Fatalf("svg missing idle overlay opacity: %s", body)
 	}
 	if !strings.Contains(body, `System Power`) {
 		t.Fatalf("svg missing chart title: %s", body)
 	}
 }
 func TestHandleMetricsChartSVGRendersCustomSVG(t *testing.T) {
 	dir := t.TempDir()
 	db, err := openMetricsDB(filepath.Join(dir, "metrics.db"))
 	if err != nil {
 		t.Fatal(err)
 	}
 	t.Cleanup(func() { _ = db.db.Close() })
 	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
 	for i, sample := range []platform.LiveMetricSample{
 		{Timestamp: start, PowerW: 300},
 		{Timestamp: start.Add(time.Minute), PowerW: 320},
 		{Timestamp: start.Add(2 * time.Minute), PowerW: 310},
 	} {
 		if err := db.Write(sample); err != nil {
 			t.Fatalf("write sample %d: %v", i, err)
 		}
 	}
 	globalQueue.mu.Lock()
 	prevTasks := globalQueue.tasks
 	s := start.Add(30 * time.Second)
 	e := start.Add(90 * time.Second)
 	globalQueue.tasks = []*Task{{Name: "Burn", Status: TaskDone, StartedAt: &s, DoneAt: &e}}
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = prevTasks
 		globalQueue.mu.Unlock()
 	})
 	h := &handler{opts: HandlerOptions{ExportDir: dir}, metricsDB: db}
 	rec := httptest.NewRecorder()
 	req := httptest.NewRequest(http.MethodGet, "/api/metrics/chart/server-power.svg", nil)
 	h.handleMetricsChartSVG(rec, req)
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, `data-role="timeline-overlay"`) {
 		t.Fatalf("custom svg response missing timeline overlay: %s", body)
 	}
 	if !strings.Contains(body, `stroke-linecap="round"`) {
 		t.Fatalf("custom svg response missing custom polyline styling: %s", body)
 	}
 }
 func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
 	got := normalizeFanSeries([]float64{4200, 0, 0, 4300, 0})
 	want := []float64{4200, 4200, 4200, 4300, 4300}
 	if len(got) != len(want) {
 		t.Fatalf("len=%d want %d", len(got), len(want))
 	}
 	for i := range want {
 		if got[i] != want[i] {
 			t.Fatalf("got[%d]=%v want %v", i, got[i], want[i])
 		}
 	}
 }
 func TestSnapshotFanRingsUsesTimelineLabels(t *testing.T) {
 	r1 := newMetricsRing(4)
 	r2 := newMetricsRing(4)
 	r1.push(1000)
 	r1.push(1100)
 	r2.push(1200)
 	r2.push(1300)
 	datasets, names, labels := snapshotFanRings([]*metricsRing{r1, r2}, []string{"FAN_A", "FAN_B"})
 	if len(datasets) != 2 {
 		t.Fatalf("datasets=%d want 2", len(datasets))
 	}
 	if len(names) != 2 || names[0] != "FAN_A RPM" || names[1] != "FAN_B RPM" {
 		t.Fatalf("names=%v", names)
 	}
 	if len(labels) != 2 {
 		t.Fatalf("labels=%v want 2 entries", labels)
 	}
 	if labels[0] == "" || labels[1] == "" {
 		t.Fatalf("labels should contain timeline values, got %v", labels)
 	}
 }
 func TestRenderNetworkInlineSyncsPendingState(t *testing.T) {
 	body := renderNetworkInline()
 	if !strings.Contains(body, "d.pending_change") {
 		t.Fatalf("network UI should read pending network state from API: %s", body)
 	}
 	if !strings.Contains(body, "setInterval(loadNetwork, 5000)") {
 		t.Fatalf("network UI should periodically refresh network state: %s", body)
 	}
 	if !strings.Contains(body, "showNetPending(NET_ROLLBACK_SECS)") {
 		t.Fatalf("network UI should show pending confirmation immediately on apply: %s", body)
 	}
 }
 func TestRootRendersDashboard(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
@@ -43,9 +470,10 @@ func TestRootRendersDashboard(t *testing.T) {
 	}
 	handler := NewHandler(HandlerOptions{
-		Title:     "Bee Hardware Audit",
+		Title:      "Bee Hardware Audit",
-		AuditPath: path,
+		BuildLabel: "1.2.3",
-		ExportDir: exportDir,
+		AuditPath:  path,
 		ExportDir:  exportDir,
 	})
 	first := httptest.NewRecorder()
@@ -60,6 +488,11 @@ func TestRootRendersDashboard(t *testing.T) {
 	if !strings.Contains(first.Body.String(), `/viewer`) {
 		t.Fatalf("first body missing viewer link: %s", first.Body.String())
 	}
 	versionIdx := strings.Index(first.Body.String(), `Version 1.2.3`)
 	navIdx := strings.Index(first.Body.String(), `href="/"`)
 	if versionIdx == -1 || navIdx == -1 || versionIdx > navIdx {
 		t.Fatalf("version should render near top of sidebar before nav links: %s", first.Body.String())
 	}
 	if got := first.Header().Get("Cache-Control"); got != "no-store" {
 		t.Fatalf("first cache-control=%q", got)
 	}
@@ -78,6 +511,45 @@ func TestRootRendersDashboard(t *testing.T) {
 	}
 }
 func TestRootShowsRunAuditButtonWhenSnapshotMissing(t *testing.T) {
 	dir := t.TempDir()
 	exportDir := filepath.Join(dir, "export")
 	if err := os.MkdirAll(exportDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	handler := NewHandler(HandlerOptions{
 		Title:     "Bee Hardware Audit",
 		AuditPath: filepath.Join(dir, "missing-audit.json"),
 		ExportDir: exportDir,
 	})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, `onclick="auditModalRun()">Run audit</button>`) {
 		t.Fatalf("dashboard missing run audit button: %s", body)
 	}
 	if strings.Contains(body, `No audit data`) {
 		t.Fatalf("dashboard still shows empty audit badge: %s", body)
 	}
 }
 func TestReadyIsOKWhenAuditPathIsUnset(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/api/ready", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	if strings.TrimSpace(rec.Body.String()) != "ready" {
 		t.Fatalf("body=%q want ready", rec.Body.String())
 	}
 }
 func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
@@ -100,6 +572,325 @@ func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
 	}
 }
 func TestTasksPageRendersOpenLinksAndPaginationControls(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tasks", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, `Open a task to view its saved logs and charts.`) {
 		t.Fatalf("tasks page missing task report hint: %s", body)
 	}
 	if !strings.Contains(body, `_taskPageSize = 50`) {
 		t.Fatalf("tasks page missing pagination size config: %s", body)
 	}
 	if !strings.Contains(body, `Previous</button>`) || !strings.Contains(body, `Next</button>`) {
 		t.Fatalf("tasks page missing pagination controls: %s", body)
 	}
 }
 func TestToolsPageRendersRestartGPUDriversButton(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tools", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, `Restart GPU Drivers`) {
 		t.Fatalf("tools page missing restart gpu drivers button: %s", body)
 	}
 	if !strings.Contains(body, `restartGPUDrivers()`) {
 		t.Fatalf("tools page missing restartGPUDrivers action: %s", body)
 	}
 	if !strings.Contains(body, `id="boot-source-text"`) {
 		t.Fatalf("tools page missing boot source field: %s", body)
 	}
 	if !strings.Contains(body, `Export to USB`) {
 		t.Fatalf("tools page missing export to usb section: %s", body)
 	}
 	if !strings.Contains(body, `Support Bundle</button>`) {
 		t.Fatalf("tools page missing support bundle usb button: %s", body)
 	}
 }
 func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/benchmark", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	for _, needle := range []string{
 		`href="/benchmark"`,
 		`id="benchmark-gpu-list"`,
 		`/api/gpu/nvidia`,
 		`/api/benchmark/nvidia/run`,
 		`benchmark-run-nccl`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("benchmark page missing %q: %s", needle, body)
 		}
 	}
 }
 func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
 	dir := t.TempDir()
 	exportDir := filepath.Join(dir, "export")
 	runDir := filepath.Join(exportDir, "bee-benchmark", "gpu-benchmark-20260406-120000")
 	if err := os.MkdirAll(runDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	result := platform.NvidiaBenchmarkResult{
 		GeneratedAt:      time.Date(2026, time.April, 6, 12, 0, 0, 0, time.UTC),
 		BenchmarkProfile: "standard",
 		OverallStatus:    "OK",
 		GPUs: []platform.BenchmarkGPUResult{
 			{
 				Index: 0,
 				Name:  "NVIDIA H100 PCIe",
 				Scores: platform.BenchmarkScorecard{
 					CompositeScore: 1176.25,
 				},
 			},
 			{
 				Index: 1,
 				Name:  "NVIDIA H100 PCIe",
 				Scores: platform.BenchmarkScorecard{
 					CompositeScore: 1168.50,
 				},
 			},
 		},
 	}
 	raw, err := json.Marshal(result)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(runDir, "result.json"), raw, 0644); err != nil {
 		t.Fatal(err)
 	}
 	handler := NewHandler(HandlerOptions{ExportDir: exportDir})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/benchmark", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	wantTime := result.GeneratedAt.Local().Format("2006-01-02 15:04:05")
 	for _, needle := range []string{
 		`Benchmark Results`,
 		`Composite score by saved benchmark run and GPU.`,
 		`NVIDIA H100 PCIe / GPU 0`,
 		`NVIDIA H100 PCIe / GPU 1`,
 		`#1`,
 		wantTime,
 		`1176.25`,
 		`1168.50`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("benchmark page missing %q: %s", needle, body)
 		}
 	}
 }
 func TestValidatePageRendersNvidiaTargetedStressCard(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/validate", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	for _, needle := range []string{
 		`NVIDIA GPU Targeted Stress`,
 		`nvidia-targeted-stress`,
 		`controlled NVIDIA DCGM load`,
 		`<code>dcgmi diag targeted_stress</code>`,
 		`NVIDIA GPU Selection`,
 		`id="sat-gpu-list"`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("validate page missing %q: %s", needle, body)
 		}
 	}
 }
 func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/burn", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	for _, needle := range []string{
 		`NVIDIA Max Compute Load`,
 		`dcgmproftester`,
 		`targeted_stress remain in <a href="/validate">Validate</a>`,
 		`NVIDIA Interconnect Test (NCCL all_reduce_perf)`,
 		`id="burn-gpu-list"`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("burn page missing %q: %s", needle, body)
 		}
 	}
 }
 func TestTaskDetailPageRendersSavedReport(t *testing.T) {
 	dir := t.TempDir()
 	exportDir := filepath.Join(dir, "export")
 	reportDir := filepath.Join(exportDir, "tasks", "task-1_cpu_sat_done")
 	if err := os.MkdirAll(reportDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	reportPath := filepath.Join(reportDir, "report.html")
 	if err := os.WriteFile(reportPath, []byte(`<div class="card"><div class="card-head">Task Report</div><div class="card-body">saved report</div></div>`), 0644); err != nil {
 		t.Fatal(err)
 	}
 	globalQueue.mu.Lock()
 	origTasks := globalQueue.tasks
 	globalQueue.tasks = []*Task{{
 		ID:             "task-1",
 		Name:           "CPU SAT",
 		Target:         "cpu",
 		Status:         TaskDone,
 		CreatedAt:      time.Now(),
 		ArtifactsDir:   reportDir,
 		ReportHTMLPath: reportPath,
 	}}
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = origTasks
 		globalQueue.mu.Unlock()
 	})
 	handler := NewHandler(HandlerOptions{Title: "Bee Hardware Audit", ExportDir: exportDir})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tasks/task-1", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, `saved report`) {
 		t.Fatalf("task detail page missing saved report: %s", body)
 	}
 	if !strings.Contains(body, `Back to Tasks`) {
 		t.Fatalf("task detail page missing back link: %s", body)
 	}
 }
 func TestTaskDetailPageRendersCancelForRunningTask(t *testing.T) {
 	globalQueue.mu.Lock()
 	origTasks := globalQueue.tasks
 	globalQueue.tasks = []*Task{{
 		ID:        "task-live-1",
 		Name:      "CPU SAT",
 		Target:    "cpu",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 	}}
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = origTasks
 		globalQueue.mu.Unlock()
 	})
 	handler := NewHandler(HandlerOptions{Title: "Bee Hardware Audit"})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tasks/task-live-1", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, `Cancel</button>`) {
 		t.Fatalf("task detail page missing cancel button: %s", body)
 	}
 	if !strings.Contains(body, `function cancelTaskDetail(id)`) {
 		t.Fatalf("task detail page missing cancel handler: %s", body)
 	}
 	if !strings.Contains(body, `/api/tasks/' + id + '/cancel`) {
 		t.Fatalf("task detail page missing cancel endpoint: %s", body)
 	}
 	if !strings.Contains(body, `id="task-live-charts"`) {
 		t.Fatalf("task detail page missing live charts container: %s", body)
 	}
 	if !strings.Contains(body, `/api/tasks/' + taskId + '/charts`) {
 		t.Fatalf("task detail page missing live charts index endpoint: %s", body)
 	}
 }
 func TestTaskChartSVGUsesTaskTimeWindow(t *testing.T) {
 	dir := t.TempDir()
 	metricsPath := filepath.Join(dir, "metrics.db")
 	prevMetricsPath := taskReportMetricsDBPath
 	taskReportMetricsDBPath = metricsPath
 	t.Cleanup(func() { taskReportMetricsDBPath = prevMetricsPath })
 	db, err := openMetricsDB(metricsPath)
 	if err != nil {
 		t.Fatalf("openMetricsDB: %v", err)
 	}
 	base := time.Now().UTC()
 	samples := []platform.LiveMetricSample{
 		{Timestamp: base.Add(-3 * time.Minute), PowerW: 100},
 		{Timestamp: base.Add(-2 * time.Minute), PowerW: 200},
 		{Timestamp: base.Add(-1 * time.Minute), PowerW: 300},
 	}
 	for _, sample := range samples {
 		if err := db.Write(sample); err != nil {
 			t.Fatalf("Write: %v", err)
 		}
 	}
 	_ = db.Close()
 	started := base.Add(-2*time.Minute - 5*time.Second)
 	done := base.Add(-1*time.Minute + 5*time.Second)
 	globalQueue.mu.Lock()
 	origTasks := globalQueue.tasks
 	globalQueue.tasks = []*Task{{
 		ID:        "task-chart-1",
 		Name:      "Power Window",
 		Target:    "cpu",
 		Status:    TaskDone,
 		CreatedAt: started.Add(-10 * time.Second),
 		StartedAt: &started,
 		DoneAt:    &done,
 	}}
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = origTasks
 		globalQueue.mu.Unlock()
 	})
 	handler := NewHandler(HandlerOptions{Title: "Bee Hardware Audit"})
 	req := httptest.NewRequest(http.MethodGet, "/api/tasks/task-chart-1/chart/server-power.svg", nil)
 	req.SetPathValue("id", "task-chart-1")
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, req)
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, "System Power") {
 		t.Fatalf("task chart missing expected title: %s", body)
 	}
 	if !strings.Contains(body, "min 200") {
 		t.Fatalf("task chart stats should start from in-window sample: %s", body)
 	}
 	if strings.Contains(body, "min 100") {
 		t.Fatalf("task chart should not include pre-task sample in stats: %s", body)
 	}
 }
 func TestViewerRendersLatestSnapshot(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
@@ -174,6 +965,17 @@ func TestSupportBundleEndpointReturnsArchive(t *testing.T) {
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-audit.log"), []byte("audit log"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	archive, err := os.CreateTemp(os.TempDir(), "bee-support-server-test-*.tar.gz")
 	if err != nil {
 		t.Fatal(err)
 	}
 	t.Cleanup(func() { _ = os.Remove(archive.Name()) })
 	if _, err := archive.WriteString("support-bundle"); err != nil {
 		t.Fatal(err)
 	}
 	if err := archive.Close(); err != nil {
 		t.Fatal(err)
 	}
 	handler := NewHandler(HandlerOptions{ExportDir: exportDir})
 	rec := httptest.NewRecorder()
@@ -210,3 +1012,98 @@ func TestRuntimeHealthEndpointReturnsJSON(t *testing.T) {
 		t.Fatalf("body=%q want %q", strings.TrimSpace(rec.Body.String()), body)
 	}
 }
 func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
 	exportDir := filepath.Join(dir, "export")
 	if err := os.MkdirAll(exportDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z","hardware":{"board":{"serial_number":"SERIAL-1"}}}`), 0644); err != nil {
 		t.Fatal(err)
 	}
 	health := `{
  "status":"PARTIAL",
  "checked_at":"2026-03-16T10:00:00Z",
  "export_dir":"/tmp/export",
  "driver_ready":true,
  "cuda_ready":false,
  "network_status":"PARTIAL",
  "issues":[
    {"code":"dhcp_partial","description":"At least one interface did not obtain IPv4 connectivity."},
    {"code":"cuda_runtime_not_ready","description":"CUDA runtime is not ready for GPU SAT."}
  ],
  "tools":[
    {"name":"dmidecode","ok":true},
    {"name":"nvidia-smi","ok":false}
  ],
  "services":[
    {"name":"bee-web","status":"active"},
    {"name":"bee-nvidia","status":"inactive"}
  ]
 }`
 	if err := os.WriteFile(filepath.Join(exportDir, "runtime-health.json"), []byte(health), 0644); err != nil {
 		t.Fatal(err)
 	}
 	componentStatus := `[
  {
    "component_key":"cpu:all",
    "status":"Warning",
    "error_summary":"cpu SAT: FAILED",
    "history":[{"at":"2026-03-16T10:00:00Z","status":"Warning","source":"sat:cpu","detail":"cpu SAT: FAILED"}]
  },
  {
    "component_key":"memory:all",
    "status":"OK",
    "history":[{"at":"2026-03-16T10:01:00Z","status":"OK","source":"sat:memory","detail":"memory SAT: OK"}]
  },
  {
    "component_key":"storage:nvme0n1",
    "status":"Critical",
    "error_summary":"storage SAT: FAILED",
    "history":[{"at":"2026-03-16T10:02:00Z","status":"Critical","source":"sat:storage","detail":"storage SAT: FAILED"}]
  },
  {
    "component_key":"pcie:gpu:nvidia",
    "status":"Warning",
    "error_summary":"nvidia SAT: FAILED",
    "history":[{"at":"2026-03-16T10:03:00Z","status":"Warning","source":"sat:nvidia","detail":"nvidia SAT: FAILED"}]
  }
 ]`
 	if err := os.WriteFile(filepath.Join(exportDir, "component-status.json"), []byte(componentStatus), 0644); err != nil {
 		t.Fatal(err)
 	}
 	handler := NewHandler(HandlerOptions{AuditPath: path, ExportDir: exportDir})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	body := rec.Body.String()
 	for _, needle := range []string{
 		`Runtime Health`,
 		`<th>Check</th><th>Status</th><th>Source</th><th>Issue</th>`,
 		`Export Directory`,
 		`Network`,
 		`NVIDIA/AMD Driver`,
 		`CUDA / ROCm`,
 		`Required Utilities`,
 		`Bee Services`,
 		`<td>CPU</td>`,
 		`<td>Memory</td>`,
 		`<td>Storage</td>`,
 		`<td>GPU</td>`,
 		`CUDA runtime is not ready for GPU SAT.`,
 		`Missing: nvidia-smi`,
 		`bee-nvidia=inactive`,
 		`cpu SAT: FAILED`,
 		`storage SAT: FAILED`,
 		`sat:nvidia`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("dashboard missing %q: %s", needle, body)
 		}
 	}
 }
--- a/audit/internal/webui/stability.go
+++ b/audit/internal/webui/stability.go
@@ -0,0 +1,42 @@
 package webui
 import (
 	"fmt"
 	"log/slog"
 	"runtime/debug"
 	"time"
 )
 func goRecoverLoop(name string, restartDelay time.Duration, fn func()) {
 	go func() {
 		for {
 			if !runRecoverable(name, fn) {
 				return
 			}
 			if restartDelay > 0 {
 				time.Sleep(restartDelay)
 			}
 		}
 	}()
 }
 func goRecoverOnce(name string, fn func()) {
 	go func() {
 		_ = runRecoverable(name, fn)
 	}()
 }
 func runRecoverable(name string, fn func()) (panicked bool) {
 	defer func() {
 		if rec := recover(); rec != nil {
 			panicked = true
 			slog.Error("recovered panic",
 				"component", name,
 				"panic", fmt.Sprint(rec),
 				"stack", string(debug.Stack()),
 			)
 		}
 	}()
 	fn()
 	return false
 }
--- a/audit/internal/webui/task_page.go
+++ b/audit/internal/webui/task_page.go
@@ -0,0 +1,267 @@
 package webui
 import (
 	"encoding/json"
 	"fmt"
 	"html"
 	"net/http"
 	"os"
 	"strings"
 	"time"
 	"bee/audit/internal/platform"
 )
 func (h *handler) handleTaskPage(w http.ResponseWriter, r *http.Request) {
 	id := r.PathValue("id")
 	task, ok := globalQueue.findByID(id)
 	if !ok {
 		http.NotFound(w, r)
 		return
 	}
 	snapshot := *task
 	body := renderTaskDetailPage(h.opts, snapshot)
 	w.Header().Set("Cache-Control", "no-store")
 	w.Header().Set("Content-Type", "text/html; charset=utf-8")
 	_, _ = w.Write([]byte(body))
 }
 func (h *handler) handleAPITaskChartsIndex(w http.ResponseWriter, r *http.Request) {
 	task, samples, _, _, ok := h.taskSamplesForRequest(r)
 	if !ok {
 		http.NotFound(w, r)
 		return
 	}
 	type taskChartIndexEntry struct {
 		Title string `json:"title"`
 		File  string `json:"file"`
 	}
 	entries := make([]taskChartIndexEntry, 0)
 	for _, spec := range taskChartSpecsForSamples(samples) {
 		title, _, ok := renderTaskChartSVG(spec.Path, samples, taskTimelineForTask(task))
 		if !ok {
 			continue
 		}
 		entries = append(entries, taskChartIndexEntry{Title: title, File: spec.File})
 	}
 	w.Header().Set("Cache-Control", "no-store")
 	w.Header().Set("Content-Type", "application/json; charset=utf-8")
 	_ = json.NewEncoder(w).Encode(entries)
 }
 func (h *handler) handleAPITaskChartSVG(w http.ResponseWriter, r *http.Request) {
 	task, samples, _, _, ok := h.taskSamplesForRequest(r)
 	if !ok {
 		http.NotFound(w, r)
 		return
 	}
 	file := strings.TrimPrefix(r.URL.Path, "/api/tasks/"+task.ID+"/chart/")
 	path, ok := taskChartPathFromFile(file)
 	if !ok {
 		http.NotFound(w, r)
 		return
 	}
 	title, buf, hasData := renderTaskChartSVG(path, samples, taskTimelineForTask(task))
 	if !hasData || len(buf) == 0 || strings.TrimSpace(title) == "" {
 		http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
 		return
 	}
 	w.Header().Set("Content-Type", "image/svg+xml")
 	w.Header().Set("Cache-Control", "no-store")
 	_, _ = w.Write(buf)
 }
 func renderTaskDetailPage(opts HandlerOptions, task Task) string {
 	title := task.Name
 	if strings.TrimSpace(title) == "" {
 		title = task.ID
 	}
 	var body strings.Builder
 	body.WriteString(`<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px;flex-wrap:wrap">`)
 	body.WriteString(`<a class="btn btn-secondary btn-sm" href="/tasks">Back to Tasks</a>`)
 	if task.Status == TaskRunning || task.Status == TaskPending {
 		body.WriteString(`<button class="btn btn-danger btn-sm" onclick="cancelTaskDetail('` + html.EscapeString(task.ID) + `')">Cancel</button>`)
 	}
 	body.WriteString(`<span style="font-size:12px;color:var(--muted)">Artifacts are saved in the task folder under <code>./tasks</code>.</span>`)
 	body.WriteString(`</div>`)
 	if report := loadTaskReportFragment(task); report != "" {
 		body.WriteString(report)
 	} else {
 		body.WriteString(`<div class="card"><div class="card-head">Task Summary</div><div class="card-body">`)
 		body.WriteString(`<div style="font-size:18px;font-weight:700">` + html.EscapeString(title) + `</div>`)
 		body.WriteString(`<div style="margin-top:8px">` + renderTaskStatusBadge(task.Status) + `</div>`)
 		if strings.TrimSpace(task.ErrMsg) != "" {
 			body.WriteString(`<div style="margin-top:8px;color:var(--crit-fg)">` + html.EscapeString(task.ErrMsg) + `</div>`)
 		}
 		body.WriteString(`</div></div>`)
 	}
 	if task.Status == TaskRunning {
 		body.WriteString(`<div class="card"><div class="card-head">Live Charts</div><div class="card-body">`)
 		body.WriteString(`<div id="task-live-charts" style="display:flex;flex-direction:column;gap:16px;color:var(--muted);font-size:13px">Loading charts...</div>`)
 		body.WriteString(`</div></div>`)
 	}
 	if task.Status == TaskRunning || task.Status == TaskPending {
 		body.WriteString(`<div class="card"><div class="card-head">Live Logs</div><div class="card-body">`)
 		body.WriteString(`<div id="task-live-log" class="terminal" style="max-height:none;white-space:pre-wrap">Connecting...</div>`)
 		body.WriteString(`</div></div>`)
 		body.WriteString(`<script>
 function cancelTaskDetail(id) {
  fetch('/api/tasks/' + id + '/cancel', {method:'POST'}).then(function(){
    var term = document.getElementById('task-live-log');
    if (term) {
      term.textContent += '\nCancel requested.\n';
      term.scrollTop = term.scrollHeight;
    }
  });
 }
 function renderTaskLiveCharts(taskId, charts) {
  const host = document.getElementById('task-live-charts');
  if (!host) return;
  if (!Array.isArray(charts) || charts.length === 0) {
    host.innerHTML = 'Waiting for metric samples...';
    return;
  }
  const seen = {};
  charts.forEach(function(chart) {
    seen[chart.file] = true;
    let img = host.querySelector('img[data-chart-file="' + chart.file + '"]');
    if (img) {
      const card = img.closest('.card');
      if (card) {
        const title = card.querySelector('.card-head');
        if (title) title.textContent = chart.title;
      }
      return;
    }
    const card = document.createElement('div');
    card.className = 'card';
    card.style.margin = '0';
    card.innerHTML = '<div class="card-head"></div><div class="card-body" style="padding:12px"></div>';
    card.querySelector('.card-head').textContent = chart.title;
    const body = card.querySelector('.card-body');
    img = document.createElement('img');
    img.setAttribute('data-task-chart', '1');
    img.setAttribute('data-chart-file', chart.file);
    img.setAttribute('data-base-src', '/api/tasks/' + taskId + '/chart/' + chart.file);
    img.src = '/api/tasks/' + taskId + '/chart/' + chart.file + '?t=' + Date.now();
    img.style.width = '100%';
    img.style.display = 'block';
    img.style.borderRadius = '6px';
    img.alt = chart.title;
    body.appendChild(img);
    host.appendChild(card);
  });
  Array.from(host.querySelectorAll('img[data-task-chart="1"]')).forEach(function(img) {
    const file = img.getAttribute('data-chart-file') || '';
    if (seen[file]) return;
    const card = img.closest('.card');
    if (card) card.remove();
  });
 }
 function loadTaskLiveCharts(taskId) {
  fetch('/api/tasks/' + taskId + '/charts').then(function(r){ return r.json(); }).then(function(charts){
    renderTaskLiveCharts(taskId, charts);
  }).catch(function(){
    const host = document.getElementById('task-live-charts');
    if (host) host.innerHTML = 'Task charts are unavailable.';
  });
 }
 function refreshTaskLiveCharts() {
  document.querySelectorAll('img[data-task-chart="1"]').forEach(function(img){
    const base = img.dataset.baseSrc;
    if (!base) return;
    img.src = base + '?t=' + Date.now();
  });
 }
 var _taskDetailES = new EventSource('/api/tasks/` + html.EscapeString(task.ID) + `/stream');
 var _taskDetailTerm = document.getElementById('task-live-log');
 var _taskChartTimer = null;
 var _taskChartsFrozen = false;
 _taskDetailES.onopen = function(){ _taskDetailTerm.textContent = ''; };
 _taskDetailES.onmessage = function(e){ _taskDetailTerm.textContent += e.data + "\n"; _taskDetailTerm.scrollTop = _taskDetailTerm.scrollHeight; };
 _taskDetailES.addEventListener('done', function(e){
  if (_taskChartTimer) clearInterval(_taskChartTimer);
  _taskDetailES.close();
  _taskDetailES = null;
  _taskChartsFrozen = true;
  _taskDetailTerm.textContent += (e.data ? '\nTask finished with error.\n' : '\nTask finished.\n');
  _taskDetailTerm.scrollTop = _taskDetailTerm.scrollHeight;
  refreshTaskLiveCharts();
 });
 _taskDetailES.onerror = function(){
  if (_taskChartTimer) clearInterval(_taskChartTimer);
  if (_taskDetailES) {
    _taskDetailES.close();
    _taskDetailES = null;
  }
 };
 loadTaskLiveCharts('` + html.EscapeString(task.ID) + `');
 _taskChartTimer = setInterval(function(){
  if (_taskChartsFrozen) return;
  loadTaskLiveCharts('` + html.EscapeString(task.ID) + `');
  refreshTaskLiveCharts();
 }, 2000);
 </script>`)
 	}
 	return layoutHead(opts.Title+" — "+title) +
 		layoutNav("tasks", opts.BuildLabel) +
 		`<div class="main"><div class="topbar"><h1>` + html.EscapeString(title) + `</h1></div><div class="content">` +
 		body.String() +
 		`</div></div></body></html>`
 }
 func loadTaskReportFragment(task Task) string {
 	if strings.TrimSpace(task.ReportHTMLPath) == "" {
 		return ""
 	}
 	data, err := os.ReadFile(task.ReportHTMLPath)
 	if err != nil || len(data) == 0 {
 		return ""
 	}
 	return string(data)
 }
 func taskArtifactDownloadLink(task Task, absPath string) string {
 	if strings.TrimSpace(absPath) == "" {
 		return ""
 	}
 	return fmt.Sprintf(`/export/file?path=%s`, absPath)
 }
 func (h *handler) taskSamplesForRequest(r *http.Request) (Task, []platform.LiveMetricSample, time.Time, time.Time, bool) {
 	id := r.PathValue("id")
 	taskPtr, ok := globalQueue.findByID(id)
 	if !ok {
 		return Task{}, nil, time.Time{}, time.Time{}, false
 	}
 	task := *taskPtr
 	start, end := taskTimeWindow(&task)
 	samples, err := loadTaskMetricSamples(start, end)
 	if err != nil {
 		return task, nil, start, end, true
 	}
 	return task, samples, start, end, true
 }
 func taskTimelineForTask(task Task) []chartTimelineSegment {
 	start, end := taskTimeWindow(&task)
 	return []chartTimelineSegment{{Start: start, End: end, Active: true}}
 }
 func taskChartPathFromFile(file string) (string, bool) {
 	file = strings.TrimSpace(file)
 	for _, spec := range taskDashboardChartSpecs {
 		if spec.File == file {
 			return spec.Path, true
 		}
 	}
 	if strings.HasPrefix(file, "gpu-") && strings.HasSuffix(file, "-overview.svg") {
 		id := strings.TrimSuffix(strings.TrimPrefix(file, "gpu-"), "-overview.svg")
 		return "gpu/" + id + "-overview", true
 	}
 	return "", false
 }
--- a/audit/internal/webui/task_report.go
+++ b/audit/internal/webui/task_report.go
@@ -0,0 +1,343 @@
 package webui
 import (
 	"encoding/json"
 	"fmt"
 	"html"
 	"os"
 	"path/filepath"
 	"sort"
 	"strings"
 	"time"
 	"bee/audit/internal/platform"
 )
 var taskReportMetricsDBPath = metricsDBPath
 type taskReport struct {
 	ID          string            `json:"id"`
 	Name        string            `json:"name"`
 	Target      string            `json:"target"`
 	Status      string            `json:"status"`
 	CreatedAt   time.Time         `json:"created_at"`
 	StartedAt   *time.Time        `json:"started_at,omitempty"`
 	DoneAt      *time.Time        `json:"done_at,omitempty"`
 	DurationSec int               `json:"duration_sec,omitempty"`
 	Error       string            `json:"error,omitempty"`
 	LogFile     string            `json:"log_file,omitempty"`
 	Charts      []taskReportChart `json:"charts,omitempty"`
 	GeneratedAt time.Time         `json:"generated_at"`
 }
 type taskReportChart struct {
 	Title string `json:"title"`
 	File  string `json:"file"`
 }
 type taskChartSpec struct {
 	Path string
 	File string
 }
 var taskDashboardChartSpecs = []taskChartSpec{
 	{Path: "server-load", File: "server-load.svg"},
 	{Path: "server-temp-cpu", File: "server-temp-cpu.svg"},
 	{Path: "server-temp-ambient", File: "server-temp-ambient.svg"},
 	{Path: "server-power", File: "server-power.svg"},
 	{Path: "server-fans", File: "server-fans.svg"},
 	{Path: "gpu-all-load", File: "gpu-all-load.svg"},
 	{Path: "gpu-all-memload", File: "gpu-all-memload.svg"},
 	{Path: "gpu-all-clock", File: "gpu-all-clock.svg"},
 	{Path: "gpu-all-power", File: "gpu-all-power.svg"},
 	{Path: "gpu-all-temp", File: "gpu-all-temp.svg"},
 }
 func taskChartSpecsForSamples(samples []platform.LiveMetricSample) []taskChartSpec {
 	specs := make([]taskChartSpec, 0, len(taskDashboardChartSpecs)+len(taskGPUIndices(samples)))
 	specs = append(specs, taskDashboardChartSpecs...)
 	for _, idx := range taskGPUIndices(samples) {
 		specs = append(specs, taskChartSpec{
 			Path: fmt.Sprintf("gpu/%d-overview", idx),
 			File: fmt.Sprintf("gpu-%d-overview.svg", idx),
 		})
 	}
 	return specs
 }
 func writeTaskReportArtifacts(t *Task) error {
 	if t == nil {
 		return nil
 	}
 	ensureTaskReportPaths(t)
 	if strings.TrimSpace(t.ArtifactsDir) == "" {
 		return nil
 	}
 	if err := os.MkdirAll(t.ArtifactsDir, 0755); err != nil {
 		return err
 	}
 	start, end := taskTimeWindow(t)
 	samples, _ := loadTaskMetricSamples(start, end)
 	charts, inlineCharts := writeTaskCharts(t.ArtifactsDir, start, end, samples)
 	logText := ""
 	if data, err := os.ReadFile(t.LogPath); err == nil {
 		logText = string(data)
 	}
 	report := taskReport{
 		ID:          t.ID,
 		Name:        t.Name,
 		Target:      t.Target,
 		Status:      t.Status,
 		CreatedAt:   t.CreatedAt,
 		StartedAt:   t.StartedAt,
 		DoneAt:      t.DoneAt,
 		DurationSec: taskElapsedSec(t, reportDoneTime(t)),
 		Error:       t.ErrMsg,
 		LogFile:     filepath.Base(t.LogPath),
 		Charts:      charts,
 		GeneratedAt: time.Now().UTC(),
 	}
 	if err := writeJSONFile(t.ReportJSONPath, report); err != nil {
 		return err
 	}
 	return os.WriteFile(t.ReportHTMLPath, []byte(renderTaskReportFragment(report, inlineCharts, logText)), 0644)
 }
 func reportDoneTime(t *Task) time.Time {
 	if t != nil && t.DoneAt != nil && !t.DoneAt.IsZero() {
 		return *t.DoneAt
 	}
 	return time.Now()
 }
 func taskTimeWindow(t *Task) (time.Time, time.Time) {
 	if t == nil {
 		now := time.Now().UTC()
 		return now, now
 	}
 	start := t.CreatedAt.UTC()
 	if t.StartedAt != nil && !t.StartedAt.IsZero() {
 		start = t.StartedAt.UTC()
 	}
 	end := time.Now().UTC()
 	if t.DoneAt != nil && !t.DoneAt.IsZero() {
 		end = t.DoneAt.UTC()
 	}
 	if end.Before(start) {
 		end = start
 	}
 	return start, end
 }
 func loadTaskMetricSamples(start, end time.Time) ([]platform.LiveMetricSample, error) {
 	db, err := openMetricsDB(taskReportMetricsDBPath)
 	if err != nil {
 		return nil, err
 	}
 	defer db.Close()
 	return db.LoadBetween(start, end)
 }
 func writeTaskCharts(dir string, start, end time.Time, samples []platform.LiveMetricSample) ([]taskReportChart, map[string]string) {
 	if len(samples) == 0 {
 		return nil, nil
 	}
 	timeline := []chartTimelineSegment{{Start: start, End: end, Active: true}}
 	var charts []taskReportChart
 	inline := make(map[string]string)
 	for _, spec := range taskChartSpecsForSamples(samples) {
 		title, svg, ok := renderTaskChartSVG(spec.Path, samples, timeline)
 		if !ok || len(svg) == 0 {
 			continue
 		}
 		path := filepath.Join(dir, spec.File)
 		if err := os.WriteFile(path, svg, 0644); err != nil {
 			continue
 		}
 		charts = append(charts, taskReportChart{Title: title, File: spec.File})
 		inline[spec.File] = string(svg)
 	}
 	return charts, inline
 }
 func renderTaskChartSVG(path string, samples []platform.LiveMetricSample, timeline []chartTimelineSegment) (string, []byte, bool) {
 	if idx, sub, ok := parseGPUChartPath(path); ok && sub == "overview" {
 		buf, hasData, err := renderGPUOverviewChartSVG(idx, samples, timeline)
 		if err != nil || !hasData {
 			return "", nil, false
 		}
 		return gpuDisplayLabel(idx) + " Overview", buf, true
 	}
 	datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples)
 	if !ok {
 		return "", nil, false
 	}
 	buf, err := renderMetricChartSVG(
 		title,
 		labels,
 		sampleTimes(samples),
 		datasets,
 		names,
 		yMin,
 		yMax,
 		chartCanvasHeightForPath(path, len(names)),
 		timeline,
 	)
 	if err != nil {
 		return "", nil, false
 	}
 	return title, buf, true
 }
 func taskGPUIndices(samples []platform.LiveMetricSample) []int {
 	seen := map[int]bool{}
 	var out []int
 	for _, s := range samples {
 		for _, g := range s.GPUs {
 			if seen[g.GPUIndex] {
 				continue
 			}
 			seen[g.GPUIndex] = true
 			out = append(out, g.GPUIndex)
 		}
 	}
 	sort.Ints(out)
 	return out
 }
 func writeJSONFile(path string, v any) error {
 	data, err := json.MarshalIndent(v, "", "  ")
 	if err != nil {
 		return err
 	}
 	return os.WriteFile(path, data, 0644)
 }
 func renderTaskReportFragment(report taskReport, charts map[string]string, logText string) string {
 	var b strings.Builder
 	b.WriteString(`<div class="card"><div class="card-head">Task Report</div><div class="card-body">`)
 	b.WriteString(`<div class="grid2">`)
 	b.WriteString(`<div><div style="font-size:12px;color:var(--muted);margin-bottom:6px">Task</div><div style="font-size:16px;font-weight:700">` + html.EscapeString(report.Name) + `</div>`)
 	b.WriteString(`<div style="font-size:13px;color:var(--muted)">` + html.EscapeString(report.Target) + `</div></div>`)
 	b.WriteString(`<div><div style="font-size:12px;color:var(--muted);margin-bottom:6px">Status</div><div>` + renderTaskStatusBadge(report.Status) + `</div>`)
 	if strings.TrimSpace(report.Error) != "" {
 		b.WriteString(`<div style="margin-top:8px;font-size:13px;color:var(--crit-fg)">` + html.EscapeString(report.Error) + `</div>`)
 	}
 	b.WriteString(`</div></div>`)
 	b.WriteString(`<div style="margin-top:14px;font-size:13px;color:var(--muted)">`)
 	b.WriteString(`Started: ` + formatTaskTime(report.StartedAt, report.CreatedAt) + ` | Finished: ` + formatTaskTime(report.DoneAt, time.Time{}) + ` | Duration: ` + formatTaskDuration(report.DurationSec))
 	b.WriteString(`</div></div></div>`)
 	if benchmarkCard := renderTaskBenchmarkResultsCard(report.Target, logText); benchmarkCard != "" {
 		b.WriteString(benchmarkCard)
 	}
 	if len(report.Charts) > 0 {
 		for _, chart := range report.Charts {
 			b.WriteString(`<div class="card"><div class="card-head">` + html.EscapeString(chart.Title) + `</div><div class="card-body" style="padding:12px">`)
 			b.WriteString(charts[chart.File])
 			b.WriteString(`</div></div>`)
 		}
 	} else {
 		b.WriteString(`<div class="alert alert-info">No metric samples were captured during this task window.</div>`)
 	}
 	b.WriteString(`<div class="card"><div class="card-head">Logs</div><div class="card-body">`)
 	b.WriteString(`<div class="terminal" style="max-height:none;white-space:pre-wrap">` + html.EscapeString(strings.TrimSpace(logText)) + `</div>`)
 	b.WriteString(`</div></div>`)
 	return b.String()
 }
 func renderTaskBenchmarkResultsCard(target, logText string) string {
 	if strings.TrimSpace(target) != "nvidia-benchmark" {
 		return ""
 	}
 	resultPath := taskBenchmarkResultPath(logText)
 	if strings.TrimSpace(resultPath) == "" {
 		return ""
 	}
 	columns, runs := loadBenchmarkHistoryFromPaths([]string{resultPath})
 	if len(runs) == 0 {
 		return ""
 	}
 	return renderBenchmarkResultsCardFromRuns(
 		"Benchmark Results",
 		"Composite score for this benchmark task.",
 		"No benchmark results were saved for this task.",
 		columns,
 		runs,
 	)
 }
 func taskBenchmarkResultPath(logText string) string {
 	archivePath := taskArchivePathFromLog(logText)
 	if archivePath == "" {
 		return ""
 	}
 	runDir := strings.TrimSuffix(archivePath, ".tar.gz")
 	if runDir == archivePath {
 		return ""
 	}
 	return filepath.Join(runDir, "result.json")
 }
 func taskArchivePathFromLog(logText string) string {
 	lines := strings.Split(logText, "\n")
 	for i := len(lines) - 1; i >= 0; i-- {
 		line := strings.TrimSpace(lines[i])
 		if line == "" || !strings.HasPrefix(line, "Archive:") {
 			continue
 		}
 		path := strings.TrimSpace(strings.TrimPrefix(line, "Archive:"))
 		if strings.HasPrefix(path, "Archive written to ") {
 			path = strings.TrimSpace(strings.TrimPrefix(path, "Archive written to "))
 		}
 		if strings.HasSuffix(path, ".tar.gz") {
 			return path
 		}
 	}
 	return ""
 }
 func renderTaskStatusBadge(status string) string {
 	className := map[string]string{
 		TaskRunning:   "badge-ok",
 		TaskPending:   "badge-unknown",
 		TaskDone:      "badge-ok",
 		TaskFailed:    "badge-err",
 		TaskCancelled: "badge-unknown",
 	}[status]
 	if className == "" {
 		className = "badge-unknown"
 	}
 	label := strings.TrimSpace(status)
 	if label == "" {
 		label = "unknown"
 	}
 	return `<span class="badge ` + className + `">` + html.EscapeString(label) + `</span>`
 }
 func formatTaskTime(ts *time.Time, fallback time.Time) string {
 	if ts != nil && !ts.IsZero() {
 		return ts.Local().Format("2006-01-02 15:04:05")
 	}
 	if !fallback.IsZero() {
 		return fallback.Local().Format("2006-01-02 15:04:05")
 	}
 	return "n/a"
 }
 func formatTaskDuration(sec int) string {
 	if sec <= 0 {
 		return "n/a"
 	}
 	if sec < 60 {
 		return fmt.Sprintf("%ds", sec)
 	}
 	if sec < 3600 {
 		return fmt.Sprintf("%dm %02ds", sec/60, sec%60)
 	}
 	return fmt.Sprintf("%dh %02dm %02ds", sec/3600, (sec%3600)/60, sec%60)
 }
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -2,12 +2,18 @@ package webui
 import (
 	"context"
 	"encoding/json"
 	"net/http"
 	"net/http/httptest"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"testing"
 	"time"
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 )
 func TestTaskQueuePersistsAndRecoversPendingTasks(t *testing.T) {
@@ -22,21 +28,34 @@ func TestTaskQueuePersistsAndRecoversPendingTasks(t *testing.T) {
 	}
 	started := time.Now().Add(-time.Minute)
-	task := &Task{
+
-		ID:        "task-1",
+	// A task that was pending (not yet started) must be re-queued on restart.
 	pendingTask := &Task{
 		ID:        "task-pending",
 		Name:      "Memory Burn-in",
 		Target:    "memory-stress",
 		Priority:  2,
-		Status:    TaskRunning,
+		Status:    TaskPending,
 		CreatedAt: time.Now().Add(-2 * time.Minute),
-		StartedAt: &started,
+		params:    taskParams{Duration: 300, BurnProfile: "smoke"},
-		params: taskParams{
+	}
-			Duration:    300,
+	// A task that was running when bee-web crashed must NOT be re-queued —
-			BurnProfile: "smoke",
+	// its child processes (e.g. gpu-burn-worker) survive the restart in
-		},
+	// their own process groups and can't be cancelled retroactively.
 	runningTask := &Task{
 		ID:        "task-running",
 		Name:      "NVIDIA GPU Stress",
 		Target:    "nvidia-stress",
 		Priority:  1,
 		Status:    TaskRunning,
 		CreatedAt: time.Now().Add(-3 * time.Minute),
 		StartedAt: &started,
 		params:    taskParams{Duration: 86400},
 	}
 	for _, task := range []*Task{pendingTask, runningTask} {
 		q.tasks = append(q.tasks, task)
 		q.assignTaskLogPathLocked(task)
 	}
 	q.tasks = append(q.tasks, task)
 	q.assignTaskLogPathLocked(task)
 	q.persistLocked()
 	recovered := &taskQueue{
@@ -46,18 +65,47 @@ func TestTaskQueuePersistsAndRecoversPendingTasks(t *testing.T) {
 	}
 	recovered.loadLocked()
-	if len(recovered.tasks) != 1 {
+	if len(recovered.tasks) != 2 {
-		t.Fatalf("tasks=%d want 1", len(recovered.tasks))
+		t.Fatalf("tasks=%d want 2", len(recovered.tasks))
 	}
-	got := recovered.tasks[0]
+
-	if got.Status != TaskPending {
+	byID := map[string]*Task{}
-		t.Fatalf("status=%q want %q", got.Status, TaskPending)
+	for i := range recovered.tasks {
 		byID[recovered.tasks[i].ID] = recovered.tasks[i]
 	}
-	if got.params.Duration != 300 || got.params.BurnProfile != "smoke" {
+
-		t.Fatalf("params=%+v", got.params)
+	// Pending task must be re-queued as pending with params intact.
 	p := byID["task-pending"]
 	if p == nil {
 		t.Fatal("task-pending not found")
 	}
-	if got.LogPath == "" {
+	if p.Status != TaskPending {
-		t.Fatal("expected log path")
+		t.Fatalf("pending task: status=%q want %q", p.Status, TaskPending)
 	}
 	if p.StartedAt != nil {
 		t.Fatalf("pending task: started_at=%v want nil", p.StartedAt)
 	}
 	if p.params.Duration != 300 || p.params.BurnProfile != "smoke" {
 		t.Fatalf("pending task: params=%+v", p.params)
 	}
 	if p.LogPath == "" {
 		t.Fatal("pending task: expected log path")
 	}
 	// Running task must be marked failed, not re-queued, to prevent
 	// launching duplicate workers (e.g. a second set of gpu-burn-workers).
 	r := byID["task-running"]
 	if r == nil {
 		t.Fatal("task-running not found")
 	}
 	if r.Status != TaskFailed {
 		t.Fatalf("running task: status=%q want %q", r.Status, TaskFailed)
 	}
 	if r.ErrMsg == "" {
 		t.Fatal("running task: expected non-empty error message")
 	}
 	if r.DoneAt == nil {
 		t.Fatal("running task: expected done_at to be set")
 	}
 }
@@ -78,15 +126,363 @@ func TestNewTaskJobStateLoadsExistingLog(t *testing.T) {
 	}
 }
 func TestTaskQueueSnapshotSortsNewestFirst(t *testing.T) {
 	now := time.Date(2026, 4, 2, 12, 0, 0, 0, time.UTC)
 	q := &taskQueue{
 		tasks: []*Task{
 			{
 				ID:        "old-running",
 				Name:      "Old Running",
 				Status:    TaskRunning,
 				Priority:  10,
 				CreatedAt: now.Add(-3 * time.Minute),
 			},
 			{
 				ID:        "new-done",
 				Name:      "New Done",
 				Status:    TaskDone,
 				Priority:  0,
 				CreatedAt: now.Add(-1 * time.Minute),
 			},
 			{
 				ID:        "mid-pending",
 				Name:      "Mid Pending",
 				Status:    TaskPending,
 				Priority:  1,
 				CreatedAt: now.Add(-2 * time.Minute),
 			},
 		},
 	}
 	got := q.snapshot()
 	if len(got) != 3 {
 		t.Fatalf("snapshot len=%d want 3", len(got))
 	}
 	if got[0].ID != "new-done" || got[1].ID != "mid-pending" || got[2].ID != "old-running" {
 		t.Fatalf("snapshot order=%q,%q,%q", got[0].ID, got[1].ID, got[2].ID)
 	}
 }
 func TestNewJobIDUsesTASKPrefixAndZeroPadding(t *testing.T) {
 	globalQueue.mu.Lock()
 	origTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	origCounter := jobCounter.Load()
 	jobCounter.Store(0)
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = origTasks
 		globalQueue.mu.Unlock()
 		jobCounter.Store(origCounter)
 	})
 	if got := newJobID("ignored"); got != "TASK-000" {
 		t.Fatalf("id=%q want TASK-000", got)
 	}
 	if got := newJobID("ignored"); got != "TASK-001" {
 		t.Fatalf("id=%q want TASK-001", got)
 	}
 }
 func TestTaskArtifactsDirStartsWithTaskNumber(t *testing.T) {
 	root := t.TempDir()
 	task := &Task{
 		ID:   "TASK-007",
 		Name: "NVIDIA Benchmark",
 	}
 	got := filepath.Base(taskArtifactsDir(root, task, TaskDone))
 	if !strings.HasPrefix(got, "007_") {
 		t.Fatalf("artifacts dir=%q want prefix 007_", got)
 	}
 }
 func TestHandleAPITasksStreamReplaysPersistedLogWithoutLiveJob(t *testing.T) {
 	dir := t.TempDir()
 	logPath := filepath.Join(dir, "task.log")
 	if err := os.WriteFile(logPath, []byte("line1\nline2\n"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	globalQueue.mu.Lock()
 	origTasks := globalQueue.tasks
 	globalQueue.tasks = []*Task{{
 		ID:        "done-1",
 		Name:      "Done Task",
 		Status:    TaskDone,
 		CreatedAt: time.Now(),
 		LogPath:   logPath,
 	}}
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = origTasks
 		globalQueue.mu.Unlock()
 	})
 	req := httptest.NewRequest(http.MethodGet, "/api/tasks/done-1/stream", nil)
 	req.SetPathValue("id", "done-1")
 	rec := httptest.NewRecorder()
 	h := &handler{}
 	h.handleAPITasksStream(rec, req)
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, "data: line1\n\n") || !strings.Contains(body, "data: line2\n\n") {
 		t.Fatalf("body=%q", body)
 	}
 	if !strings.Contains(body, "event: done\n") {
 		t.Fatalf("missing done event: %q", body)
 	}
 }
 func TestHandleAPITasksStreamPendingTaskStartsSSEImmediately(t *testing.T) {
 	globalQueue.mu.Lock()
 	origTasks := globalQueue.tasks
 	globalQueue.tasks = []*Task{{
 		ID:        "pending-1",
 		Name:      "Pending Task",
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 	}}
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = origTasks
 		globalQueue.mu.Unlock()
 	})
 	ctx, cancel := context.WithCancel(context.Background())
 	req := httptest.NewRequest(http.MethodGet, "/api/tasks/pending-1/stream", nil).WithContext(ctx)
 	req.SetPathValue("id", "pending-1")
 	rec := httptest.NewRecorder()
 	done := make(chan struct{})
 	go func() {
 		h := &handler{}
 		h.handleAPITasksStream(rec, req)
 		close(done)
 	}()
 	deadline := time.Now().Add(2 * time.Second)
 	for time.Now().Before(deadline) {
 		if strings.Contains(rec.Body.String(), "Task is queued. Waiting for worker...") {
 			cancel()
 			<-done
 			if rec.Code != http.StatusOK {
 				t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 			}
 			return
 		}
 		time.Sleep(20 * time.Millisecond)
 	}
 	cancel()
 	<-done
 	t.Fatalf("stream did not emit queued status promptly, body=%q", rec.Body.String())
 }
 func TestFinalizeTaskRunCreatesReportFolderAndArtifacts(t *testing.T) {
 	dir := t.TempDir()
 	metricsPath := filepath.Join(dir, "metrics.db")
 	prevMetricsPath := taskReportMetricsDBPath
 	taskReportMetricsDBPath = metricsPath
 	t.Cleanup(func() { taskReportMetricsDBPath = prevMetricsPath })
 	db, err := openMetricsDB(metricsPath)
 	if err != nil {
 		t.Fatalf("openMetricsDB: %v", err)
 	}
 	base := time.Now().UTC().Add(-45 * time.Second)
 	if err := db.Write(platform.LiveMetricSample{
 		Timestamp:  base,
 		CPULoadPct: 42,
 		MemLoadPct: 35,
 		PowerW:     510,
 	}); err != nil {
 		t.Fatalf("Write: %v", err)
 	}
 	_ = db.Close()
 	q := &taskQueue{
 		statePath: filepath.Join(dir, "tasks-state.json"),
 		logsDir:   filepath.Join(dir, "tasks"),
 		trigger:   make(chan struct{}, 1),
 	}
 	if err := os.MkdirAll(q.logsDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	started := time.Now().UTC().Add(-90 * time.Second)
 	task := &Task{
 		ID:        "task-1",
 		Name:      "CPU SAT",
 		Target:    "cpu",
 		Status:    TaskRunning,
 		CreatedAt: started.Add(-10 * time.Second),
 		StartedAt: &started,
 	}
 	q.assignTaskLogPathLocked(task)
 	appendJobLog(task.LogPath, "line-1")
 	job := newTaskJobState(task.LogPath)
 	job.finish("")
 	q.finalizeTaskRun(task, job)
 	if task.Status != TaskDone {
 		t.Fatalf("status=%q want %q", task.Status, TaskDone)
 	}
 	if !strings.Contains(filepath.Base(task.ArtifactsDir), "_done") {
 		t.Fatalf("artifacts dir=%q", task.ArtifactsDir)
 	}
 	if _, err := os.Stat(task.ReportJSONPath); err != nil {
 		t.Fatalf("report json: %v", err)
 	}
 	if _, err := os.Stat(task.ReportHTMLPath); err != nil {
 		t.Fatalf("report html: %v", err)
 	}
 	var report taskReport
 	data, err := os.ReadFile(task.ReportJSONPath)
 	if err != nil {
 		t.Fatalf("ReadFile(report.json): %v", err)
 	}
 	if err := json.Unmarshal(data, &report); err != nil {
 		t.Fatalf("Unmarshal(report.json): %v", err)
 	}
 	if report.ID != task.ID || report.Status != TaskDone {
 		t.Fatalf("report=%+v", report)
 	}
 	if len(report.Charts) == 0 {
 		t.Fatalf("expected charts in report, got none")
 	}
 }
 func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	dir := t.TempDir()
 	metricsPath := filepath.Join(dir, "metrics.db")
 	prevMetricsPath := taskReportMetricsDBPath
 	taskReportMetricsDBPath = metricsPath
 	t.Cleanup(func() { taskReportMetricsDBPath = prevMetricsPath })
 	benchmarkDir := filepath.Join(dir, "bee-benchmark", "gpu-benchmark-20260406-120000")
 	if err := os.MkdirAll(benchmarkDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	result := platform.NvidiaBenchmarkResult{
 		GeneratedAt:      time.Date(2026, time.April, 6, 12, 0, 0, 0, time.UTC),
 		BenchmarkProfile: "standard",
 		OverallStatus:    "OK",
 		GPUs: []platform.BenchmarkGPUResult{
 			{
 				Index: 0,
 				Name:  "NVIDIA H100 PCIe",
 				Scores: platform.BenchmarkScorecard{
 					CompositeScore: 1176.25,
 				},
 			},
 		},
 	}
 	raw, err := json.Marshal(result)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(benchmarkDir, "result.json"), raw, 0644); err != nil {
 		t.Fatal(err)
 	}
 	artifactsDir := filepath.Join(dir, "tasks", "task-bench_done")
 	if err := os.MkdirAll(artifactsDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	task := &Task{
 		ID:           "task-bench",
 		Name:         "NVIDIA Benchmark",
 		Target:       "nvidia-benchmark",
 		Status:       TaskDone,
 		CreatedAt:    time.Now().UTC().Add(-time.Minute),
 		ArtifactsDir: artifactsDir,
 	}
 	ensureTaskReportPaths(task)
 	logText := "line-1\nArchive: " + filepath.Join(dir, "bee-benchmark", "gpu-benchmark-20260406-120000.tar.gz") + "\n"
 	if err := os.WriteFile(task.LogPath, []byte(logText), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := writeTaskReportArtifacts(task); err != nil {
 		t.Fatalf("writeTaskReportArtifacts: %v", err)
 	}
 	body, err := os.ReadFile(task.ReportHTMLPath)
 	if err != nil {
 		t.Fatalf("ReadFile(report.html): %v", err)
 	}
 	html := string(body)
 	for _, needle := range []string{
 		`Benchmark Results`,
 		`Composite score for this benchmark task.`,
 		`NVIDIA H100 PCIe / GPU 0`,
 		`1176.25`,
 	} {
 		if !strings.Contains(html, needle) {
 			t.Fatalf("report missing %q: %s", needle, html)
 		}
 	}
 }
 func TestTaskLifecycleMirrorsToSerialConsole(t *testing.T) {
 	var lines []string
 	prev := taskSerialWriteLine
 	taskSerialWriteLine = func(line string) { lines = append(lines, line) }
 	t.Cleanup(func() { taskSerialWriteLine = prev })
 	dir := t.TempDir()
 	q := &taskQueue{
 		statePath: filepath.Join(dir, "tasks-state.json"),
 		logsDir:   filepath.Join(dir, "tasks"),
 		trigger:   make(chan struct{}, 1),
 	}
 	task := &Task{
 		ID:        "task-serial-1",
 		Name:      "CPU SAT",
 		Target:    "cpu",
 		Status:    TaskPending,
 		CreatedAt: time.Now().UTC(),
 	}
 	q.enqueue(task)
 	started := time.Now().UTC()
 	task.Status = TaskRunning
 	task.StartedAt = &started
 	job := newTaskJobState(task.LogPath, taskSerialPrefix(task))
 	job.append("Starting CPU SAT...")
 	job.append("CPU stress duration: 60s")
 	job.finish("")
 	q.finalizeTaskRun(task, job)
 	joined := strings.Join(lines, "\n")
 	for _, needle := range []string{
 		"queued",
 		"Starting CPU SAT...",
 		"CPU stress duration: 60s",
 		"finished with status=done",
 	} {
 		if !strings.Contains(joined, needle) {
 			t.Fatalf("serial mirror missing %q in %q", needle, joined)
 		}
 	}
 }
 func TestResolveBurnPreset(t *testing.T) {
 	tests := []struct {
 		profile string
 		want    burnPreset
 	}{
-		{profile: "smoke", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
+		{profile: "smoke", want: burnPreset{DurationSec: 5 * 60}},
-		{profile: "acceptance", want: burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}},
+		{profile: "acceptance", want: burnPreset{DurationSec: 60 * 60}},
-		{profile: "overnight", want: burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}},
+		{profile: "overnight", want: burnPreset{DurationSec: 8 * 60 * 60}},
-		{profile: "", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
+		{profile: "", want: burnPreset{DurationSec: 5 * 60}},
 	}
 	for _, tc := range tests {
 		if got := resolveBurnPreset(tc.profile); got != tc.want {
@@ -95,9 +491,24 @@ func TestResolveBurnPreset(t *testing.T) {
 	}
 }
-func TestRunTaskHonorsCancel(t *testing.T) {
+func TestTaskDisplayNameUsesNvidiaStressLoader(t *testing.T) {
-	t.Parallel()
+	tests := []struct {
 		loader string
 		want   string
 	}{
 		{loader: "", want: "NVIDIA GPU Stress (bee-gpu-burn)"},
 		{loader: "builtin", want: "NVIDIA GPU Stress (bee-gpu-burn)"},
 		{loader: "john", want: "NVIDIA GPU Stress (John/OpenCL)"},
 		{loader: "nccl", want: "NVIDIA GPU Stress (NCCL)"},
 	}
 	for _, tc := range tests {
 		if got := taskDisplayName("nvidia-stress", "acceptance", tc.loader); got != tc.want {
 			t.Fatalf("taskDisplayName(loader=%q)=%q want %q", tc.loader, got, tc.want)
 		}
 	}
 }
 func TestRunTaskHonorsCancel(t *testing.T) {
 	blocked := make(chan struct{})
 	released := make(chan struct{})
 	aRun := func(_ any, ctx context.Context, _ string, _ int, _ func(string)) (string, error) {
@@ -154,3 +565,180 @@ func TestRunTaskHonorsCancel(t *testing.T) {
 		t.Fatal("runTask did not return after cancel")
 	}
 }
 func TestRunTaskUsesBurnProfileDurationForCPU(t *testing.T) {
 	var gotDuration int
 	q := &taskQueue{
 		opts: &HandlerOptions{App: &app.App{}},
 	}
 	tk := &Task{
 		ID:        "cpu-burn-1",
 		Name:      "CPU Burn-in",
 		Target:    "cpu",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 		params:    taskParams{BurnProfile: "smoke"},
 	}
 	j := &jobState{}
 	orig := runCPUAcceptancePackCtx
 	runCPUAcceptancePackCtx = func(_ *app.App, _ context.Context, _ string, durationSec int, _ func(string)) (string, error) {
 		gotDuration = durationSec
 		return "/tmp/cpu-burn.tar.gz", nil
 	}
 	defer func() { runCPUAcceptancePackCtx = orig }()
 	q.runTask(tk, j, context.Background())
 	if gotDuration != 5*60 {
 		t.Fatalf("duration=%d want %d", gotDuration, 5*60)
 	}
 }
 func TestRunTaskBuildsSupportBundleWithoutApp(t *testing.T) {
 	dir := t.TempDir()
 	q := &taskQueue{
 		opts: &HandlerOptions{ExportDir: dir},
 	}
 	tk := &Task{
 		ID:        "support-bundle-1",
 		Name:      "Support Bundle",
 		Target:    "support-bundle",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 	}
 	j := &jobState{}
 	var gotExportDir string
 	orig := buildSupportBundle
 	buildSupportBundle = func(exportDir string) (string, error) {
 		gotExportDir = exportDir
 		return filepath.Join(exportDir, "bundle.tar.gz"), nil
 	}
 	defer func() { buildSupportBundle = orig }()
 	q.runTask(tk, j, context.Background())
 	if gotExportDir != dir {
 		t.Fatalf("exportDir=%q want %q", gotExportDir, dir)
 	}
 	if j.err != "" {
 		t.Fatalf("unexpected error: %q", j.err)
 	}
 	if !strings.Contains(strings.Join(j.lines, "\n"), "Archive: "+filepath.Join(dir, "bundle.tar.gz")) {
 		t.Fatalf("lines=%v", j.lines)
 	}
 }
 func TestTaskElapsedSecClampsInvalidStartedAt(t *testing.T) {
 	now := time.Date(2026, 4, 1, 19, 10, 0, 0, time.UTC)
 	created := time.Date(2026, 4, 1, 19, 4, 5, 0, time.UTC)
 	started := time.Time{}
 	task := &Task{
 		Status:    TaskRunning,
 		CreatedAt: created,
 		StartedAt: &started,
 	}
 	if got := taskElapsedSec(task, now); got != 0 {
 		t.Fatalf("taskElapsedSec(zero start)=%d want 0", got)
 	}
 	stale := created.Add(-24 * time.Hour)
 	task.StartedAt = &stale
 	if got := taskElapsedSec(task, now); got != int(now.Sub(created).Seconds()) {
 		t.Fatalf("taskElapsedSec(stale start)=%d want %d", got, int(now.Sub(created).Seconds()))
 	}
 }
 func TestRunTaskInstallUsesSharedCommandStreaming(t *testing.T) {
 	q := &taskQueue{
 		opts: &HandlerOptions{},
 	}
 	tk := &Task{
 		ID:        "install-1",
 		Name:      "Install to Disk",
 		Target:    "install",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 		params:    taskParams{Device: "/dev/sda"},
 	}
 	j := &jobState{}
 	var gotDevice string
 	var gotLogPath string
 	orig := installCommand
 	installCommand = func(ctx context.Context, device string, logPath string) *exec.Cmd {
 		gotDevice = device
 		gotLogPath = logPath
 		return exec.CommandContext(ctx, "sh", "-c", "printf 'line1\nline2\n'")
 	}
 	defer func() { installCommand = orig }()
 	q.runTask(tk, j, context.Background())
 	if gotDevice != "/dev/sda" {
 		t.Fatalf("device=%q want /dev/sda", gotDevice)
 	}
 	if gotLogPath == "" {
 		t.Fatal("expected install log path")
 	}
 	logs := strings.Join(j.lines, "\n")
 	if !strings.Contains(logs, "Install log: ") {
 		t.Fatalf("missing install log line: %v", j.lines)
 	}
 	if !strings.Contains(logs, "line1") || !strings.Contains(logs, "line2") {
 		t.Fatalf("missing streamed output: %v", j.lines)
 	}
 	if j.err != "" {
 		t.Fatalf("unexpected error: %q", j.err)
 	}
 }
 func TestExecuteTaskMarksPanicsAsFailedAndClosesKmsgWindow(t *testing.T) {
 	dir := t.TempDir()
 	q := &taskQueue{
 		opts:        &HandlerOptions{App: &app.App{}},
 		statePath:   filepath.Join(dir, "tasks-state.json"),
 		logsDir:     filepath.Join(dir, "tasks"),
 		kmsgWatcher: newKmsgWatcher(nil),
 	}
 	tk := &Task{
 		ID:        "cpu-panic-1",
 		Name:      "CPU SAT",
 		Target:    "cpu",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 	}
 	j := &jobState{}
 	orig := runCPUAcceptancePackCtx
 	runCPUAcceptancePackCtx = func(_ *app.App, _ context.Context, _ string, _ int, _ func(string)) (string, error) {
 		panic("boom")
 	}
 	defer func() { runCPUAcceptancePackCtx = orig }()
 	q.executeTask(tk, j, context.Background())
 	if tk.Status != TaskFailed {
 		t.Fatalf("status=%q want %q", tk.Status, TaskFailed)
 	}
 	if tk.DoneAt == nil {
 		t.Fatal("expected done_at to be set")
 	}
 	if !strings.Contains(tk.ErrMsg, "task panic: boom") {
 		t.Fatalf("task error=%q", tk.ErrMsg)
 	}
 	if !strings.Contains(j.err, "task panic: boom") {
 		t.Fatalf("job error=%q", j.err)
 	}
 	q.kmsgWatcher.mu.Lock()
 	activeCount := q.kmsgWatcher.activeCount
 	window := q.kmsgWatcher.window
 	q.kmsgWatcher.mu.Unlock()
 	if activeCount != 0 {
 		t.Fatalf("activeCount=%d want 0", activeCount)
 	}
 	if window != nil {
 		t.Fatalf("expected kmsg window to be cleared, got %+v", window)
 	}
 }
--- a/audit/scripts/resolve-version.sh
+++ b/audit/scripts/resolve-version.sh
@@ -0,0 +1,16 @@
 #!/bin/sh
 set -eu
 tag="$(git describe --tags --match 'v[0-9]*' --abbrev=7 --dirty 2>/dev/null || true)"
 case "${tag}" in
 	v*)
 		printf '%s\n' "${tag#v}"
 		;;
 	"")
 		printf 'dev\n'
 		;;
 	*)
 		printf '%s\n' "${tag}"
 		;;
 esac
--- a/2
+++ b/2
--- a/bible-local/architecture/charting.md
+++ b/bible-local/architecture/charting.md
@@ -9,6 +9,34 @@ All live metrics charts in the web UI are server-side SVG images served by Go
 and polled by the browser every 2 seconds via `<img src="...?t=now">`.
 There is no client-side canvas or JS chart library.
 ## Rule: live charts must be visually uniform
 Live charts are a single UI family, not a set of one-off widgets. New charts and
 changes to existing charts must keep the same rendering model and presentation
 rules unless there is an explicit architectural decision to diverge.
 Default expectations:
 - same server-side SVG pipeline for all live metrics charts
 - same refresh behaviour and failure handling in the browser
 - same canvas size class and card layout
 - same legend placement policy across charts
 - same axis, title, and summary conventions
 - no chart-specific visual exceptions added as a quick fix
 Current default for live charts:
 - legend below the plot area when a chart has 8 series or fewer
 - legend hidden when a chart has more than 8 series
 - 10 equal Y-axis steps across the chart height
 - 1400 x 360 SVG canvas with legend
 - 1400 x 288 SVG canvas without legend
 - full-width card rendering in a single-column stack
 If one chart needs a different layout or legend behaviour, treat that as a
 design-level decision affecting the whole chart family, not as a local tweak to
 just one endpoint.
 ### Why go-analyze/charts
 - Pure Go, no CGO — builds cleanly inside the live-build container
@@ -29,7 +57,8 @@ self-contained SVG renderer used **only** for completed SAT run reports
 | `GET /api/metrics/chart/server.svg` | CPU temp, CPU load %, mem load %, power W, fan RPMs |
 | `GET /api/metrics/chart/gpu/{idx}.svg` | GPU temp °C, load %, mem %, power W |
-Charts are 1400 × 280 px SVG. The page renders them at `width: 100%` in a
+Charts are 1400 × 360 px SVG when the legend is shown, and 1400 × 288 px when
 the legend is hidden. The page renders them at `width: 100%` in a
 single-column layout so they always fill the viewport width.
 ### Ring buffers
--- a/bible-local/architecture/runtime-flows.md
+++ b/bible-local/architecture/runtime-flows.md
@@ -60,6 +60,8 @@ Rules:
 - Chromium opens `http://localhost/` — the full interactive web UI
 - SSH is independent from the desktop path
 - serial console support is enabled for VM boot debugging
 - Default boot keeps the server-safe graphics path (`nomodeset` + forced `fbdev`) for IPMI/BMC consoles
 - Higher-resolution mode selection is expected only when booting through an explicit `bee.display=kms` menu entry, which disables the forced `fbdev` Xorg config before `lightdm`
 ## ISO build sequence
@@ -81,9 +83,9 @@ build-in-container.sh [--authorized-keys /path/to/keys]
  7. `build-cublas.sh`:
       a. download `libcublas`, `libcublasLt`, `libcudart` runtime + dev packages from the NVIDIA CUDA Debian repo
       b. verify packages against repo `Packages.gz`
-       c. extract headers for `bee-gpu-stress` build
+       c. extract headers for `bee-gpu-burn` worker build
       d. cache userspace libs in `dist/cublas-<version>+cuda<series>/`
-  8. build `bee-gpu-stress` against extracted cuBLASLt/cudart headers
+  8. build `bee-gpu-burn` worker against extracted cuBLASLt/cudart headers
  9. inject NVIDIA `.ko` → staged `/usr/local/lib/nvidia/`
  10. inject `nvidia-smi` → staged `/usr/local/bin/nvidia-smi`
  11. inject `libnvidia-ml` + `libcuda` + `libcublas` + `libcublasLt` + `libcudart` → staged `/usr/lib/`
@@ -104,7 +106,7 @@ Build host notes:
  1. `build-in-container.sh` / `build-nvidia-module.sh` — Debian kernel headers for module build
  2. `auto/config` — `linux-image-${DEBIAN_KERNEL_ABI}` in the ISO
 - NVIDIA modules go to staged `usr/local/lib/nvidia/` — NOT to `/lib/modules/<kver>/extra/`.
- `bee-gpu-stress` must be built against cached CUDA userspace headers from `build-cublas.sh`, not against random host-installed CUDA headers.
+- `bee-gpu-burn` worker must be built against cached CUDA userspace headers from `build-cublas.sh`, not against random host-installed CUDA headers.
 - The live ISO must ship `libcublas`, `libcublasLt`, and `libcudart` together with `libcuda` so tensor-core stress works without internet or package installs at boot.
 - The source overlay in `iso/overlay/` is treated as immutable source. Build-time files are injected only into the staged overlay.
 - The live-build workdir under `dist/` is disposable; source files under `iso/builder/` stay clean.
@@ -153,18 +155,17 @@ Current validation state:
 Every collector returns `nil, nil` on tool-not-found. Errors are logged, never fatal.
 Acceptance flows:
- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + mixed-precision `bee-gpu-stress`
+- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + lightweight `bee-gpu-burn`
 - NVIDIA GPU burn-in can use either `bee-gpu-burn` or `bee-john-gpu-stress` (John the Ripper jumbo via OpenCL)
 - `bee sat memory` → `memtester` archive
 - `bee sat storage` → SMART/NVMe diagnostic archive and short self-test trigger where supported
 - SAT `summary.txt` now includes `overall_status` and per-job `*_status` values (`OK`, `FAILED`, `UNSUPPORTED`)
- `bee-gpu-stress` should prefer cuBLASLt GEMM load over the old integer/PTX burn path:
+- `bee-gpu-burn` should prefer cuBLASLt GEMM load over the old integer/PTX burn path:
  - Ampere: `fp16` + `fp32`/TF32 tensor-core load
  - Ada / Hopper: add `fp8`
  - Blackwell+: add `fp4`
  - PTX fallback is only for missing cuBLASLt/userspace or unsupported narrow datatypes
 - Runtime overrides:
  - `BEE_GPU_STRESS_SECONDS`
  - `BEE_GPU_STRESS_SIZE_MB`
  - `BEE_MEMTESTER_SIZE_MB`
  - `BEE_MEMTESTER_PASSES`
@@ -179,6 +180,6 @@ Web UI: Acceptance Tests page → Run Test button
 ```
 **Critical invariants:**
- `bee-gpu-stress` uses `exec.CommandContext` — killed on job context cancel.
+- `bee-gpu-burn` / `bee-john-gpu-stress` use `exec.CommandContext` — killed on job context cancel.
 - Metric goroutine uses stopCh/doneCh pattern; main goroutine waits `<-doneCh` before reading rows (no mutex needed).
 - SVG chart is fully offline: no JS, no external CSS, pure inline SVG.
--- a/bible-local/architecture/system-overview.md
+++ b/bible-local/architecture/system-overview.md
@@ -21,8 +21,8 @@ Fills gaps where Redfish/logpile is blind:
 - Read-only hardware inventory: board, CPU, memory, storage, PCIe, PSU, GPU, NIC, RAID
 - Machine-readable health summary derived from collector verdicts
 - Operator-triggered acceptance tests for NVIDIA, memory, and storage
- NVIDIA SAT includes both diagnostic collection and mixed-precision GPU stress via `bee-gpu-stress`
+- NVIDIA SAT includes diagnostic collection plus a lightweight in-image GPU stress step via `bee-gpu-burn`
- `bee-gpu-stress` should exercise tensor/inference paths (`fp16`, `fp32`/TF32, `fp8`, `fp4` when supported by the GPU/userspace stack) and fall back to Driver API PTX burn only if cuBLASLt is unavailable
+- `bee-gpu-burn` should exercise tensor/inference paths (`fp16`, `fp32`/TF32, `fp8`, `fp4` when supported by the GPU/userspace stack) and fall back to Driver API PTX burn only if cuBLASLt is unavailable
 - Automatic boot audit with operator-facing local console and SSH access
 - NVIDIA proprietary driver loaded at boot for GPU enrichment via `nvidia-smi`
 - SSH access (OpenSSH) always available for inspection and debugging
@@ -70,7 +70,7 @@ Fills gaps where Redfish/logpile is blind:
 | SSH | OpenSSH server |
 | NVIDIA driver | Proprietary `.run` installer, built against Debian kernel headers |
 | NVIDIA modules | Loaded via `insmod` from `/usr/local/lib/nvidia/` |
-| GPU stress backend | `bee-gpu-stress` + cuBLASLt/cuBLAS/cudart mixed-precision GEMM, with Driver API PTX fallback |
+| GPU stress backend | `bee-gpu-burn` + cuBLASLt/cuBLAS/cudart mixed-precision GEMM, with Driver API PTX fallback |
 | Builder | Debian 12 host/VM or Debian 12 container image |
 ## Operator UX
--- a/bible-local/decisions/2026-03-05-nvidia-proprietary-driver.md
+++ b/bible-local/decisions/2026-03-05-nvidia-proprietary-driver.md
@@ -18,6 +18,8 @@ Use the official proprietary NVIDIA `.run` installer for both kernel modules and
 - Kernel modules and nvidia-smi come from a single verified source.
 - NVIDIA publishes `.sha256sum` alongside each installer — download and verify before use.
 - Driver version pinned in `iso/builder/VERSIONS` as `NVIDIA_DRIVER_VERSION`.
 - DCGM must track the CUDA user-mode driver major version exposed by `nvidia-smi`.
 - For NVIDIA driver branch `590` with CUDA `13.x`, use DCGM 4 package family `datacenter-gpu-manager-4-cuda13`; legacy `datacenter-gpu-manager` 3.x does not provide a working path for this stack.
 - Build process: download `.run`, extract, compile `kernel/` sources against `linux-lts-dev`.
 - Modules cached in `dist/nvidia-<version>-<kver>/` — rebuild only on version or kernel change.
 - ISO size increases by ~50MB for .ko files + nvidia-smi.
--- a/bible-local/decisions/2026-04-01-memtest-build-strategy.md
+++ b/bible-local/decisions/2026-04-01-memtest-build-strategy.md
@@ -0,0 +1,224 @@
 # Decision: Treat memtest as explicit ISO content, not as trusted live-build magic
 **Date:** 2026-04-01
 **Status:** resolved
 ## Context
 We have already iterated on `memtest` multiple times and kept cycling between the same ideas.
 The commit history shows several distinct attempts:
 - `f91bce8` — fixed Bookworm memtest file names to `memtest86+x64.bin` / `memtest86+x64.efi`
 - `5857805` — added a binary hook to copy memtest files from the build tree into the ISO root
 - `f96b149` — added fallback extraction from the cached `.deb` when `chroot/boot/` stayed empty
 - `d43a9ae` — removed the custom hook and switched back to live-build built-in memtest integration
 - `60cb8f8` — restored explicit memtest menu entries and added ISO validation
 - `3dbc218` / `3869788` — added archived build logs and better memtest diagnostics
 Current evidence from the archived `easy-bee-nvidia-v3.14-amd64` logs dated 2026-04-01:
 - `lb binary_memtest` does run and installs `memtest86+`
 - but the final ISO still does **not** contain `boot/memtest86+x64.bin`
 - the final ISO also does **not** contain memtest menu entries in `boot/grub/grub.cfg` or `isolinux/live.cfg`
 So the assumption "live-build built-in memtest integration is enough on this stack" is currently false for this project until proven otherwise by a real built ISO.
 Additional evidence from the archived `easy-bee-nvidia-v3.17-dirty-amd64` logs dated 2026-04-01:
 - the build now completes successfully because memtest is non-blocking by default
 - `lb binary_memtest` still runs and installs `memtest86+`
 - the project-owned hook `config/hooks/normal/9100-memtest.hook.binary` does execute
 - but it executes too early for its current target paths:
  - `binary/boot/grub/grub.cfg` is still missing at hook time
  - `binary/isolinux/live.cfg` is still missing at hook time
  - memtest binaries are also still absent in `binary/boot/`
 - later in the build, live-build does create intermediate bootloader configs with memtest lines in the workdir
 - but the final ISO still lacks memtest binaries and still lacks memtest lines in extracted ISO `boot/grub/grub.cfg` and `isolinux/live.cfg`
 So the assumption "the current normal binary hook path is late enough to patch final memtest artifacts" is also false.
 Correction after inspecting the real `easy-bee-nvidia-v3.20-5-g76a9100-amd64.iso`
 artifact dated 2026-04-01:
 - the final ISO does contain `boot/memtest86+x64.bin`
 - the final ISO does contain `boot/memtest86+x64.efi`
 - the final ISO does contain memtest menu entries in both `boot/grub/grub.cfg`
  and `isolinux/live.cfg`
 - so `v3.20-5-g76a9100` was **not** another real memtest regression in the
  shipped ISO
 - the regression was in the build-time validator/debug path in `build.sh`
 Root cause of the false alarm:
 - `build.sh` treated "ISO reader command exists" as equivalent to "ISO reader
  successfully listed/extracted members"
 - `iso_list_files` / `iso_extract_file` failures were collapsed into the same
  observable output as "memtest content missing"
 - this made a reader failure look identical to a missing memtest payload
 - as a result, we re-entered the same memtest investigation loop even though
  the real ISO was already correct
 Additional correction from the subsequent `v3.21` build logs dated 2026-04-01:
 - once ISO reading was fixed, the post-build debug correctly showed the raw ISO
  still carried live-build's default memtest layout (`live/memtest.bin`,
  `live/memtest.efi`, `boot/grub/memtest.cfg`, `isolinux/memtest.cfg`)
 - that mismatch is expected to trigger project recovery, because `bee` requires
  `boot/memtest86+x64.bin` / `boot/memtest86+x64.efi` plus matching menu paths
 - however, `build.sh` exited before recovery because `set -e` treated a direct
  `iso_memtest_present` return code of `1` as fatal
 - so the next repeated loop was caused by shell control flow, not by proof that
  the recovery design itself was wrong
 ## Known Failed Attempts
 These approaches were already tried and should not be repeated blindly:
 1. Built-in live-build memtest only.
 Reason it failed:
 - `lb binary_memtest` runs, but the final ISO still misses memtest binaries and menu entries.
 2. Fixing only the memtest file names for Debian Bookworm.
 Reason it failed:
 - correct file names alone do not make the files appear in the final ISO.
 3. Copying memtest from `chroot/boot/` into `binary/boot/` via a binary hook.
 Reason it failed:
 - in this stack `chroot/boot/` is often empty for memtest payloads at the relevant time.
 4. Fallback extraction from cached `memtest86+` `.deb`.
 Reason it failed:
 - this was explored already and was not enough to stabilize the final ISO path end-to-end.
 5. Restoring explicit memtest menu entries in source bootloader templates only.
 Reason it failed:
 - memtest lines in source templates or intermediate workdir configs do not guarantee the final ISO contains them.
 6. Patching `binary/boot/grub/grub.cfg` and `binary/isolinux/live.cfg` from the current `config/hooks/normal/9100-memtest.hook.binary`.
 Reason it failed:
 - the hook runs before those files exist, so the hook cannot patch them there.
 ## What This Means
 When revisiting memtest later, start from the constraints above rather than retrying the same patterns:
 - do not assume the built-in memtest stage is sufficient
 - do not assume `chroot/boot/` will contain memtest payloads
 - do not assume source bootloader templates are the last writer of final ISO configs
 - do not assume the current normal binary hook timing is late enough for final patching
 Any future memtest fix must explicitly identify:
 - where the memtest binaries are reliably available at build time
 - which exact build stage writes the final bootloader configs that land in the ISO
 - and a post-build proof from a real ISO, not only from intermediate workdir files
 - whether the ISO inspection step itself succeeded, rather than merely whether
  the validator printed a memtest warning
 - whether a non-zero probe is intentionally handled inside an `if` / `case`
  context rather than accidentally tripping `set -e`
 ## Decision
 For `bee`, memtest must be treated as an explicit ISO artifact with explicit post-build validation.
 Project rules from now on:
 - Do **not** trust `--memtest memtest86+` by itself.
 - A memtest implementation is considered valid only if the produced ISO actually contains:
  - `boot/memtest86+x64.bin`
  - `boot/memtest86+x64.efi`
  - a GRUB menu entry
  - an isolinux menu entry
 - If live-build built-in integration does not produce those artifacts, use an explicit project-owned mechanism such as:
  - a binary hook copying files into `binary/boot/`
  - extraction from the cached `memtest86+` `.deb`
  - another deterministic build-time copy step
 - Do **not** remove such explicit logic later unless a fresh real ISO build proves that built-in integration alone produces all required files and menu entries.
 Current implementation direction:
 - keep the live-build memtest stage enabled if it helps package acquisition
 - do not rely on the current early `binary_hooks` timing for final patching
 - prefer a post-`lb build` recovery step in `build.sh` that:
  - patches the fully materialized `LB_DIR/binary` tree
  - injects memtest binaries there
  - ensures final bootloader entries there
  - reruns late binary stages (`binary_checksums`, `binary_iso`, `binary_zsync`) after the patch
 - also treat ISO validation tooling as part of the critical path:
  - install a stable ISO reader in the builder image
  - fail with an explicit reader error if ISO listing/extraction fails
  - do not treat reader failure as evidence that memtest is missing
  - do not call a probe that may return "needs recovery" as a bare command under
    `set -e`; wrap it in explicit control flow
 ## Consequences
 - Future memtest changes must begin by reading this ADR and the commits listed above.
 - Future memtest changes must also begin by reading the failed-attempt list above.
 - We should stop re-introducing "prefer built-in live-build memtest" as a default assumption without new evidence.
 - Memtest validation in `build.sh` is not optional; it is the acceptance gate that prevents another silent regression.
 - But validation output is only trustworthy if ISO reading itself succeeded. A
  "missing memtest" warning without a successful ISO read is not evidence.
 - If we change memtest strategy again, we must update this ADR with the exact build evidence that justified the change.
 ## Working Solution (confirmed 2026-04-01, commits 76a9100 → 2baf3be)
 This approach was confirmed working in ISO `easy-bee-nvidia-v3.20-5-g76a9100-amd64.iso`
 and validated again in subsequent builds. The final ISO contains all required memtest artifacts.
 ### Components
 **1. Binary hook `config/hooks/normal/9100-memtest.hook.binary`**
 Runs inside the live-build binary phase. Does not patch bootloader files at hook time —
 those files may not exist yet. Instead:
 - Tries to copy `memtest86+x64.bin` / `memtest86+x64.efi` from `chroot/boot/` first.
 - Falls back to extracting from the cached `.deb` (via `dpkg-deb -x`) if `chroot/boot/` is empty.
 - Appends GRUB and isolinux menu entries only if the respective cfg files already exist at hook time.
  If they do not exist, the hook warns and continues (does not fail).
 Controlled by `BEE_REQUIRE_MEMTEST=1` env var to turn warnings into hard errors when needed.
 **2. Post-`lb build` recovery step in `build.sh`**
 After `lb build` completes, `build.sh` checks whether the fully materialized `binary/` tree
 contains all required memtest artifacts. If not:
 - Copies/extracts memtest binaries into `binary/boot/`.
 - Patches `binary/boot/grub/grub.cfg` and `binary/isolinux/live.cfg` directly.
 - Reruns the late binary stages (`binary_checksums`, `binary_iso`, `binary_zsync`) to rebuild
  the ISO with the patched tree.
 This is the deterministic safety net: even if the hook runs at the wrong time, the recovery
 step handles the final `binary/` tree after live-build has written all bootloader configs.
 **3. ISO validation hardening**
 The memtest probe in `build.sh` is wrapped in explicit `if` / `case` control flow, not called
 as a bare command under `set -e`. A non-zero probe return (needs recovery) is intentional and
 handled — it does not abort the build prematurely.
 ISO reading (`xorriso -indev -ls` / extraction) is treated as a separate prerequisite.
 If the reader fails, the validator reports a reader error explicitly, not a memtest warning.
 This prevents the false-negative loop that burned 2026-04-01 v3.14–v3.19.
 ### Why this works when earlier attempts did not
 The earlier patterns all shared a single flaw: they assumed a single build-time point
 (hook or source template) would be the last writer of bootloader configs and memtest payloads.
 In live-build on Debian Bookworm that assumption is false — live-build continues writing
 bootloader files after custom hooks run, and `chroot/boot/` does not reliably hold memtest payloads.
 The recovery step sidesteps the ordering problem entirely: it acts on the fully materialized
 `binary/` tree after `lb build` finishes, then rebuilds the ISO from that patched tree.
 There is no ordering dependency to get wrong.
 ### Do not revert
 Do not remove the recovery step or the hook without a fresh real ISO build proving
 live-build alone produces all four required artifacts:
 - `boot/memtest86+x64.bin`
 - `boot/memtest86+x64.efi`
 - memtest entry in `boot/grub/grub.cfg`
 - memtest entry in `isolinux/live.cfg`
--- a/bible-local/decisions/README.md
+++ b/bible-local/decisions/README.md
@@ -5,3 +5,4 @@ One file per decision, named `YYYY-MM-DD-short-topic.md`.
 | Date | Decision | Status |
 |---|---|---|
 | 2026-03-05 | Use NVIDIA proprietary driver | active |
 | 2026-04-01 | Treat memtest as explicit ISO content | active |
--- a/bible-local/docs/iso-build-rules.md
+++ b/bible-local/docs/iso-build-rules.md
@@ -13,9 +13,50 @@ Use one of:
 This applies to:
 - `iso/builder/config/package-lists/*.list.chroot`
- Any package referenced in `grub.cfg`, hooks, or overlay scripts (e.g. file paths like `/boot/memtest86+x64.bin`)
+- Any package referenced in bootloader configs, hooks, or overlay scripts
-## Example of what goes wrong without this
+## Memtest rule
-`memtest86+` in Debian bookworm installs `/boot/memtest86+x64.bin`, not `/boot/memtest86+.bin`.
+Do not assume live-build's built-in memtest integration is sufficient for `bee`.
-Guessing the filename caused a broken GRUB entry that only surfaced at boot time, after a full rebuild.
+We already tried that path and regressed again on 2026-04-01: `lb binary_memtest`
 ran, but the final ISO still lacked memtest binaries and menu entries.
 For this project, memtest is accepted only when the produced ISO actually
 contains all of the following:
 - `boot/memtest86+x64.bin`
 - `boot/memtest86+x64.efi`
 - a memtest entry in `boot/grub/grub.cfg`
 - a memtest entry in `isolinux/live.cfg`
 Rules:
 - Keep explicit post-build memtest validation in `build.sh`.
 - Treat ISO reader success as a separate prerequisite from memtest content.
  If the reader cannot list or extract from the ISO, that is a validator
  failure, not proof that memtest is missing.
 - If built-in integration does not produce the artifacts above, use a
  deterministic project-owned copy/extract step instead of hoping live-build
  will "start working".
 - Do not switch back to built-in-only memtest without fresh build evidence from
  a real ISO.
 - If you reference memtest files manually, verify the exact package file list
  first for the target Debian release.
 Known bad loops for this repository:
 - Do not retry built-in-only memtest without new evidence. We already proved
  that `lb binary_memtest` can run while the final ISO still has no memtest.
 - Do not assume fixing memtest file names is enough. Correct names did not fix
  the final artifact path.
 - Do not assume `chroot/boot/` contains memtest payloads at the time hooks run.
 - Do not assume source `grub.cfg` / `live.cfg.in` are the final writers of ISO
  bootloader configs.
 - Do not assume the current `config/hooks/normal/9100-memtest.hook.binary`
  timing is late enough to patch final `binary/boot/grub/grub.cfg` or
  `binary/isolinux/live.cfg`; logs from 2026-04-01 showed those files were not
  present yet when the hook executed.
 - Do not treat a validator warning as ground truth until you have confirmed the
  ISO reader actually succeeded. On 2026-04-01 we misdiagnosed another memtest
  regression because the final ISO was correct but the validator produced a
  false negative.
--- a/iso/README.md
+++ b/iso/README.md
@@ -48,6 +48,7 @@ sh iso/builder/build-in-container.sh --cache-dir /path/to/cache
 - The builder image is automatically rebuilt if the local tag exists for the wrong architecture.
 - The live ISO boots with Debian `live-boot` `toram`, so the read-only medium is copied into RAM during boot and the runtime no longer depends on the original USB/BMC virtual media staying present.
 - Target systems need enough RAM for the full compressed live medium plus normal runtime overhead, or boot may fail before reaching the TUI.
 - The NVIDIA variant installs DCGM 4 packages matched to the CUDA user-mode driver major version. For driver branch `590` / CUDA `13.x`, the package family is `datacenter-gpu-manager-4-cuda13` rather than legacy `datacenter-gpu-manager`.
 - Override the container platform only if you know why:
 ```sh
--- a/iso/builder/Dockerfile
+++ b/iso/builder/Dockerfile
@@ -17,12 +17,23 @@ RUN apt-get update -qq && apt-get install -y \
    wget \
    curl \
    tar \
    libarchive-tools \
    xz-utils \
    rsync \
    build-essential \
    gcc \
    make \
    perl \
    pkg-config \
    yasm \
    libssl-dev \
    zlib1g-dev \
    libbz2-dev \
    libgmp-dev \
    libpcap-dev \
    libsqlite3-dev \
    libcurl4-openssl-dev \
    ocl-icd-opencl-dev \
    linux-headers-amd64 \
    && rm -rf /var/lib/apt/lists/*
--- a/iso/builder/VERSIONS
+++ b/iso/builder/VERSIONS
@@ -8,7 +8,8 @@ NCCL_TESTS_VERSION=2.13.10
 NVCC_VERSION=12.8
 CUBLAS_VERSION=13.0.2.14-1
 CUDA_USERSPACE_VERSION=13.0.96-1
-DCGM_VERSION=3.3.9
+DCGM_VERSION=4.5.3-1
 JOHN_JUMBO_COMMIT=67fcf9fe5a
 ROCM_VERSION=6.3.4
 ROCM_SMI_VERSION=7.4.0.60304-76~22.04
 ROCM_BANDWIDTH_TEST_VERSION=1.4.0.60304-76~22.04
--- a/iso/builder/auto/config
+++ b/iso/builder/auto/config
@@ -29,10 +29,10 @@ lb config noauto \
    --security true \
    --linux-flavours "amd64" \
    --linux-packages "${LB_LINUX_PACKAGES}" \
-    --memtest none \
+    --memtest memtest86+ \
-    --iso-volume "EASY-BEE" \
+    --iso-volume "EASY_BEE_${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
-    --iso-application "EASY-BEE" \
+    --iso-application "EASY-BEE-${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
-    --bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
+    --bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=3 systemd.show_status=1 username=bee user-fullname=Bee modprobe.blacklist=nouveau,snd_hda_intel,snd_hda_codec_realtek,snd_hda_codec_generic,soundcore" \
    --apt-recommends false \
    --chroot-squashfs-compression-type zstd \
    "${@}"
--- a/iso/builder/bee-gpu-stress.c
+++ b/iso/builder/bee-gpu-stress.c
@@ -29,8 +29,14 @@ typedef void *CUfunction;
 typedef void *CUstream;
 #define CU_SUCCESS 0
 #define CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT 16
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76
 #define MAX_STRESS_STREAMS 16
 #define MAX_CUBLAS_PROFILES 5
 #define MIN_PROFILE_BUDGET_BYTES ((size_t)4u * 1024u * 1024u)
 #define MIN_STREAM_BUDGET_BYTES ((size_t)64u * 1024u * 1024u)
 #define STRESS_LAUNCH_DEPTH 8
 static const char *ptx_source =
    ".version 6.0\n"
@@ -97,6 +103,9 @@ typedef CUresult (*cuLaunchKernel_fn)(CUfunction,
                                      CUstream,
                                      void **,
                                      void **);
 typedef CUresult (*cuMemGetInfo_fn)(size_t *, size_t *);
 typedef CUresult (*cuStreamCreate_fn)(CUstream *, unsigned int);
 typedef CUresult (*cuStreamDestroy_fn)(CUstream);
 typedef CUresult (*cuGetErrorName_fn)(CUresult, const char **);
 typedef CUresult (*cuGetErrorString_fn)(CUresult, const char **);
@@ -118,6 +127,9 @@ struct cuda_api {
    cuModuleLoadDataEx_fn cuModuleLoadDataEx;
    cuModuleGetFunction_fn cuModuleGetFunction;
    cuLaunchKernel_fn cuLaunchKernel;
    cuMemGetInfo_fn cuMemGetInfo;
    cuStreamCreate_fn cuStreamCreate;
    cuStreamDestroy_fn cuStreamDestroy;
    cuGetErrorName_fn cuGetErrorName;
    cuGetErrorString_fn cuGetErrorString;
 };
@@ -128,9 +140,10 @@ struct stress_report {
    int cc_major;
    int cc_minor;
    int buffer_mb;
    int stream_count;
    unsigned long iterations;
    uint64_t checksum;
-    char details[1024];
+    char details[16384];
 };
 static int load_symbol(void *lib, const char *name, void **out) {
@@ -144,7 +157,7 @@ static int load_cuda(struct cuda_api *api) {
    if (!api->lib) {
        return 0;
    }
-    return
+    if (!(
        load_symbol(api->lib, "cuInit", (void **)&api->cuInit) &&
        load_symbol(api->lib, "cuDeviceGetCount", (void **)&api->cuDeviceGetCount) &&
        load_symbol(api->lib, "cuDeviceGet", (void **)&api->cuDeviceGet) &&
@@ -160,7 +173,17 @@ static int load_cuda(struct cuda_api *api) {
        load_symbol(api->lib, "cuMemcpyDtoH_v2", (void **)&api->cuMemcpyDtoH) &&
        load_symbol(api->lib, "cuModuleLoadDataEx", (void **)&api->cuModuleLoadDataEx) &&
        load_symbol(api->lib, "cuModuleGetFunction", (void **)&api->cuModuleGetFunction) &&
-        load_symbol(api->lib, "cuLaunchKernel", (void **)&api->cuLaunchKernel);
+        load_symbol(api->lib, "cuLaunchKernel", (void **)&api->cuLaunchKernel))) {
        dlclose(api->lib);
        memset(api, 0, sizeof(*api));
        return 0;
    }
    load_symbol(api->lib, "cuMemGetInfo_v2", (void **)&api->cuMemGetInfo);
    load_symbol(api->lib, "cuStreamCreate", (void **)&api->cuStreamCreate);
    if (!load_symbol(api->lib, "cuStreamDestroy_v2", (void **)&api->cuStreamDestroy)) {
        load_symbol(api->lib, "cuStreamDestroy", (void **)&api->cuStreamDestroy);
    }
    return 1;
 }
 static const char *cu_error_name(struct cuda_api *api, CUresult rc) {
@@ -193,14 +216,12 @@ static double now_seconds(void) {
    return (double)ts.tv_sec + ((double)ts.tv_nsec / 1000000000.0);
 }
 #if HAVE_CUBLASLT_HEADERS
 static size_t round_down_size(size_t value, size_t multiple) {
    if (multiple == 0 || value < multiple) {
        return value;
    }
    return value - (value % multiple);
 }
 #endif
 static int query_compute_capability(struct cuda_api *api, CUdevice dev, int *major, int *minor) {
    int cc_major = 0;
@@ -220,6 +241,75 @@ static int query_compute_capability(struct cuda_api *api, CUdevice dev, int *maj
    return 1;
 }
 static int query_multiprocessor_count(struct cuda_api *api, CUdevice dev, int *count) {
    int mp_count = 0;
    if (!check_rc(api,
                  "cuDeviceGetAttribute(multiprocessors)",
                  api->cuDeviceGetAttribute(&mp_count, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev))) {
        return 0;
    }
    *count = mp_count;
    return 1;
 }
 static size_t clamp_budget_to_free_memory(struct cuda_api *api, size_t requested_bytes) {
    size_t free_bytes = 0;
    size_t total_bytes = 0;
    size_t max_bytes = requested_bytes;
    if (!api->cuMemGetInfo) {
        return requested_bytes;
    }
    if (api->cuMemGetInfo(&free_bytes, &total_bytes) != CU_SUCCESS || free_bytes == 0) {
        return requested_bytes;
    }
    max_bytes = (free_bytes * 9u) / 10u;
    if (max_bytes < (size_t)4u * 1024u * 1024u) {
        max_bytes = (size_t)4u * 1024u * 1024u;
    }
    if (requested_bytes > max_bytes) {
        return max_bytes;
    }
    return requested_bytes;
 }
 static int choose_stream_count(int mp_count, int planned_profiles, size_t total_budget, int have_streams) {
    int stream_count = 1;
    if (!have_streams || mp_count <= 0 || planned_profiles <= 0) {
        return 1;
    }
    stream_count = mp_count / 8;
    if (stream_count < 2) {
        stream_count = 2;
    }
    if (stream_count > MAX_STRESS_STREAMS) {
        stream_count = MAX_STRESS_STREAMS;
    }
    while (stream_count > 1) {
        size_t per_stream_budget = total_budget / ((size_t)planned_profiles * (size_t)stream_count);
        if (per_stream_budget >= MIN_STREAM_BUDGET_BYTES) {
            break;
        }
        stream_count--;
    }
    return stream_count;
 }
 static void destroy_streams(struct cuda_api *api, CUstream *streams, int count) {
    if (!api->cuStreamDestroy) {
        return;
    }
    for (int i = 0; i < count; i++) {
        if (streams[i]) {
            api->cuStreamDestroy(streams[i]);
            streams[i] = NULL;
        }
    }
 }
 #if HAVE_CUBLASLT_HEADERS
 static void append_detail(char *buf, size_t cap, const char *fmt, ...) {
    size_t len = strlen(buf);
@@ -242,12 +332,19 @@ static int run_ptx_fallback(struct cuda_api *api,
                            int size_mb,
                            struct stress_report *report) {
    CUcontext ctx = NULL;
    CUdeviceptr device_mem = 0;
    CUmodule module = NULL;
    CUfunction kernel = NULL;
    uint32_t sample[256];
-    uint32_t words = 0;
+    CUdeviceptr device_mem[MAX_STRESS_STREAMS] = {0};
    CUstream streams[MAX_STRESS_STREAMS] = {0};
    uint32_t words[MAX_STRESS_STREAMS] = {0};
    uint32_t rounds[MAX_STRESS_STREAMS] = {0};
    void *params[MAX_STRESS_STREAMS][3];
    size_t bytes_per_stream[MAX_STRESS_STREAMS] = {0};
    unsigned long iterations = 0;
    int mp_count = 0;
    int stream_count = 1;
    int launches_per_wave = 0;
    memset(report, 0, sizeof(*report));
    snprintf(report->backend, sizeof(report->backend), "driver-ptx");
@@ -260,64 +357,109 @@ static int run_ptx_fallback(struct cuda_api *api,
        return 0;
    }
-    size_t bytes = (size_t)size_mb * 1024u * 1024u;
+    size_t requested_bytes = (size_t)size_mb * 1024u * 1024u;
-    if (bytes < 4u * 1024u * 1024u) {
+    if (requested_bytes < MIN_PROFILE_BUDGET_BYTES) {
-        bytes = 4u * 1024u * 1024u;
+        requested_bytes = MIN_PROFILE_BUDGET_BYTES;
    }
-    if (bytes > (size_t)1024u * 1024u * 1024u) {
+    size_t total_bytes = clamp_budget_to_free_memory(api, requested_bytes);
-        bytes = (size_t)1024u * 1024u * 1024u;
+    if (total_bytes < MIN_PROFILE_BUDGET_BYTES) {
        total_bytes = MIN_PROFILE_BUDGET_BYTES;
    }
-    words = (uint32_t)(bytes / sizeof(uint32_t));
+    report->buffer_mb = (int)(total_bytes / (1024u * 1024u));
-    if (!check_rc(api, "cuMemAlloc", api->cuMemAlloc(&device_mem, bytes))) {
+    if (query_multiprocessor_count(api, dev, &mp_count) &&
-        api->cuCtxDestroy(ctx);
+        api->cuStreamCreate &&
-        return 0;
+        api->cuStreamDestroy) {
        stream_count = choose_stream_count(mp_count, 1, total_bytes, 1);
    }
-    if (!check_rc(api, "cuMemsetD8", api->cuMemsetD8(device_mem, 0, bytes))) {
+    if (stream_count > 1) {
-        api->cuMemFree(device_mem);
+        int created = 0;
-        api->cuCtxDestroy(ctx);
+        for (; created < stream_count; created++) {
-        return 0;
+            if (!check_rc(api, "cuStreamCreate", api->cuStreamCreate(&streams[created], 0))) {
                destroy_streams(api, streams, created);
                stream_count = 1;
                break;
            }
        }
    }
    report->stream_count = stream_count;
    for (int lane = 0; lane < stream_count; lane++) {
        size_t slice = total_bytes / (size_t)stream_count;
        if (lane == stream_count - 1) {
            slice = total_bytes - ((size_t)lane * (total_bytes / (size_t)stream_count));
        }
        slice = round_down_size(slice, sizeof(uint32_t));
        if (slice < MIN_PROFILE_BUDGET_BYTES) {
            slice = MIN_PROFILE_BUDGET_BYTES;
        }
        bytes_per_stream[lane] = slice;
        words[lane] = (uint32_t)(slice / sizeof(uint32_t));
        if (!check_rc(api, "cuMemAlloc", api->cuMemAlloc(&device_mem[lane], slice))) {
            goto fail;
        }
        if (!check_rc(api, "cuMemsetD8", api->cuMemsetD8(device_mem[lane], 0, slice))) {
            goto fail;
        }
        rounds[lane] = 2048;
        params[lane][0] = &device_mem[lane];
        params[lane][1] = &words[lane];
        params[lane][2] = &rounds[lane];
    }
    if (!check_rc(api,
                  "cuModuleLoadDataEx",
                  api->cuModuleLoadDataEx(&module, ptx_source, 0, NULL, NULL))) {
-        api->cuMemFree(device_mem);
+        goto fail;
        api->cuCtxDestroy(ctx);
        return 0;
    }
    if (!check_rc(api, "cuModuleGetFunction", api->cuModuleGetFunction(&kernel, module, "burn"))) {
-        api->cuMemFree(device_mem);
+        goto fail;
        api->cuCtxDestroy(ctx);
        return 0;
    }
    unsigned int threads = 256;
    unsigned int blocks = (unsigned int)((words + threads - 1) / threads);
    uint32_t rounds = 1024;
    void *params[] = {&device_mem, &words, &rounds};
    double start = now_seconds();
    double deadline = start + (double)seconds;
    while (now_seconds() < deadline) {
-        if (!check_rc(api,
+        launches_per_wave = 0;
-                      "cuLaunchKernel",
+        for (int depth = 0; depth < STRESS_LAUNCH_DEPTH && now_seconds() < deadline; depth++) {
-                      api->cuLaunchKernel(kernel, blocks, 1, 1, threads, 1, 1, 0, NULL, params, NULL))) {
+            int launched_this_batch = 0;
-            api->cuMemFree(device_mem);
+            for (int lane = 0; lane < stream_count; lane++) {
-            api->cuCtxDestroy(ctx);
+                unsigned int blocks = (unsigned int)((words[lane] + threads - 1) / threads);
-            return 0;
+                if (!check_rc(api,
                              "cuLaunchKernel",
                              api->cuLaunchKernel(kernel,
                                                  blocks,
                                                  1,
                                                  1,
                                                  threads,
                                                  1,
                                                  1,
                                                  0,
                                                  streams[lane],
                                                  params[lane],
                                                  NULL))) {
                    goto fail;
                }
                launches_per_wave++;
                launched_this_batch++;
            }
            if (launched_this_batch <= 0) {
                break;
            }
        }
-        iterations++;
+        if (launches_per_wave <= 0) {
            goto fail;
        }
        if (!check_rc(api, "cuCtxSynchronize", api->cuCtxSynchronize())) {
            goto fail;
        }
        iterations += (unsigned long)launches_per_wave;
    }
-    if (!check_rc(api, "cuCtxSynchronize", api->cuCtxSynchronize())) {
+    if (!check_rc(api, "cuMemcpyDtoH", api->cuMemcpyDtoH(sample, device_mem[0], sizeof(sample)))) {
-        api->cuMemFree(device_mem);
+        goto fail;
        api->cuCtxDestroy(ctx);
        return 0;
    }
    if (!check_rc(api, "cuMemcpyDtoH", api->cuMemcpyDtoH(sample, device_mem, sizeof(sample)))) {
        api->cuMemFree(device_mem);
        api->cuCtxDestroy(ctx);
        return 0;
    }
    for (size_t i = 0; i < sizeof(sample) / sizeof(sample[0]); i++) {
@@ -326,12 +468,34 @@ static int run_ptx_fallback(struct cuda_api *api,
    report->iterations = iterations;
    snprintf(report->details,
             sizeof(report->details),
-             "profile_int32_fallback=OK iterations=%lu\n",
+             "fallback_int32=OK requested_mb=%d actual_mb=%d streams=%d queue_depth=%d per_stream_mb=%zu iterations=%lu\n",
             size_mb,
             report->buffer_mb,
             report->stream_count,
             STRESS_LAUNCH_DEPTH,
             bytes_per_stream[0] / (1024u * 1024u),
             iterations);
-    api->cuMemFree(device_mem);
+    for (int lane = 0; lane < stream_count; lane++) {
        if (device_mem[lane]) {
            api->cuMemFree(device_mem[lane]);
        }
    }
    destroy_streams(api, streams, stream_count);
    api->cuCtxDestroy(ctx);
    return 1;
 fail:
    for (int lane = 0; lane < MAX_STRESS_STREAMS; lane++) {
        if (device_mem[lane]) {
            api->cuMemFree(device_mem[lane]);
        }
    }
    destroy_streams(api, streams, MAX_STRESS_STREAMS);
    if (ctx) {
        api->cuCtxDestroy(ctx);
    }
    return 0;
 }
 #if HAVE_CUBLASLT_HEADERS
@@ -418,6 +582,7 @@ struct profile_desc {
 struct prepared_profile {
    struct profile_desc desc;
    CUstream stream;
    cublasLtMatmulDesc_t op_desc;
    cublasLtMatrixLayout_t a_layout;
    cublasLtMatrixLayout_t b_layout;
@@ -617,8 +782,8 @@ static uint64_t choose_square_dim(size_t budget_bytes, size_t bytes_per_cell, in
    if (dim < (uint64_t)multiple) {
        dim = (uint64_t)multiple;
    }
-    if (dim > 8192u) {
+    if (dim > 65536u) {
-        dim = 8192u;
+        dim = 65536u;
    }
    return dim;
 }
@@ -704,10 +869,12 @@ static int prepare_profile(struct cublaslt_api *cublas,
                           cublasLtHandle_t handle,
                           struct cuda_api *cuda,
                           const struct profile_desc *desc,
                           CUstream stream,
                           size_t profile_budget_bytes,
                           struct prepared_profile *out) {
    memset(out, 0, sizeof(*out));
    out->desc = *desc;
    out->stream = stream;
    size_t bytes_per_cell = 0;
    bytes_per_cell += bytes_for_elements(desc->a_type, 1);
@@ -935,7 +1102,7 @@ static int run_cublas_profile(cublasLtHandle_t handle,
                                               &profile->heuristic.algo,
                                               (void *)(uintptr_t)profile->workspace_dev,
                                               profile->workspace_size,
-                                               (cudaStream_t)0));
+                                               profile->stream));
 }
 static int run_cublaslt_stress(struct cuda_api *cuda,
@@ -947,13 +1114,22 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                               int size_mb,
                               struct stress_report *report) {
    struct cublaslt_api cublas;
-    struct prepared_profile prepared[sizeof(k_profiles) / sizeof(k_profiles[0])];
+    struct prepared_profile prepared[MAX_STRESS_STREAMS * MAX_CUBLAS_PROFILES];
    cublasLtHandle_t handle = NULL;
    CUcontext ctx = NULL;
    CUstream streams[MAX_STRESS_STREAMS] = {0};
    uint16_t sample[256];
    int cc = cc_major * 10 + cc_minor;
    int planned = 0;
    int active = 0;
    int mp_count = 0;
    int stream_count = 1;
    int profile_count = (int)(sizeof(k_profiles) / sizeof(k_profiles[0]));
    int prepared_count = 0;
    int wave_launches = 0;
    size_t requested_budget = 0;
    size_t total_budget = 0;
    size_t per_profile_budget = 0;
    memset(report, 0, sizeof(*report));
    snprintf(report->backend, sizeof(report->backend), "cublasLt");
@@ -986,16 +1162,46 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }
-    size_t total_budget = (size_t)size_mb * 1024u * 1024u;
+    requested_budget = (size_t)size_mb * 1024u * 1024u;
-    if (total_budget < (size_t)planned * 4u * 1024u * 1024u) {
+    if (requested_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
-        total_budget = (size_t)planned * 4u * 1024u * 1024u;
+        requested_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
    }
-    size_t per_profile_budget = total_budget / (size_t)planned;
+    total_budget = clamp_budget_to_free_memory(cuda, requested_budget);
-    if (per_profile_budget < 4u * 1024u * 1024u) {
+    if (total_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
-        per_profile_budget = 4u * 1024u * 1024u;
+        total_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
    }
    if (query_multiprocessor_count(cuda, dev, &mp_count) &&
        cuda->cuStreamCreate &&
        cuda->cuStreamDestroy) {
        stream_count = choose_stream_count(mp_count, planned, total_budget, 1);
    }
    if (stream_count > 1) {
        int created = 0;
        for (; created < stream_count; created++) {
            if (!check_rc(cuda, "cuStreamCreate", cuda->cuStreamCreate(&streams[created], 0))) {
                destroy_streams(cuda, streams, created);
                stream_count = 1;
                break;
            }
        }
    }
    report->stream_count = stream_count;
    per_profile_budget = total_budget / ((size_t)planned * (size_t)stream_count);
    if (per_profile_budget < MIN_PROFILE_BUDGET_BYTES) {
        per_profile_budget = MIN_PROFILE_BUDGET_BYTES;
    }
    report->buffer_mb = (int)(total_budget / (1024u * 1024u));
    append_detail(report->details,
                  sizeof(report->details),
                  "requested_mb=%d actual_mb=%d streams=%d queue_depth=%d mp_count=%d per_worker_mb=%zu\n",
                  size_mb,
                  report->buffer_mb,
                  report->stream_count,
                  STRESS_LAUNCH_DEPTH,
                  mp_count,
                  per_profile_budget / (1024u * 1024u));
-    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
+    for (int i = 0; i < profile_count; i++) {
        const struct profile_desc *desc = &k_profiles[i];
        if (!(desc->enabled && cc >= desc->min_cc)) {
            append_detail(report->details,
@@ -1005,63 +1211,87 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                          desc->min_cc);
            continue;
        }
-        if (prepare_profile(&cublas, handle, cuda, desc, per_profile_budget, &prepared[i])) {
+        for (int lane = 0; lane < stream_count; lane++) {
-            active++;
+            CUstream stream = streams[lane];
-            append_detail(report->details,
+            if (prepared_count >= (int)(sizeof(prepared) / sizeof(prepared[0]))) {
-                          sizeof(report->details),
+                break;
-                          "%s=READY dim=%llux%llux%llu block=%s\n",
+            }
-                          desc->name,
+            if (prepare_profile(&cublas, handle, cuda, desc, stream, per_profile_budget, &prepared[prepared_count])) {
-                          (unsigned long long)prepared[i].m,
+                active++;
-                          (unsigned long long)prepared[i].n,
+                append_detail(report->details,
-                          (unsigned long long)prepared[i].k,
+                              sizeof(report->details),
-                          desc->block_label);
+                              "%s[%d]=READY dim=%llux%llux%llu block=%s stream=%d\n",
-        } else {
+                              desc->name,
-            append_detail(report->details, sizeof(report->details), "%s=SKIPPED unsupported\n", desc->name);
+                              lane,
                              (unsigned long long)prepared[prepared_count].m,
                              (unsigned long long)prepared[prepared_count].n,
                              (unsigned long long)prepared[prepared_count].k,
                              desc->block_label,
                              lane);
                prepared_count++;
            } else {
                append_detail(report->details,
                              sizeof(report->details),
                              "%s[%d]=SKIPPED unsupported\n",
                              desc->name,
                              lane);
            }
        }
    }
    if (active <= 0) {
        cublas.cublasLtDestroy(handle);
        destroy_streams(cuda, streams, stream_count);
        cuda->cuCtxDestroy(ctx);
        return 0;
    }
    double deadline = now_seconds() + (double)seconds;
    while (now_seconds() < deadline) {
-        for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+        wave_launches = 0;
-            if (!prepared[i].ready) {
+        for (int depth = 0; depth < STRESS_LAUNCH_DEPTH && now_seconds() < deadline; depth++) {
-                continue;
+            int launched_this_batch = 0;
-            }
+            for (int i = 0; i < prepared_count; i++) {
-            if (!run_cublas_profile(handle, &cublas, &prepared[i])) {
+                if (!prepared[i].ready) {
-                append_detail(report->details,
+                    continue;
                              sizeof(report->details),
                              "%s=FAILED runtime\n",
                              prepared[i].desc.name);
                for (size_t j = 0; j < sizeof(prepared) / sizeof(prepared[0]); j++) {
                    destroy_profile(&cublas, cuda, &prepared[j]);
                }
-                cublas.cublasLtDestroy(handle);
+                if (!run_cublas_profile(handle, &cublas, &prepared[i])) {
-                cuda->cuCtxDestroy(ctx);
+                    append_detail(report->details,
-                return 0;
+                                  sizeof(report->details),
                                  "%s=FAILED runtime\n",
                                  prepared[i].desc.name);
                    for (int j = 0; j < prepared_count; j++) {
                        destroy_profile(&cublas, cuda, &prepared[j]);
                    }
                    cublas.cublasLtDestroy(handle);
                    destroy_streams(cuda, streams, stream_count);
                    cuda->cuCtxDestroy(ctx);
                    return 0;
                }
                prepared[i].iterations++;
                report->iterations++;
                wave_launches++;
                launched_this_batch++;
            }
-            prepared[i].iterations++;
+            if (launched_this_batch <= 0) {
            report->iterations++;
            if (now_seconds() >= deadline) {
                break;
            }
        }
-    }
+        if (wave_launches <= 0) {
-
+            break;
-    if (!check_rc(cuda, "cuCtxSynchronize", cuda->cuCtxSynchronize())) {
+        }
-        for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+        if (!check_rc(cuda, "cuCtxSynchronize", cuda->cuCtxSynchronize())) {
-            destroy_profile(&cublas, cuda, &prepared[i]);
+            for (int i = 0; i < prepared_count; i++) {
                destroy_profile(&cublas, cuda, &prepared[i]);
            }
            cublas.cublasLtDestroy(handle);
            destroy_streams(cuda, streams, stream_count);
            cuda->cuCtxDestroy(ctx);
            return 0;
        }
        cublas.cublasLtDestroy(handle);
        cuda->cuCtxDestroy(ctx);
        return 0;
    }
-    for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+    for (int i = 0; i < prepared_count; i++) {
        if (!prepared[i].ready) {
            continue;
        }
@@ -1072,7 +1302,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                      prepared[i].iterations);
    }
-    for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+    for (int i = 0; i < prepared_count; i++) {
        if (prepared[i].ready) {
            if (check_rc(cuda, "cuMemcpyDtoH", cuda->cuMemcpyDtoH(sample, prepared[i].d_dev, sizeof(sample)))) {
                for (size_t j = 0; j < sizeof(sample) / sizeof(sample[0]); j++) {
@@ -1083,10 +1313,11 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        }
    }
-    for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+    for (int i = 0; i < prepared_count; i++) {
        destroy_profile(&cublas, cuda, &prepared[i]);
    }
    cublas.cublasLtDestroy(handle);
    destroy_streams(cuda, streams, stream_count);
    cuda->cuCtxDestroy(ctx);
    return 1;
 }
@@ -1095,13 +1326,16 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
 int main(int argc, char **argv) {
    int seconds = 5;
    int size_mb = 64;
    int device_index = 0;
    for (int i = 1; i < argc; i++) {
        if ((strcmp(argv[i], "--seconds") == 0 || strcmp(argv[i], "-t") == 0) && i + 1 < argc) {
            seconds = atoi(argv[++i]);
        } else if ((strcmp(argv[i], "--size-mb") == 0 || strcmp(argv[i], "-m") == 0) && i + 1 < argc) {
            size_mb = atoi(argv[++i]);
        } else if ((strcmp(argv[i], "--device") == 0 || strcmp(argv[i], "-d") == 0) && i + 1 < argc) {
            device_index = atoi(argv[++i]);
        } else {
-            fprintf(stderr, "usage: %s [--seconds N] [--size-mb N]\n", argv[0]);
+            fprintf(stderr, "usage: %s [--seconds N] [--size-mb N] [--device N]\n", argv[0]);
            return 2;
        }
    }
@@ -1111,6 +1345,9 @@ int main(int argc, char **argv) {
    if (size_mb <= 0) {
        size_mb = 64;
    }
    if (device_index < 0) {
        device_index = 0;
    }
    struct cuda_api cuda;
    if (!load_cuda(&cuda)) {
@@ -1133,8 +1370,13 @@ int main(int argc, char **argv) {
        return 1;
    }
    if (device_index >= count) {
        fprintf(stderr, "device index %d out of range (found %d CUDA device(s))\n", device_index, count);
        return 1;
    }
    CUdevice dev = 0;
-    if (!check_rc(&cuda, "cuDeviceGet", cuda.cuDeviceGet(&dev, 0))) {
+    if (!check_rc(&cuda, "cuDeviceGet", cuda.cuDeviceGet(&dev, device_index))) {
        return 1;
    }
@@ -1162,10 +1404,12 @@ int main(int argc, char **argv) {
    }
    printf("device=%s\n", report.device);
    printf("device_index=%d\n", device_index);
    printf("compute_capability=%d.%d\n", report.cc_major, report.cc_minor);
    printf("backend=%s\n", report.backend);
    printf("duration_s=%d\n", seconds);
    printf("buffer_mb=%d\n", report.buffer_mb);
    printf("streams=%d\n", report.stream_count);
    printf("iterations=%lu\n", report.iterations);
    printf("checksum=%llu\n", (unsigned long long)report.checksum);
    if (report.details[0] != '\0') {
--- a/iso/builder/build-cublas.sh
+++ b/iso/builder/build-cublas.sh
@@ -1,9 +1,9 @@
 #!/bin/sh
-# build-cublas.sh — download cuBLASLt/cuBLAS/cudart runtime + headers for bee-gpu-stress.
+# build-cublas.sh — download cuBLASLt/cuBLAS/cudart runtime + headers for bee-gpu-burn worker.
 #
 # Downloads .deb packages from NVIDIA's CUDA apt repository (Debian 12, x86_64),
 # verifies them against Packages.gz, and extracts the small subset we need:
-#   - headers for compiling bee-gpu-stress against cuBLASLt
+#   - headers for compiling bee-gpu-burn worker against cuBLASLt
 #   - runtime libs for libcublas, libcublasLt, libcudart inside the ISO
 set -e
--- a/iso/builder/build-in-container.sh
+++ b/iso/builder/build-in-container.sh
@@ -12,6 +12,7 @@ CACHE_DIR="${BEE_BUILDER_CACHE_DIR:-${REPO_ROOT}/dist/container-cache}"
 AUTH_KEYS=""
 REBUILD_IMAGE=0
 CLEAN_CACHE=0
 VARIANT="all"
 . "${BUILDER_DIR}/VERSIONS"
@@ -34,14 +35,23 @@ while [ $# -gt 0 ]; do
            REBUILD_IMAGE=1
            shift
            ;;
        --variant)
            VARIANT="$2"
            shift 2
            ;;
        *)
            echo "unknown arg: $1" >&2
-            echo "usage: $0 [--cache-dir /path] [--rebuild-image] [--clean-build] [--authorized-keys /path/to/authorized_keys]" >&2
+            echo "usage: $0 [--cache-dir /path] [--rebuild-image] [--clean-build] [--authorized-keys /path/to/authorized_keys] [--variant nvidia|nvidia-legacy|amd|nogpu|all]" >&2
            exit 1
            ;;
    esac
 done
 case "$VARIANT" in
    nvidia|nvidia-legacy|amd|nogpu|all) ;;
    *) echo "unknown variant: $VARIANT (expected nvidia, nvidia-legacy, amd, nogpu, or all)" >&2; exit 1 ;;
 esac
 if [ "$CLEAN_CACHE" = "1" ]; then
    echo "=== cleaning build cache: ${CACHE_DIR} ==="
    rm -rf "${CACHE_DIR:?}/go-build" \
@@ -49,8 +59,15 @@ if [ "$CLEAN_CACHE" = "1" ]; then
           "${CACHE_DIR:?}/tmp" \
           "${CACHE_DIR:?}/bee" \
           "${CACHE_DIR:?}/lb-packages"
-    echo "=== cleaning live-build work dir: ${REPO_ROOT}/dist/live-build-work ==="
+    echo "=== cleaning live-build work dirs ==="
-    rm -rf "${REPO_ROOT}/dist/live-build-work"
+    rm -rf "${REPO_ROOT}/dist/live-build-work-nvidia"
    rm -rf "${REPO_ROOT}/dist/live-build-work-nvidia-legacy"
    rm -rf "${REPO_ROOT}/dist/live-build-work-amd"
    rm -rf "${REPO_ROOT}/dist/live-build-work-nogpu"
    rm -rf "${REPO_ROOT}/dist/overlay-stage-nvidia"
    rm -rf "${REPO_ROOT}/dist/overlay-stage-nvidia-legacy"
    rm -rf "${REPO_ROOT}/dist/overlay-stage-amd"
    rm -rf "${REPO_ROOT}/dist/overlay-stage-nogpu"
    echo "=== caches cleared, proceeding with build ==="
 fi
@@ -108,34 +125,79 @@ else
    echo "=== using existing builder image ${IMAGE_REF} (${BUILDER_PLATFORM}) ==="
 fi
-set -- \
+# Build base docker run args (without --authorized-keys)
-    run --rm --privileged \
+build_run_args() {
-    --platform "${BUILDER_PLATFORM}" \
+    _variant="$1"
-    -v "${REPO_ROOT}:/work" \
+    _auth_arg=""
-    -v "${CACHE_DIR}:/cache" \
+    if [ -n "$AUTH_KEYS" ]; then
-    -e BEE_CONTAINER_BUILD=1 \
+        _auth_arg="--authorized-keys /tmp/bee-authkeys/${AUTH_KEYS_BASE}"
-    -e GOCACHE=/cache/go-build \
+    fi
-    -e GOMODCACHE=/cache/go-mod \
+    echo "run --rm --privileged \
-    -e TMPDIR=/cache/tmp \
+        --platform ${BUILDER_PLATFORM} \
-    -e BEE_CACHE_DIR=/cache/bee \
+        -v ${REPO_ROOT}:/work \
-    -w /work \
+        -v ${CACHE_DIR}:/cache \
-    "${IMAGE_REF}" \
+        ${AUTH_KEYS:+-v ${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro} \
    sh /work/iso/builder/build.sh
 if [ -n "$AUTH_KEYS" ]; then
    set -- run --rm --privileged \
        --platform "${BUILDER_PLATFORM}" \
        -v "${REPO_ROOT}:/work" \
        -v "${CACHE_DIR}:/cache" \
        -v "${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro" \
        -e BEE_CONTAINER_BUILD=1 \
        -e GOCACHE=/cache/go-build \
        -e GOMODCACHE=/cache/go-mod \
        -e TMPDIR=/cache/tmp \
        -e BEE_CACHE_DIR=/cache/bee \
        -w /work \
-        "${IMAGE_REF}" \
+        ${IMAGE_REF} \
-        sh /work/iso/builder/build.sh --authorized-keys "/tmp/bee-authkeys/${AUTH_KEYS_BASE}"
+        sh /work/iso/builder/build.sh --variant ${_variant} ${_auth_arg}"
-fi
+}
-"$CONTAINER_TOOL" "$@"
+run_variant() {
    _v="$1"
    echo "=== building variant: ${_v} ==="
    if [ -n "$AUTH_KEYS" ]; then
        "$CONTAINER_TOOL" run --rm --privileged \
            --platform "${BUILDER_PLATFORM}" \
            -v "${REPO_ROOT}:/work" \
            -v "${CACHE_DIR}:/cache" \
            -v "${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro" \
            -e BEE_CONTAINER_BUILD=1 \
            -e GOCACHE=/cache/go-build \
            -e GOMODCACHE=/cache/go-mod \
            -e TMPDIR=/cache/tmp \
            -e BEE_CACHE_DIR=/cache/bee \
            -w /work \
            "${IMAGE_REF}" \
            sh /work/iso/builder/build.sh --variant "${_v}" \
                --authorized-keys "/tmp/bee-authkeys/${AUTH_KEYS_BASE}"
    else
        "$CONTAINER_TOOL" run --rm --privileged \
            --platform "${BUILDER_PLATFORM}" \
            -v "${REPO_ROOT}:/work" \
            -v "${CACHE_DIR}:/cache" \
            -e BEE_CONTAINER_BUILD=1 \
            -e GOCACHE=/cache/go-build \
            -e GOMODCACHE=/cache/go-mod \
            -e TMPDIR=/cache/tmp \
            -e BEE_CACHE_DIR=/cache/bee \
            -w /work \
            "${IMAGE_REF}" \
            sh /work/iso/builder/build.sh --variant "${_v}"
    fi
 }
 case "$VARIANT" in
    nvidia)
        run_variant nvidia
        ;;
    nvidia-legacy)
        run_variant nvidia-legacy
        ;;
    amd)
        run_variant amd
        ;;
    nogpu)
        run_variant nogpu
        ;;
    all)
        run_variant nvidia
        run_variant nvidia-legacy
        run_variant amd
        run_variant nogpu
        ;;
 esac
--- a/iso/builder/build-john.sh
+++ b/iso/builder/build-john.sh
@@ -0,0 +1,55 @@
 #!/bin/sh
 # build-john.sh — build John the Ripper jumbo with OpenCL support for the LiveCD.
 #
 # Downloads a pinned source snapshot from the official openwall/john repository,
 # builds it inside the builder container, and caches the resulting run/ tree.
 set -e
 JOHN_COMMIT="$1"
 DIST_DIR="$2"
 [ -n "$JOHN_COMMIT" ] || { echo "usage: $0 <john-commit> <dist-dir>"; exit 1; }
 [ -n "$DIST_DIR" ] || { echo "usage: $0 <john-commit> <dist-dir>"; exit 1; }
 echo "=== John the Ripper jumbo ${JOHN_COMMIT} ==="
 CACHE_DIR="${DIST_DIR}/john-${JOHN_COMMIT}"
 CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
 DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/john-downloads"
 SRC_TAR="${DOWNLOAD_CACHE_DIR}/john-${JOHN_COMMIT}.tar.gz"
 SRC_URL="https://github.com/openwall/john/archive/${JOHN_COMMIT}.tar.gz"
 if [ -x "${CACHE_DIR}/run/john" ] && [ -f "${CACHE_DIR}/run/john.conf" ]; then
    echo "=== john cached, skipping build ==="
    echo "run dir: ${CACHE_DIR}/run"
    exit 0
 fi
 mkdir -p "${DOWNLOAD_CACHE_DIR}"
 if [ ! -f "${SRC_TAR}" ]; then
    echo "=== downloading john source snapshot ==="
    wget --show-progress -O "${SRC_TAR}" "${SRC_URL}"
 fi
 BUILD_TMP=$(mktemp -d)
 trap 'rm -rf "${BUILD_TMP}"' EXIT INT TERM
 cd "${BUILD_TMP}"
 tar xf "${SRC_TAR}"
 SRC_DIR=$(find . -maxdepth 1 -type d -name 'john-*' | head -1)
 [ -n "${SRC_DIR}" ] || { echo "ERROR: john source directory not found"; exit 1; }
 cd "${SRC_DIR}/src"
 echo "=== configuring john ==="
 ./configure
 echo "=== building john ==="
 make clean >/dev/null 2>&1 || true
 make -j"$(nproc)"
 mkdir -p "${CACHE_DIR}"
 cp -a "../run" "${CACHE_DIR}/run"
 chmod +x "${CACHE_DIR}/run/john"
 echo "=== john build complete ==="
 echo "run dir: ${CACHE_DIR}/run"
--- a/iso/builder/build-nccl-tests.sh
+++ b/iso/builder/build-nccl-tests.sh
@@ -9,6 +9,7 @@
 #
 # Output layout:
 #   $CACHE_DIR/bin/all_reduce_perf
 #   $CACHE_DIR/lib/libcudart.so* copied from the nvcc toolchain used to build nccl-tests
 set -e
@@ -30,7 +31,7 @@ CACHE_DIR="${DIST_DIR}/nccl-tests-${NCCL_TESTS_VERSION}"
 CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
 DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nccl-tests-downloads"
-if [ -f "${CACHE_DIR}/bin/all_reduce_perf" ]; then
+if [ -f "${CACHE_DIR}/bin/all_reduce_perf" ] && [ "$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libcudart.so*' 2>/dev/null | wc -l)" -gt 0 ]; then
    echo "=== nccl-tests cached, skipping build ==="
    echo "binary: ${CACHE_DIR}/bin/all_reduce_perf"
    exit 0
@@ -52,6 +53,23 @@ echo "nvcc: $NVCC"
 CUDA_HOME="$(dirname "$(dirname "$NVCC")")"
 echo "CUDA_HOME: $CUDA_HOME"
 find_cudart_dir() {
    for dir in \
        "${CUDA_HOME}/targets/x86_64-linux/lib" \
        "${CUDA_HOME}/targets/x86_64-linux/lib/stubs" \
        "${CUDA_HOME}/lib64" \
        "${CUDA_HOME}/lib"; do
        if [ -d "$dir" ] && find "$dir" -maxdepth 1 -name 'libcudart.so*' -type f | grep -q .; then
            printf '%s\n' "$dir"
            return 0
        fi
    done
    return 1
 }
 CUDART_DIR="$(find_cudart_dir)" || { echo "ERROR: libcudart.so* not found under ${CUDA_HOME}"; exit 1; }
 echo "cudart dir: $CUDART_DIR"
 # Download libnccl-dev for nccl.h
 REPO_BASE="https://developer.download.nvidia.com/compute/cuda/repos/debian${DEBIAN_VERSION}/x86_64"
 DEV_PKG="libnccl-dev_${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}_amd64.deb"
@@ -136,6 +154,11 @@ mkdir -p "${CACHE_DIR}/bin"
 cp "./build/all_reduce_perf" "${CACHE_DIR}/bin/all_reduce_perf"
 chmod +x "${CACHE_DIR}/bin/all_reduce_perf"
 mkdir -p "${CACHE_DIR}/lib"
 find "${CUDART_DIR}" -maxdepth 1 -name 'libcudart.so*' -type f -exec cp -a {} "${CACHE_DIR}/lib/" \;
 [ "$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libcudart.so*' -type f | wc -l)" -gt 0 ] || { echo "ERROR: libcudart runtime copy failed"; exit 1; }
 echo "=== nccl-tests build complete ==="
 echo "binary: ${CACHE_DIR}/bin/all_reduce_perf"
 ls -lh "${CACHE_DIR}/bin/all_reduce_perf"
 ls -lh "${CACHE_DIR}/lib/"libcudart.so* 2>/dev/null || true
--- a/iso/builder/build-nvidia-module.sh
+++ b/iso/builder/build-nvidia-module.sh
@@ -1,8 +1,10 @@
 #!/bin/sh
-# build-nvidia-module.sh — compile NVIDIA proprietary driver modules for Debian 12
+# build-nvidia-module.sh — compile NVIDIA kernel modules for Debian 12
 #
 # Downloads the official NVIDIA .run installer, extracts kernel modules and
-# userspace tools (nvidia-smi, libnvidia-ml). Everything is proprietary NVIDIA.
+# userspace tools (nvidia-smi, libnvidia-ml). Supports both:
 #   - open         -> kernel-open/ sources from the .run installer
 #   - proprietary  -> traditional proprietary kernel sources from the .run installer
 #
 # Output is cached in DIST_DIR/nvidia-<version>-<kver>/ so subsequent builds
 # are instant unless NVIDIA_DRIVER_VERSION or kernel version changes.
@@ -10,17 +12,26 @@
 # Output layout:
 #   $CACHE_DIR/modules/   — nvidia*.ko files
 #   $CACHE_DIR/bin/       — nvidia-smi, nvidia-debugdump
-#   $CACHE_DIR/lib/       — libnvidia-ml.so*, libcuda.so* (for nvidia-smi)
+#   $CACHE_DIR/lib/       — libnvidia-ml.so*, libcuda.so*, OpenCL-related libs
 set -e
 NVIDIA_VERSION="$1"
 DIST_DIR="$2"
 DEBIAN_KERNEL_ABI="$3"
 NVIDIA_FLAVOR="${4:-open}"
-[ -n "$NVIDIA_VERSION" ]    || { echo "usage: $0 <nvidia-version> <dist-dir> <debian-kernel-abi>"; exit 1; }
+[ -n "$NVIDIA_VERSION" ]    || { echo "usage: $0 <nvidia-version> <dist-dir> <debian-kernel-abi> [open|proprietary]"; exit 1; }
-[ -n "$DIST_DIR" ]          || { echo "usage: $0 <nvidia-version> <dist-dir> <debian-kernel-abi>"; exit 1; }
+[ -n "$DIST_DIR" ]          || { echo "usage: $0 <nvidia-version> <dist-dir> <debian-kernel-abi> [open|proprietary]"; exit 1; }
-[ -n "$DEBIAN_KERNEL_ABI" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <debian-kernel-abi>"; exit 1; }
+[ -n "$DEBIAN_KERNEL_ABI" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <debian-kernel-abi> [open|proprietary]"; exit 1; }
 case "$NVIDIA_FLAVOR" in
    open|proprietary) ;;
    *)
        echo "unsupported NVIDIA flavor: $NVIDIA_FLAVOR (expected open or proprietary)" >&2
        exit 1
        ;;
 esac
 KVER="${DEBIAN_KERNEL_ABI}-amd64"
 # On Debian, kernel headers are split into two packages:
@@ -31,7 +42,22 @@ KVER="${DEBIAN_KERNEL_ABI}-amd64"
 KDIR_ARCH="/usr/src/linux-headers-${KVER}"
 KDIR_COMMON="/usr/src/linux-headers-${DEBIAN_KERNEL_ABI}-common"
-echo "=== NVIDIA ${NVIDIA_VERSION} (proprietary) for kernel ${KVER} ==="
+echo "=== NVIDIA ${NVIDIA_VERSION} (${NVIDIA_FLAVOR}) for kernel ${KVER} ==="
 CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_FLAVOR}-${NVIDIA_VERSION}-${KVER}"
 CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
 DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads"
 EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract"
 CACHE_LAYOUT_VERSION="3"
 CACHE_LAYOUT_MARKER="${CACHE_DIR}/.cache-layout-v${CACHE_LAYOUT_VERSION}"
 if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \
        && [ -f "$CACHE_LAYOUT_MARKER" ] \
        && [ "$(ls "$CACHE_DIR/lib/libnvidia-ptxjitcompiler.so."* 2>/dev/null | wc -l)" -gt 0 ]; then
    echo "=== NVIDIA cached, skipping build ==="
    echo "cache: $CACHE_DIR"
    echo "modules: $(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) .ko files"
    exit 0
 fi
 if [ ! -d "$KDIR_ARCH" ] || [ ! -d "$KDIR_COMMON" ]; then
    echo "=== installing linux-headers-${KVER} ==="
@@ -42,18 +68,6 @@ fi
 echo "kernel headers (arch):   $KDIR_ARCH"
 echo "kernel headers (common): $KDIR_COMMON"
 CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_VERSION}-${KVER}"
 CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
 DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads"
 EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract"
 if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \
        && [ "$(ls "$CACHE_DIR/lib/libnvidia-ptxjitcompiler.so."* 2>/dev/null | wc -l)" -gt 0 ]; then
    echo "=== NVIDIA cached, skipping build ==="
    echo "cache: $CACHE_DIR"
    echo "modules: $(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) .ko files"
    exit 0
 fi
 # Download official NVIDIA .run installer with sha256 verification
 BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}"
 mkdir -p "$DOWNLOAD_CACHE_DIR" "$EXTRACT_CACHE_DIR"
@@ -87,12 +101,18 @@ EXTRACT_DIR="${EXTRACT_CACHE_DIR}/nvidia-extract-${NVIDIA_VERSION}"
 rm -rf "$EXTRACT_DIR"
 "$RUN_FILE" --extract-only --target "$EXTRACT_DIR"
-# Find kernel source directory (proprietary: kernel/, open: kernel-open/)
+# Find kernel source directory for the selected flavor.
 KERNEL_SRC=""
-for d in "$EXTRACT_DIR/kernel" "$EXTRACT_DIR/kernel-modules-sources" "$EXTRACT_DIR/kernel-source"; do
+if [ "$NVIDIA_FLAVOR" = "open" ]; then
-    [ -f "$d/Makefile" ] && KERNEL_SRC="$d" && break
+    for d in "$EXTRACT_DIR/kernel-open" "$EXTRACT_DIR/kernel-open/"*; do
-done
+        [ -f "$d/Makefile" ] && KERNEL_SRC="$d" && break
-[ -n "$KERNEL_SRC" ] || { echo "ERROR: kernel source dir not found in:"; ls "$EXTRACT_DIR/"; exit 1; }
+    done
 else
    for d in "$EXTRACT_DIR/kernel" "$EXTRACT_DIR/kernel-modules-sources" "$EXTRACT_DIR/kernel-source"; do
        [ -f "$d/Makefile" ] && KERNEL_SRC="$d" && break
    done
 fi
 [ -n "$KERNEL_SRC" ] || { echo "ERROR: kernel source dir not found for flavor ${NVIDIA_FLAVOR} in:"; ls "$EXTRACT_DIR/"; exit 1; }
 echo "kernel source: $KERNEL_SRC"
 # Build kernel modules
@@ -130,17 +150,30 @@ else
    echo "WARNING: no firmware/ dir found in installer (may be needed for Hopper GPUs)"
 fi
-# Copy ALL userspace library files.
+# Copy NVIDIA userspace libraries broadly instead of whitelisting a few names.
-# libnvidia-ptxjitcompiler is required by libcuda for PTX JIT compilation
+# Newer driver branches add extra runtime deps (for example OpenCL/compiler side
-# (cuModuleLoadDataEx with PTX source) — without it CUDA_ERROR_JIT_COMPILER_NOT_FOUND.
+# libraries). If we only copy a narrow allowlist, clinfo/John can see nvidia.icd
-for lib in libnvidia-ml libcuda libnvidia-ptxjitcompiler; do
+# but still fail with "no OpenCL platforms" because one dependent .so is absent.
-    count=0
+copied_libs=0
-    for f in $(find "$EXTRACT_DIR" -maxdepth 1 -name "${lib}.so.*" 2>/dev/null); do
+for f in $(find "$EXTRACT_DIR" -maxdepth 1 \( -name 'libnvidia*.so.*' -o -name 'libcuda.so.*' \) -type f 2>/dev/null | sort); do
-        cp "$f" "$CACHE_DIR/lib/" && count=$((count+1))
+    cp "$f" "$CACHE_DIR/lib/"
-    done
+    copied_libs=$((copied_libs+1))
-    if [ "$count" -eq 0 ]; then
+done
-        echo "ERROR: ${lib}.so.* not found in $EXTRACT_DIR"
+
-        ls "$EXTRACT_DIR/"*.so* 2>/dev/null | head -20 || true
+if [ "$copied_libs" -eq 0 ]; then
    echo "ERROR: no NVIDIA userspace libraries found in $EXTRACT_DIR"
    ls "$EXTRACT_DIR/"*.so* 2>/dev/null | head -40 || true
    exit 1
 fi
 for lib in \
    libnvidia-ml \
    libcuda \
    libnvidia-ptxjitcompiler \
    libnvidia-opencl; do
    if ! ls "$CACHE_DIR/lib/${lib}.so."* >/dev/null 2>&1; then
        echo "ERROR: required ${lib}.so.* not found in extracted userspace libs"
        ls "$CACHE_DIR/lib/" | sort >&2 || true
        exit 1
    fi
 done
@@ -149,16 +182,17 @@ done
 ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l)
 [ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; }
-# Create soname symlinks: use [0-9][0-9]* to avoid circular symlink (.so.1 has single digit)
+# Create soname symlinks for every copied versioned library.
-for lib in libnvidia-ml libcuda libnvidia-ptxjitcompiler; do
+for versioned in "$CACHE_DIR"/lib/*.so.*; do
-    versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9][0-9]* 2>/dev/null | head -1)
+    [ -f "$versioned" ] || continue
    [ -n "$versioned" ] || continue
    base=$(basename "$versioned")
-    ln -sf "$base" "$CACHE_DIR/lib/${lib}.so.1"
+    stem=${base%%.so.*}
-    ln -sf "${lib}.so.1" "$CACHE_DIR/lib/${lib}.so" 2>/dev/null || true
+    ln -sf "$base" "$CACHE_DIR/lib/${stem}.so.1"
-    echo "${lib}: .so.1 -> $base"
+    ln -sf "${stem}.so.1" "$CACHE_DIR/lib/${stem}.so" 2>/dev/null || true
 done
 touch "$CACHE_LAYOUT_MARKER"
 echo "=== NVIDIA build complete ==="
 echo "cache: $CACHE_DIR"
 echo "modules: $ko_count .ko files"
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
--- a/iso/builder/config/bootloaders/grub-pc/grub.cfg
+++ b/iso/builder/config/bootloaders/grub-pc/grub.cfg
@@ -7,26 +7,29 @@ echo "  █████╗  ███████║███████╗ ╚
 echo "  ██╔══╝  ██╔══██║╚════██║  ╚██╔╝  ╚════╝██╔══██╗██╔══╝  ██╔══╝"
 echo "  ███████╗██║  ██║███████║   ██║         ██████╔╝███████╗███████╗"
 echo "  ╚══════╝╚═╝  ╚═╝╚══════╝   ╚═╝         ╚═════╝ ╚══════╝╚══════╝"
 echo "  Hardware Audit LiveCD"
 echo ""
 menuentry "EASY-BEE" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
+    linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
    initrd  @INITRD_LIVE@
 }
-menuentry "EASY-BEE (load to RAM)" {
+submenu "EASY-BEE (advanced options) -->" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ toram bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
+    menuentry "EASY-BEE — GSP=off" {
-    initrd  @INITRD_LIVE@
+        linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
-}
+        initrd  @INITRD_LIVE@
    }
-menuentry "EASY-BEE (NVIDIA GSP=off)" {
+    menuentry "EASY-BEE — KMS (no nomodeset)" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
+        linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
-    initrd  @INITRD_LIVE@
+        initrd  @INITRD_LIVE@
-}
+    }
-menuentry "EASY-BEE (fail-safe)" {
+    menuentry "EASY-BEE — fail-safe" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0
+        linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0
-    initrd  @INITRD_LIVE@
+        initrd  @INITRD_LIVE@
    }
 }
 if [ "${grub_platform}" = "efi" ]; then
--- a/iso/builder/config/bootloaders/isolinux/live.cfg.in
+++ b/iso/builder/config/bootloaders/isolinux/live.cfg.in
@@ -5,6 +5,12 @@ label live-@FLAVOUR@-normal
    initrd @INITRD@
    append @APPEND_LIVE@ bee.nvidia.mode=normal
 label live-@FLAVOUR@-kms
    menu label EASY-BEE (^graphics/KMS)
    linux @LINUX@
    initrd @INITRD@
    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=normal
 label live-@FLAVOUR@-toram
    menu label EASY-BEE (^load to RAM)
    linux @LINUX@
@@ -17,8 +23,18 @@ label live-@FLAVOUR@-gsp-off
    initrd @INITRD@
    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off
 label live-@FLAVOUR@-kms-gsp-off
    menu label EASY-BEE (g^raphics/KMS, GSP=off)
    linux @LINUX@
    initrd @INITRD@
    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=gsp-off
 label live-@FLAVOUR@-failsafe
    menu label EASY-BEE (^fail-safe)
    linux @LINUX@
    initrd @INITRD@
    append @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal
 label memtest
    menu label ^Memory Test (memtest86+)
    linux /boot/memtest86+x64.bin
--- a/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
+++ b/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
@@ -5,6 +5,9 @@ set -e
 echo "=== bee chroot setup ==="
 GPU_VENDOR=$(cat /etc/bee-gpu-vendor 2>/dev/null || echo nvidia)
 echo "=== GPU vendor: ${GPU_VENDOR} ==="
 ensure_bee_console_user() {
    if id bee >/dev/null 2>&1; then
        usermod -d /home/bee -s /bin/bash bee 2>/dev/null || true
@@ -21,14 +24,14 @@ ensure_bee_console_user() {
 ensure_bee_console_user
-# Enable bee services
+# Enable common bee services
 systemctl enable nvidia-dcgm.service 2>/dev/null || true
 systemctl enable bee-network.service
 systemctl enable bee-nvidia.service
 systemctl enable bee-preflight.service
 systemctl enable bee-audit.service
 systemctl enable bee-web.service
 systemctl enable bee-sshsetup.service
 systemctl enable bee-selfheal.timer
 systemctl enable bee-boot-status.service
 systemctl enable ssh.service
 systemctl enable lightdm.service 2>/dev/null || true
 systemctl enable qemu-guest-agent.service 2>/dev/null || true
@@ -36,25 +39,39 @@ systemctl enable serial-getty@ttyS0.service 2>/dev/null || true
 systemctl enable serial-getty@ttyS1.service 2>/dev/null || true
 systemctl enable bee-journal-mirror@ttyS1.service 2>/dev/null || true
 # Enable GPU-vendor specific services
 if [ "$GPU_VENDOR" = "nvidia" ]; then
    systemctl enable nvidia-dcgm.service 2>/dev/null || true
    systemctl enable bee-nvidia.service
 elif [ "$GPU_VENDOR" = "amd" ]; then
    # ROCm symlinks (packages install to /opt/rocm-*/bin/)
    for tool in rocm-smi rocm-bandwidth-test rvs; do
        if [ ! -e /usr/local/bin/${tool} ]; then
            bin_path="$(find /opt -path "*/bin/${tool}" -type f 2>/dev/null | sort | tail -1)"
            [ -n "${bin_path}" ] && ln -sf "${bin_path}" /usr/local/bin/${tool}
        fi
    done
 fi
 # nogpu: no GPU services needed
 # Ensure scripts are executable
 chmod +x /usr/local/bin/bee-network.sh  2>/dev/null || true
 chmod +x /usr/local/bin/bee-nvidia-load 2>/dev/null || true
 chmod +x /usr/local/bin/bee-sshsetup   2>/dev/null || true
 chmod +x /usr/local/bin/bee-smoketest  2>/dev/null || true
 chmod +x /usr/local/bin/bee            2>/dev/null || true
 chmod +x /usr/local/bin/bee-log-run    2>/dev/null || true
 chmod +x /usr/local/bin/bee-selfheal      2>/dev/null || true
 chmod +x /usr/local/bin/bee-boot-status  2>/dev/null || true
 if [ "$GPU_VENDOR" = "nvidia" ]; then
    chmod +x /usr/local/bin/bee-nvidia-load 2>/dev/null || true
    chmod +x /usr/local/bin/bee-gpu-burn 2>/dev/null || true
    chmod +x /usr/local/bin/bee-john-gpu-stress 2>/dev/null || true
    chmod +x /usr/local/bin/bee-nccl-gpu-stress 2>/dev/null || true
 fi
 # Reload udev rules
 udevadm control --reload-rules 2>/dev/null || true
 # rocm symlinks (packages install to /opt/rocm-*/bin/)
 for tool in rocm-smi rocm-bandwidth-test rvs; do
    if [ ! -e /usr/local/bin/${tool} ]; then
        bin_path="$(find /opt -path "*/bin/${tool}" -type f 2>/dev/null | sort | tail -1)"
        [ -n "${bin_path}" ] && ln -sf "${bin_path}" /usr/local/bin/${tool}
    fi
 done
 # Create export directory
 mkdir -p /appdata/bee/export
@@ -62,4 +79,4 @@ if [ -f /etc/sudoers.d/bee ]; then
    chmod 0440 /etc/sudoers.d/bee
 fi
-echo "=== bee chroot setup complete ==="
+echo "=== bee chroot setup complete (${GPU_VENDOR}) ==="
--- a/iso/builder/config/hooks/normal/9001-wallpaper.hook.chroot
+++ b/iso/builder/config/hooks/normal/9001-wallpaper.hook.chroot
@@ -0,0 +1,76 @@
 #!/bin/sh
 # 9001-wallpaper.hook.chroot — generate /usr/share/bee/wallpaper.png inside chroot
 set -e
 echo "=== generating bee wallpaper ==="
 mkdir -p /usr/share/bee
 python3 - <<'PYEOF'
 from PIL import Image, ImageDraw, ImageFont
 import os
 W, H = 1920, 1080
 LOGO = """\
  \u2588\u2588\u2588\u2588\u2588\u2588\u2557 \u2588\u2588\u2588\u2588\u2588\u2557 \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2557   \u2588\u2588\u2557      \u2588\u2588\u2588\u2588\u2588\u2588\u2557 \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557
  \u2588\u2588\u2554\u2550\u2550\u2550\u2550\u255d\u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2557\u2588\u2588\u2554\u2550\u2550\u2550\u2550\u255d\u255a\u2588\u2588\u2557 \u2588\u2588\u2554\u255d      \u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2557\u2588\u2588\u2554\u2550\u2550\u2550\u2550\u255d\u2588\u2588\u2554\u2550\u2550\u2550\u2550\u255d
  \u2588\u2588\u2588\u2588\u2588\u2557  \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2551\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557 \u255a\u2588\u2588\u2588\u2588\u2554\u255d \u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2588\u2588\u2588\u2588\u2554\u255d\u2588\u2588\u2588\u2588\u2588\u2557  \u2588\u2588\u2588\u2588\u2588\u2557
  \u2588\u2588\u2554\u2550\u2550\u255d  \u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2551\u255a\u2550\u2550\u2550\u2550\u2588\u2588\u2551  \u255a\u2588\u2588\u2554\u255d  \u255a\u2550\u2550\u2550\u2550\u255d\u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2557\u2588\u2588\u2554\u2550\u2550\u255d  \u2588\u2588\u2554\u2550\u2550\u255d
  \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2551  \u2588\u2588\u2551\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2551   \u2588\u2588\u2551         \u2588\u2588\u2588\u2588\u2588\u2588\u2554\u255d\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557
  \u255a\u2550\u2550\u2550\u2550\u2550\u2550\u255d\u255a\u2550\u255d  \u255a\u2550\u255d\u255a\u2550\u2550\u2550\u2550\u2550\u2550\u255d   \u255a\u2550\u255d         \u255a\u2550\u2550\u2550\u2550\u2550\u255d \u255a\u2550\u2550\u2550\u2550\u2550\u2550\u255d\u255a\u2550\u2550\u2550\u2550\u2550\u2550\u255d
  Hardware Audit LiveCD"""
 # Find a monospace font that supports box-drawing characters
 FONT_CANDIDATES = [
    '/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf',
    '/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf',
    '/usr/share/fonts/truetype/freefont/FreeMono.ttf',
    '/usr/share/fonts/truetype/noto/NotoMono-Regular.ttf',
 ]
 font_path = None
 for p in FONT_CANDIDATES:
    if os.path.exists(p):
        font_path = p
        break
 SIZE = 22
 if font_path:
    font_logo = ImageFont.truetype(font_path, SIZE)
    font_sub  = ImageFont.truetype(font_path, SIZE)
 else:
    font_logo = ImageFont.load_default()
    font_sub  = font_logo
 img  = Image.new('RGB', (W, H), (0, 0, 0))
 draw = ImageDraw.Draw(img)
 # Measure logo block line by line to avoid font ascender offset
 lines = LOGO.split('\n')
 logo_lines = lines[:6]
 sub_line   = lines[6] if len(lines) > 6 else ''
 line_h = SIZE + 2
 block_h = len(logo_lines) * line_h + 8 + (SIZE if sub_line else 0)
 # Width: measure the widest logo line
 max_w = 0
 for line in logo_lines:
    bb = draw.textbbox((0, 0), line, font=font_logo)
    max_w = max(max_w, bb[2] - bb[0])
 x = (W - max_w) // 2
 y = (H - block_h) // 2
 cy = y
 for line in logo_lines:
    draw.text((x, cy), line, font=font_logo, fill=(0xf6, 0xc9, 0x0e))
    cy += line_h
 cy += 8
 if sub_line:
    draw.text((x, cy), sub_line, font=font_sub, fill=(0x80, 0x68, 0x18))
 img.save('/usr/share/bee/wallpaper.png', optimize=True)
 print('wallpaper written: /usr/share/bee/wallpaper.png')
 PYEOF
 echo "=== wallpaper done ==="
--- a/iso/builder/config/hooks/normal/9100-memtest.hook.binary
+++ b/iso/builder/config/hooks/normal/9100-memtest.hook.binary
@@ -1,13 +1,139 @@
 #!/bin/sh
-# Copy memtest86+ binaries from chroot /boot into the ISO boot directory
+# Ensure memtest is present in the final ISO even if live-build's built-in
-# so GRUB can chainload them directly (they must be on the ISO filesystem,
+# memtest stage does not copy the binaries or expose menu entries.
 # not inside the squashfs).
 set -e
-for f in memtest86+x64.bin memtest86+x64.efi memtest86+ia32.bin memtest86+ia32.efi; do
+: "${BEE_REQUIRE_MEMTEST:=0}"
-    src="chroot/boot/${f}"
+
-    if [ -f "${src}" ]; then
+MEMTEST_FILES="memtest86+x64.bin memtest86+x64.efi"
-        cp "${src}" "binary/boot/${f}"
+BINARY_BOOT_DIR="binary/boot"
-        echo "memtest: copied ${f} to binary/boot/"
+GRUB_CFG="binary/boot/grub/grub.cfg"
 ISOLINUX_CFG="binary/isolinux/live.cfg"
 log() {
    echo "memtest hook: $*"
 }
 fail_or_warn() {
    msg="$1"
    if [ "${BEE_REQUIRE_MEMTEST}" = "1" ]; then
        log "ERROR: ${msg}"
        exit 1
    fi
-done
+    log "WARNING: ${msg}"
    return 0
 }
 copy_memtest_file() {
    src="$1"
    base="$(basename "$src")"
    dst="${BINARY_BOOT_DIR}/${base}"
    [ -f "$src" ] || return 1
    mkdir -p "${BINARY_BOOT_DIR}"
    cp "$src" "$dst"
    log "copied ${base} from ${src}"
 }
 extract_memtest_from_deb() {
    deb="$1"
    tmpdir="$(mktemp -d)"
    log "extracting memtest payload from ${deb}"
    dpkg-deb -x "$deb" "$tmpdir"
    for f in ${MEMTEST_FILES}; do
        if [ -f "${tmpdir}/boot/${f}" ]; then
            copy_memtest_file "${tmpdir}/boot/${f}"
        fi
    done
    rm -rf "$tmpdir"
 }
 ensure_memtest_binaries() {
    missing=0
    for f in ${MEMTEST_FILES}; do
        [ -f "${BINARY_BOOT_DIR}/${f}" ] || missing=1
    done
    [ "$missing" -eq 1 ] || return 0
    for root in chroot/boot /boot; do
        for f in ${MEMTEST_FILES}; do
            [ -f "${BINARY_BOOT_DIR}/${f}" ] || copy_memtest_file "${root}/${f}" || true
        done
    done
    missing=0
    for f in ${MEMTEST_FILES}; do
        [ -f "${BINARY_BOOT_DIR}/${f}" ] || missing=1
    done
    [ "$missing" -eq 1 ] || return 0
    for root in cache chroot/var/cache/apt/archives /var/cache/apt/archives; do
        [ -d "$root" ] || continue
        deb="$(find "$root" -type f \( -name 'memtest86+_*.deb' -o -name 'memtest86+*.deb' \) 2>/dev/null | head -1)"
        [ -n "$deb" ] || continue
        extract_memtest_from_deb "$deb"
        break
    done
    missing=0
    for f in ${MEMTEST_FILES}; do
        if [ ! -f "${BINARY_BOOT_DIR}/${f}" ]; then
            fail_or_warn "missing ${BINARY_BOOT_DIR}/${f}"
            missing=1
        fi
    done
    [ "$missing" -eq 0 ] || return 0
 }
 ensure_grub_entry() {
    [ -f "$GRUB_CFG" ] || {
        fail_or_warn "missing ${GRUB_CFG}"
        return 0
    }
    grep -q '### BEE MEMTEST ###' "$GRUB_CFG" && return 0
    cat >> "$GRUB_CFG" <<'EOF'
 ### BEE MEMTEST ###
 if [ "${grub_platform}" = "efi" ]; then
    menuentry "Memory Test (memtest86+)" {
        chainloader /boot/memtest86+x64.efi
    }
 else
    menuentry "Memory Test (memtest86+)" {
        linux16 /boot/memtest86+x64.bin
    }
 fi
 ### /BEE MEMTEST ###
 EOF
    log "appended memtest entry to ${GRUB_CFG}"
 }
 ensure_isolinux_entry() {
    [ -f "$ISOLINUX_CFG" ] || {
        fail_or_warn "missing ${ISOLINUX_CFG}"
        return 0
    }
    grep -q '### BEE MEMTEST ###' "$ISOLINUX_CFG" && return 0
    cat >> "$ISOLINUX_CFG" <<'EOF'
 # ### BEE MEMTEST ###
 label memtest
    menu label ^Memory Test (memtest86+)
    linux /boot/memtest86+x64.bin
 # ### /BEE MEMTEST ###
 EOF
    log "appended memtest entry to ${ISOLINUX_CFG}"
 }
 log "ensuring memtest binaries and menu entries in binary image"
 ensure_memtest_binaries
 ensure_grub_entry
 ensure_isolinux_entry
 log "memtest assets ready"
--- a/iso/builder/config/package-lists/bee-amd.list.chroot
+++ b/iso/builder/config/package-lists/bee-amd.list.chroot
@@ -0,0 +1,12 @@
 # AMD GPU firmware
 firmware-amd-graphics
 # AMD ROCm — GPU monitoring, bandwidth test, and compute stress (RVS GST)
 rocm-smi-lib=%%ROCM_SMI_VERSION%%
 rocm-bandwidth-test=%%ROCM_BANDWIDTH_TEST_VERSION%%
 rocm-validation-suite=%%ROCM_VALIDATION_SUITE_VERSION%%
 rocblas=%%ROCBLAS_VERSION%%
 rocrand=%%ROCRAND_VERSION%%
 hip-runtime-amd=%%HIP_RUNTIME_AMD_VERSION%%
 hipblaslt=%%HIPBLASLT_VERSION%%
 comgr=%%COMGR_VERSION%%
--- a/iso/builder/config/package-lists/bee-nogpu.list.chroot
+++ b/iso/builder/config/package-lists/bee-nogpu.list.chroot
@@ -0,0 +1 @@
 # No GPU variant — no NVIDIA, no AMD/ROCm packages
--- a/iso/builder/config/package-lists/bee-nvidia.list.chroot
+++ b/iso/builder/config/package-lists/bee-nvidia.list.chroot
@@ -0,0 +1,12 @@
 # NVIDIA DCGM (Data Center GPU Manager).
 # Validate uses dcgmi diagnostics; Burn uses dcgmproftester as the official
 # NVIDIA max-compute recipe. The smoketest/runtime contract treats
 # dcgmproftester as required in the LiveCD.
 # DCGM 4 is packaged per CUDA major. The image ships NVIDIA driver 590 with
 # CUDA 13 userspace, so install the CUDA 13 build plus proprietary components
 # explicitly.
 datacenter-gpu-manager-4-cuda13=1:%%DCGM_VERSION%%
 datacenter-gpu-manager-4-proprietary=1:%%DCGM_VERSION%%
 datacenter-gpu-manager-4-proprietary-cuda13=1:%%DCGM_VERSION%%
 ocl-icd-libopencl1
 clinfo
--- a/iso/builder/config/package-lists/bee.list.chroot
+++ b/iso/builder/config/package-lists/bee.list.chroot
@@ -21,8 +21,15 @@ openssh-server
 # Disk installer
 squashfs-tools
 parted
 # Keep GRUB install tools without selecting a single active platform package.
 # grub-pc and grub-efi-amd64 conflict with each other, but grub2-common
 # provides grub-install/update-grub and the *-bin packages provide BIOS/UEFI modules.
 grub2-common
 grub-pc-bin
 grub-efi-amd64-bin
 grub-efi-amd64-signed
 shim-signed
 efibootmgr
 # Filesystem support for USB export targets
 exfatprogs
@@ -39,11 +46,11 @@ vim-tiny
 mc
 htop
 nvtop
 btop
 sudo
 zstd
 mstflint
 memtester
 memtest86+
 stress-ng
 stressapptest
@@ -53,9 +60,15 @@ qrencode
 # Local desktop (openbox + chromium kiosk)
 openbox
 tint2
 feh
 python3-pil
 xorg
 xterm
 chromium
 mousepad
 pcmanfm
 ristretto
 mupdf
 xserver-xorg-video-fbdev
 xserver-xorg-video-vesa
 lightdm
@@ -64,26 +77,11 @@ lightdm
 firmware-linux-free
 firmware-linux-nonfree
 firmware-misc-nonfree
 firmware-amd-graphics
 firmware-realtek
 firmware-intel-sound
 firmware-bnx2
 firmware-bnx2x
 firmware-cavium
 firmware-qlogic
 # NVIDIA DCGM (Data Center GPU Manager) — dcgmi diag for acceptance testing
 datacenter-gpu-manager=1:%%DCGM_VERSION%%
 # AMD ROCm — GPU monitoring, bandwidth test, and compute stress (RVS GST)
 rocm-smi-lib=%%ROCM_SMI_VERSION%%
 rocm-bandwidth-test=%%ROCM_BANDWIDTH_TEST_VERSION%%
 rocm-validation-suite=%%ROCM_VALIDATION_SUITE_VERSION%%
 rocblas=%%ROCBLAS_VERSION%%
 rocrand=%%ROCRAND_VERSION%%
 hip-runtime-amd=%%HIP_RUNTIME_AMD_VERSION%%
 hipblaslt=%%HIPBLASLT_VERSION%%
 comgr=%%COMGR_VERSION%%
 # glibc compat helpers (for any external binaries that need it)
 libc6
--- a/iso/builder/smoketest.sh
+++ b/iso/builder/smoketest.sh
@@ -39,7 +39,7 @@ info "nvidia boot mode: ${NVIDIA_BOOT_MODE}"
 # --- PATH & binaries ---
 echo "-- PATH & binaries --"
 for tool in dmidecode smartctl nvme ipmitool lspci bee; do
-    if p=$(PATH="/usr/local/bin:$PATH" command -v "$tool" 2>/dev/null); then
+    if p=$(PATH="/usr/local/bin:/usr/sbin:/sbin:$PATH" command -v "$tool" 2>/dev/null); then
        ok "$tool found: $p"
    else
        fail "$tool: NOT FOUND"
@@ -52,6 +52,45 @@ else
    fail "nvidia-smi: NOT FOUND"
 fi
 if p=$(PATH="/usr/local/bin:$PATH" command -v dcgmi 2>/dev/null); then
    ok "dcgmi found: $p"
 else
    fail "dcgmi: NOT FOUND"
 fi
 if p=$(PATH="/usr/local/bin:$PATH" command -v nv-hostengine 2>/dev/null); then
    ok "nv-hostengine found: $p"
 else
    fail "nv-hostengine: NOT FOUND"
 fi
 DCGM_PROFTESTER=""
 for tool in dcgmproftester dcgmproftester13 dcgmproftester12 dcgmproftester11; do
    if p=$(PATH="/usr/local/bin:$PATH" command -v "$tool" 2>/dev/null); then
        DCGM_PROFTESTER="$p"
        break
    fi
 done
 if [ -n "$DCGM_PROFTESTER" ]; then
    ok "dcgmproftester found: $DCGM_PROFTESTER"
 else
    fail "dcgmproftester: NOT FOUND"
 fi
 for tool in bee-gpu-burn bee-john-gpu-stress bee-nccl-gpu-stress all_reduce_perf; do
    if p=$(PATH="/usr/local/bin:$PATH" command -v "$tool" 2>/dev/null); then
        ok "$tool found: $p"
    else
        fail "$tool: NOT FOUND"
    fi
 done
 if p=$(PATH="/usr/local/bin:$PATH" command -v nvbandwidth 2>/dev/null); then
    ok "nvbandwidth found: $p"
 else
    warn "nvbandwidth: NOT FOUND"
 fi
 echo ""
 echo "-- NVIDIA modules --"
 KO_DIR="/usr/local/lib/nvidia"
@@ -109,6 +148,40 @@ else
    fail "nvidia-smi: not found in PATH"
 fi
 echo ""
 echo "-- OpenCL / John --"
 if [ -f /etc/OpenCL/vendors/nvidia.icd ]; then
    ok "OpenCL ICD present: /etc/OpenCL/vendors/nvidia.icd"
 else
    fail "OpenCL ICD missing: /etc/OpenCL/vendors/nvidia.icd"
 fi
 if ldconfig -p 2>/dev/null | grep -q "libnvidia-opencl.so.1"; then
    ok "libnvidia-opencl.so.1 present in linker cache"
 else
    fail "libnvidia-opencl.so.1 missing from linker cache"
 fi
 if command -v clinfo >/dev/null 2>&1; then
    if clinfo -l 2>/dev/null | grep -q "Platform"; then
        ok "clinfo: OpenCL platform detected"
    else
        fail "clinfo: no OpenCL platform detected"
    fi
 else
    fail "clinfo: not found in PATH"
 fi
 if command -v john >/dev/null 2>&1; then
    if john --list=opencl-devices 2>/dev/null | grep -q "Device #"; then
        ok "john: OpenCL devices detected"
    else
        fail "john: no OpenCL devices detected"
    fi
 else
    fail "john: not found in PATH"
 fi
 echo ""
 echo "-- lib symlinks --"
 for lib in libnvidia-ml libcuda; do
@@ -129,6 +202,12 @@ for svc in bee-nvidia bee-network bee-preflight bee-audit bee-web; do
    fi
 done
 if systemctl is-active --quiet bee-selfheal.timer 2>/dev/null; then
    ok "timer active: bee-selfheal.timer"
 else
    fail "timer NOT active: bee-selfheal.timer"
 fi
 echo ""
 echo "-- runtime health --"
 if [ -f /appdata/bee/export/runtime-health.json ] && [ -s /appdata/bee/export/runtime-health.json ]; then
--- a/iso/overlay/etc/profile.d/bee.sh
+++ b/iso/overlay/etc/profile.d/bee.sh
@@ -1,4 +1,4 @@
-export PATH="$PATH:/usr/local/bin:/opt/rocm/bin:/opt/rocm/sbin"
+export PATH="$PATH:/usr/local/bin:/usr/sbin:/sbin:/opt/rocm/bin:/opt/rocm/sbin"
 # Print web UI URLs on the local console at login.
 if [ -z "${SSH_CONNECTION:-}" ] \
--- a/iso/overlay/etc/systemd/system/bee-audit.service
+++ b/iso/overlay/etc/systemd/system/bee-audit.service
@@ -1,14 +1,13 @@
 [Unit]
-Description=Bee: run hardware audit
+Description=Bee: hardware audit
-After=bee-network.service bee-nvidia.service bee-preflight.service
+After=bee-preflight.service bee-network.service bee-nvidia.service
 Before=bee-web.service
 [Service]
 Type=oneshot
-ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-audit.log /bin/sh -c '/usr/local/bin/bee audit --runtime livecd --output file:/appdata/bee/export/bee-audit.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-audit] WARN: audit exited with rc=$rc"; fi; exit 0'
+RemainAfterExit=yes
 ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-audit.log /usr/local/bin/bee audit --runtime auto --output file:/appdata/bee/export/bee-audit.json
 StandardOutput=journal
 StandardError=journal
 RemainAfterExit=yes
 [Install]
 WantedBy=multi-user.target
--- a/iso/overlay/etc/systemd/system/bee-boot-status.service
+++ b/iso/overlay/etc/systemd/system/bee-boot-status.service
@@ -0,0 +1,18 @@
 [Unit]
 Description=Bee: boot status display
 After=systemd-user-sessions.service
 Before=getty@tty1.service
 [Service]
 Type=oneshot
 RemainAfterExit=no
 ExecStart=/usr/local/bin/bee-boot-status
 TTYPath=/dev/tty1
 StandardInput=tty
 StandardOutput=tty
 StandardError=tty
 TTYReset=yes
 TTYVHangup=yes
 [Install]
 WantedBy=multi-user.target
--- a/iso/overlay/etc/systemd/system/bee-selfheal.service
+++ b/iso/overlay/etc/systemd/system/bee-selfheal.service
@@ -0,0 +1,9 @@
 [Unit]
 Description=Bee: periodic runtime self-heal
 After=bee-web.service bee-audit.service bee-preflight.service
 [Service]
 Type=oneshot
 ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-selfheal.log /usr/local/bin/bee-selfheal
 StandardOutput=journal
 StandardError=journal
--- a/iso/overlay/etc/systemd/system/bee-selfheal.timer
+++ b/iso/overlay/etc/systemd/system/bee-selfheal.timer
@@ -0,0 +1,11 @@
 [Unit]
 Description=Bee: run self-heal checks periodically
 [Timer]
 OnBootSec=45sec
 OnUnitActiveSec=60sec
 AccuracySec=15sec
 Unit=bee-selfheal.service
 [Install]
 WantedBy=timers.target
--- a/iso/overlay/etc/systemd/system/bee-web.service
+++ b/iso/overlay/etc/systemd/system/bee-web.service
@@ -1,16 +1,18 @@
 [Unit]
 Description=Bee: hardware audit web viewer
-After=bee-network.service
+StartLimitIntervalSec=0
 Wants=bee-audit.service
 [Service]
 Type=simple
 ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-web.log /usr/local/bin/bee web --listen :80 --audit-path /appdata/bee/export/bee-audit.json --export-dir /appdata/bee/export --title "Bee Hardware Audit"
 Restart=always
-RestartSec=2
+RestartSec=3
 StandardOutput=journal
 StandardError=journal
 LimitMEMLOCK=infinity
 # Keep the web server responsive during GPU/CPU stress (children inherit nice+10
 # via Setpriority in runCmdJob, but the bee-web parent stays at 0).
 Nice=0
 [Install]
 WantedBy=multi-user.target
--- a/iso/overlay/etc/systemd/system/getty@tty1.service.d/wait-bee.conf
+++ b/iso/overlay/etc/systemd/system/getty@tty1.service.d/wait-bee.conf
@@ -0,0 +1,2 @@
 [Unit]
 After=bee-boot-status.service
--- a/iso/overlay/etc/systemd/system/lightdm.service.d/bee-display-mode.conf
+++ b/iso/overlay/etc/systemd/system/lightdm.service.d/bee-display-mode.conf
@@ -0,0 +1,4 @@
 [Unit]
 [Service]
 ExecStartPre=/usr/local/bin/bee-display-mode
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1 @@`
							`# No GPU variant — no NVIDIA, no AMD/ROCm packages`