Mirror task lifecycle to serial console

Make memory stress size follow available RAM
Add task reports and streamline GPU charts
2026-04-05 18:34:06 +03:00 · 2026-04-05 18:33:26 +03:00 · 2026-04-05 18:13:58 +03:00 · 2026-04-05 17:50:13 +03:00 · 2026-04-05 16:24:45 +03:00 · 2026-04-05 13:43:43 +03:00
50 changed files with 6689 additions and 785 deletions
--- a/audit/Makefile
+++ b/audit/Makefile
@@ -1,9 +1,10 @@
 LISTEN ?= :8080
 AUDIT_PATH ?=
 EXPORT_DIR ?= $(CURDIR)/.tmp/export
 VERSION ?= $(shell sh ./scripts/resolve-version.sh)
 GO_LDFLAGS := -X main.Version=$(VERSION)
-RUN_ARGS := web --listen $(LISTEN)
+RUN_ARGS := web --listen $(LISTEN) --export-dir $(EXPORT_DIR)
 ifneq ($(AUDIT_PATH),)
 RUN_ARGS += --audit-path $(AUDIT_PATH)
 endif
@@ -11,6 +12,7 @@ endif
 .PHONY: run build test
 run:
 	mkdir -p $(EXPORT_DIR)
 	go run -ldflags "$(GO_LDFLAGS)" ./cmd/bee $(RUN_ARGS)
 build:
--- a/audit/cmd/bee/main.go
+++ b/audit/cmd/bee/main.go
@@ -7,6 +7,8 @@ import (
 	"io"
 	"log/slog"
 	"os"
 	"runtime/debug"
 	"strconv"
 	"strings"
 	"bee/audit/internal/app"
@@ -29,10 +31,19 @@ func main() {
 	os.Exit(run(os.Args[1:], os.Stdout, os.Stderr))
 }
-func run(args []string, stdout, stderr io.Writer) int {
+func run(args []string, stdout, stderr io.Writer) (exitCode int) {
 	slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
 		Level: slog.LevelInfo,
 	})))
 	defer func() {
 		if rec := recover(); rec != nil {
 			slog.Error("fatal panic",
 				"panic", fmt.Sprint(rec),
 				"stack", string(debug.Stack()),
 			)
 			exitCode = 1
 		}
 	}()
 	if len(args) == 0 {
 		printRootUsage(stderr)
@@ -58,6 +69,8 @@ func run(args []string, stdout, stderr io.Writer) int {
 		return runWeb(args[1:], stdout, stderr)
 	case "sat":
 		return runSAT(args[1:], stdout, stderr)
 	case "benchmark":
 		return runBenchmark(args[1:], stdout, stderr)
 	case "version", "--version", "-version":
 		fmt.Fprintln(stdout, Version)
 		return 0
@@ -74,8 +87,9 @@ func printRootUsage(w io.Writer) {
  bee preflight --output stdout|file:<path>
  bee export  --target <device>
  bee support-bundle --output stdout|file:<path>
-  bee web     --listen :80 --audit-path `+app.DefaultAuditJSONPath+`
+  bee web     --listen :80 [--audit-path `+app.DefaultAuditJSONPath+`]
  bee sat nvidia|memory|storage|cpu [--duration <seconds>]
  bee benchmark nvidia [--profile standard|stability|overnight]
  bee version
  bee help [command]`)
 }
@@ -94,6 +108,8 @@ func runHelp(args []string, stdout, stderr io.Writer) int {
 		return runWeb([]string{"--help"}, stdout, stdout)
 	case "sat":
 		return runSAT([]string{"--help"}, stdout, stderr)
 	case "benchmark":
 		return runBenchmark([]string{"--help"}, stdout, stderr)
 	case "version":
 		fmt.Fprintln(stdout, "usage: bee version")
 		return 0
@@ -280,7 +296,7 @@ func runWeb(args []string, stdout, stderr io.Writer) int {
 	fs := flag.NewFlagSet("web", flag.ContinueOnError)
 	fs.SetOutput(stderr)
 	listenAddr := fs.String("listen", ":8080", "listen address, e.g. :80")
-	auditPath := fs.String("audit-path", app.DefaultAuditJSONPath, "path to the latest audit JSON snapshot")
+	auditPath := fs.String("audit-path", "", "optional path to the latest audit JSON snapshot")
 	exportDir := fs.String("export-dir", app.DefaultExportDir, "directory with logs, SAT results, and support bundles")
 	title := fs.String("title", "Bee Hardware Audit", "page title")
 	fs.Usage = func() {
@@ -383,3 +399,85 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	slog.Info("sat archive written", "target", target, "path", archive)
 	return 0
 }
 func runBenchmark(args []string, stdout, stderr io.Writer) int {
 	if len(args) == 0 {
 		fmt.Fprintln(stderr, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]")
 		return 2
 	}
 	if args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
 		fmt.Fprintln(stdout, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]")
 		return 0
 	}
 	target := args[0]
 	if target != "nvidia" {
 		fmt.Fprintf(stderr, "bee benchmark: unknown target %q\n", target)
 		fmt.Fprintln(stderr, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]")
 		return 2
 	}
 	fs := flag.NewFlagSet("benchmark", flag.ContinueOnError)
 	fs.SetOutput(stderr)
 	profile := fs.String("profile", platform.NvidiaBenchmarkProfileStandard, "benchmark profile: standard, stability, overnight")
 	devices := fs.String("devices", "", "comma-separated GPU indices to include")
 	exclude := fs.String("exclude", "", "comma-separated GPU indices to exclude")
 	sizeMB := fs.Int("size-mb", 0, "per-GPU benchmark buffer size in MB (0 = auto)")
 	skipNCCL := fs.Bool("skip-nccl", false, "skip multi-GPU NCCL interconnect benchmark")
 	if err := fs.Parse(args[1:]); err != nil {
 		if err == flag.ErrHelp {
 			return 0
 		}
 		return 2
 	}
 	if fs.NArg() != 0 {
 		fmt.Fprintf(stderr, "bee benchmark: unexpected arguments\n")
 		return 2
 	}
 	includeIndices, err := parseBenchmarkIndexCSV(*devices)
 	if err != nil {
 		fmt.Fprintf(stderr, "bee benchmark: invalid --devices: %v\n", err)
 		return 2
 	}
 	excludeIndices, err := parseBenchmarkIndexCSV(*exclude)
 	if err != nil {
 		fmt.Fprintf(stderr, "bee benchmark: invalid --exclude: %v\n", err)
 		return 2
 	}
 	application := app.New(platform.New())
 	logLine := func(s string) { fmt.Fprintln(os.Stderr, s) }
 	archive, err := application.RunNvidiaBenchmark("", platform.NvidiaBenchmarkOptions{
 		Profile:           *profile,
 		SizeMB:            *sizeMB,
 		GPUIndices:        includeIndices,
 		ExcludeGPUIndices: excludeIndices,
 		RunNCCL:           !*skipNCCL,
 	}, logLine)
 	if err != nil {
 		slog.Error("run benchmark", "target", target, "err", err)
 		return 1
 	}
 	slog.Info("benchmark archive written", "target", target, "path", archive)
 	return 0
 }
 func parseBenchmarkIndexCSV(raw string) ([]int, error) {
 	raw = strings.TrimSpace(raw)
 	if raw == "" {
 		return nil, nil
 	}
 	var indices []int
 	for _, part := range strings.Split(raw, ",") {
 		part = strings.TrimSpace(part)
 		if part == "" {
 			continue
 		}
 		value, err := strconv.Atoi(part)
 		if err != nil || value < 0 {
 			return nil, fmt.Errorf("bad gpu index %q", part)
 		}
 		indices = append(indices, value)
 	}
 	return indices, nil
 }
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -19,17 +19,18 @@ import (
 )
 var (
-	DefaultExportDir       = "/appdata/bee/export"
+	DefaultExportDir        = "/appdata/bee/export"
-	DefaultAuditJSONPath   = DefaultExportDir + "/bee-audit.json"
+	DefaultAuditJSONPath    = DefaultExportDir + "/bee-audit.json"
-	DefaultAuditLogPath    = DefaultExportDir + "/bee-audit.log"
+	DefaultAuditLogPath     = DefaultExportDir + "/bee-audit.log"
-	DefaultWebLogPath      = DefaultExportDir + "/bee-web.log"
+	DefaultWebLogPath       = DefaultExportDir + "/bee-web.log"
-	DefaultNetworkLogPath  = DefaultExportDir + "/bee-network.log"
+	DefaultNetworkLogPath   = DefaultExportDir + "/bee-network.log"
-	DefaultNvidiaLogPath   = DefaultExportDir + "/bee-nvidia.log"
+	DefaultNvidiaLogPath    = DefaultExportDir + "/bee-nvidia.log"
-	DefaultSSHLogPath      = DefaultExportDir + "/bee-sshsetup.log"
+	DefaultSSHLogPath       = DefaultExportDir + "/bee-sshsetup.log"
-	DefaultRuntimeJSONPath = DefaultExportDir + "/runtime-health.json"
+	DefaultRuntimeJSONPath  = DefaultExportDir + "/runtime-health.json"
-	DefaultRuntimeLogPath  = DefaultExportDir + "/runtime-health.log"
+	DefaultRuntimeLogPath   = DefaultExportDir + "/runtime-health.log"
-	DefaultTechDumpDir     = DefaultExportDir + "/techdump"
+	DefaultTechDumpDir      = DefaultExportDir + "/techdump"
-	DefaultSATBaseDir      = DefaultExportDir + "/bee-sat"
+	DefaultSATBaseDir       = DefaultExportDir + "/bee-sat"
 	DefaultBenchmarkBaseDir = DefaultExportDir + "/bee-benchmark"
 )
 type App struct {
@@ -114,6 +115,12 @@ func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
 type satRunner interface {
 	RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error)
 	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBenchmark(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
 	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaStressPack(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error)
 	RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
@@ -195,10 +202,7 @@ func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, erro
 		return "stdout", err
 	case strings.HasPrefix(output, "file:"):
 		path := strings.TrimPrefix(output, "file:")
-		if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		if err := atomicWriteFile(path, append(data, '\n'), 0644); err != nil {
 			return "", err
 		}
 		if err := os.WriteFile(path, append(data, '\n'), 0644); err != nil {
 			return "", err
 		}
 		return path, nil
@@ -223,10 +227,7 @@ func (a *App) RunRuntimePreflight(output string) (string, error) {
 		return "stdout", err
 	case strings.HasPrefix(output, "file:"):
 		path := strings.TrimPrefix(output, "file:")
-		if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		if err := atomicWriteFile(path, append(data, '\n'), 0644); err != nil {
 			return "", err
 		}
 		if err := os.WriteFile(path, append(data, '\n'), 0644); err != nil {
 			return "", err
 		}
 		return path, nil
@@ -532,10 +533,56 @@ func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir st
 	return ActionResult{Title: "NVIDIA DCGM", Body: body}, err
 }
 func (a *App) RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaTargetedStressValidatePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaStressPack(baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
 	return a.RunNvidiaStressPackCtx(context.Background(), baseDir, opts, logFunc)
 }
 func (a *App) RunNvidiaBenchmark(baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	return a.RunNvidiaBenchmarkCtx(context.Background(), baseDir, opts, logFunc)
 }
 func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultBenchmarkBaseDir
 	}
 	return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
 }
 func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaTargetedPowerPack(ctx, baseDir, durationSec, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaPulseTestPack(ctx, baseDir, durationSec, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNvidiaBandwidthPack(ctx, baseDir, gpuIndices, logFunc)
 }
 func (a *App) RunNvidiaStressPackCtx(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
@@ -886,6 +933,12 @@ func latestSATSummaries() []string {
 		prefix string
 	}{
 		{label: "NVIDIA SAT", prefix: "gpu-nvidia-"},
 		{label: "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)", prefix: "gpu-nvidia-targeted-stress-"},
 		{label: "NVIDIA Max Compute Load (dcgmproftester)", prefix: "gpu-nvidia-compute-"},
 		{label: "NVIDIA Targeted Power (dcgmi diag targeted_power)", prefix: "gpu-nvidia-targeted-power-"},
 		{label: "NVIDIA Pulse Test (dcgmi diag pulse_test)", prefix: "gpu-nvidia-pulse-"},
 		{label: "NVIDIA Interconnect Test (NCCL all_reduce_perf)", prefix: "gpu-nvidia-nccl-"},
 		{label: "NVIDIA Bandwidth Test (NVBandwidth)", prefix: "gpu-nvidia-bandwidth-"},
 		{label: "Memory SAT", prefix: "memory-"},
 		{label: "Storage SAT", prefix: "storage-"},
 		{label: "CPU SAT", prefix: "cpu-"},
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -120,15 +120,21 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
 }
 type fakeSAT struct {
-	runNvidiaFn       func(string) (string, error)
+	runNvidiaFn               func(string) (string, error)
-	runNvidiaStressFn func(string, platform.NvidiaStressOptions) (string, error)
+	runNvidiaBenchmarkFn      func(string, platform.NvidiaBenchmarkOptions) (string, error)
-	runMemoryFn       func(string) (string, error)
+	runNvidiaStressFn         func(string, platform.NvidiaStressOptions) (string, error)
-	runStorageFn      func(string) (string, error)
+	runNvidiaComputeFn        func(string, int, []int) (string, error)
-	runCPUFn          func(string, int) (string, error)
+	runNvidiaPowerFn          func(string, int, []int) (string, error)
-	detectVendorFn    func() string
+	runNvidiaPulseFn          func(string, int, []int) (string, error)
-	listAMDGPUsFn     func() ([]platform.AMDGPUInfo, error)
+	runNvidiaBandwidthFn      func(string, []int) (string, error)
-	runAMDPackFn      func(string) (string, error)
+	runNvidiaTargetedStressFn func(string, int, []int) (string, error)
-	listNvidiaGPUsFn  func() ([]platform.NvidiaGPU, error)
+	runMemoryFn               func(string) (string, error)
 	runStorageFn              func(string) (string, error)
 	runCPUFn                  func(string, int) (string, error)
 	detectVendorFn            func() string
 	listAMDGPUsFn             func() ([]platform.AMDGPUInfo, error)
 	runAMDPackFn              func(string) (string, error)
 	listNvidiaGPUsFn          func() ([]platform.NvidiaGPU, error)
 }
 func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string, _ func(string)) (string, error) {
@@ -139,6 +145,48 @@ func (f fakeSAT) RunNvidiaAcceptancePackWithOptions(_ context.Context, baseDir s
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaBenchmark(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, _ func(string)) (string, error) {
 	if f.runNvidiaBenchmarkFn != nil {
 		return f.runNvidiaBenchmarkFn(baseDir, opts)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaTargetedStressFn != nil {
 		return f.runNvidiaTargetedStressFn(baseDir, durationSec, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaComputeFn != nil {
 		return f.runNvidiaComputeFn(baseDir, durationSec, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaTargetedPowerPack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaPowerFn != nil {
 		return f.runNvidiaPowerFn(baseDir, durationSec, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaPulseTestPack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaPulseFn != nil {
 		return f.runNvidiaPulseFn(baseDir, durationSec, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaBandwidthPack(_ context.Context, baseDir string, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaBandwidthFn != nil {
 		return f.runNvidiaBandwidthFn(baseDir, gpuIndices)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaStressPack(_ context.Context, baseDir string, opts platform.NvidiaStressOptions, _ func(string)) (string, error) {
 	if f.runNvidiaStressFn != nil {
 		return f.runNvidiaStressFn(baseDir, opts)
@@ -754,6 +802,26 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 		}
 	}
 	for _, want := range []string{
 		"/system/ip-link.txt",
 		"/system/ip-link-stats.txt",
 		"/system/ethtool-info.txt",
 		"/system/ethtool-link.txt",
 		"/system/ethtool-module.txt",
 		"/system/mstflint-query.txt",
 	} {
 		var found bool
 		for _, name := range names {
 			if contains(name, want) {
 				found = true
 				break
 			}
 		}
 		if !found {
 			t.Fatalf("support bundle missing %s, names=%v", want, names)
 		}
 	}
 	var foundRaw bool
 	for _, name := range names {
 		if contains(name, "/export/bee-sat/memory-run/verbose.log") {
--- a/audit/internal/app/atomic_write.go
+++ b/audit/internal/app/atomic_write.go
@@ -0,0 +1,48 @@
 package app
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 )
 func atomicWriteFile(path string, data []byte, perm os.FileMode) error {
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return fmt.Errorf("mkdir %s: %w", filepath.Dir(path), err)
 	}
 	tmpPath := path + ".tmp"
 	f, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, perm)
 	if err != nil {
 		return fmt.Errorf("open temp %s: %w", tmpPath, err)
 	}
 	success := false
 	defer func() {
 		_ = f.Close()
 		if !success {
 			_ = os.Remove(tmpPath)
 		}
 	}()
 	if _, err := f.Write(data); err != nil {
 		return fmt.Errorf("write temp %s: %w", tmpPath, err)
 	}
 	if err := f.Sync(); err != nil {
 		return fmt.Errorf("sync temp %s: %w", tmpPath, err)
 	}
 	if err := f.Close(); err != nil {
 		return fmt.Errorf("close temp %s: %w", tmpPath, err)
 	}
 	if err := os.Rename(tmpPath, path); err != nil {
 		return fmt.Errorf("rename %s -> %s: %w", tmpPath, path, err)
 	}
 	if dir, err := os.Open(filepath.Dir(path)); err == nil {
 		_ = dir.Sync()
 		_ = dir.Close()
 	}
 	success = true
 	return nil
 }
--- a/audit/internal/app/atomic_write_test.go
+++ b/audit/internal/app/atomic_write_test.go
@@ -0,0 +1,71 @@
 package app
 import (
 	"encoding/json"
 	"os"
 	"path/filepath"
 	"testing"
 	"bee/audit/internal/schema"
 )
 func TestAtomicWriteFileReplacesTargetWithoutLeavingTmp(t *testing.T) {
 	path := filepath.Join(t.TempDir(), "bee-audit.json")
 	if err := os.WriteFile(path, []byte("old\n"), 0644); err != nil {
 		t.Fatalf("seed file: %v", err)
 	}
 	if err := atomicWriteFile(path, []byte("new\n"), 0644); err != nil {
 		t.Fatalf("atomicWriteFile: %v", err)
 	}
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatalf("read final: %v", err)
 	}
 	if string(raw) != "new\n" {
 		t.Fatalf("final content=%q want %q", string(raw), "new\n")
 	}
 	if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
 		t.Fatalf("tmp file should be absent after success, err=%v", err)
 	}
 }
 func TestRunRuntimePreflightWritesAtomically(t *testing.T) {
 	path := filepath.Join(t.TempDir(), "runtime-health.json")
 	a := &App{
 		runtime: fakeRuntime{
 			collectFn: func(exportDir string) (schema.RuntimeHealth, error) {
 				return schema.RuntimeHealth{
 					Status:      "OK",
 					ExportDir:   exportDir,
 					DriverReady: true,
 					CUDAReady:   true,
 				}, nil
 			},
 		},
 	}
 	got, err := a.RunRuntimePreflight("file:" + path)
 	if err != nil {
 		t.Fatalf("RunRuntimePreflight: %v", err)
 	}
 	if got != path {
 		t.Fatalf("path=%q want %q", got, path)
 	}
 	if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
 		t.Fatalf("tmp file should be absent after success, err=%v", err)
 	}
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatalf("read runtime file: %v", err)
 	}
 	var health schema.RuntimeHealth
 	if err := json.Unmarshal(raw, &health); err != nil {
 		t.Fatalf("json unmarshal: %v", err)
 	}
 	if health.Status != "OK" {
 		t.Fatalf("status=%q want OK", health.Status)
 	}
 }
--- a/audit/internal/app/component_status_db.go
+++ b/audit/internal/app/component_status_db.go
@@ -21,12 +21,12 @@ type ComponentStatusDB struct {
 // ComponentStatusRecord holds the current and historical health of one hardware component.
 type ComponentStatusRecord struct {
-	ComponentKey  string                  `json:"component_key"`
+	ComponentKey  string                 `json:"component_key"`
-	Status        string                  `json:"status"` // "OK", "Warning", "Critical", "Unknown"
+	Status        string                 `json:"status"` // "OK", "Warning", "Critical", "Unknown"
-	LastCheckedAt time.Time               `json:"last_checked_at"`
+	LastCheckedAt time.Time              `json:"last_checked_at"`
-	LastChangedAt time.Time               `json:"last_changed_at"`
+	LastChangedAt time.Time              `json:"last_changed_at"`
-	ErrorSummary  string                  `json:"error_summary,omitempty"`
+	ErrorSummary  string                 `json:"error_summary,omitempty"`
-	History       []ComponentStatusEntry  `json:"history"`
+	History       []ComponentStatusEntry `json:"history"`
 }
 // ComponentStatusEntry is one observation written to a component's history.
@@ -179,7 +179,9 @@ func ApplySATResultToDB(db *ComponentStatusDB, target, archivePath string) {
 	// Map SAT target to component keys.
 	switch target {
-	case "nvidia", "amd", "nvidia-stress", "amd-stress", "amd-mem", "amd-bandwidth":
+	case "nvidia", "nvidia-targeted-stress", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
 		"nvidia-interconnect", "nvidia-bandwidth", "amd", "nvidia-stress",
 		"amd-stress", "amd-mem", "amd-bandwidth":
 		db.Record("pcie:gpu:"+target, source, dbStatus, target+" SAT: "+overall)
 	case "memory", "memory-stress", "sat-stress":
 		db.Record("memory:all", source, dbStatus, target+" SAT: "+overall)
--- a/audit/internal/app/support_bundle.go
+++ b/audit/internal/app/support_bundle.go
@@ -19,6 +19,8 @@ var supportBundleServices = []string{
 	"bee-network.service",
 	"bee-nvidia.service",
 	"bee-preflight.service",
 	"bee-selfheal.service",
 	"bee-selfheal.timer",
 	"bee-sshsetup.service",
 }
@@ -32,6 +34,8 @@ var supportBundleCommands = []struct {
 	{name: "system/lspci-nn.txt", cmd: []string{"lspci", "-nn"}},
 	{name: "system/lspci-vvv.txt", cmd: []string{"lspci", "-vvv"}},
 	{name: "system/ip-addr.txt", cmd: []string{"ip", "addr"}},
 	{name: "system/ip-link.txt", cmd: []string{"ip", "-details", "link", "show"}},
 	{name: "system/ip-link-stats.txt", cmd: []string{"ip", "-s", "link", "show"}},
 	{name: "system/ip-route.txt", cmd: []string{"ip", "route"}},
 	{name: "system/mount.txt", cmd: []string{"mount"}},
 	{name: "system/df-h.txt", cmd: []string{"df", "-h"}},
@@ -47,6 +51,83 @@ for d in /sys/bus/pci/devices/*/; do
    printf "  %-22s %s\n" "$f" "$(cat "$d/$f" 2>/dev/null)"
  done
 done
 `}},
 	{name: "system/ethtool-info.txt", cmd: []string{"sh", "-c", `
 if ! command -v ethtool >/dev/null 2>&1; then
  echo "ethtool not found"
  exit 0
 fi
 found=0
 for path in /sys/class/net/*; do
  [ -e "$path" ] || continue
  iface=$(basename "$path")
  [ "$iface" = "lo" ] && continue
  found=1
  echo "=== $iface ==="
  ethtool -i "$iface" 2>&1 || true
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no interfaces found"
 fi
 `}},
 	{name: "system/ethtool-link.txt", cmd: []string{"sh", "-c", `
 if ! command -v ethtool >/dev/null 2>&1; then
  echo "ethtool not found"
  exit 0
 fi
 found=0
 for path in /sys/class/net/*; do
  [ -e "$path" ] || continue
  iface=$(basename "$path")
  [ "$iface" = "lo" ] && continue
  found=1
  echo "=== $iface ==="
  ethtool "$iface" 2>&1 || true
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no interfaces found"
 fi
 `}},
 	{name: "system/ethtool-module.txt", cmd: []string{"sh", "-c", `
 if ! command -v ethtool >/dev/null 2>&1; then
  echo "ethtool not found"
  exit 0
 fi
 found=0
 for path in /sys/class/net/*; do
  [ -e "$path" ] || continue
  iface=$(basename "$path")
  [ "$iface" = "lo" ] && continue
  found=1
  echo "=== $iface ==="
  ethtool -m "$iface" 2>&1 || true
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no interfaces found"
 fi
 `}},
 	{name: "system/mstflint-query.txt", cmd: []string{"sh", "-c", `
 if ! command -v mstflint >/dev/null 2>&1; then
  echo "mstflint not found"
  exit 0
 fi
 found=0
 for path in /sys/bus/pci/devices/*; do
  [ -e "$path/vendor" ] || continue
  vendor=$(cat "$path/vendor" 2>/dev/null)
  [ "$vendor" = "0x15b3" ] || continue
  bdf=$(basename "$path")
  found=1
  echo "=== $bdf ==="
  mstflint -d "$bdf" q 2>&1 || true
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no Mellanox/NVIDIA networking devices found"
 fi
 `}},
 }
--- a/audit/internal/collector/nic_mellanox.go
+++ b/audit/internal/collector/nic_mellanox.go
@@ -2,18 +2,21 @@ package collector
 import (
 	"bee/audit/internal/schema"
 	"context"
 	"log/slog"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"time"
 )
 const mellanoxVendorID = 0x15b3
 const nicProbeTimeout = 2 * time.Second
 var (
 	mstflintQuery = func(bdf string) (string, error) {
-		out, err := exec.Command("mstflint", "-d", bdf, "q").Output()
+		out, err := commandOutputWithTimeout(nicProbeTimeout, "mstflint", "-d", bdf, "q")
 		if err != nil {
 			return "", err
 		}
@@ -21,7 +24,7 @@ var (
 	}
 	ethtoolInfoQuery = func(iface string) (string, error) {
-		out, err := exec.Command("ethtool", "-i", iface).Output()
+		out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-i", iface)
 		if err != nil {
 			return "", err
 		}
@@ -29,6 +32,14 @@ var (
 	}
 	netIfacesByBDF = listNetIfacesByBDF
 	readNetCarrierFile = func(iface string) (string, error) {
 		path := filepath.Join("/sys/class/net", iface, "carrier")
 		raw, err := os.ReadFile(path)
 		if err != nil {
 			return "", err
 		}
 		return strings.TrimSpace(string(raw)), nil
 	}
 )
 // enrichPCIeWithMellanox enriches Mellanox/NVIDIA Networking devices with
@@ -162,3 +173,17 @@ func listNetIfacesByBDF(bdf string) []string {
 	}
 	return ifaces
 }
 func commandOutputWithTimeout(timeout time.Duration, name string, args ...string) ([]byte, error) {
 	ctx, cancel := context.WithTimeout(context.Background(), timeout)
 	defer cancel()
 	return exec.CommandContext(ctx, name, args...).Output()
 }
 func interfaceHasCarrier(iface string) bool {
 	raw, err := readNetCarrierFile(iface)
 	if err != nil {
 		return false
 	}
 	return strings.TrimSpace(raw) == "1"
 }
--- a/audit/internal/collector/nic_telemetry.go
+++ b/audit/internal/collector/nic_telemetry.go
@@ -12,7 +12,7 @@ import (
 var (
 	ethtoolModuleQuery = func(iface string) (string, error) {
-		out, err := raidToolQuery("ethtool", "-m", iface)
+		out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-m", iface)
 		if err != nil {
 			return "", err
 		}
@@ -58,10 +58,12 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw
 			}
 		}
-		if out, err := ethtoolModuleQuery(iface); err == nil {
+		if interfaceHasCarrier(iface) {
-			if injectSFPDOMTelemetry(&devs[i], out) {
+			if out, err := ethtoolModuleQuery(iface); err == nil {
-				enriched++
+				if injectSFPDOMTelemetry(&devs[i], out) {
-				continue
+					enriched++
 					continue
 				}
 			}
 		}
 		if len(devs[i].MacAddresses) > 0 || devs[i].Firmware != nil {
--- a/audit/internal/collector/nic_telemetry_test.go
+++ b/audit/internal/collector/nic_telemetry_test.go
@@ -57,6 +57,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
 	origReadMAC := readNetAddressFile
 	origEth := ethtoolInfoQuery
 	origModule := ethtoolModuleQuery
 	origCarrier := readNetCarrierFile
 	t.Cleanup(func() {
 		queryPCILSPCIDetail = origDetail
 		readPCIVPDFile = origVPD
@@ -64,6 +65,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
 		readNetAddressFile = origReadMAC
 		ethtoolInfoQuery = origEth
 		ethtoolModuleQuery = origModule
 		readNetCarrierFile = origCarrier
 	})
 	queryPCILSPCIDetail = func(bdf string) (string, error) {
@@ -82,6 +84,7 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
 		}
 		return "aa:bb:cc:dd:ee:ff", nil
 	}
 	readNetCarrierFile = func(string) (string, error) { return "1", nil }
 	ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
 	ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("skip optics") }
@@ -101,6 +104,42 @@ func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
 	}
 }
 func TestEnrichPCIeWithNICTelemetrySkipsModuleQueryWithoutCarrier(t *testing.T) {
 	origIfaces := netIfacesByBDF
 	origReadMAC := readNetAddressFile
 	origEth := ethtoolInfoQuery
 	origModule := ethtoolModuleQuery
 	origCarrier := readNetCarrierFile
 	t.Cleanup(func() {
 		netIfacesByBDF = origIfaces
 		readNetAddressFile = origReadMAC
 		ethtoolInfoQuery = origEth
 		ethtoolModuleQuery = origModule
 		readNetCarrierFile = origCarrier
 	})
 	netIfacesByBDF = func(string) []string { return []string{"eth0"} }
 	readNetAddressFile = func(string) (string, error) { return "aa:bb:cc:dd:ee:ff", nil }
 	readNetCarrierFile = func(string) (string, error) { return "0", nil }
 	ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
 	ethtoolModuleQuery = func(string) (string, error) {
 		t.Fatal("ethtool -m should not be called without carrier")
 		return "", nil
 	}
 	class := "EthernetController"
 	bdf := "0000:18:00.0"
 	devs := []schema.HardwarePCIeDevice{{
 		DeviceClass: &class,
 		BDF:         &bdf,
 	}}
 	out := enrichPCIeWithNICTelemetry(devs)
 	if len(out[0].MacAddresses) != 1 || out[0].MacAddresses[0] != "aa:bb:cc:dd:ee:ff" {
 		t.Fatalf("mac_addresses=%v", out[0].MacAddresses)
 	}
 }
 func TestDBMValue(t *testing.T) {
 	tests := []struct {
 		in   string
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -0,0 +1,141 @@
 package platform
 import (
 	"fmt"
 	"strings"
 	"time"
 )
 func renderBenchmarkReport(result NvidiaBenchmarkResult) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "Bee NVIDIA Benchmark Report\n")
 	fmt.Fprintf(&b, "===========================\n\n")
 	fmt.Fprintf(&b, "Generated: %s\n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC"))
 	fmt.Fprintf(&b, "Host: %s\n", result.Hostname)
 	fmt.Fprintf(&b, "Profile: %s\n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "Overall status: %s\n", result.OverallStatus)
 	fmt.Fprintf(&b, "Selected GPUs: %s\n", joinIndexList(result.SelectedGPUIndices))
 	fmt.Fprintf(&b, "Normalization: %s\n\n", result.Normalization.Status)
 	if len(result.Findings) > 0 {
 		fmt.Fprintf(&b, "Executive Summary\n")
 		fmt.Fprintf(&b, "-----------------\n")
 		for _, finding := range result.Findings {
 			fmt.Fprintf(&b, "- %s\n", finding)
 		}
 		b.WriteString("\n")
 	}
 	if len(result.Warnings) > 0 {
 		fmt.Fprintf(&b, "Warnings\n")
 		fmt.Fprintf(&b, "--------\n")
 		for _, warning := range result.Warnings {
 			fmt.Fprintf(&b, "- %s\n", warning)
 		}
 		b.WriteString("\n")
 	}
 	fmt.Fprintf(&b, "Per GPU Scorecard\n")
 	fmt.Fprintf(&b, "-----------------\n")
 	for _, gpu := range result.GPUs {
 		fmt.Fprintf(&b, "GPU %d  %s\n", gpu.Index, gpu.Name)
 		fmt.Fprintf(&b, "  Status: %s\n", gpu.Status)
 		fmt.Fprintf(&b, "  Composite score: %.2f\n", gpu.Scores.CompositeScore)
 		fmt.Fprintf(&b, "  Compute score: %.2f\n", gpu.Scores.ComputeScore)
 		fmt.Fprintf(&b, "  Power sustain: %.1f\n", gpu.Scores.PowerSustainScore)
 		fmt.Fprintf(&b, "  Thermal sustain: %.1f\n", gpu.Scores.ThermalSustainScore)
 		fmt.Fprintf(&b, "  Stability: %.1f\n", gpu.Scores.StabilityScore)
 		if gpu.Scores.InterconnectScore > 0 {
 			fmt.Fprintf(&b, "  Interconnect: %.1f\n", gpu.Scores.InterconnectScore)
 		}
 		if len(gpu.DegradationReasons) > 0 {
 			fmt.Fprintf(&b, "  Degradation reasons: %s\n", strings.Join(gpu.DegradationReasons, ", "))
 		}
 		fmt.Fprintf(&b, "  Avg power/temp/clock: %.1f W / %.1f C / %.0f MHz\n", gpu.Steady.AvgPowerW, gpu.Steady.AvgTempC, gpu.Steady.AvgGraphicsClockMHz)
 		fmt.Fprintf(&b, "  P95 power/temp/clock: %.1f W / %.1f C / %.0f MHz\n", gpu.Steady.P95PowerW, gpu.Steady.P95TempC, gpu.Steady.P95GraphicsClockMHz)
 		if len(gpu.PrecisionResults) > 0 {
 			fmt.Fprintf(&b, "  Precision results:\n")
 			for _, precision := range gpu.PrecisionResults {
 				if precision.Supported {
 					fmt.Fprintf(&b, "    - %s: %.2f TOPS lanes=%d iterations=%d\n", precision.Name, precision.TeraOpsPerSec, precision.Lanes, precision.Iterations)
 				} else {
 					fmt.Fprintf(&b, "    - %s: unsupported (%s)\n", precision.Name, precision.Notes)
 				}
 			}
 		}
 		fmt.Fprintf(&b, "  Throttle counters (us): sw_power=%d sw_thermal=%d sync_boost=%d hw_thermal=%d hw_power_brake=%d\n",
 			gpu.Throttle.SWPowerCapUS,
 			gpu.Throttle.SWThermalSlowdownUS,
 			gpu.Throttle.SyncBoostUS,
 			gpu.Throttle.HWThermalSlowdownUS,
 			gpu.Throttle.HWPowerBrakeSlowdownUS,
 		)
 		if len(gpu.Notes) > 0 {
 			fmt.Fprintf(&b, "  Notes:\n")
 			for _, note := range gpu.Notes {
 				fmt.Fprintf(&b, "    - %s\n", note)
 			}
 		}
 		b.WriteString("\n")
 	}
 	if result.Interconnect != nil {
 		fmt.Fprintf(&b, "Interconnect\n")
 		fmt.Fprintf(&b, "------------\n")
 		fmt.Fprintf(&b, "Status: %s\n", result.Interconnect.Status)
 		if result.Interconnect.Supported {
 			fmt.Fprintf(&b, "Avg algbw / busbw: %.1f / %.1f GB/s\n", result.Interconnect.AvgAlgBWGBps, result.Interconnect.AvgBusBWGBps)
 			fmt.Fprintf(&b, "Max algbw / busbw: %.1f / %.1f GB/s\n", result.Interconnect.MaxAlgBWGBps, result.Interconnect.MaxBusBWGBps)
 		}
 		for _, note := range result.Interconnect.Notes {
 			fmt.Fprintf(&b, "- %s\n", note)
 		}
 		b.WriteString("\n")
 	}
 	fmt.Fprintf(&b, "Methodology\n")
 	fmt.Fprintf(&b, "-----------\n")
 	fmt.Fprintf(&b, "- Profile %s uses standardized baseline, warmup, steady-state, interconnect, and cooldown phases.\n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "- Single-GPU compute score comes from bee-gpu-burn cuBLASLt output when available.\n")
 	fmt.Fprintf(&b, "- Thermal and power limitations are inferred from NVIDIA clock event reason counters and sustained telemetry.\n")
 	fmt.Fprintf(&b, "- result.json is the canonical machine-readable source for this benchmark run.\n\n")
 	fmt.Fprintf(&b, "Raw Files\n")
 	fmt.Fprintf(&b, "---------\n")
 	fmt.Fprintf(&b, "- result.json\n")
 	fmt.Fprintf(&b, "- report.txt\n")
 	fmt.Fprintf(&b, "- summary.txt\n")
 	fmt.Fprintf(&b, "- verbose.log\n")
 	fmt.Fprintf(&b, "- gpu-*-baseline-metrics.csv/html/term.txt\n")
 	fmt.Fprintf(&b, "- gpu-*-warmup.log\n")
 	fmt.Fprintf(&b, "- gpu-*-steady.log\n")
 	fmt.Fprintf(&b, "- gpu-*-steady-metrics.csv/html/term.txt\n")
 	fmt.Fprintf(&b, "- gpu-*-cooldown-metrics.csv/html/term.txt\n")
 	if result.Interconnect != nil {
 		fmt.Fprintf(&b, "- nccl-all-reduce.log\n")
 	}
 	return b.String()
 }
 func renderBenchmarkSummary(result NvidiaBenchmarkResult) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "run_at_utc=%s\n", result.GeneratedAt.Format(time.RFC3339))
 	fmt.Fprintf(&b, "benchmark_profile=%s\n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "overall_status=%s\n", result.OverallStatus)
 	fmt.Fprintf(&b, "gpu_count=%d\n", len(result.GPUs))
 	fmt.Fprintf(&b, "normalization_status=%s\n", result.Normalization.Status)
 	var best float64
 	for i, gpu := range result.GPUs {
 		fmt.Fprintf(&b, "gpu_%d_status=%s\n", gpu.Index, gpu.Status)
 		fmt.Fprintf(&b, "gpu_%d_composite_score=%.2f\n", gpu.Index, gpu.Scores.CompositeScore)
 		if i == 0 || gpu.Scores.CompositeScore > best {
 			best = gpu.Scores.CompositeScore
 		}
 	}
 	fmt.Fprintf(&b, "best_composite_score=%.2f\n", best)
 	if result.Interconnect != nil {
 		fmt.Fprintf(&b, "interconnect_status=%s\n", result.Interconnect.Status)
 		fmt.Fprintf(&b, "interconnect_max_busbw_gbps=%.1f\n", result.Interconnect.MaxBusBWGBps)
 	}
 	return b.String()
 }
--- a/audit/internal/platform/benchmark_test.go
+++ b/audit/internal/platform/benchmark_test.go
@@ -0,0 +1,147 @@
 package platform
 import (
 	"strings"
 	"testing"
 )
 func TestResolveBenchmarkProfile(t *testing.T) {
 	t.Parallel()
 	cases := []struct {
 		name    string
 		profile string
 		want    benchmarkProfileSpec
 	}{
 		{
 			name:    "default",
 			profile: "",
 			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 120, SteadySec: 480, NCCLSec: 180, CooldownSec: 120},
 		},
 		{
 			name:    "stability",
 			profile: "stability",
 			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 300, SteadySec: 3600, NCCLSec: 300, CooldownSec: 300},
 		},
 		{
 			name:    "overnight",
 			profile: "overnight",
 			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 600, SteadySec: 27000, NCCLSec: 600, CooldownSec: 300},
 		},
 	}
 	for _, tc := range cases {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
 			got := resolveBenchmarkProfile(tc.profile)
 			if got != tc.want {
 				t.Fatalf("profile=%q got %+v want %+v", tc.profile, got, tc.want)
 			}
 		})
 	}
 }
 func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
 	t.Parallel()
 	opts := normalizeNvidiaBenchmarkOptionsForBenchmark(NvidiaBenchmarkOptions{
 		Profile: "stability",
 		RunNCCL: false,
 	})
 	if opts.Profile != NvidiaBenchmarkProfileStability {
 		t.Fatalf("profile=%q want %q", opts.Profile, NvidiaBenchmarkProfileStability)
 	}
 	if opts.RunNCCL {
 		t.Fatalf("RunNCCL should stay false when explicitly disabled")
 	}
 }
 func TestParseBenchmarkBurnLog(t *testing.T) {
 	t.Parallel()
 	raw := strings.Join([]string{
 		"loader=bee-gpu-burn",
 		"[gpu 0] device=NVIDIA H100",
 		"[gpu 0] compute_capability=9.0",
 		"[gpu 0] backend=cublasLt",
 		"[gpu 0] duration_s=10",
 		"[gpu 0] fp16_tensor[0]=READY dim=4096x4096x4096 block=128 stream=0",
 		"[gpu 0] fp8_e4m3[0]=READY dim=8192x8192x4096 block=128 stream=0",
 		"[gpu 0] fp16_tensor_iterations=200",
 		"[gpu 0] fp8_e4m3_iterations=50",
 		"[gpu 0] status=OK",
 	}, "\n")
 	got := parseBenchmarkBurnLog(raw)
 	if got.Backend != "cublasLt" {
 		t.Fatalf("backend=%q want cublasLt", got.Backend)
 	}
 	if got.ComputeCapability != "9.0" {
 		t.Fatalf("compute capability=%q want 9.0", got.ComputeCapability)
 	}
 	if len(got.Profiles) != 2 {
 		t.Fatalf("profiles=%d want 2", len(got.Profiles))
 	}
 	if got.Profiles[0].TeraOpsPerSec <= 0 {
 		t.Fatalf("profile[0] teraops=%f want >0", got.Profiles[0].TeraOpsPerSec)
 	}
 	if got.Profiles[1].Category != "fp8" {
 		t.Fatalf("profile[1] category=%q want fp8", got.Profiles[1].Category)
 	}
 }
 func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 	t.Parallel()
 	result := NvidiaBenchmarkResult{
 		BenchmarkVersion:   benchmarkVersion,
 		BenchmarkProfile:   NvidiaBenchmarkProfileStandard,
 		OverallStatus:      "PARTIAL",
 		SelectedGPUIndices: []int{0},
 		Normalization: BenchmarkNormalization{
 			Status: "partial",
 		},
 		Findings: []string{"GPU 0 spent measurable time under SW power cap."},
 		GPUs: []BenchmarkGPUResult{
 			{
 				Index:  0,
 				Name:   "NVIDIA H100",
 				Status: "OK",
 				Steady: BenchmarkTelemetrySummary{
 					AvgPowerW:           680,
 					AvgTempC:            79,
 					AvgGraphicsClockMHz: 1725,
 					P95PowerW:           700,
 					P95TempC:            82,
 					P95GraphicsClockMHz: 1800,
 				},
 				Scores: BenchmarkScorecard{
 					ComputeScore:        1200,
 					PowerSustainScore:   96,
 					ThermalSustainScore: 88,
 					StabilityScore:      92,
 					CompositeScore:      1176,
 				},
 				PrecisionResults: []BenchmarkPrecisionResult{
 					{Name: "fp16_tensor", Supported: true, TeraOpsPerSec: 700},
 				},
 				Throttle: BenchmarkThrottleCounters{
 					SWPowerCapUS: 1000000,
 				},
 				DegradationReasons: []string{"power_capped"},
 			},
 		},
 	}
 	report := renderBenchmarkReport(result)
 	for _, needle := range []string{
 		"Executive Summary",
 		"GPU 0 spent measurable time under SW power cap.",
 		"Composite score: 1176.00",
 		"fp16_tensor: 700.00 TOPS",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
 		}
 	}
 }
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -0,0 +1,132 @@
 package platform
 import "time"
 const (
 	NvidiaBenchmarkProfileStandard  = "standard"
 	NvidiaBenchmarkProfileStability = "stability"
 	NvidiaBenchmarkProfileOvernight = "overnight"
 )
 type NvidiaBenchmarkOptions struct {
 	Profile           string
 	SizeMB            int
 	GPUIndices        []int
 	ExcludeGPUIndices []int
 	RunNCCL           bool
 }
 type NvidiaBenchmarkResult struct {
 	BenchmarkVersion   string                       `json:"benchmark_version"`
 	GeneratedAt        time.Time                    `json:"generated_at"`
 	Hostname           string                       `json:"hostname,omitempty"`
 	BenchmarkProfile   string                       `json:"benchmark_profile"`
 	OverallStatus      string                       `json:"overall_status"`
 	SelectedGPUIndices []int                        `json:"selected_gpu_indices"`
 	Findings           []string                     `json:"findings,omitempty"`
 	Warnings           []string                     `json:"warnings,omitempty"`
 	Normalization      BenchmarkNormalization       `json:"normalization"`
 	GPUs               []BenchmarkGPUResult         `json:"gpus"`
 	Interconnect       *BenchmarkInterconnectResult `json:"interconnect,omitempty"`
 }
 type BenchmarkNormalization struct {
 	Status string                      `json:"status"`
 	Notes  []string                    `json:"notes,omitempty"`
 	GPUs   []BenchmarkNormalizationGPU `json:"gpus,omitempty"`
 }
 type BenchmarkNormalizationGPU struct {
 	Index                 int      `json:"index"`
 	PersistenceMode       string   `json:"persistence_mode,omitempty"`
 	GPUClockLockMHz       float64  `json:"gpu_clock_lock_mhz,omitempty"`
 	GPUClockLockStatus    string   `json:"gpu_clock_lock_status,omitempty"`
 	MemoryClockLockMHz    float64  `json:"memory_clock_lock_mhz,omitempty"`
 	MemoryClockLockStatus string   `json:"memory_clock_lock_status,omitempty"`
 	Notes                 []string `json:"notes,omitempty"`
 }
 type BenchmarkGPUResult struct {
 	Index                  int                        `json:"index"`
 	UUID                   string                     `json:"uuid,omitempty"`
 	Name                   string                     `json:"name,omitempty"`
 	BusID                  string                     `json:"bus_id,omitempty"`
 	VBIOS                  string                     `json:"vbios,omitempty"`
 	ComputeCapability      string                     `json:"compute_capability,omitempty"`
 	Backend                string                     `json:"backend,omitempty"`
 	Status                 string                     `json:"status"`
 	PowerLimitW            float64                    `json:"power_limit_w,omitempty"`
 	MaxGraphicsClockMHz    float64                    `json:"max_graphics_clock_mhz,omitempty"`
 	MaxMemoryClockMHz      float64                    `json:"max_memory_clock_mhz,omitempty"`
 	LockedGraphicsClockMHz float64                    `json:"locked_graphics_clock_mhz,omitempty"`
 	LockedMemoryClockMHz   float64                    `json:"locked_memory_clock_mhz,omitempty"`
 	Baseline               BenchmarkTelemetrySummary  `json:"baseline"`
 	Steady                 BenchmarkTelemetrySummary  `json:"steady"`
 	Cooldown               BenchmarkTelemetrySummary  `json:"cooldown"`
 	Throttle               BenchmarkThrottleCounters  `json:"throttle_counters"`
 	PrecisionResults       []BenchmarkPrecisionResult `json:"precision_results,omitempty"`
 	Scores                 BenchmarkScorecard         `json:"scores"`
 	DegradationReasons     []string                   `json:"degradation_reasons,omitempty"`
 	Notes                  []string                   `json:"notes,omitempty"`
 }
 type BenchmarkTelemetrySummary struct {
 	DurationSec         float64 `json:"duration_sec"`
 	Samples             int     `json:"samples"`
 	AvgTempC            float64 `json:"avg_temp_c"`
 	P95TempC            float64 `json:"p95_temp_c"`
 	AvgPowerW           float64 `json:"avg_power_w"`
 	P95PowerW           float64 `json:"p95_power_w"`
 	AvgGraphicsClockMHz float64 `json:"avg_graphics_clock_mhz"`
 	P95GraphicsClockMHz float64 `json:"p95_graphics_clock_mhz"`
 	AvgMemoryClockMHz   float64 `json:"avg_memory_clock_mhz"`
 	P95MemoryClockMHz   float64 `json:"p95_memory_clock_mhz"`
 	AvgUsagePct         float64 `json:"avg_usage_pct"`
 	AvgMemUsagePct      float64 `json:"avg_mem_usage_pct"`
 	ClockCVPct          float64 `json:"clock_cv_pct"`
 	PowerCVPct          float64 `json:"power_cv_pct"`
 	TempCVPct           float64 `json:"temp_cv_pct"`
 	ClockDriftPct       float64 `json:"clock_drift_pct"`
 }
 type BenchmarkThrottleCounters struct {
 	SWPowerCapUS           uint64 `json:"sw_power_cap_us"`
 	SWThermalSlowdownUS    uint64 `json:"sw_thermal_slowdown_us"`
 	SyncBoostUS            uint64 `json:"sync_boost_us"`
 	HWThermalSlowdownUS    uint64 `json:"hw_thermal_slowdown_us"`
 	HWPowerBrakeSlowdownUS uint64 `json:"hw_power_brake_slowdown_us"`
 }
 type BenchmarkPrecisionResult struct {
 	Name          string  `json:"name"`
 	Category      string  `json:"category"`
 	Supported     bool    `json:"supported"`
 	Lanes         int     `json:"lanes,omitempty"`
 	M             uint64  `json:"m,omitempty"`
 	N             uint64  `json:"n,omitempty"`
 	K             uint64  `json:"k,omitempty"`
 	Iterations    uint64  `json:"iterations,omitempty"`
 	TeraOpsPerSec float64 `json:"teraops_per_sec,omitempty"`
 	Notes         string  `json:"notes,omitempty"`
 }
 type BenchmarkScorecard struct {
 	ComputeScore        float64 `json:"compute_score"`
 	PowerSustainScore   float64 `json:"power_sustain_score"`
 	ThermalSustainScore float64 `json:"thermal_sustain_score"`
 	StabilityScore      float64 `json:"stability_score"`
 	InterconnectScore   float64 `json:"interconnect_score"`
 	CompositeScore      float64 `json:"composite_score"`
 }
 type BenchmarkInterconnectResult struct {
 	Status             string   `json:"status"`
 	Attempted          bool     `json:"attempted"`
 	Supported          bool     `json:"supported"`
 	SelectedGPUIndices []int    `json:"selected_gpu_indices,omitempty"`
 	AvgAlgBWGBps       float64  `json:"avg_algbw_gbps,omitempty"`
 	MaxAlgBWGBps       float64  `json:"max_algbw_gbps,omitempty"`
 	AvgBusBWGBps       float64  `json:"avg_busbw_gbps,omitempty"`
 	MaxBusBWGBps       float64  `json:"max_busbw_gbps,omitempty"`
 	Notes              []string `json:"notes,omitempty"`
 }
--- a/audit/internal/platform/gpu_metrics.go
+++ b/audit/internal/platform/gpu_metrics.go
@@ -20,12 +20,13 @@ type GPUMetricRow struct {
 	MemUsagePct float64 `json:"mem_usage_pct"`
 	PowerW      float64 `json:"power_w"`
 	ClockMHz    float64 `json:"clock_mhz"`
 	MemClockMHz float64 `json:"mem_clock_mhz"`
 }
 // sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
 func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
 	args := []string{
-		"--query-gpu=index,temperature.gpu,utilization.gpu,utilization.memory,power.draw,clocks.current.graphics",
+		"--query-gpu=index,temperature.gpu,utilization.gpu,utilization.memory,power.draw,clocks.current.graphics,clocks.current.memory",
 		"--format=csv,noheader,nounits",
 	}
 	if len(gpuIndices) > 0 {
@@ -46,7 +47,7 @@ func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
 			continue
 		}
 		parts := strings.Split(line, ", ")
-		if len(parts) < 6 {
+		if len(parts) < 7 {
 			continue
 		}
 		idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
@@ -57,6 +58,7 @@ func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
 			MemUsagePct: parseGPUFloat(parts[3]),
 			PowerW:      parseGPUFloat(parts[4]),
 			ClockMHz:    parseGPUFloat(parts[5]),
 			MemClockMHz: parseGPUFloat(parts[6]),
 		})
 	}
 	return rows, nil
@@ -139,10 +141,10 @@ func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
 // WriteGPUMetricsCSV writes collected rows as a CSV file.
 func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
 	var b bytes.Buffer
-	b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,power_w,clock_mhz\n")
+	b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz\n")
 	for _, r := range rows {
-		fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.0f\n",
+		fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f\n",
-			r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.PowerW, r.ClockMHz)
+			r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz)
 	}
 	return os.WriteFile(path, b.Bytes(), 0644)
 }
@@ -197,7 +199,7 @@ func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
 	const PW = plotX2 - plotX1
 	const PH = plotY2 - plotY1
 	// Outer axes
-	const tempAxisX = 60  // temp axis line
+	const tempAxisX = 60   // temp axis line
 	const clockAxisX = 900 // clock axis line
 	colors := [4]string{"#e74c3c", "#3498db", "#2ecc71", "#f39c12"}
--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -120,10 +120,45 @@ func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) erro
 		log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
 	}
 	log("Verifying live medium now served from RAM...")
 	status := s.LiveBootSource()
 	if err := verifyInstallToRAMStatus(status); err != nil {
 		return err
 	}
 	log(fmt.Sprintf("Verification passed: live medium now served from %s.", describeLiveBootSource(status)))
 	log("Done. Installation media can be safely disconnected.")
 	return nil
 }
 func verifyInstallToRAMStatus(status LiveBootSource) error {
 	if status.InRAM {
 		return nil
 	}
 	return fmt.Errorf("install to RAM verification failed: live medium still mounted from %s", describeLiveBootSource(status))
 }
 func describeLiveBootSource(status LiveBootSource) string {
 	source := strings.TrimSpace(status.Device)
 	if source == "" {
 		source = strings.TrimSpace(status.Source)
 	}
 	if source == "" {
 		source = "unknown source"
 	}
 	switch strings.TrimSpace(status.Kind) {
 	case "ram":
 		return "RAM"
 	case "usb":
 		return "USB (" + source + ")"
 	case "cdrom":
 		return "CD-ROM (" + source + ")"
 	case "disk":
 		return "disk (" + source + ")"
 	default:
 		return source
 	}
 }
 func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) error {
 	in, err := os.Open(src)
 	if err != nil {
--- a/audit/internal/platform/install_to_ram_test.go
+++ b/audit/internal/platform/install_to_ram_test.go
@@ -3,6 +3,8 @@ package platform
 import "testing"
 func TestInferLiveBootKind(t *testing.T) {
 	t.Parallel()
 	tests := []struct {
 		name       string
 		fsType     string
@@ -18,6 +20,7 @@ func TestInferLiveBootKind(t *testing.T) {
 		{name: "unknown", source: "overlay", want: "unknown"},
 	}
 	for _, tc := range tests {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
 			got := inferLiveBootKind(tc.fsType, tc.source, tc.deviceType, tc.transport)
 			if got != tc.want {
@@ -26,3 +29,29 @@ func TestInferLiveBootKind(t *testing.T) {
 		})
 	}
 }
 func TestVerifyInstallToRAMStatus(t *testing.T) {
 	t.Parallel()
 	if err := verifyInstallToRAMStatus(LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"}); err != nil {
 		t.Fatalf("expected success for RAM-backed status, got %v", err)
 	}
 	err := verifyInstallToRAMStatus(LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"})
 	if err == nil {
 		t.Fatal("expected verification failure when media is still on USB")
 	}
 	if got := err.Error(); got != "install to RAM verification failed: live medium still mounted from USB (/dev/sdb1)" {
 		t.Fatalf("error=%q", got)
 	}
 }
 func TestDescribeLiveBootSource(t *testing.T) {
 	t.Parallel()
 	if got := describeLiveBootSource(LiveBootSource{InRAM: true, Kind: "ram"}); got != "RAM" {
 		t.Fatalf("got %q want RAM", got)
 	}
 	if got := describeLiveBootSource(LiveBootSource{Kind: "unknown", Source: "/run/live/medium"}); got != "/run/live/medium" {
 		t.Fatalf("got %q want /run/live/medium", got)
 	}
 }
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -135,12 +135,15 @@ func (s *System) runtimeToolStatuses(vendor string) []ToolStatus {
 	case "nvidia":
 		tools = append(tools, s.CheckTools([]string{
 			"nvidia-smi",
 			"dcgmi",
 			"nv-hostengine",
 			"nvidia-bug-report.sh",
 			"bee-gpu-burn",
 			"bee-john-gpu-stress",
 			"bee-nccl-gpu-stress",
 			"all_reduce_perf",
 		})...)
 		tools = append(tools, resolvedToolStatus("dcgmproftester", dcgmProfTesterCandidates...))
 	case "amd":
 		tool := ToolStatus{Name: "rocm-smi"}
 		if cmd, err := resolveROCmSMICommand(); err == nil && len(cmd) > 0 {
@@ -155,6 +158,16 @@ func (s *System) runtimeToolStatuses(vendor string) []ToolStatus {
 	return tools
 }
 func resolvedToolStatus(display string, candidates ...string) ToolStatus {
 	for _, candidate := range candidates {
 		path, err := exec.LookPath(candidate)
 		if err == nil {
 			return ToolStatus{Name: display, Path: path, OK: true}
 		}
 	}
 	return ToolStatus{Name: display}
 }
 func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHealth) {
 	lsmodText := commandText("lsmod")
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -12,19 +12,20 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
 	"syscall"
 	"sort"
 	"strconv"
 	"strings"
 	"sync"
 	"syscall"
 	"time"
 )
 var (
-	satExecCommand = exec.Command
+	satExecCommand  = exec.Command
-	satLookPath    = exec.LookPath
+	satLookPath     = exec.LookPath
-	satGlob        = filepath.Glob
+	satGlob         = filepath.Glob
-	satStat        = os.Stat
+	satStat         = os.Stat
 	satFreeMemBytes = freeMemBytes
 	rocmSMIExecutableGlobs = []string{
 		"/opt/rocm/bin/rocm-smi",
@@ -38,6 +39,12 @@ var (
 		"/opt/rocm/bin/rvs",
 		"/opt/rocm-*/bin/rvs",
 	}
 	dcgmProfTesterCandidates = []string{
 		"dcgmproftester",
 		"dcgmproftester13",
 		"dcgmproftester12",
 		"dcgmproftester11",
 	}
 )
 // streamExecOutput runs cmd and streams each output line to logFunc (if non-nil).
@@ -76,15 +83,15 @@ func streamExecOutput(cmd *exec.Cmd, logFunc func(string)) ([]byte, error) {
 // NvidiaGPU holds basic GPU info from nvidia-smi.
 type NvidiaGPU struct {
-	Index    int
+	Index    int    `json:"index"`
-	Name     string
+	Name     string `json:"name"`
-	MemoryMB int
+	MemoryMB int    `json:"memory_mb"`
 }
 // AMDGPUInfo holds basic info about an AMD GPU from rocm-smi.
 type AMDGPUInfo struct {
-	Index int
+	Index int    `json:"index"`
-	Name  string
+	Name  string `json:"name"`
 }
 // DetectGPUVendor returns "nvidia" if /dev/nvidia0 exists, "amd" if /dev/kfd exists, or "" otherwise.
@@ -256,6 +263,9 @@ func (s *System) ListNvidiaGPUs() ([]NvidiaGPU, error) {
 			MemoryMB: memMB,
 		})
 	}
 	sort.Slice(gpus, func(i, j int) bool {
 		return gpus[i].Index < gpus[j].Index
 	})
 	return gpus, nil
 }
@@ -277,6 +287,80 @@ func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(
 	}, logFunc)
 }
 func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
 	profCmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)))
 	if err != nil {
 		return "", err
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-compute", []satJob{
 		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		{name: "02-dcgmi-version.log", cmd: []string{"dcgmi", "-v"}},
 		{
 			name:       "03-dcgmproftester.log",
 			cmd:        profCmd,
 			env:        nvidiaVisibleDevicesEnv(selected),
 			collectGPU: true,
 			gpuIndices: selected,
 		},
 		{name: "04-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	}, logFunc)
 }
 func (s *System) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-targeted-power", []satJob{
 		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		{
 			name:       "02-dcgmi-targeted-power.log",
 			cmd:        nvidiaDCGMNamedDiagCommand("targeted_power", normalizeNvidiaBurnDuration(durationSec), selected),
 			collectGPU: true,
 			gpuIndices: selected,
 		},
 		{name: "03-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	}, logFunc)
 }
 func (s *System) RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-pulse", []satJob{
 		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		{
 			name:       "02-dcgmi-pulse-test.log",
 			cmd:        nvidiaDCGMNamedDiagCommand("pulse_test", normalizeNvidiaBurnDuration(durationSec), selected),
 			collectGPU: true,
 			gpuIndices: selected,
 		},
 		{name: "03-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	}, logFunc)
 }
 func (s *System) RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-bandwidth", []satJob{
 		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		{
 			name:       "02-dcgmi-nvbandwidth.log",
 			cmd:        nvidiaDCGMNamedDiagCommand("nvbandwidth", 0, selected),
 			collectGPU: true,
 			gpuIndices: selected,
 		},
 		{name: "03-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	}, logFunc)
 }
 func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
 	return runAcceptancePackCtx(context.Background(), baseDir, "gpu-nvidia", nvidiaSATJobs(), logFunc)
 }
@@ -293,6 +377,23 @@ func (s *System) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, resolvedGPUIndices), logFunc)
 }
 func (s *System) RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-targeted-stress", []satJob{
 		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		{
 			name:       "02-dcgmi-targeted-stress.log",
 			cmd:        nvidiaDCGMNamedDiagCommand("targeted_stress", normalizeNvidiaBurnDuration(durationSec), selected),
 			collectGPU: true,
 			gpuIndices: selected,
 		},
 		{name: "03-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	}, logFunc)
 }
 func resolveDCGMGPUIndices(gpuIndices []int) ([]int, error) {
 	if len(gpuIndices) > 0 {
 		return dedupeSortedIndices(gpuIndices), nil
@@ -307,6 +408,25 @@ func resolveDCGMGPUIndices(gpuIndices []int) ([]int, error) {
 	return all, nil
 }
 func memoryStressSizeArg() string {
 	if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 {
 		return fmt.Sprintf("%dM", mb)
 	}
 	availBytes := satFreeMemBytes()
 	if availBytes <= 0 {
 		return "80%"
 	}
 	availMB := availBytes / (1024 * 1024)
 	targetMB := (availMB * 2) / 3
 	if targetMB >= 256 {
 		targetMB = (targetMB / 256) * 256
 	}
 	if targetMB <= 0 {
 		return "80%"
 	}
 	return fmt.Sprintf("%dM", targetMB)
 }
 func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 	sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128)
 	passes := envInt("BEE_MEMTESTER_PASSES", 1)
@@ -322,11 +442,9 @@ func (s *System) RunMemoryStressPack(ctx context.Context, baseDir string, durati
 	if seconds <= 0 {
 		seconds = envInt("BEE_VM_STRESS_SECONDS", 300)
 	}
-	// Use 80% of RAM by default; override with BEE_VM_STRESS_SIZE_MB.
+	// Base the default on current MemAvailable and keep headroom for the OS and
-	sizeArg := "80%"
+	// concurrent stressors so mixed burn runs do not trip the OOM killer.
-	if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 {
+	sizeArg := memoryStressSizeArg()
 		sizeArg = fmt.Sprintf("%dM", mb)
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "memory-stress", []satJob{
 		{name: "01-free-before.log", cmd: []string{"free", "-h"}},
 		{name: "02-stress-ng-vm.log", cmd: []string{
@@ -473,6 +591,31 @@ func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
 	}
 }
 func nvidiaDCGMNamedDiagCommand(name string, durationSec int, gpuIndices []int) []string {
 	args := []string{"dcgmi", "diag", "-r", name}
 	if durationSec > 0 {
 		args = append(args, "-p", fmt.Sprintf("%s.test_duration=%d", name, durationSec))
 	}
 	if len(gpuIndices) > 0 {
 		args = append(args, "-i", joinIndexList(gpuIndices))
 	}
 	return args
 }
 func normalizeNvidiaBurnDuration(durationSec int) int {
 	if durationSec <= 0 {
 		return 300
 	}
 	return durationSec
 }
 func nvidiaVisibleDevicesEnv(gpuIndices []int) []string {
 	if len(gpuIndices) == 0 {
 		return nil
 	}
 	return []string{"CUDA_VISIBLE_DEVICES=" + joinIndexList(gpuIndices)}
 }
 func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []satJob, logFunc func(string)) (string, error) {
 	if ctx == nil {
 		ctx = context.Background()
@@ -642,6 +785,7 @@ func classifySATResult(name string, out []byte, err error) (string, int) {
 	}
 	if strings.Contains(text, "unsupported") ||
 		strings.Contains(text, "not supported") ||
 		strings.Contains(text, "not found in path") ||
 		strings.Contains(text, "invalid opcode") ||
 		strings.Contains(text, "unknown command") ||
 		strings.Contains(text, "not implemented") ||
@@ -748,6 +892,15 @@ func resolveROCmSMICommand(args ...string) ([]string, error) {
 	return nil, errors.New("rocm-smi not found in PATH or under /opt/rocm")
 }
 func resolveDCGMProfTesterCommand(args ...string) ([]string, error) {
 	for _, candidate := range dcgmProfTesterCandidates {
 		if path, err := satLookPath(candidate); err == nil {
 			return append([]string{path}, args...), nil
 		}
 	}
 	return nil, errors.New("dcgmproftester not found in PATH")
 }
 func ensureAMDRuntimeReady() error {
 	if _, err := os.Stat("/dev/kfd"); err == nil {
 		return nil
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -195,6 +195,53 @@ func TestResolveDCGMGPUIndicesKeepsExplicitSelection(t *testing.T) {
 	}
 }
 func TestResolveDCGMProfTesterCommandUsesVersionedBinary(t *testing.T) {
 	oldLookPath := satLookPath
 	satLookPath = func(file string) (string, error) {
 		switch file {
 		case "dcgmproftester13":
 			return "/usr/bin/dcgmproftester13", nil
 		default:
 			return "", exec.ErrNotFound
 		}
 	}
 	t.Cleanup(func() { satLookPath = oldLookPath })
 	cmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004")
 	if err != nil {
 		t.Fatalf("resolveDCGMProfTesterCommand error: %v", err)
 	}
 	if len(cmd) != 4 {
 		t.Fatalf("cmd len=%d want 4 (%v)", len(cmd), cmd)
 	}
 	if cmd[0] != "/usr/bin/dcgmproftester13" {
 		t.Fatalf("cmd[0]=%q want /usr/bin/dcgmproftester13", cmd[0])
 	}
 }
 func TestNvidiaDCGMNamedDiagCommandUsesDurationAndSelection(t *testing.T) {
 	cmd := nvidiaDCGMNamedDiagCommand("targeted_power", 900, []int{3, 1})
 	want := []string{"dcgmi", "diag", "-r", "targeted_power", "-p", "targeted_power.test_duration=900", "-i", "3,1"}
 	if len(cmd) != len(want) {
 		t.Fatalf("cmd len=%d want %d (%v)", len(cmd), len(want), cmd)
 	}
 	for i := range want {
 		if cmd[i] != want[i] {
 			t.Fatalf("cmd[%d]=%q want %q", i, cmd[i], want[i])
 		}
 	}
 }
 func TestNvidiaVisibleDevicesEnvUsesSelectedGPUs(t *testing.T) {
 	env := nvidiaVisibleDevicesEnv([]int{0, 2, 4})
 	if len(env) != 1 {
 		t.Fatalf("env len=%d want 1 (%v)", len(env), env)
 	}
 	if env[0] != "CUDA_VISIBLE_DEVICES=0,2,4" {
 		t.Fatalf("env[0]=%q want CUDA_VISIBLE_DEVICES=0,2,4", env[0])
 	}
 }
 func TestNvidiaStressArchivePrefixByLoader(t *testing.T) {
 	t.Parallel()
@@ -229,6 +276,37 @@ func TestEnvIntFallback(t *testing.T) {
 	}
 }
 func TestMemoryStressSizeArgUsesAvailableMemory(t *testing.T) {
 	oldFreeMemBytes := satFreeMemBytes
 	satFreeMemBytes = func() int64 { return 96 * 1024 * 1024 * 1024 }
 	t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
 	if got := memoryStressSizeArg(); got != "65536M" {
 		t.Fatalf("sizeArg=%q want 65536M", got)
 	}
 }
 func TestMemoryStressSizeArgRespectsOverride(t *testing.T) {
 	oldFreeMemBytes := satFreeMemBytes
 	satFreeMemBytes = func() int64 { return 96 * 1024 * 1024 * 1024 }
 	t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
 	t.Setenv("BEE_VM_STRESS_SIZE_MB", "4096")
 	if got := memoryStressSizeArg(); got != "4096M" {
 		t.Fatalf("sizeArg=%q want 4096M", got)
 	}
 }
 func TestMemoryStressSizeArgFallsBackWhenFreeMemoryUnknown(t *testing.T) {
 	oldFreeMemBytes := satFreeMemBytes
 	satFreeMemBytes = func() int64 { return 0 }
 	t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
 	if got := memoryStressSizeArg(); got != "80%" {
 		t.Fatalf("sizeArg=%q want 80%%", got)
 	}
 }
 func TestClassifySATResult(t *testing.T) {
 	tests := []struct {
 		name   string
--- a/audit/internal/platform/services.go
+++ b/audit/internal/platform/services.go
@@ -10,17 +10,30 @@ import (
 func (s *System) ListBeeServices() ([]string, error) {
 	seen := map[string]bool{}
 	var out []string
-	for _, pattern := range []string{"/etc/systemd/system/bee-*.service", "/lib/systemd/system/bee-*.service"} {
+	for _, pattern := range []string{
 		"/etc/systemd/system/bee-*.service",
 		"/lib/systemd/system/bee-*.service",
 		"/etc/systemd/system/bee-*.timer",
 		"/lib/systemd/system/bee-*.timer",
 	} {
 		matches, err := filepath.Glob(pattern)
 		if err != nil {
 			return nil, err
 		}
 		for _, match := range matches {
-			name := strings.TrimSuffix(filepath.Base(match), ".service")
+			base := filepath.Base(match)
 			name := base
 			if strings.HasSuffix(base, ".service") {
 				name = strings.TrimSuffix(base, ".service")
 			}
 			// Skip template units (e.g. bee-journal-mirror@) — they have no instances to query.
 			if strings.HasSuffix(name, "@") {
 				continue
 			}
 			// bee-selfheal is timer-managed; showing the oneshot service as inactive is misleading.
 			if name == "bee-selfheal" && strings.HasSuffix(base, ".service") {
 				continue
 			}
 			if !seen[name] {
 				seen[name] = true
 				out = append(out, name)
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -44,12 +44,12 @@ type StaticIPv4Config struct {
 }
 type RemovableTarget struct {
-	Device     string
+	Device     string `json:"device"`
-	FSType     string
+	FSType     string `json:"fs_type"`
-	Size       string
+	Size       string `json:"size"`
-	Label      string
+	Label      string `json:"label"`
-	Model      string
+	Model      string `json:"model"`
-	Mountpoint string
+	Mountpoint string `json:"mountpoint"`
 }
 type ToolStatus struct {
--- a/audit/internal/platform/types_test.go
+++ b/audit/internal/platform/types_test.go
@@ -0,0 +1,31 @@
 package platform
 import (
 	"encoding/json"
 	"strings"
 	"testing"
 )
 func TestRemovableTargetJSONUsesFrontendFieldNames(t *testing.T) {
 	t.Parallel()
 	data, err := json.Marshal(RemovableTarget{
 		Device: "/dev/sdb1",
 		FSType: "exfat",
 		Size:   "1.8T",
 		Label:  "USB",
 		Model:  "Flash",
 	})
 	if err != nil {
 		t.Fatalf("marshal: %v", err)
 	}
 	raw := string(data)
 	for _, key := range []string{`"device"`, `"fs_type"`, `"size"`, `"label"`, `"model"`} {
 		if !strings.Contains(raw, key) {
 			t.Fatalf("json missing key %s: %s", key, raw)
 		}
 	}
 	if strings.Contains(raw, `"Device"`) || strings.Contains(raw, `"FSType"`) {
 		t.Fatalf("json still contains Go field names: %s", raw)
 	}
 }
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -110,6 +110,11 @@ func streamCmdJob(j *jobState, cmd *exec.Cmd) error {
 	scanDone := make(chan error, 1)
 	go func() {
 		defer func() {
 			if rec := recover(); rec != nil {
 				scanDone <- fmt.Errorf("stream scanner panic: %v", rec)
 			}
 		}()
 		scanner := bufio.NewScanner(pr)
 		scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
 		for scanner.Scan() {
@@ -227,6 +232,54 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 	}
 }
 func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
 	var body struct {
 		Profile           string `json:"profile"`
 		SizeMB            int    `json:"size_mb"`
 		GPUIndices        []int  `json:"gpu_indices"`
 		ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
 		RunNCCL           *bool  `json:"run_nccl"`
 		DisplayName       string `json:"display_name"`
 	}
 	if r.Body != nil {
 		if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
 			writeError(w, http.StatusBadRequest, "invalid request body")
 			return
 		}
 	}
 	runNCCL := true
 	if body.RunNCCL != nil {
 		runNCCL = *body.RunNCCL
 	}
 	t := &Task{
 		ID:        newJobID("benchmark-nvidia"),
 		Name:      taskDisplayName("nvidia-benchmark", "", ""),
 		Target:    "nvidia-benchmark",
 		Priority:  15,
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 		params: taskParams{
 			GPUIndices:        body.GPUIndices,
 			ExcludeGPUIndices: body.ExcludeGPUIndices,
 			SizeMB:            body.SizeMB,
 			BenchmarkProfile:  body.Profile,
 			RunNCCL:           runNCCL,
 			DisplayName:       body.DisplayName,
 		},
 	}
 	if strings.TrimSpace(body.DisplayName) != "" {
 		t.Name = body.DisplayName
 	}
 	globalQueue.enqueue(t)
 	writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
 }
 func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
 	id := r.URL.Query().Get("job_id")
 	if id == "" {
@@ -486,6 +539,22 @@ func (h *handler) handleAPIExportUSBBundle(w http.ResponseWriter, r *http.Reques
 // ── GPU presence ──────────────────────────────────────────────────────────────
 func (h *handler) handleAPIGNVIDIAGPUs(w http.ResponseWriter, _ *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
 	gpus, err := h.opts.App.ListNvidiaGPUs()
 	if err != nil {
 		writeError(w, http.StatusInternalServerError, err.Error())
 		return
 	}
 	if gpus == nil {
 		gpus = []platform.NvidiaGPU{}
 	}
 	writeJSON(w, gpus)
 }
 func (h *handler) handleAPIGPUPresence(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
@@ -511,14 +580,33 @@ func (h *handler) handleAPIGPUTools(w http.ResponseWriter, _ *http.Request) {
 	_, amdErr := os.Stat("/dev/kfd")
 	nvidiaUp := nvidiaErr == nil
 	amdUp := amdErr == nil
 	_, dcgmErr := exec.LookPath("dcgmi")
 	_, ncclStressErr := exec.LookPath("bee-nccl-gpu-stress")
 	_, johnErr := exec.LookPath("bee-john-gpu-stress")
 	_, beeBurnErr := exec.LookPath("bee-gpu-burn")
 	_, nvBandwidthErr := exec.LookPath("nvbandwidth")
 	profErr := lookPathAny("dcgmproftester", "dcgmproftester13", "dcgmproftester12", "dcgmproftester11")
 	writeJSON(w, []toolEntry{
-		{ID: "bee-gpu-burn", Available: nvidiaUp, Vendor: "nvidia"},
+		{ID: "nvidia-compute", Available: nvidiaUp && profErr == nil, Vendor: "nvidia"},
-		{ID: "john", Available: nvidiaUp, Vendor: "nvidia"},
+		{ID: "nvidia-targeted-power", Available: nvidiaUp && dcgmErr == nil, Vendor: "nvidia"},
-		{ID: "nccl", Available: nvidiaUp, Vendor: "nvidia"},
+		{ID: "nvidia-pulse", Available: nvidiaUp && dcgmErr == nil, Vendor: "nvidia"},
 		{ID: "nvidia-interconnect", Available: nvidiaUp && ncclStressErr == nil, Vendor: "nvidia"},
 		{ID: "nvidia-bandwidth", Available: nvidiaUp && dcgmErr == nil && nvBandwidthErr == nil, Vendor: "nvidia"},
 		{ID: "bee-gpu-burn", Available: nvidiaUp && beeBurnErr == nil, Vendor: "nvidia"},
 		{ID: "john", Available: nvidiaUp && johnErr == nil, Vendor: "nvidia"},
 		{ID: "rvs", Available: amdUp, Vendor: "amd"},
 	})
 }
 func lookPathAny(names ...string) error {
 	for _, name := range names {
 		if _, err := exec.LookPath(name); err == nil {
 			return nil
 		}
 	}
 	return exec.ErrNotFound
 }
 // ── System ────────────────────────────────────────────────────────────────────
 func (h *handler) handleAPIRAMStatus(w http.ResponseWriter, r *http.Request) {
@@ -557,7 +645,7 @@ func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request)
 var standardTools = []string{
 	"dmidecode", "smartctl", "nvme", "lspci", "ipmitool",
-	"nvidia-smi", "memtester", "stress-ng", "nvtop",
+	"nvidia-smi", "dcgmi", "nv-hostengine", "memtester", "stress-ng", "nvtop",
 	"mstflint", "qrencode",
 }
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -64,6 +64,42 @@ func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
 	}
 }
 func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
 	rec := httptest.NewRecorder()
 	h.handleAPIBenchmarkNvidiaRun(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 1 {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	task := globalQueue.tasks[0]
 	if task.Target != "nvidia-benchmark" {
 		t.Fatalf("target=%q want nvidia-benchmark", task.Target)
 	}
 	if got := task.params.GPUIndices; len(got) != 2 || got[0] != 1 || got[1] != 3 {
 		t.Fatalf("gpu indices=%v want [1 3]", got)
 	}
 	if task.params.RunNCCL {
 		t.Fatal("RunNCCL should reflect explicit false from request")
 	}
 }
 func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
 	h := &handler{}
--- a/audit/internal/webui/charts_svg.go
+++ b/audit/internal/webui/charts_svg.go
@@ -0,0 +1,773 @@
 package webui
 import (
 	"fmt"
 	"math"
 	"sort"
 	"strconv"
 	"strings"
 	"sync"
 	"time"
 	"bee/audit/internal/platform"
 )
 type chartTimelineSegment struct {
 	Start  time.Time
 	End    time.Time
 	Active bool
 }
 type chartScale struct {
 	Min   float64
 	Max   float64
 	Ticks []float64
 }
 type chartLayout struct {
 	Width      int
 	Height     int
 	PlotLeft   int
 	PlotRight  int
 	PlotTop    int
 	PlotBottom int
 }
 type metricChartSeries struct {
 	Name      string
 	AxisTitle string
 	Color     string
 	Values    []float64
 }
 var metricChartPalette = []string{
 	"#5794f2",
 	"#73bf69",
 	"#f2cc0c",
 	"#ff9830",
 	"#f2495c",
 	"#b877d9",
 	"#56d2f7",
 	"#8ab8ff",
 	"#9adf8f",
 	"#ffbe5c",
 }
 var gpuLabelCache struct {
 	mu       sync.Mutex
 	loadedAt time.Time
 	byIndex  map[int]string
 }
 func renderMetricChartSVG(title string, labels []string, times []time.Time, datasets [][]float64, names []string, yMin, yMax *float64, canvasHeight int, timeline []chartTimelineSegment) ([]byte, error) {
 	pointCount := len(labels)
 	if len(times) > pointCount {
 		pointCount = len(times)
 	}
 	if pointCount == 0 {
 		pointCount = 1
 		labels = []string{""}
 		times = []time.Time{time.Time{}}
 	}
 	if len(labels) < pointCount {
 		padded := make([]string, pointCount)
 		copy(padded, labels)
 		labels = padded
 	}
 	if len(times) < pointCount {
 		times = synthesizeChartTimes(times, pointCount)
 	}
 	for i := range datasets {
 		if len(datasets[i]) == 0 {
 			datasets[i] = make([]float64, pointCount)
 		}
 	}
 	statsLabel := chartStatsLabel(datasets)
 	legendItems := []metricChartSeries{}
 	for i, name := range names {
 		color := metricChartPalette[i%len(metricChartPalette)]
 		values := make([]float64, pointCount)
 		if i < len(datasets) {
 			copy(values, coalesceDataset(datasets[i], pointCount))
 		}
 		legendItems = append(legendItems, metricChartSeries{
 			Name:   name,
 			Color:  color,
 			Values: values,
 		})
 	}
 	scale := singleAxisChartScale(datasets, yMin, yMax)
 	layout := singleAxisChartLayout(canvasHeight, len(legendItems))
 	start, end := chartTimeBounds(times)
 	var b strings.Builder
 	writeSVGOpen(&b, layout.Width, layout.Height)
 	writeChartFrame(&b, title, statsLabel, layout.Width, layout.Height)
 	writeTimelineIdleSpans(&b, layout, start, end, timeline)
 	writeVerticalGrid(&b, layout, times, pointCount, 8)
 	writeHorizontalGrid(&b, layout, scale)
 	writeTimelineBoundaries(&b, layout, start, end, timeline)
 	writePlotBorder(&b, layout)
 	writeSingleAxisY(&b, layout, scale)
 	writeXAxisLabels(&b, layout, times, labels, start, end, 8)
 	for _, item := range legendItems {
 		writeSeriesPolyline(&b, layout, times, start, end, item.Values, scale, item.Color)
 	}
 	writeLegend(&b, layout, legendItems)
 	writeSVGClose(&b)
 	return []byte(b.String()), nil
 }
 func renderGPUOverviewChartSVG(idx int, samples []platform.LiveMetricSample, timeline []chartTimelineSegment) ([]byte, bool, error) {
 	temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
 	power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
 	coreClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
 	if temp == nil && power == nil && coreClock == nil {
 		return nil, false, nil
 	}
 	labels := sampleTimeLabels(samples)
 	times := sampleTimes(samples)
 	svg, err := drawGPUOverviewChartSVG(
 		gpuDisplayLabel(idx)+" Overview",
 		labels,
 		times,
 		[]metricChartSeries{
 			{Name: "Temp C", Values: coalesceDataset(temp, len(labels)), Color: "#f05a5a", AxisTitle: "Temp C"},
 			{Name: "Power W", Values: coalesceDataset(power, len(labels)), Color: "#ffb357", AxisTitle: "Power W"},
 			{Name: "Core Clock MHz", Values: coalesceDataset(coreClock, len(labels)), Color: "#73bf69", AxisTitle: "Core MHz"},
 		},
 		timeline,
 	)
 	if err != nil {
 		return nil, false, err
 	}
 	return svg, true, nil
 }
 func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, series []metricChartSeries, timeline []chartTimelineSegment) ([]byte, error) {
 	if len(series) != 3 {
 		return nil, fmt.Errorf("gpu overview requires 3 series, got %d", len(series))
 	}
 	const (
 		width      = 1400
 		height     = 840
 		plotLeft   = 180
 		plotRight  = 1220
 		plotTop    = 96
 		plotBottom = 660
 	)
 	const (
 		leftOuterAxis  = 72
 		leftInnerAxis  = 132
 		rightInnerAxis = 1268
 	)
 	layout := chartLayout{
 		Width:      width,
 		Height:     height,
 		PlotLeft:   plotLeft,
 		PlotRight:  plotRight,
 		PlotTop:    plotTop,
 		PlotBottom: plotBottom,
 	}
 	axisX := []int{leftOuterAxis, leftInnerAxis, rightInnerAxis}
 	pointCount := len(labels)
 	if len(times) > pointCount {
 		pointCount = len(times)
 	}
 	if pointCount == 0 {
 		pointCount = 1
 		labels = []string{""}
 		times = []time.Time{time.Time{}}
 	}
 	if len(labels) < pointCount {
 		padded := make([]string, pointCount)
 		copy(padded, labels)
 		labels = padded
 	}
 	if len(times) < pointCount {
 		times = synthesizeChartTimes(times, pointCount)
 	}
 	for i := range series {
 		if len(series[i].Values) == 0 {
 			series[i].Values = make([]float64, pointCount)
 		}
 	}
 	scales := make([]chartScale, len(series))
 	for i := range series {
 		min, max := chartSeriesBounds(series[i].Values)
 		ticks := chartNiceTicks(min, max, 8)
 		scales[i] = chartScale{
 			Min:   ticks[0],
 			Max:   ticks[len(ticks)-1],
 			Ticks: ticks,
 		}
 	}
 	start, end := chartTimeBounds(times)
 	var b strings.Builder
 	writeSVGOpen(&b, width, height)
 	writeChartFrame(&b, title, "", width, height)
 	writeTimelineIdleSpans(&b, layout, start, end, timeline)
 	writeVerticalGrid(&b, layout, times, pointCount, 8)
 	writeHorizontalGrid(&b, layout, scales[0])
 	writeTimelineBoundaries(&b, layout, start, end, timeline)
 	writePlotBorder(&b, layout)
 	for i, axisLineX := range axisX {
 		fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="1"/>`+"\n",
 			axisLineX, layout.PlotTop, axisLineX, layout.PlotBottom, series[i].Color)
 		fmt.Fprintf(&b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="11" font-weight="700" fill="%s">%s</text>`+"\n",
 			axisLineX, 64, series[i].Color, sanitizeChartText(series[i].AxisTitle))
 		for _, tick := range scales[i].Ticks {
 			y := chartYForValue(valueClamp(tick, scales[i]), scales[i], layout.PlotTop, layout.PlotBottom)
 			label := sanitizeChartText(chartYAxisNumber(tick))
 			if i < 2 {
 				fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
 					axisLineX, y, axisLineX+6, y, series[i].Color)
 				fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
 					axisLineX-8, y, series[i].Color, label)
 				continue
 			}
 			fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
 				axisLineX, y, axisLineX-6, y, series[i].Color)
 			fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="start" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
 				axisLineX+8, y, series[i].Color, label)
 		}
 	}
 	writeXAxisLabels(&b, layout, times, labels, start, end, 8)
 	for i := range series {
 		writeSeriesPolyline(&b, layout, times, start, end, series[i].Values, scales[i], series[i].Color)
 	}
 	writeLegend(&b, layout, series)
 	writeSVGClose(&b)
 	return []byte(b.String()), nil
 }
 func metricsTimelineSegments(samples []platform.LiveMetricSample, now time.Time) []chartTimelineSegment {
 	if len(samples) == 0 {
 		return nil
 	}
 	times := sampleTimes(samples)
 	start, end := chartTimeBounds(times)
 	if start.IsZero() || end.IsZero() {
 		return nil
 	}
 	return chartTimelineSegmentsForRange(start, end, now, snapshotTaskHistory())
 }
 func snapshotTaskHistory() []Task {
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	out := make([]Task, len(globalQueue.tasks))
 	for i, t := range globalQueue.tasks {
 		out[i] = *t
 	}
 	return out
 }
 func chartTimelineSegmentsForRange(start, end, now time.Time, tasks []Task) []chartTimelineSegment {
 	if start.IsZero() || end.IsZero() {
 		return nil
 	}
 	if end.Before(start) {
 		start, end = end, start
 	}
 	type interval struct {
 		start time.Time
 		end   time.Time
 	}
 	active := make([]interval, 0, len(tasks))
 	for _, task := range tasks {
 		if task.StartedAt == nil {
 			continue
 		}
 		intervalStart := task.StartedAt.UTC()
 		intervalEnd := now.UTC()
 		if task.DoneAt != nil {
 			intervalEnd = task.DoneAt.UTC()
 		}
 		if !intervalEnd.After(intervalStart) {
 			continue
 		}
 		if intervalEnd.Before(start) || intervalStart.After(end) {
 			continue
 		}
 		if intervalStart.Before(start) {
 			intervalStart = start
 		}
 		if intervalEnd.After(end) {
 			intervalEnd = end
 		}
 		active = append(active, interval{start: intervalStart, end: intervalEnd})
 	}
 	sort.Slice(active, func(i, j int) bool {
 		if active[i].start.Equal(active[j].start) {
 			return active[i].end.Before(active[j].end)
 		}
 		return active[i].start.Before(active[j].start)
 	})
 	merged := make([]interval, 0, len(active))
 	for _, span := range active {
 		if len(merged) == 0 {
 			merged = append(merged, span)
 			continue
 		}
 		last := &merged[len(merged)-1]
 		if !span.start.After(last.end) {
 			if span.end.After(last.end) {
 				last.end = span.end
 			}
 			continue
 		}
 		merged = append(merged, span)
 	}
 	segments := make([]chartTimelineSegment, 0, len(merged)*2+1)
 	cursor := start
 	for _, span := range merged {
 		if span.start.After(cursor) {
 			segments = append(segments, chartTimelineSegment{Start: cursor, End: span.start, Active: false})
 		}
 		segments = append(segments, chartTimelineSegment{Start: span.start, End: span.end, Active: true})
 		cursor = span.end
 	}
 	if cursor.Before(end) {
 		segments = append(segments, chartTimelineSegment{Start: cursor, End: end, Active: false})
 	}
 	if len(segments) == 0 {
 		segments = append(segments, chartTimelineSegment{Start: start, End: end, Active: false})
 	}
 	return segments
 }
 func sampleTimes(samples []platform.LiveMetricSample) []time.Time {
 	times := make([]time.Time, 0, len(samples))
 	for _, sample := range samples {
 		times = append(times, sample.Timestamp)
 	}
 	return times
 }
 func singleAxisChartScale(datasets [][]float64, yMin, yMax *float64) chartScale {
 	min, max := 0.0, 1.0
 	if yMin != nil && yMax != nil {
 		min, max = *yMin, *yMax
 	} else {
 		min, max = chartSeriesBounds(flattenDatasets(datasets))
 		if yMin != nil {
 			min = *yMin
 		}
 		if yMax != nil {
 			max = *yMax
 		}
 	}
 	ticks := chartNiceTicks(min, max, 8)
 	return chartScale{Min: ticks[0], Max: ticks[len(ticks)-1], Ticks: ticks}
 }
 func flattenDatasets(datasets [][]float64) []float64 {
 	total := 0
 	for _, ds := range datasets {
 		total += len(ds)
 	}
 	out := make([]float64, 0, total)
 	for _, ds := range datasets {
 		out = append(out, ds...)
 	}
 	return out
 }
 func singleAxisChartLayout(canvasHeight int, seriesCount int) chartLayout {
 	legendRows := 0
 	if chartLegendVisible(seriesCount) && seriesCount > 0 {
 		cols := 4
 		if seriesCount < cols {
 			cols = seriesCount
 		}
 		legendRows = (seriesCount + cols - 1) / cols
 	}
 	legendHeight := 0
 	if legendRows > 0 {
 		legendHeight = legendRows*24 + 24
 	}
 	return chartLayout{
 		Width:      1400,
 		Height:     canvasHeight,
 		PlotLeft:   96,
 		PlotRight:  1352,
 		PlotTop:    72,
 		PlotBottom: canvasHeight - 60 - legendHeight,
 	}
 }
 func chartTimeBounds(times []time.Time) (time.Time, time.Time) {
 	if len(times) == 0 {
 		return time.Time{}, time.Time{}
 	}
 	start := times[0].UTC()
 	end := start
 	for _, ts := range times[1:] {
 		t := ts.UTC()
 		if t.Before(start) {
 			start = t
 		}
 		if t.After(end) {
 			end = t
 		}
 	}
 	return start, end
 }
 func synthesizeChartTimes(times []time.Time, count int) []time.Time {
 	if count <= 0 {
 		return nil
 	}
 	if len(times) == count {
 		return times
 	}
 	if len(times) == 1 {
 		out := make([]time.Time, count)
 		for i := range out {
 			out[i] = times[0].Add(time.Duration(i) * time.Minute)
 		}
 		return out
 	}
 	base := time.Now().UTC().Add(-time.Duration(count-1) * time.Minute)
 	out := make([]time.Time, count)
 	for i := range out {
 		out[i] = base.Add(time.Duration(i) * time.Minute)
 	}
 	return out
 }
 func writeSVGOpen(b *strings.Builder, width, height int) {
 	fmt.Fprintf(b, `<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" viewBox="0 0 %d %d">`+"\n", width, height, width, height)
 }
 func writeSVGClose(b *strings.Builder) {
 	b.WriteString("</svg>\n")
 }
 func writeChartFrame(b *strings.Builder, title, subtitle string, width, height int) {
 	fmt.Fprintf(b, `<rect width="%d" height="%d" rx="10" ry="10" fill="#ffffff" stroke="#d7e0ea"/>`+"\n", width, height)
 	fmt.Fprintf(b, `<text x="%d" y="30" text-anchor="middle" font-family="sans-serif" font-size="16" font-weight="700" fill="#1f2937">%s</text>`+"\n",
 		width/2, sanitizeChartText(title))
 	if strings.TrimSpace(subtitle) != "" {
 		fmt.Fprintf(b, `<text x="%d" y="50" text-anchor="middle" font-family="sans-serif" font-size="12" font-weight="600" fill="#64748b">%s</text>`+"\n",
 			width/2, sanitizeChartText(subtitle))
 	}
 }
 func writePlotBorder(b *strings.Builder, layout chartLayout) {
 	fmt.Fprintf(b, `<rect x="%d" y="%d" width="%d" height="%d" fill="none" stroke="#cbd5e1" stroke-width="1"/>`+"\n",
 		layout.PlotLeft, layout.PlotTop, layout.PlotRight-layout.PlotLeft, layout.PlotBottom-layout.PlotTop)
 }
 func writeHorizontalGrid(b *strings.Builder, layout chartLayout, scale chartScale) {
 	b.WriteString(`<g stroke="#e2e8f0" stroke-width="1">` + "\n")
 	for _, tick := range scale.Ticks {
 		y := chartYForValue(tick, scale, layout.PlotTop, layout.PlotBottom)
 		fmt.Fprintf(b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"/>`+"\n",
 			layout.PlotLeft, y, layout.PlotRight, y)
 	}
 	b.WriteString(`</g>` + "\n")
 }
 func writeVerticalGrid(b *strings.Builder, layout chartLayout, times []time.Time, pointCount, target int) {
 	if pointCount <= 0 {
 		return
 	}
 	start, end := chartTimeBounds(times)
 	b.WriteString(`<g stroke="#edf2f7" stroke-width="1">` + "\n")
 	for _, idx := range gpuChartLabelIndices(pointCount, target) {
 		ts := chartPointTime(times, idx)
 		x := chartXForTime(ts, start, end, layout.PlotLeft, layout.PlotRight)
 		fmt.Fprintf(b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d"/>`+"\n",
 			x, layout.PlotTop, x, layout.PlotBottom)
 	}
 	b.WriteString(`</g>` + "\n")
 }
 func writeSingleAxisY(b *strings.Builder, layout chartLayout, scale chartScale) {
 	fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#64748b" stroke-width="1"/>`+"\n",
 		layout.PlotLeft, layout.PlotTop, layout.PlotLeft, layout.PlotBottom)
 	for _, tick := range scale.Ticks {
 		y := chartYForValue(tick, scale, layout.PlotTop, layout.PlotBottom)
 		fmt.Fprintf(b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="#64748b" stroke-width="1"/>`+"\n",
 			layout.PlotLeft, y, layout.PlotLeft-6, y)
 		fmt.Fprintf(b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="#475569">%s</text>`+"\n",
 			layout.PlotLeft-10, y, sanitizeChartText(chartYAxisNumber(tick)))
 	}
 }
 func writeXAxisLabels(b *strings.Builder, layout chartLayout, times []time.Time, labels []string, start, end time.Time, target int) {
 	pointCount := len(labels)
 	if len(times) > pointCount {
 		pointCount = len(times)
 	}
 	b.WriteString(`<g font-family="sans-serif" font-size="11" fill="#64748b" text-anchor="middle">` + "\n")
 	for _, idx := range gpuChartLabelIndices(pointCount, target) {
 		x := chartXForTime(chartPointTime(times, idx), start, end, layout.PlotLeft, layout.PlotRight)
 		label := ""
 		if idx < len(labels) {
 			label = labels[idx]
 		}
 		fmt.Fprintf(b, `<text x="%.1f" y="%d">%s</text>`+"\n", x, layout.PlotBottom+28, sanitizeChartText(label))
 	}
 	b.WriteString(`</g>` + "\n")
 	fmt.Fprintf(b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="12" fill="#64748b">Time</text>`+"\n",
 		(layout.PlotLeft+layout.PlotRight)/2, layout.PlotBottom+48)
 }
 func writeSeriesPolyline(b *strings.Builder, layout chartLayout, times []time.Time, start, end time.Time, values []float64, scale chartScale, color string) {
 	if len(values) == 0 {
 		return
 	}
 	var points strings.Builder
 	for idx, value := range values {
 		if idx > 0 {
 			points.WriteByte(' ')
 		}
 		x := chartXForTime(chartPointTime(times, idx), start, end, layout.PlotLeft, layout.PlotRight)
 		y := chartYForValue(value, scale, layout.PlotTop, layout.PlotBottom)
 		points.WriteString(strconv.FormatFloat(x, 'f', 1, 64))
 		points.WriteByte(',')
 		points.WriteString(strconv.FormatFloat(y, 'f', 1, 64))
 	}
 	fmt.Fprintf(b, `<polyline points="%s" fill="none" stroke="%s" stroke-width="2.2" stroke-linejoin="round" stroke-linecap="round"/>`+"\n",
 		points.String(), color)
 	if len(values) == 1 {
 		x := chartXForTime(chartPointTime(times, 0), start, end, layout.PlotLeft, layout.PlotRight)
 		y := chartYForValue(values[0], scale, layout.PlotTop, layout.PlotBottom)
 		fmt.Fprintf(b, `<circle cx="%.1f" cy="%.1f" r="3.5" fill="%s"/>`+"\n", x, y, color)
 		return
 	}
 	peakIdx := 0
 	peakValue := values[0]
 	for idx, value := range values[1:] {
 		if value >= peakValue {
 			peakIdx = idx + 1
 			peakValue = value
 		}
 	}
 	x := chartXForTime(chartPointTime(times, peakIdx), start, end, layout.PlotLeft, layout.PlotRight)
 	y := chartYForValue(peakValue, scale, layout.PlotTop, layout.PlotBottom)
 	fmt.Fprintf(b, `<circle cx="%.1f" cy="%.1f" r="4.2" fill="%s" stroke="#ffffff" stroke-width="1.6"/>`+"\n", x, y, color)
 	fmt.Fprintf(b, `<path d="M %.1f %.1f L %.1f %.1f L %.1f %.1f Z" fill="%s" opacity="0.9"/>`+"\n",
 		x, y-10, x-5, y-18, x+5, y-18, color)
 }
 func writeLegend(b *strings.Builder, layout chartLayout, series []metricChartSeries) {
 	if !chartLegendVisible(len(series)) || len(series) == 0 {
 		return
 	}
 	cols := 4
 	if len(series) < cols {
 		cols = len(series)
 	}
 	cellWidth := float64(layout.PlotRight-layout.PlotLeft) / float64(cols)
 	baseY := layout.PlotBottom + 74
 	for i, item := range series {
 		row := i / cols
 		col := i % cols
 		x := float64(layout.PlotLeft) + cellWidth*float64(col) + 8
 		y := float64(baseY + row*24)
 		fmt.Fprintf(b, `<line x1="%.1f" y1="%.1f" x2="%.1f" y2="%.1f" stroke="%s" stroke-width="3"/>`+"\n",
 			x, y, x+28, y, item.Color)
 		fmt.Fprintf(b, `<text x="%.1f" y="%.1f" font-family="sans-serif" font-size="12" fill="#1f2937">%s</text>`+"\n",
 			x+38, y+4, sanitizeChartText(item.Name))
 	}
 }
 func writeTimelineIdleSpans(b *strings.Builder, layout chartLayout, start, end time.Time, segments []chartTimelineSegment) {
 	if len(segments) == 0 {
 		return
 	}
 	b.WriteString(`<g data-role="timeline-overlay">` + "\n")
 	for _, segment := range segments {
 		if segment.Active || !segment.End.After(segment.Start) {
 			continue
 		}
 		x0 := chartXForTime(segment.Start, start, end, layout.PlotLeft, layout.PlotRight)
 		x1 := chartXForTime(segment.End, start, end, layout.PlotLeft, layout.PlotRight)
 		fmt.Fprintf(b, `<rect x="%.1f" y="%d" width="%.1f" height="%d" fill="#475569" opacity="0.10"/>`+"\n",
 			x0, layout.PlotTop, math.Max(1, x1-x0), layout.PlotBottom-layout.PlotTop)
 	}
 	b.WriteString(`</g>` + "\n")
 }
 func writeTimelineBoundaries(b *strings.Builder, layout chartLayout, start, end time.Time, segments []chartTimelineSegment) {
 	if len(segments) == 0 {
 		return
 	}
 	seen := map[int]bool{}
 	b.WriteString(`<g data-role="timeline-boundaries" stroke="#94a3b8" stroke-width="1.2">` + "\n")
 	for i, segment := range segments {
 		if i > 0 {
 			x := int(math.Round(chartXForTime(segment.Start, start, end, layout.PlotLeft, layout.PlotRight)))
 			if !seen[x] {
 				seen[x] = true
 				fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d"/>`+"\n", x, layout.PlotTop, x, layout.PlotBottom)
 			}
 		}
 		if i < len(segments)-1 {
 			x := int(math.Round(chartXForTime(segment.End, start, end, layout.PlotLeft, layout.PlotRight)))
 			if !seen[x] {
 				seen[x] = true
 				fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d"/>`+"\n", x, layout.PlotTop, x, layout.PlotBottom)
 			}
 		}
 	}
 	b.WriteString(`</g>` + "\n")
 }
 func chartXForTime(ts, start, end time.Time, left, right int) float64 {
 	if !end.After(start) {
 		return float64(left+right) / 2
 	}
 	if ts.Before(start) {
 		ts = start
 	}
 	if ts.After(end) {
 		ts = end
 	}
 	ratio := float64(ts.Sub(start)) / float64(end.Sub(start))
 	return float64(left) + ratio*float64(right-left)
 }
 func chartPointTime(times []time.Time, idx int) time.Time {
 	if idx >= 0 && idx < len(times) && !times[idx].IsZero() {
 		return times[idx].UTC()
 	}
 	if len(times) > 0 && !times[0].IsZero() {
 		return times[0].UTC().Add(time.Duration(idx) * time.Minute)
 	}
 	return time.Now().UTC().Add(time.Duration(idx) * time.Minute)
 }
 func chartYForValue(value float64, scale chartScale, plotTop, plotBottom int) float64 {
 	if scale.Max <= scale.Min {
 		return float64(plotTop+plotBottom) / 2
 	}
 	return float64(plotBottom) - (value-scale.Min)/(scale.Max-scale.Min)*float64(plotBottom-plotTop)
 }
 func chartSeriesBounds(values []float64) (float64, float64) {
 	if len(values) == 0 {
 		return 0, 1
 	}
 	min, max := values[0], values[0]
 	for _, value := range values[1:] {
 		if value < min {
 			min = value
 		}
 		if value > max {
 			max = value
 		}
 	}
 	if min == max {
 		if max == 0 {
 			return 0, 1
 		}
 		pad := math.Abs(max) * 0.1
 		if pad == 0 {
 			pad = 1
 		}
 		min -= pad
 		max += pad
 	}
 	if min > 0 {
 		pad := (max - min) * 0.2
 		if pad == 0 {
 			pad = max * 0.1
 		}
 		min -= pad
 		if min < 0 {
 			min = 0
 		}
 		max += pad
 	}
 	return min, max
 }
 func chartNiceTicks(min, max float64, target int) []float64 {
 	if min == max {
 		max = min + 1
 	}
 	span := max - min
 	step := math.Pow(10, math.Floor(math.Log10(span/float64(target))))
 	for _, factor := range []float64{1, 2, 5, 10} {
 		if span/(factor*step) <= float64(target)*1.5 {
 			step = factor * step
 			break
 		}
 	}
 	low := math.Floor(min/step) * step
 	high := math.Ceil(max/step) * step
 	var ticks []float64
 	for value := low; value <= high+step*0.001; value += step {
 		ticks = append(ticks, math.Round(value*1e9)/1e9)
 	}
 	return ticks
 }
 func valueClamp(value float64, scale chartScale) float64 {
 	if value < scale.Min {
 		return scale.Min
 	}
 	if value > scale.Max {
 		return scale.Max
 	}
 	return value
 }
 func chartStatsLabel(datasets [][]float64) string {
 	mn, avg, mx := globalStats(datasets)
 	if mx <= 0 && avg <= 0 && mn <= 0 {
 		return ""
 	}
 	return fmt.Sprintf("min %s   avg %s   max %s",
 		chartLegendNumber(mn),
 		chartLegendNumber(avg),
 		chartLegendNumber(mx),
 	)
 }
 func gpuDisplayLabel(idx int) string {
 	if name := gpuModelNameByIndex(idx); name != "" {
 		return fmt.Sprintf("GPU %d — %s", idx, name)
 	}
 	return fmt.Sprintf("GPU %d", idx)
 }
 func gpuModelNameByIndex(idx int) string {
 	now := time.Now()
 	gpuLabelCache.mu.Lock()
 	if now.Sub(gpuLabelCache.loadedAt) > 30*time.Second || gpuLabelCache.byIndex == nil {
 		gpuLabelCache.loadedAt = now
 		gpuLabelCache.byIndex = loadGPUModelNames()
 	}
 	name := strings.TrimSpace(gpuLabelCache.byIndex[idx])
 	gpuLabelCache.mu.Unlock()
 	return name
 }
 func loadGPUModelNames() map[int]string {
 	out := map[int]string{}
 	gpus, err := platform.New().ListNvidiaGPUs()
 	if err != nil {
 		return out
 	}
 	for _, gpu := range gpus {
 		name := strings.TrimSpace(gpu.Name)
 		if name != "" {
 			out[gpu.Index] = name
 		}
 	}
 	return out
 }
--- a/audit/internal/webui/jobs.go
+++ b/audit/internal/webui/jobs.go
@@ -9,13 +9,14 @@ import (
 // jobState holds the output lines and completion status of an async job.
 type jobState struct {
-	lines   []string
+	lines        []string
-	done    bool
+	done         bool
-	err     string
+	err          string
-	mu      sync.Mutex
+	mu           sync.Mutex
-	subs    []chan string
+	subs         []chan string
-	cancel  func() // optional cancel function; nil if job is not cancellable
+	cancel       func() // optional cancel function; nil if job is not cancellable
-	logPath string
+	logPath      string
 	serialPrefix string
 }
 // abort cancels the job if it has a cancel function and is not yet done.
@@ -36,6 +37,9 @@ func (j *jobState) append(line string) {
 	if j.logPath != "" {
 		appendJobLog(j.logPath, line)
 	}
 	if j.serialPrefix != "" {
 		taskSerialWriteLine(j.serialPrefix + line)
 	}
 	for _, ch := range j.subs {
 		select {
 		case ch <- line:
@@ -84,12 +88,12 @@ func (m *jobManager) create(id string) *jobState {
 	j := &jobState{}
 	m.jobs[id] = j
 	// Schedule cleanup after 30 minutes
-	go func() {
+	goRecoverOnce("job cleanup", func() {
 		time.Sleep(30 * time.Minute)
 		m.mu.Lock()
 		delete(m.jobs, id)
 		m.mu.Unlock()
-	}()
+	})
 	return j
 }
@@ -107,8 +111,11 @@ func (m *jobManager) get(id string) (*jobState, bool) {
 	return j, ok
 }
-func newTaskJobState(logPath string) *jobState {
+func newTaskJobState(logPath string, serialPrefix ...string) *jobState {
 	j := &jobState{logPath: logPath}
 	if len(serialPrefix) > 0 {
 		j.serialPrefix = serialPrefix[0]
 	}
 	if logPath == "" {
 		return j
 	}
--- a/audit/internal/webui/kmsg_watcher.go
+++ b/audit/internal/webui/kmsg_watcher.go
@@ -17,10 +17,10 @@ import (
 // It supports multiple concurrent SAT tasks: a shared event window is open
 // while any SAT task is running, and flushed when all tasks complete.
 type kmsgWatcher struct {
-	mu           sync.Mutex
+	mu          sync.Mutex
-	activeCount  int        // number of in-flight SAT tasks
+	activeCount int // number of in-flight SAT tasks
-	window       *kmsgWindow
+	window      *kmsgWindow
-	statusDB     *app.ComponentStatusDB
+	statusDB    *app.ComponentStatusDB
 }
 type kmsgWindow struct {
@@ -48,36 +48,39 @@ func newKmsgWatcher(statusDB *app.ComponentStatusDB) *kmsgWatcher {
 // start launches the background kmsg reading goroutine.
 func (w *kmsgWatcher) start() {
-	go w.run()
+	goRecoverLoop("kmsg watcher", 5*time.Second, w.run)
 }
 func (w *kmsgWatcher) run() {
-	f, err := os.Open("/dev/kmsg")
+	for {
-	if err != nil {
+		f, err := os.Open("/dev/kmsg")
-		slog.Warn("kmsg watcher unavailable", "err", err)
+		if err != nil {
-		return
+			slog.Warn("kmsg watcher unavailable", "err", err)
-	}
+			time.Sleep(30 * time.Second)
 	defer f.Close()
 	// Best-effort seek to end so we only capture events from now forward.
 	_, _ = f.Seek(0, io.SeekEnd)
 	scanner := bufio.NewScanner(f)
 	scanner.Buffer(make([]byte, 64*1024), 64*1024)
 	for scanner.Scan() {
 		line := scanner.Text()
 		evt, ok := parseKmsgLine(line)
 		if !ok {
 			continue
 		}
-		w.mu.Lock()
+		// Best-effort seek to end so we only capture events from now forward.
-		if w.window != nil {
+		_, _ = f.Seek(0, io.SeekEnd)
-			w.recordEvent(evt)
+
 		scanner := bufio.NewScanner(f)
 		scanner.Buffer(make([]byte, 64*1024), 64*1024)
 		for scanner.Scan() {
 			line := scanner.Text()
 			evt, ok := parseKmsgLine(line)
 			if !ok {
 				continue
 			}
 			w.mu.Lock()
 			if w.window != nil {
 				w.recordEvent(evt)
 			}
 			w.mu.Unlock()
 		}
-		w.mu.Unlock()
+		if err := scanner.Err(); err != nil {
-	}
+			slog.Warn("kmsg watcher stopped", "err", err)
-	if err := scanner.Err(); err != nil {
+		}
-		slog.Warn("kmsg watcher stopped", "err", err)
+		_ = f.Close()
 		time.Sleep(2 * time.Second)
 	}
 }
@@ -134,7 +137,7 @@ func (w *kmsgWatcher) NotifyTaskFinished(taskID string) {
 	if window == nil || len(window.events) == 0 {
 		return
 	}
-	go w.flushWindow(window)
+	goRecoverOnce("kmsg watcher flush", func() { w.flushWindow(window) })
 }
 func (w *kmsgWatcher) flushWindow(window *kmsgWindow) {
@@ -229,7 +232,8 @@ func truncate(s string, max int) string {
 // isSATTarget returns true for task targets that run hardware acceptance tests.
 func isSATTarget(target string) bool {
 	switch target {
-	case "nvidia", "nvidia-stress", "memory", "memory-stress", "storage",
+	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
 		"nvidia-interconnect", "nvidia-bandwidth", "nvidia-stress", "memory", "memory-stress", "storage",
 		"cpu", "sat-stress", "amd", "amd-mem", "amd-bandwidth", "amd-stress",
 		"platform-stress":
 		return true
--- a/audit/internal/webui/metricsdb.go
+++ b/audit/internal/webui/metricsdb.go
@@ -8,6 +8,7 @@ import (
 	"path/filepath"
 	"sort"
 	"strconv"
 	"strings"
 	"time"
 	"bee/audit/internal/platform"
@@ -21,6 +22,13 @@ type MetricsDB struct {
 	db *sql.DB
 }
 func (m *MetricsDB) Close() error {
 	if m == nil || m.db == nil {
 		return nil
 	}
 	return m.db.Close()
 }
 // openMetricsDB opens (or creates) the metrics database at the given path.
 func openMetricsDB(path string) (*MetricsDB, error) {
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
@@ -54,6 +62,8 @@ CREATE TABLE IF NOT EXISTS gpu_metrics (
  usage_pct     REAL,
  mem_usage_pct REAL,
  power_w       REAL,
  clock_mhz     REAL,
  mem_clock_mhz REAL,
  PRIMARY KEY (ts, gpu_index)
 );
 CREATE TABLE IF NOT EXISTS fan_metrics (
@@ -70,6 +80,38 @@ CREATE TABLE IF NOT EXISTS temp_metrics (
  PRIMARY KEY (ts, name)
 );
 `)
 	if err != nil {
 		return err
 	}
 	if err := ensureMetricsColumn(db, "gpu_metrics", "clock_mhz", "REAL"); err != nil {
 		return err
 	}
 	return ensureMetricsColumn(db, "gpu_metrics", "mem_clock_mhz", "REAL")
 }
 func ensureMetricsColumn(db *sql.DB, table, column, definition string) error {
 	rows, err := db.Query("PRAGMA table_info(" + table + ")")
 	if err != nil {
 		return err
 	}
 	defer rows.Close()
 	for rows.Next() {
 		var cid int
 		var name, ctype string
 		var notNull, pk int
 		var dflt sql.NullString
 		if err := rows.Scan(&cid, &name, &ctype, &notNull, &dflt, &pk); err != nil {
 			return err
 		}
 		if strings.EqualFold(name, column) {
 			return nil
 		}
 	}
 	if err := rows.Err(); err != nil {
 		return err
 	}
 	_, err = db.Exec("ALTER TABLE " + table + " ADD COLUMN " + column + " " + definition)
 	return err
 }
@@ -91,8 +133,8 @@ func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
 	}
 	for _, g := range s.GPUs {
 		_, err = tx.Exec(
-			`INSERT OR REPLACE INTO gpu_metrics(ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w) VALUES(?,?,?,?,?,?)`,
+			`INSERT OR REPLACE INTO gpu_metrics(ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz) VALUES(?,?,?,?,?,?,?,?)`,
-			ts, g.GPUIndex, g.TempC, g.UsagePct, g.MemUsagePct, g.PowerW,
+			ts, g.GPUIndex, g.TempC, g.UsagePct, g.MemUsagePct, g.PowerW, g.ClockMHz, g.MemClockMHz,
 		)
 		if err != nil {
 			return err
@@ -129,6 +171,23 @@ func (m *MetricsDB) LoadAll() ([]platform.LiveMetricSample, error) {
 	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts`, nil)
 }
 // LoadBetween returns samples in chronological order within the given time window.
 func (m *MetricsDB) LoadBetween(start, end time.Time) ([]platform.LiveMetricSample, error) {
 	if m == nil {
 		return nil, nil
 	}
 	if start.IsZero() || end.IsZero() {
 		return nil, nil
 	}
 	if end.Before(start) {
 		start, end = end, start
 	}
 	return m.loadSamples(
 		`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics WHERE ts>=? AND ts<=? ORDER BY ts`,
 		start.Unix(), end.Unix(),
 	)
 }
 // loadSamples reconstructs LiveMetricSample rows from the normalized tables.
 func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetricSample, error) {
 	rows, err := m.db.Query(query, args...)
@@ -163,7 +222,7 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	}
 	gpuData := map[gpuKey]platform.GPUMetricRow{}
 	gRows, err := m.db.Query(
-		`SELECT ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w FROM gpu_metrics WHERE ts>=? AND ts<=? ORDER BY ts,gpu_index`,
+		`SELECT ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w,IFNULL(clock_mhz,0),IFNULL(mem_clock_mhz,0) FROM gpu_metrics WHERE ts>=? AND ts<=? ORDER BY ts,gpu_index`,
 		minTS, maxTS,
 	)
 	if err == nil {
@@ -171,7 +230,7 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 		for gRows.Next() {
 			var ts int64
 			var g platform.GPUMetricRow
-			if err := gRows.Scan(&ts, &g.GPUIndex, &g.TempC, &g.UsagePct, &g.MemUsagePct, &g.PowerW); err == nil {
+			if err := gRows.Scan(&ts, &g.GPUIndex, &g.TempC, &g.UsagePct, &g.MemUsagePct, &g.PowerW, &g.ClockMHz, &g.MemClockMHz); err == nil {
 				gpuData[gpuKey{ts, g.GPUIndex}] = g
 			}
 		}
@@ -283,7 +342,8 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 func (m *MetricsDB) ExportCSV(w io.Writer) error {
 	rows, err := m.db.Query(`
 		SELECT s.ts, s.cpu_load_pct, s.mem_load_pct, s.power_w,
-		       g.gpu_index, g.temp_c, g.usage_pct, g.mem_usage_pct, g.power_w
+		       g.gpu_index, g.temp_c, g.usage_pct, g.mem_usage_pct, g.power_w,
 		       g.clock_mhz, g.mem_clock_mhz
 		FROM sys_metrics s
 		LEFT JOIN gpu_metrics g ON g.ts = s.ts
 		ORDER BY s.ts, g.gpu_index
@@ -294,13 +354,13 @@ func (m *MetricsDB) ExportCSV(w io.Writer) error {
 	defer rows.Close()
 	cw := csv.NewWriter(w)
-	_ = cw.Write([]string{"ts", "cpu_load_pct", "mem_load_pct", "sys_power_w", "gpu_index", "gpu_temp_c", "gpu_usage_pct", "gpu_mem_pct", "gpu_power_w"})
+	_ = cw.Write([]string{"ts", "cpu_load_pct", "mem_load_pct", "sys_power_w", "gpu_index", "gpu_temp_c", "gpu_usage_pct", "gpu_mem_pct", "gpu_power_w", "gpu_clock_mhz", "gpu_mem_clock_mhz"})
 	for rows.Next() {
 		var ts int64
 		var cpu, mem, pwr float64
 		var gpuIdx sql.NullInt64
-		var gpuTemp, gpuUse, gpuMem, gpuPow sql.NullFloat64
+		var gpuTemp, gpuUse, gpuMem, gpuPow, gpuClock, gpuMemClock sql.NullFloat64
-		if err := rows.Scan(&ts, &cpu, &mem, &pwr, &gpuIdx, &gpuTemp, &gpuUse, &gpuMem, &gpuPow); err != nil {
+		if err := rows.Scan(&ts, &cpu, &mem, &pwr, &gpuIdx, &gpuTemp, &gpuUse, &gpuMem, &gpuPow, &gpuClock, &gpuMemClock); err != nil {
 			continue
 		}
 		row := []string{
@@ -316,9 +376,11 @@ func (m *MetricsDB) ExportCSV(w io.Writer) error {
 				strconv.FormatFloat(gpuUse.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuMem.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuPow.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuClock.Float64, 'f', 1, 64),
 				strconv.FormatFloat(gpuMemClock.Float64, 'f', 1, 64),
 			)
 		} else {
-			row = append(row, "", "", "", "", "")
+			row = append(row, "", "", "", "", "", "", "")
 		}
 		_ = cw.Write(row)
 	}
@@ -326,9 +388,6 @@ func (m *MetricsDB) ExportCSV(w io.Writer) error {
 	return cw.Error()
 }
 // Close closes the database.
 func (m *MetricsDB) Close() { _ = m.db.Close() }
 func nullFloat(v float64) sql.NullFloat64 {
 	return sql.NullFloat64{Float64: v, Valid: true}
 }
--- a/audit/internal/webui/metricsdb_test.go
+++ b/audit/internal/webui/metricsdb_test.go
@@ -1,11 +1,13 @@
 package webui
 import (
 	"database/sql"
 	"path/filepath"
 	"testing"
 	"time"
 	"bee/audit/internal/platform"
 	_ "modernc.org/sqlite"
 )
 func TestMetricsDBLoadSamplesKeepsChronologicalRangeForGPUs(t *testing.T) {
@@ -67,3 +69,106 @@ func TestMetricsDBLoadSamplesKeepsChronologicalRangeForGPUs(t *testing.T) {
 		}
 	}
 }
 func TestMetricsDBMigratesLegacyGPUSchema(t *testing.T) {
 	path := filepath.Join(t.TempDir(), "metrics.db")
 	raw, err := sql.Open("sqlite", path)
 	if err != nil {
 		t.Fatalf("sql.Open: %v", err)
 	}
 	_, err = raw.Exec(`
 CREATE TABLE gpu_metrics (
  ts            INTEGER NOT NULL,
  gpu_index     INTEGER NOT NULL,
  temp_c        REAL,
  usage_pct     REAL,
  mem_usage_pct REAL,
  power_w       REAL,
  PRIMARY KEY (ts, gpu_index)
 );
 CREATE TABLE sys_metrics (
  ts           INTEGER NOT NULL,
  cpu_load_pct REAL,
  mem_load_pct REAL,
  power_w      REAL,
  PRIMARY KEY (ts)
 );
 CREATE TABLE fan_metrics (
  ts   INTEGER NOT NULL,
  name TEXT NOT NULL,
  rpm  REAL,
  PRIMARY KEY (ts, name)
 );
 CREATE TABLE temp_metrics (
  ts      INTEGER NOT NULL,
  name    TEXT NOT NULL,
  grp     TEXT NOT NULL,
  celsius REAL,
  PRIMARY KEY (ts, name)
 );
 `)
 	if err != nil {
 		t.Fatalf("create legacy schema: %v", err)
 	}
 	_ = raw.Close()
 	db, err := openMetricsDB(path)
 	if err != nil {
 		t.Fatalf("openMetricsDB: %v", err)
 	}
 	defer db.Close()
 	now := time.Unix(1_700_000_100, 0).UTC()
 	err = db.Write(platform.LiveMetricSample{
 		Timestamp: now,
 		GPUs: []platform.GPUMetricRow{
 			{GPUIndex: 0, ClockMHz: 1410, MemClockMHz: 2600},
 		},
 	})
 	if err != nil {
 		t.Fatalf("Write: %v", err)
 	}
 	samples, err := db.LoadAll()
 	if err != nil {
 		t.Fatalf("LoadAll: %v", err)
 	}
 	if len(samples) != 1 || len(samples[0].GPUs) != 1 {
 		t.Fatalf("samples=%+v", samples)
 	}
 	if got := samples[0].GPUs[0].ClockMHz; got != 1410 {
 		t.Fatalf("ClockMHz=%v want 1410", got)
 	}
 	if got := samples[0].GPUs[0].MemClockMHz; got != 2600 {
 		t.Fatalf("MemClockMHz=%v want 2600", got)
 	}
 }
 func TestMetricsDBLoadBetweenFiltersWindow(t *testing.T) {
 	db, err := openMetricsDB(filepath.Join(t.TempDir(), "metrics.db"))
 	if err != nil {
 		t.Fatalf("openMetricsDB: %v", err)
 	}
 	defer db.Close()
 	base := time.Unix(1_700_000_000, 0).UTC()
 	for i := 0; i < 5; i++ {
 		if err := db.Write(platform.LiveMetricSample{
 			Timestamp:  base.Add(time.Duration(i) * time.Minute),
 			CPULoadPct: float64(i),
 		}); err != nil {
 			t.Fatalf("Write(%d): %v", i, err)
 		}
 	}
 	got, err := db.LoadBetween(base.Add(1*time.Minute), base.Add(3*time.Minute))
 	if err != nil {
 		t.Fatalf("LoadBetween: %v", err)
 	}
 	if len(got) != 3 {
 		t.Fatalf("LoadBetween len=%d want 3", len(got))
 	}
 	if !got[0].Timestamp.Equal(base.Add(1*time.Minute)) || !got[2].Timestamp.Equal(base.Add(3*time.Minute)) {
 		t.Fatalf("window=%v..%v", got[0].Timestamp, got[2].Timestamp)
 	}
 }
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
--- a/audit/internal/webui/serial_console.go
+++ b/audit/internal/webui/serial_console.go
@@ -0,0 +1,41 @@
 package webui
 import (
 	"fmt"
 	"os"
 	"strings"
 	"time"
 )
 var taskSerialWriteLine = writeTaskSerialLine
 func writeTaskSerialLine(line string) {
 	line = strings.TrimSpace(line)
 	if line == "" {
 		return
 	}
 	payload := fmt.Sprintf("%s %s\n", time.Now().UTC().Format("2006-01-02 15:04:05Z"), line)
 	for _, path := range []string{"/dev/ttyS0", "/dev/ttyS1", "/dev/console"} {
 		f, err := os.OpenFile(path, os.O_WRONLY|os.O_APPEND, 0)
 		if err != nil {
 			continue
 		}
 		_, _ = f.WriteString(payload)
 		_ = f.Close()
 		return
 	}
 }
 func taskSerialPrefix(t *Task) string {
 	if t == nil {
 		return "[task] "
 	}
 	return fmt.Sprintf("[task %s %s] ", t.ID, t.Name)
 }
 func taskSerialEvent(t *Task, event string) {
 	if t == nil {
 		return
 	}
 	taskSerialWriteLine(fmt.Sprintf("%s%s", taskSerialPrefix(t), strings.TrimSpace(event)))
 }
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -1,15 +1,19 @@
 package webui
 import (
 	"bufio"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"html"
 	"io"
 	"log/slog"
 	"mime"
 	"net"
 	"net/http"
 	"os"
 	"path/filepath"
 	"runtime/debug"
 	"sort"
 	"strings"
 	"sync"
@@ -18,7 +22,6 @@ import (
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 	"bee/audit/internal/runtimeenv"
 	gocharts "github.com/go-analyze/charts"
 	"reanimator/chart/viewer"
 	"reanimator/chart/web"
 )
@@ -234,6 +237,12 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	// SAT
 	mux.HandleFunc("POST /api/sat/nvidia/run", h.handleAPISATRun("nvidia"))
 	mux.HandleFunc("POST /api/sat/nvidia-targeted-stress/run", h.handleAPISATRun("nvidia-targeted-stress"))
 	mux.HandleFunc("POST /api/sat/nvidia-compute/run", h.handleAPISATRun("nvidia-compute"))
 	mux.HandleFunc("POST /api/sat/nvidia-targeted-power/run", h.handleAPISATRun("nvidia-targeted-power"))
 	mux.HandleFunc("POST /api/sat/nvidia-pulse/run", h.handleAPISATRun("nvidia-pulse"))
 	mux.HandleFunc("POST /api/sat/nvidia-interconnect/run", h.handleAPISATRun("nvidia-interconnect"))
 	mux.HandleFunc("POST /api/sat/nvidia-bandwidth/run", h.handleAPISATRun("nvidia-bandwidth"))
 	mux.HandleFunc("POST /api/sat/nvidia-stress/run", h.handleAPISATRun("nvidia-stress"))
 	mux.HandleFunc("POST /api/sat/memory/run", h.handleAPISATRun("memory"))
 	mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage"))
@@ -247,6 +256,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	mux.HandleFunc("POST /api/sat/platform-stress/run", h.handleAPISATRun("platform-stress"))
 	mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
 	mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
 	mux.HandleFunc("POST /api/benchmark/nvidia/run", h.handleAPIBenchmarkNvidiaRun)
 	// Tasks
 	mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
@@ -255,6 +265,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	mux.HandleFunc("POST /api/tasks/{id}/cancel", h.handleAPITasksCancel)
 	mux.HandleFunc("POST /api/tasks/{id}/priority", h.handleAPITasksPriority)
 	mux.HandleFunc("GET /api/tasks/{id}/stream", h.handleAPITasksStream)
 	mux.HandleFunc("GET /tasks/{id}", h.handleTaskPage)
 	// Services
 	mux.HandleFunc("GET /api/services", h.handleAPIServicesList)
@@ -283,6 +294,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	// GPU presence / tools
 	mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)
 	mux.HandleFunc("GET /api/gpu/nvidia", h.handleAPIGNVIDIAGPUs)
 	mux.HandleFunc("GET /api/gpu/tools", h.handleAPIGPUTools)
 	// System
@@ -309,11 +321,11 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	mux.HandleFunc("GET /", h.handlePage)
 	h.mux = mux
-	return mux
+	return recoverMiddleware(mux)
 }
 func (h *handler) startMetricsCollector() {
-	go func() {
+	goRecoverLoop("metrics collector", 2*time.Second, func() {
 		ticker := time.NewTicker(metricsCollectInterval)
 		defer ticker.Stop()
 		for range ticker.C {
@@ -324,7 +336,7 @@ func (h *handler) startMetricsCollector() {
 			h.feedRings(sample)
 			h.setLatestMetric(sample)
 		}
-	}()
+	})
 }
 func (h *handler) setLatestMetric(sample platform.LiveMetricSample) {
@@ -345,7 +357,81 @@ func (h *handler) latestMetric() (platform.LiveMetricSample, bool) {
 // ListenAndServe starts the HTTP server.
 func ListenAndServe(addr string, opts HandlerOptions) error {
-	return http.ListenAndServe(addr, NewHandler(opts))
+	srv := &http.Server{
 		Addr:              addr,
 		Handler:           NewHandler(opts),
 		ReadHeaderTimeout: 5 * time.Second,
 		ReadTimeout:       30 * time.Second,
 		IdleTimeout:       2 * time.Minute,
 	}
 	return srv.ListenAndServe()
 }
 type trackingResponseWriter struct {
 	http.ResponseWriter
 	wroteHeader bool
 }
 func (w *trackingResponseWriter) WriteHeader(statusCode int) {
 	w.wroteHeader = true
 	w.ResponseWriter.WriteHeader(statusCode)
 }
 func (w *trackingResponseWriter) Write(p []byte) (int, error) {
 	w.wroteHeader = true
 	return w.ResponseWriter.Write(p)
 }
 func (w *trackingResponseWriter) Flush() {
 	w.wroteHeader = true
 	if f, ok := w.ResponseWriter.(http.Flusher); ok {
 		f.Flush()
 	}
 }
 func (w *trackingResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
 	h, ok := w.ResponseWriter.(http.Hijacker)
 	if !ok {
 		return nil, nil, fmt.Errorf("hijacking not supported")
 	}
 	return h.Hijack()
 }
 func (w *trackingResponseWriter) Push(target string, opts *http.PushOptions) error {
 	p, ok := w.ResponseWriter.(http.Pusher)
 	if !ok {
 		return http.ErrNotSupported
 	}
 	return p.Push(target, opts)
 }
 func (w *trackingResponseWriter) ReadFrom(r io.Reader) (int64, error) {
 	rf, ok := w.ResponseWriter.(io.ReaderFrom)
 	if !ok {
 		return io.Copy(w.ResponseWriter, r)
 	}
 	w.wroteHeader = true
 	return rf.ReadFrom(r)
 }
 func recoverMiddleware(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		tw := &trackingResponseWriter{ResponseWriter: w}
 		defer func() {
 			if rec := recover(); rec != nil {
 				slog.Error("http handler panic",
 					"method", r.Method,
 					"path", r.URL.Path,
 					"panic", fmt.Sprint(rec),
 					"stack", string(debug.Stack()),
 				)
 				if !tw.wroteHeader {
 					http.Error(tw, "internal server error", http.StatusInternalServerError)
 				}
 			}
 		}()
 		next.ServeHTTP(tw, r)
 	})
 }
 // ── Infrastructure handlers ──────────────────────────────────────────────────
@@ -475,13 +561,44 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 		http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
 		return
 	}
-	datasets, names, labels, title, yMin, yMax, ok := h.chartDataFromDB(path)
+	samples, err := h.metricsDB.LoadAll()
 	if err != nil || len(samples) == 0 {
 		http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
 		return
 	}
 	timeline := metricsTimelineSegments(samples, time.Now())
 	if idx, sub, ok := parseGPUChartPath(path); ok && sub == "overview" {
 		buf, ok, err := renderGPUOverviewChartSVG(idx, samples, timeline)
 		if err != nil {
 			http.Error(w, err.Error(), http.StatusInternalServerError)
 			return
 		}
 		if !ok {
 			http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
 			return
 		}
 		w.Header().Set("Content-Type", "image/svg+xml")
 		w.Header().Set("Cache-Control", "no-store")
 		_, _ = w.Write(buf)
 		return
 	}
 	datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples)
 	if !ok {
 		http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
 		return
 	}
-	buf, err := renderChartSVG(title, datasets, names, labels, yMin, yMax)
+	buf, err := renderMetricChartSVG(
 		title,
 		labels,
 		sampleTimes(samples),
 		datasets,
 		names,
 		yMin,
 		yMax,
 		chartCanvasHeightForPath(path, len(names)),
 		timeline,
 	)
 	if err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
@@ -491,14 +608,6 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 	_, _ = w.Write(buf)
 }
 func (h *handler) chartDataFromDB(path string) ([][]float64, []string, []string, string, *float64, *float64, bool) {
 	samples, err := h.metricsDB.LoadAll()
 	if err != nil || len(samples) == 0 {
 		return nil, nil, nil, "", nil, nil, false
 	}
 	return chartDataFromSamples(path, samples)
 }
 func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][]float64, []string, []string, string, *float64, *float64, bool) {
 	var datasets [][]float64
 	var names []string
@@ -578,18 +687,24 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 		yMin = floatPtr(0)
 		yMax = autoMax120(datasets...)
 	case path == "gpu-all-clock":
 		title = "GPU Core Clock"
 		datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
 		yMin, yMax = autoBounds120(datasets...)
 	case path == "gpu-all-memclock":
 		title = "GPU Memory Clock"
 		datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
 		yMin, yMax = autoBounds120(datasets...)
 	case strings.HasPrefix(path, "gpu/"):
-		rest := strings.TrimPrefix(path, "gpu/")
+		idx, sub, ok := parseGPUChartPath(path)
-		sub := ""
+		if !ok {
-		if i := strings.LastIndex(rest, "-"); i > 0 {
+			return nil, nil, nil, "", nil, nil, false
 			sub = rest[i+1:]
 			rest = rest[:i]
 		}
 		idx := 0
 		fmt.Sscanf(rest, "%d", &idx)
 		switch sub {
 		case "load":
-			title = fmt.Sprintf("GPU %d Load", idx)
+			title = gpuDisplayLabel(idx) + " Load"
 			util := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.UsagePct })
 			mem := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct })
 			if util == nil && mem == nil {
@@ -600,7 +715,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 			yMin = floatPtr(0)
 			yMax = floatPtr(100)
 		case "temp":
-			title = fmt.Sprintf("GPU %d Temperature", idx)
+			title = gpuDisplayLabel(idx) + " Temperature"
 			temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
 			if temp == nil {
 				return nil, nil, nil, "", nil, nil, false
@@ -609,8 +724,26 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 			names = []string{"Temp °C"}
 			yMin = floatPtr(0)
 			yMax = autoMax120(temp)
 		case "clock":
 			title = gpuDisplayLabel(idx) + " Core Clock"
 			clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
 			if clock == nil {
 				return nil, nil, nil, "", nil, nil, false
 			}
 			datasets = [][]float64{clock}
 			names = []string{"Core Clock MHz"}
 			yMin, yMax = autoBounds120(clock)
 		case "memclock":
 			title = gpuDisplayLabel(idx) + " Memory Clock"
 			clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
 			if clock == nil {
 				return nil, nil, nil, "", nil, nil, false
 			}
 			datasets = [][]float64{clock}
 			names = []string{"Memory Clock MHz"}
 			yMin, yMax = autoBounds120(clock)
 		default:
-			title = fmt.Sprintf("GPU %d Power", idx)
+			title = gpuDisplayLabel(idx) + " Power"
 			power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
 			if power == nil {
 				return nil, nil, nil, "", nil, nil, false
@@ -627,6 +760,26 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 	return datasets, names, labels, title, yMin, yMax, len(datasets) > 0
 }
 func parseGPUChartPath(path string) (idx int, sub string, ok bool) {
 	if !strings.HasPrefix(path, "gpu/") {
 		return 0, "", false
 	}
 	rest := strings.TrimPrefix(path, "gpu/")
 	if rest == "" {
 		return 0, "", false
 	}
 	sub = ""
 	if i := strings.LastIndex(rest, "-"); i > 0 {
 		sub = rest[i+1:]
 		rest = rest[:i]
 	}
 	n, err := fmt.Sscanf(rest, "%d", &idx)
 	if err != nil || n != 1 {
 		return 0, "", false
 	}
 	return idx, sub, true
 }
 func sampleTimeLabels(samples []platform.LiveMetricSample) []string {
 	labels := make([]string, len(samples))
 	if len(samples) == 0 {
@@ -719,7 +872,7 @@ func gpuDatasets(samples []platform.LiveMetricSample, pick func(platform.GPUMetr
 			continue
 		}
 		datasets = append(datasets, ds)
-		names = append(names, fmt.Sprintf("GPU %d", idx))
+		names = append(names, gpuDisplayLabel(idx))
 	}
 	return datasets, names
 }
@@ -852,64 +1005,37 @@ func autoBounds120(datasets ...[]float64) (*float64, *float64) {
 	return floatPtr(low), floatPtr(high)
 }
-// renderChartSVG renders a line chart SVG with a fixed Y-axis range.
+func gpuChartLabelIndices(total, target int) []int {
-func renderChartSVG(title string, datasets [][]float64, names []string, labels []string, yMin, yMax *float64) ([]byte, error) {
+	if total <= 0 {
-	n := len(labels)
+		return nil
 	if n == 0 {
 		n = 1
 		labels = []string{""}
 	}
-	for i := range datasets {
+	if total == 1 {
-		if len(datasets[i]) == 0 {
+		return []int{0}
 			datasets[i] = make([]float64, n)
 		}
 	}
-	// Append global min/avg/max to title.
+	step := total / target
-	mn, avg, mx := globalStats(datasets)
+	if step < 1 {
-	if mx > 0 {
+		step = 1
 		title = fmt.Sprintf("%s    ↓%s  ~%s  ↑%s",
 			title,
 			chartLegendNumber(mn),
 			chartLegendNumber(avg),
 			chartLegendNumber(mx),
 		)
 	}
-	title = sanitizeChartText(title)
+	var indices []int
-	names = sanitizeChartTexts(names)
+	for i := 0; i < total; i += step {
-	sparse := sanitizeChartTexts(sparseLabels(labels, 6))
+		indices = append(indices, i)
 	}
 	if indices[len(indices)-1] != total-1 {
 		indices = append(indices, total-1)
 	}
 	return indices
 }
-	opt := gocharts.NewLineChartOptionWithData(datasets)
+func chartCanvasHeightForPath(path string, seriesCount int) int {
-	opt.Title = gocharts.TitleOption{Text: title}
+	height := chartCanvasHeight(seriesCount)
-	opt.XAxis.Labels = sparse
+	if isGPUChartPath(path) {
-	opt.Legend = gocharts.LegendOption{SeriesNames: names}
+		return height * 2
 	if chartLegendVisible(len(names)) {
 		opt.Legend.Offset = gocharts.OffsetStr{Top: gocharts.PositionBottom}
 		opt.Legend.OverlayChart = gocharts.Ptr(false)
 	} else {
 		opt.Legend.Show = gocharts.Ptr(false)
 	}
 	opt.Symbol = gocharts.SymbolNone
 	// Right padding: reserve space for the MarkLine label (library recommendation).
 	opt.Padding = gocharts.NewBox(20, 20, 80, 20)
 	if yMin != nil || yMax != nil {
 		opt.YAxis = []gocharts.YAxisOption{chartYAxisOption(yMin, yMax)}
 	}
 	return height
 }
-	// Add a single peak mark line on the series that holds the global maximum.
+func isGPUChartPath(path string) bool {
-	peakIdx, _ := globalPeakSeries(datasets)
+	return strings.HasPrefix(path, "gpu-all-") || strings.HasPrefix(path, "gpu/")
 	if peakIdx >= 0 && peakIdx < len(opt.SeriesList) {
 		opt.SeriesList[peakIdx].MarkLine = gocharts.NewMarkLine(gocharts.SeriesMarkTypeMax)
 	}
 	p := gocharts.NewPainter(gocharts.PainterOptions{
 		OutputFormat: gocharts.ChartOutputSVG,
 		Width:        1400,
 		Height:       chartCanvasHeight(len(names)),
 	}, gocharts.PainterThemeOption(gocharts.GetTheme("grafana")))
 	if err := p.LineChart(opt); err != nil {
 		return nil, err
 	}
 	return p.Bytes()
 }
 func chartLegendVisible(seriesCount int) bool {
@@ -923,30 +1049,6 @@ func chartCanvasHeight(seriesCount int) int {
 	return 288
 }
 func chartYAxisOption(yMin, yMax *float64) gocharts.YAxisOption {
 	return gocharts.YAxisOption{
 		Min:            yMin,
 		Max:            yMax,
 		LabelCount:     11,
 		ValueFormatter: chartYAxisNumber,
 	}
 }
 // globalPeakSeries returns the index of the series containing the global maximum
 // value across all datasets, and that maximum value.
 func globalPeakSeries(datasets [][]float64) (idx int, peak float64) {
 	idx = -1
 	for i, ds := range datasets {
 		for _, v := range ds {
 			if v > peak {
 				peak = v
 				idx = i
 			}
 		}
 	}
 	return idx, peak
 }
 // globalStats returns min, average, and max across all values in all datasets.
 func globalStats(datasets [][]float64) (mn, avg, mx float64) {
 	var sum float64
@@ -986,21 +1088,6 @@ func sanitizeChartText(s string) string {
 	}, s))
 }
 func sanitizeChartTexts(in []string) []string {
 	out := make([]string, len(in))
 	for i, s := range in {
 		out[i] = sanitizeChartText(s)
 	}
 	return out
 }
 func safeIdx(s []float64, i int) float64 {
 	if i < len(s) {
 		return s[i]
 	}
 	return 0
 }
 func snapshotNamedRings(rings []*namedMetricsRing) ([][]float64, []string, []string) {
 	var datasets [][]float64
 	var names []string
@@ -1087,20 +1174,6 @@ func chartYAxisNumber(v float64) string {
 	return out
 }
 func sparseLabels(labels []string, n int) []string {
 	out := make([]string, len(labels))
 	step := len(labels) / n
 	if step < 1 {
 		step = 1
 	}
 	for i, l := range labels {
 		if i%step == 0 {
 			out[i] = l
 		}
 	}
 	return out
 }
 func (h *handler) handleAPIMetricsExportCSV(w http.ResponseWriter, r *http.Request) {
 	if h.metricsDB == nil {
 		http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
@@ -1116,6 +1189,11 @@ func (h *handler) handleAPIMetricsExportCSV(w http.ResponseWriter, r *http.Reque
 func (h *handler) handleReady(w http.ResponseWriter, r *http.Request) {
 	w.Header().Set("Cache-Control", "no-store")
 	if strings.TrimSpace(h.opts.AuditPath) == "" {
 		w.WriteHeader(http.StatusOK)
 		_, _ = w.Write([]byte("ready"))
 		return
 	}
 	if _, err := os.Stat(h.opts.AuditPath); err != nil {
 		w.WriteHeader(http.StatusServiceUnavailable)
 		_, _ = w.Write([]byte("starting"))
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -34,6 +34,49 @@ func TestChartLegendNumber(t *testing.T) {
 	}
 }
 func TestRecoverMiddlewareReturns500OnPanic(t *testing.T) {
 	handler := recoverMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		panic("boom")
 	}))
 	rec := httptest.NewRecorder()
 	req := httptest.NewRequest(http.MethodGet, "/panic", nil)
 	handler.ServeHTTP(rec, req)
 	if rec.Code != http.StatusInternalServerError {
 		t.Fatalf("status=%d want %d", rec.Code, http.StatusInternalServerError)
 	}
 	if !strings.Contains(rec.Body.String(), "internal server error") {
 		t.Fatalf("body=%q", rec.Body.String())
 	}
 }
 func TestRecoverMiddlewarePreservesStreamingInterfaces(t *testing.T) {
 	handler := recoverMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		if !sseStart(w) {
 			return
 		}
 		if !sseWrite(w, "tick", "ok") {
 			t.Fatal("expected sse write to succeed")
 		}
 	}))
 	rec := httptest.NewRecorder()
 	req := httptest.NewRequest(http.MethodGet, "/stream", nil)
 	handler.ServeHTTP(rec, req)
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	if got := rec.Header().Get("Content-Type"); got != "text/event-stream" {
 		t.Fatalf("content-type=%q", got)
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, "event: tick\n") || !strings.Contains(body, "data: ok\n\n") {
 		t.Fatalf("body=%q", body)
 	}
 }
 func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
 	samples := []platform.LiveMetricSample{
 		{
@@ -136,6 +179,39 @@ func TestChartDataFromSamplesKeepsStableGPUSeriesOrder(t *testing.T) {
 	}
 }
 func TestChartDataFromSamplesIncludesGPUClockCharts(t *testing.T) {
 	samples := []platform.LiveMetricSample{
 		{
 			Timestamp: time.Now().Add(-2 * time.Minute),
 			GPUs: []platform.GPUMetricRow{
 				{GPUIndex: 0, ClockMHz: 1400},
 				{GPUIndex: 3, ClockMHz: 1500},
 			},
 		},
 		{
 			Timestamp: time.Now().Add(-1 * time.Minute),
 			GPUs: []platform.GPUMetricRow{
 				{GPUIndex: 0, ClockMHz: 1410},
 				{GPUIndex: 3, ClockMHz: 1510},
 			},
 		},
 	}
 	datasets, names, _, title, _, _, ok := chartDataFromSamples("gpu-all-clock", samples)
 	if !ok {
 		t.Fatal("gpu-all-clock returned ok=false")
 	}
 	if title != "GPU Core Clock" {
 		t.Fatalf("title=%q", title)
 	}
 	if len(names) != 2 || names[0] != "GPU 0" || names[1] != "GPU 3" {
 		t.Fatalf("names=%v", names)
 	}
 	if got := datasets[1][1]; got != 1510 {
 		t.Fatalf("GPU 3 core clock=%v want 1510", got)
 	}
 }
 func TestNormalizePowerSeriesHoldsLastPositive(t *testing.T) {
 	got := normalizePowerSeries([]float64{0, 480, 0, 0, 510, 0})
 	want := []float64{0, 480, 480, 480, 510, 510}
@@ -157,6 +233,21 @@ func TestRenderMetricsUsesBufferedChartRefresh(t *testing.T) {
 	if !strings.Contains(body, "el.dataset.loading === '1'") {
 		t.Fatalf("metrics page should avoid overlapping chart reloads: %s", body)
 	}
 	if !strings.Contains(body, `id="gpu-metrics-section" style="display:none`) {
 		t.Fatalf("metrics page should keep gpu charts in a hidden dedicated section until GPUs are detected: %s", body)
 	}
 	if !strings.Contains(body, `id="gpu-chart-toggle"`) {
 		t.Fatalf("metrics page should render GPU chart mode toggle: %s", body)
 	}
 	if !strings.Contains(body, `/api/metrics/chart/gpu-all-clock.svg`) {
 		t.Fatalf("metrics page should include GPU core clock chart: %s", body)
 	}
 	if strings.Contains(body, `/api/metrics/chart/gpu-all-memclock.svg`) {
 		t.Fatalf("metrics page should not include GPU memory clock chart: %s", body)
 	}
 	if !strings.Contains(body, `renderGPUOverviewCards(indices, names)`) {
 		t.Fatalf("metrics page should build per-GPU chart cards dynamically: %s", body)
 	}
 }
 func TestChartLegendVisible(t *testing.T) {
@@ -199,6 +290,124 @@ func TestChartCanvasHeight(t *testing.T) {
 	}
 }
 func TestChartTimelineSegmentsForRangeMergesActiveSpansAndIdleGaps(t *testing.T) {
 	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
 	end := start.Add(10 * time.Minute)
 	taskWindow := func(offsetStart, offsetEnd time.Duration) Task {
 		s := start.Add(offsetStart)
 		e := start.Add(offsetEnd)
 		return Task{
 			Name:      "task",
 			Status:    TaskDone,
 			StartedAt: &s,
 			DoneAt:    &e,
 		}
 	}
 	segments := chartTimelineSegmentsForRange(start, end, end, []Task{
 		taskWindow(1*time.Minute, 3*time.Minute),
 		taskWindow(2*time.Minute, 5*time.Minute),
 		taskWindow(7*time.Minute, 8*time.Minute),
 	})
 	if len(segments) != 5 {
 		t.Fatalf("segments=%d want 5: %#v", len(segments), segments)
 	}
 	wantActive := []bool{false, true, false, true, false}
 	wantMinutes := [][2]int{{0, 1}, {1, 5}, {5, 7}, {7, 8}, {8, 10}}
 	for i, segment := range segments {
 		if segment.Active != wantActive[i] {
 			t.Fatalf("segment[%d].Active=%v want %v", i, segment.Active, wantActive[i])
 		}
 		if got := int(segment.Start.Sub(start).Minutes()); got != wantMinutes[i][0] {
 			t.Fatalf("segment[%d] start=%d want %d", i, got, wantMinutes[i][0])
 		}
 		if got := int(segment.End.Sub(start).Minutes()); got != wantMinutes[i][1] {
 			t.Fatalf("segment[%d] end=%d want %d", i, got, wantMinutes[i][1])
 		}
 	}
 }
 func TestRenderMetricChartSVGIncludesTimelineOverlay(t *testing.T) {
 	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
 	labels := []string{"12:00", "12:01", "12:02"}
 	times := []time.Time{start, start.Add(time.Minute), start.Add(2 * time.Minute)}
 	svg, err := renderMetricChartSVG(
 		"System Power",
 		labels,
 		times,
 		[][]float64{{300, 320, 310}},
 		[]string{"Power W"},
 		floatPtr(0),
 		floatPtr(400),
 		360,
 		[]chartTimelineSegment{
 			{Start: start, End: start.Add(time.Minute), Active: false},
 			{Start: start.Add(time.Minute), End: start.Add(2 * time.Minute), Active: true},
 		},
 	)
 	if err != nil {
 		t.Fatal(err)
 	}
 	body := string(svg)
 	if !strings.Contains(body, `data-role="timeline-overlay"`) {
 		t.Fatalf("svg missing timeline overlay: %s", body)
 	}
 	if !strings.Contains(body, `opacity="0.10"`) {
 		t.Fatalf("svg missing idle overlay opacity: %s", body)
 	}
 	if !strings.Contains(body, `System Power`) {
 		t.Fatalf("svg missing chart title: %s", body)
 	}
 }
 func TestHandleMetricsChartSVGRendersCustomSVG(t *testing.T) {
 	dir := t.TempDir()
 	db, err := openMetricsDB(filepath.Join(dir, "metrics.db"))
 	if err != nil {
 		t.Fatal(err)
 	}
 	t.Cleanup(func() { _ = db.db.Close() })
 	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
 	for i, sample := range []platform.LiveMetricSample{
 		{Timestamp: start, PowerW: 300},
 		{Timestamp: start.Add(time.Minute), PowerW: 320},
 		{Timestamp: start.Add(2 * time.Minute), PowerW: 310},
 	} {
 		if err := db.Write(sample); err != nil {
 			t.Fatalf("write sample %d: %v", i, err)
 		}
 	}
 	globalQueue.mu.Lock()
 	prevTasks := globalQueue.tasks
 	s := start.Add(30 * time.Second)
 	e := start.Add(90 * time.Second)
 	globalQueue.tasks = []*Task{{Name: "Burn", Status: TaskDone, StartedAt: &s, DoneAt: &e}}
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = prevTasks
 		globalQueue.mu.Unlock()
 	})
 	h := &handler{opts: HandlerOptions{ExportDir: dir}, metricsDB: db}
 	rec := httptest.NewRecorder()
 	req := httptest.NewRequest(http.MethodGet, "/api/metrics/chart/server-power.svg", nil)
 	h.handleMetricsChartSVG(rec, req)
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, `data-role="timeline-overlay"`) {
 		t.Fatalf("custom svg response missing timeline overlay: %s", body)
 	}
 	if !strings.Contains(body, `stroke-linecap="round"`) {
 		t.Fatalf("custom svg response missing custom polyline styling: %s", body)
 	}
 }
 func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
 	got := normalizeFanSeries([]float64{4200, 0, 0, 4300, 0})
 	want := []float64{4200, 4200, 4200, 4300, 4300}
@@ -212,21 +421,6 @@ func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
 	}
 }
 func TestChartYAxisOption(t *testing.T) {
 	min := floatPtr(0)
 	max := floatPtr(100)
 	opt := chartYAxisOption(min, max)
 	if opt.Min != min || opt.Max != max {
 		t.Fatalf("chartYAxisOption min/max mismatch: %#v", opt)
 	}
 	if opt.LabelCount != 11 {
 		t.Fatalf("chartYAxisOption labelCount=%d want 11", opt.LabelCount)
 	}
 	if got := opt.ValueFormatter(1000); got != "1к" {
 		t.Fatalf("chartYAxisOption formatter(1000)=%q want 1к", got)
 	}
 }
 func TestSnapshotFanRingsUsesTimelineLabels(t *testing.T) {
 	r1 := newMetricsRing(4)
 	r2 := newMetricsRing(4)
@@ -335,7 +529,7 @@ func TestRootShowsRunAuditButtonWhenSnapshotMissing(t *testing.T) {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
-	if !strings.Contains(body, `Run Audit`) {
+	if !strings.Contains(body, `onclick="auditModalRun()">Run audit</button>`) {
 		t.Fatalf("dashboard missing run audit button: %s", body)
 	}
 	if strings.Contains(body, `No audit data`) {
@@ -343,6 +537,18 @@ func TestRootShowsRunAuditButtonWhenSnapshotMissing(t *testing.T) {
 	}
 }
 func TestReadyIsOKWhenAuditPathIsUnset(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/api/ready", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	if strings.TrimSpace(rec.Body.String()) != "ready" {
 		t.Fatalf("body=%q want ready", rec.Body.String())
 	}
 }
 func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
@@ -365,7 +571,7 @@ func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
 	}
 }
-func TestTasksPageRendersLogModalAndPaginationControls(t *testing.T) {
+func TestTasksPageRendersOpenLinksAndPaginationControls(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tasks", nil))
@@ -373,8 +579,8 @@ func TestTasksPageRendersLogModalAndPaginationControls(t *testing.T) {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
-	if !strings.Contains(body, `id="task-log-overlay"`) {
+	if !strings.Contains(body, `Open a task to view its saved logs and charts.`) {
-		t.Fatalf("tasks page missing log modal overlay: %s", body)
+		t.Fatalf("tasks page missing task report hint: %s", body)
 	}
 	if !strings.Contains(body, `_taskPageSize = 50`) {
 		t.Fatalf("tasks page missing pagination size config: %s", body)
@@ -409,37 +615,111 @@ func TestToolsPageRendersRestartGPUDriversButton(t *testing.T) {
 	}
 }
-func TestTasksPageRendersScrollableLogModal(t *testing.T) {
+func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
-	dir := t.TempDir()
+	handler := NewHandler(HandlerOptions{})
 	path := filepath.Join(dir, "audit.json")
 	exportDir := filepath.Join(dir, "export")
 	if err := os.MkdirAll(exportDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z"}`), 0644); err != nil {
 		t.Fatal(err)
 	}
 	handler := NewHandler(HandlerOptions{
 		Title:     "Bee Hardware Audit",
 		AuditPath: path,
 		ExportDir: exportDir,
 	})
 	rec := httptest.NewRecorder()
-	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tasks", nil))
+	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/benchmark", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
-	if !strings.Contains(body, `height:calc(100vh - 32px)`) {
+	for _, needle := range []string{
-		t.Fatalf("tasks page missing bounded log modal height: %s", body)
+		`href="/benchmark"`,
 		`id="benchmark-gpu-list"`,
 		`/api/gpu/nvidia`,
 		`/api/benchmark/nvidia/run`,
 		`benchmark-run-nccl`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("benchmark page missing %q: %s", needle, body)
 		}
 	}
-	if !strings.Contains(body, `flex:1;min-height:0;overflow:hidden`) {
+}
-		t.Fatalf("tasks page missing log modal overflow guard: %s", body)
+
 func TestValidatePageRendersNvidiaTargetedStressCard(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/validate", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
-	if !strings.Contains(body, `height:100%;min-height:0;overflow:auto`) {
+	body := rec.Body.String()
-		t.Fatalf("tasks page missing scrollable log wrapper: %s", body)
+	for _, needle := range []string{
 		`NVIDIA GPU Targeted Stress`,
 		`nvidia-targeted-stress`,
 		`controlled NVIDIA DCGM load`,
 		`<code>dcgmi diag targeted_stress</code>`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("validate page missing %q: %s", needle, body)
 		}
 	}
 }
 func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/burn", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	for _, needle := range []string{
 		`NVIDIA Max Compute Load`,
 		`dcgmproftester`,
 		`targeted_stress remain in <a href="/validate">Validate</a>`,
 		`NVIDIA Interconnect Test (NCCL all_reduce_perf)`,
 		`id="burn-gpu-list"`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("burn page missing %q: %s", needle, body)
 		}
 	}
 }
 func TestTaskDetailPageRendersSavedReport(t *testing.T) {
 	dir := t.TempDir()
 	exportDir := filepath.Join(dir, "export")
 	reportDir := filepath.Join(exportDir, "tasks", "task-1_cpu_sat_done")
 	if err := os.MkdirAll(reportDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	reportPath := filepath.Join(reportDir, "report.html")
 	if err := os.WriteFile(reportPath, []byte(`<div class="card"><div class="card-head">Task Report</div><div class="card-body">saved report</div></div>`), 0644); err != nil {
 		t.Fatal(err)
 	}
 	globalQueue.mu.Lock()
 	origTasks := globalQueue.tasks
 	globalQueue.tasks = []*Task{{
 		ID:             "task-1",
 		Name:           "CPU SAT",
 		Target:         "cpu",
 		Status:         TaskDone,
 		CreatedAt:      time.Now(),
 		ArtifactsDir:   reportDir,
 		ReportHTMLPath: reportPath,
 	}}
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = origTasks
 		globalQueue.mu.Unlock()
 	})
 	handler := NewHandler(HandlerOptions{Title: "Bee Hardware Audit", ExportDir: exportDir})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tasks/task-1", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, `saved report`) {
 		t.Fatalf("task detail page missing saved report: %s", body)
 	}
 	if !strings.Contains(body, `Back to Tasks`) {
 		t.Fatalf("task detail page missing back link: %s", body)
 	}
 }
@@ -564,3 +844,98 @@ func TestRuntimeHealthEndpointReturnsJSON(t *testing.T) {
 		t.Fatalf("body=%q want %q", strings.TrimSpace(rec.Body.String()), body)
 	}
 }
 func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
 	exportDir := filepath.Join(dir, "export")
 	if err := os.MkdirAll(exportDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z","hardware":{"board":{"serial_number":"SERIAL-1"}}}`), 0644); err != nil {
 		t.Fatal(err)
 	}
 	health := `{
  "status":"PARTIAL",
  "checked_at":"2026-03-16T10:00:00Z",
  "export_dir":"/tmp/export",
  "driver_ready":true,
  "cuda_ready":false,
  "network_status":"PARTIAL",
  "issues":[
    {"code":"dhcp_partial","description":"At least one interface did not obtain IPv4 connectivity."},
    {"code":"cuda_runtime_not_ready","description":"CUDA runtime is not ready for GPU SAT."}
  ],
  "tools":[
    {"name":"dmidecode","ok":true},
    {"name":"nvidia-smi","ok":false}
  ],
  "services":[
    {"name":"bee-web","status":"active"},
    {"name":"bee-nvidia","status":"inactive"}
  ]
 }`
 	if err := os.WriteFile(filepath.Join(exportDir, "runtime-health.json"), []byte(health), 0644); err != nil {
 		t.Fatal(err)
 	}
 	componentStatus := `[
  {
    "component_key":"cpu:all",
    "status":"Warning",
    "error_summary":"cpu SAT: FAILED",
    "history":[{"at":"2026-03-16T10:00:00Z","status":"Warning","source":"sat:cpu","detail":"cpu SAT: FAILED"}]
  },
  {
    "component_key":"memory:all",
    "status":"OK",
    "history":[{"at":"2026-03-16T10:01:00Z","status":"OK","source":"sat:memory","detail":"memory SAT: OK"}]
  },
  {
    "component_key":"storage:nvme0n1",
    "status":"Critical",
    "error_summary":"storage SAT: FAILED",
    "history":[{"at":"2026-03-16T10:02:00Z","status":"Critical","source":"sat:storage","detail":"storage SAT: FAILED"}]
  },
  {
    "component_key":"pcie:gpu:nvidia",
    "status":"Warning",
    "error_summary":"nvidia SAT: FAILED",
    "history":[{"at":"2026-03-16T10:03:00Z","status":"Warning","source":"sat:nvidia","detail":"nvidia SAT: FAILED"}]
  }
 ]`
 	if err := os.WriteFile(filepath.Join(exportDir, "component-status.json"), []byte(componentStatus), 0644); err != nil {
 		t.Fatal(err)
 	}
 	handler := NewHandler(HandlerOptions{AuditPath: path, ExportDir: exportDir})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	body := rec.Body.String()
 	for _, needle := range []string{
 		`Runtime Health`,
 		`<th>Check</th><th>Status</th><th>Source</th><th>Issue</th>`,
 		`Export Directory`,
 		`Network`,
 		`NVIDIA/AMD Driver`,
 		`CUDA / ROCm`,
 		`Required Utilities`,
 		`Bee Services`,
 		`<td>CPU</td>`,
 		`<td>Memory</td>`,
 		`<td>Storage</td>`,
 		`<td>GPU</td>`,
 		`CUDA runtime is not ready for GPU SAT.`,
 		`Missing: nvidia-smi`,
 		`bee-nvidia=inactive`,
 		`cpu SAT: FAILED`,
 		`storage SAT: FAILED`,
 		`sat:nvidia`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("dashboard missing %q: %s", needle, body)
 		}
 	}
 }
--- a/audit/internal/webui/stability.go
+++ b/audit/internal/webui/stability.go
@@ -0,0 +1,42 @@
 package webui
 import (
 	"fmt"
 	"log/slog"
 	"runtime/debug"
 	"time"
 )
 func goRecoverLoop(name string, restartDelay time.Duration, fn func()) {
 	go func() {
 		for {
 			if !runRecoverable(name, fn) {
 				return
 			}
 			if restartDelay > 0 {
 				time.Sleep(restartDelay)
 			}
 		}
 	}()
 }
 func goRecoverOnce(name string, fn func()) {
 	go func() {
 		_ = runRecoverable(name, fn)
 	}()
 }
 func runRecoverable(name string, fn func()) (panicked bool) {
 	defer func() {
 		if rec := recover(); rec != nil {
 			panicked = true
 			slog.Error("recovered panic",
 				"component", name,
 				"panic", fmt.Sprint(rec),
 				"stack", string(debug.Stack()),
 			)
 		}
 	}()
 	fn()
 	return false
 }
--- a/audit/internal/webui/task_page.go
+++ b/audit/internal/webui/task_page.go
@@ -0,0 +1,85 @@
 package webui
 import (
 	"fmt"
 	"html"
 	"net/http"
 	"os"
 	"strings"
 )
 func (h *handler) handleTaskPage(w http.ResponseWriter, r *http.Request) {
 	id := r.PathValue("id")
 	task, ok := globalQueue.findByID(id)
 	if !ok {
 		http.NotFound(w, r)
 		return
 	}
 	snapshot := *task
 	body := renderTaskDetailPage(h.opts, snapshot)
 	w.Header().Set("Cache-Control", "no-store")
 	w.Header().Set("Content-Type", "text/html; charset=utf-8")
 	_, _ = w.Write([]byte(body))
 }
 func renderTaskDetailPage(opts HandlerOptions, task Task) string {
 	title := task.Name
 	if strings.TrimSpace(title) == "" {
 		title = task.ID
 	}
 	var body strings.Builder
 	body.WriteString(`<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px;flex-wrap:wrap">`)
 	body.WriteString(`<a class="btn btn-secondary btn-sm" href="/tasks">Back to Tasks</a>`)
 	body.WriteString(`<span style="font-size:12px;color:var(--muted)">Artifacts are saved in the task folder under <code>./tasks</code>.</span>`)
 	body.WriteString(`</div>`)
 	if report := loadTaskReportFragment(task); report != "" {
 		body.WriteString(report)
 	} else {
 		body.WriteString(`<div class="card"><div class="card-head">Task Summary</div><div class="card-body">`)
 		body.WriteString(`<div style="font-size:18px;font-weight:700">` + html.EscapeString(title) + `</div>`)
 		body.WriteString(`<div style="margin-top:8px">` + renderTaskStatusBadge(task.Status) + `</div>`)
 		if strings.TrimSpace(task.ErrMsg) != "" {
 			body.WriteString(`<div style="margin-top:8px;color:var(--crit-fg)">` + html.EscapeString(task.ErrMsg) + `</div>`)
 		}
 		body.WriteString(`</div></div>`)
 	}
 	if task.Status == TaskRunning || task.Status == TaskPending {
 		body.WriteString(`<div class="card"><div class="card-head">Live Logs</div><div class="card-body">`)
 		body.WriteString(`<div id="task-live-log" class="terminal" style="max-height:none;white-space:pre-wrap">Connecting...</div>`)
 		body.WriteString(`</div></div>`)
 		body.WriteString(`<script>
 var _taskDetailES = new EventSource('/api/tasks/` + html.EscapeString(task.ID) + `/stream');
 var _taskDetailTerm = document.getElementById('task-live-log');
 _taskDetailES.onopen = function(){ _taskDetailTerm.textContent = ''; };
 _taskDetailES.onmessage = function(e){ _taskDetailTerm.textContent += e.data + "\n"; _taskDetailTerm.scrollTop = _taskDetailTerm.scrollHeight; };
 _taskDetailES.addEventListener('done', function(){ _taskDetailES.close(); setTimeout(function(){ window.location.reload(); }, 1000); });
 _taskDetailES.onerror = function(){ _taskDetailES.close(); };
 </script>`)
 	}
 	return layoutHead(opts.Title+" — "+title) +
 		layoutNav("tasks", opts.BuildLabel) +
 		`<div class="main"><div class="topbar"><h1>` + html.EscapeString(title) + `</h1></div><div class="content">` +
 		body.String() +
 		`</div></div></body></html>`
 }
 func loadTaskReportFragment(task Task) string {
 	if strings.TrimSpace(task.ReportHTMLPath) == "" {
 		return ""
 	}
 	data, err := os.ReadFile(task.ReportHTMLPath)
 	if err != nil || len(data) == 0 {
 		return ""
 	}
 	return string(data)
 }
 func taskArtifactDownloadLink(task Task, absPath string) string {
 	if strings.TrimSpace(absPath) == "" {
 		return ""
 	}
 	return fmt.Sprintf(`/export/file?path=%s`, absPath)
 }
--- a/audit/internal/webui/task_report.go
+++ b/audit/internal/webui/task_report.go
@@ -0,0 +1,286 @@
 package webui
 import (
 	"encoding/json"
 	"fmt"
 	"html"
 	"os"
 	"path/filepath"
 	"sort"
 	"strings"
 	"time"
 	"bee/audit/internal/platform"
 )
 var taskReportMetricsDBPath = metricsDBPath
 type taskReport struct {
 	ID          string            `json:"id"`
 	Name        string            `json:"name"`
 	Target      string            `json:"target"`
 	Status      string            `json:"status"`
 	CreatedAt   time.Time         `json:"created_at"`
 	StartedAt   *time.Time        `json:"started_at,omitempty"`
 	DoneAt      *time.Time        `json:"done_at,omitempty"`
 	DurationSec int               `json:"duration_sec,omitempty"`
 	Error       string            `json:"error,omitempty"`
 	LogFile     string            `json:"log_file,omitempty"`
 	Charts      []taskReportChart `json:"charts,omitempty"`
 	GeneratedAt time.Time         `json:"generated_at"`
 }
 type taskReportChart struct {
 	Title string `json:"title"`
 	File  string `json:"file"`
 }
 type taskChartSpec struct {
 	Path string
 	File string
 }
 var taskDashboardChartSpecs = []taskChartSpec{
 	{Path: "server-load", File: "server-load.svg"},
 	{Path: "server-temp-cpu", File: "server-temp-cpu.svg"},
 	{Path: "server-temp-ambient", File: "server-temp-ambient.svg"},
 	{Path: "server-power", File: "server-power.svg"},
 	{Path: "server-fans", File: "server-fans.svg"},
 	{Path: "gpu-all-load", File: "gpu-all-load.svg"},
 	{Path: "gpu-all-memload", File: "gpu-all-memload.svg"},
 	{Path: "gpu-all-clock", File: "gpu-all-clock.svg"},
 	{Path: "gpu-all-power", File: "gpu-all-power.svg"},
 	{Path: "gpu-all-temp", File: "gpu-all-temp.svg"},
 }
 func writeTaskReportArtifacts(t *Task) error {
 	if t == nil {
 		return nil
 	}
 	ensureTaskReportPaths(t)
 	if strings.TrimSpace(t.ArtifactsDir) == "" {
 		return nil
 	}
 	if err := os.MkdirAll(t.ArtifactsDir, 0755); err != nil {
 		return err
 	}
 	start, end := taskTimeWindow(t)
 	samples, _ := loadTaskMetricSamples(start, end)
 	charts, inlineCharts := writeTaskCharts(t.ArtifactsDir, start, end, samples)
 	logText := ""
 	if data, err := os.ReadFile(t.LogPath); err == nil {
 		logText = string(data)
 	}
 	report := taskReport{
 		ID:          t.ID,
 		Name:        t.Name,
 		Target:      t.Target,
 		Status:      t.Status,
 		CreatedAt:   t.CreatedAt,
 		StartedAt:   t.StartedAt,
 		DoneAt:      t.DoneAt,
 		DurationSec: taskElapsedSec(t, reportDoneTime(t)),
 		Error:       t.ErrMsg,
 		LogFile:     filepath.Base(t.LogPath),
 		Charts:      charts,
 		GeneratedAt: time.Now().UTC(),
 	}
 	if err := writeJSONFile(t.ReportJSONPath, report); err != nil {
 		return err
 	}
 	return os.WriteFile(t.ReportHTMLPath, []byte(renderTaskReportFragment(report, inlineCharts, logText)), 0644)
 }
 func reportDoneTime(t *Task) time.Time {
 	if t != nil && t.DoneAt != nil && !t.DoneAt.IsZero() {
 		return *t.DoneAt
 	}
 	return time.Now()
 }
 func taskTimeWindow(t *Task) (time.Time, time.Time) {
 	if t == nil {
 		now := time.Now().UTC()
 		return now, now
 	}
 	start := t.CreatedAt.UTC()
 	if t.StartedAt != nil && !t.StartedAt.IsZero() {
 		start = t.StartedAt.UTC()
 	}
 	end := time.Now().UTC()
 	if t.DoneAt != nil && !t.DoneAt.IsZero() {
 		end = t.DoneAt.UTC()
 	}
 	if end.Before(start) {
 		end = start
 	}
 	return start, end
 }
 func loadTaskMetricSamples(start, end time.Time) ([]platform.LiveMetricSample, error) {
 	db, err := openMetricsDB(taskReportMetricsDBPath)
 	if err != nil {
 		return nil, err
 	}
 	defer db.Close()
 	return db.LoadBetween(start, end)
 }
 func writeTaskCharts(dir string, start, end time.Time, samples []platform.LiveMetricSample) ([]taskReportChart, map[string]string) {
 	if len(samples) == 0 {
 		return nil, nil
 	}
 	timeline := []chartTimelineSegment{{Start: start, End: end, Active: true}}
 	var charts []taskReportChart
 	inline := make(map[string]string)
 	for _, spec := range taskDashboardChartSpecs {
 		title, svg, ok := renderTaskChartSVG(spec.Path, samples, timeline)
 		if !ok || len(svg) == 0 {
 			continue
 		}
 		path := filepath.Join(dir, spec.File)
 		if err := os.WriteFile(path, svg, 0644); err != nil {
 			continue
 		}
 		charts = append(charts, taskReportChart{Title: title, File: spec.File})
 		inline[spec.File] = string(svg)
 	}
 	for _, idx := range taskGPUIndices(samples) {
 		file := fmt.Sprintf("gpu-%d-overview.svg", idx)
 		svg, ok, err := renderGPUOverviewChartSVG(idx, samples, timeline)
 		if err != nil || !ok || len(svg) == 0 {
 			continue
 		}
 		path := filepath.Join(dir, file)
 		if err := os.WriteFile(path, svg, 0644); err != nil {
 			continue
 		}
 		charts = append(charts, taskReportChart{Title: gpuDisplayLabel(idx) + " Overview", File: file})
 		inline[file] = string(svg)
 	}
 	return charts, inline
 }
 func renderTaskChartSVG(path string, samples []platform.LiveMetricSample, timeline []chartTimelineSegment) (string, []byte, bool) {
 	datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples)
 	if !ok {
 		return "", nil, false
 	}
 	buf, err := renderMetricChartSVG(
 		title,
 		labels,
 		sampleTimes(samples),
 		datasets,
 		names,
 		yMin,
 		yMax,
 		chartCanvasHeightForPath(path, len(names)),
 		timeline,
 	)
 	if err != nil {
 		return "", nil, false
 	}
 	return title, buf, true
 }
 func taskGPUIndices(samples []platform.LiveMetricSample) []int {
 	seen := map[int]bool{}
 	var out []int
 	for _, s := range samples {
 		for _, g := range s.GPUs {
 			if seen[g.GPUIndex] {
 				continue
 			}
 			seen[g.GPUIndex] = true
 			out = append(out, g.GPUIndex)
 		}
 	}
 	sort.Ints(out)
 	return out
 }
 func writeJSONFile(path string, v any) error {
 	data, err := json.MarshalIndent(v, "", "  ")
 	if err != nil {
 		return err
 	}
 	return os.WriteFile(path, data, 0644)
 }
 func renderTaskReportFragment(report taskReport, charts map[string]string, logText string) string {
 	var b strings.Builder
 	b.WriteString(`<div class="card"><div class="card-head">Task Report</div><div class="card-body">`)
 	b.WriteString(`<div class="grid2">`)
 	b.WriteString(`<div><div style="font-size:12px;color:var(--muted);margin-bottom:6px">Task</div><div style="font-size:16px;font-weight:700">` + html.EscapeString(report.Name) + `</div>`)
 	b.WriteString(`<div style="font-size:13px;color:var(--muted)">` + html.EscapeString(report.Target) + `</div></div>`)
 	b.WriteString(`<div><div style="font-size:12px;color:var(--muted);margin-bottom:6px">Status</div><div>` + renderTaskStatusBadge(report.Status) + `</div>`)
 	if strings.TrimSpace(report.Error) != "" {
 		b.WriteString(`<div style="margin-top:8px;font-size:13px;color:var(--crit-fg)">` + html.EscapeString(report.Error) + `</div>`)
 	}
 	b.WriteString(`</div></div>`)
 	b.WriteString(`<div style="margin-top:14px;font-size:13px;color:var(--muted)">`)
 	b.WriteString(`Started: ` + formatTaskTime(report.StartedAt, report.CreatedAt) + ` | Finished: ` + formatTaskTime(report.DoneAt, time.Time{}) + ` | Duration: ` + formatTaskDuration(report.DurationSec))
 	b.WriteString(`</div></div></div>`)
 	if len(report.Charts) > 0 {
 		b.WriteString(`<div class="grid2">`)
 		for _, chart := range report.Charts {
 			b.WriteString(`<div class="card"><div class="card-head">` + html.EscapeString(chart.Title) + `</div><div class="card-body" style="padding:12px">`)
 			b.WriteString(charts[chart.File])
 			b.WriteString(`</div></div>`)
 		}
 		b.WriteString(`</div>`)
 	} else {
 		b.WriteString(`<div class="alert alert-info">No metric samples were captured during this task window.</div>`)
 	}
 	b.WriteString(`<div class="card"><div class="card-head">Logs</div><div class="card-body">`)
 	b.WriteString(`<div class="terminal" style="max-height:none;white-space:pre-wrap">` + html.EscapeString(strings.TrimSpace(logText)) + `</div>`)
 	b.WriteString(`</div></div>`)
 	return b.String()
 }
 func renderTaskStatusBadge(status string) string {
 	className := map[string]string{
 		TaskRunning:   "badge-ok",
 		TaskPending:   "badge-unknown",
 		TaskDone:      "badge-ok",
 		TaskFailed:    "badge-err",
 		TaskCancelled: "badge-unknown",
 	}[status]
 	if className == "" {
 		className = "badge-unknown"
 	}
 	label := strings.TrimSpace(status)
 	if label == "" {
 		label = "unknown"
 	}
 	return `<span class="badge ` + className + `">` + html.EscapeString(label) + `</span>`
 }
 func formatTaskTime(ts *time.Time, fallback time.Time) string {
 	if ts != nil && !ts.IsZero() {
 		return ts.Local().Format("2006-01-02 15:04:05")
 	}
 	if !fallback.IsZero() {
 		return fallback.Local().Format("2006-01-02 15:04:05")
 	}
 	return "n/a"
 }
 func formatTaskDuration(sec int) string {
 	if sec <= 0 {
 		return "n/a"
 	}
 	if sec < 60 {
 		return fmt.Sprintf("%ds", sec)
 	}
 	if sec < 3600 {
 		return fmt.Sprintf("%dm %02ds", sec/60, sec%60)
 	}
 	return fmt.Sprintf("%dh %02dm %02ds", sec/3600, (sec%3600)/60, sec%60)
 }
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -4,10 +4,12 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"log/slog"
 	"net/http"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"runtime/debug"
 	"sort"
 	"strings"
 	"sync"
@@ -28,22 +30,29 @@ const (
 // taskNames maps target → human-readable name for validate (SAT) runs.
 var taskNames = map[string]string{
-	"nvidia":          "NVIDIA SAT",
+	"nvidia":                 "NVIDIA SAT",
-	"nvidia-stress":   "NVIDIA GPU Stress",
+	"nvidia-targeted-stress": "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)",
-	"memory":          "Memory SAT",
+	"nvidia-benchmark":       "NVIDIA Benchmark",
-	"storage":         "Storage SAT",
+	"nvidia-compute":         "NVIDIA Max Compute Load (dcgmproftester)",
-	"cpu":             "CPU SAT",
+	"nvidia-targeted-power":  "NVIDIA Targeted Power (dcgmi diag targeted_power)",
-	"amd":             "AMD GPU SAT",
+	"nvidia-pulse":           "NVIDIA Pulse Test (dcgmi diag pulse_test)",
-	"amd-mem":         "AMD GPU MEM Integrity",
+	"nvidia-interconnect":    "NVIDIA Interconnect Test (NCCL all_reduce_perf)",
-	"amd-bandwidth":   "AMD GPU MEM Bandwidth",
+	"nvidia-bandwidth":       "NVIDIA Bandwidth Test (NVBandwidth)",
-	"amd-stress":      "AMD GPU Burn-in",
+	"nvidia-stress":          "NVIDIA GPU Stress",
-	"memory-stress":   "Memory Burn-in",
+	"memory":                 "Memory SAT",
-	"sat-stress":      "SAT Stress (stressapptest)",
+	"storage":                "Storage SAT",
-	"platform-stress": "Platform Thermal Cycling",
+	"cpu":                    "CPU SAT",
-	"audit":           "Audit",
+	"amd":                    "AMD GPU SAT",
-	"support-bundle":  "Support Bundle",
+	"amd-mem":                "AMD GPU MEM Integrity",
-	"install":         "Install to Disk",
+	"amd-bandwidth":          "AMD GPU MEM Bandwidth",
-	"install-to-ram":  "Install to RAM",
+	"amd-stress":             "AMD GPU Burn-in",
 	"memory-stress":          "Memory Burn-in",
 	"sat-stress":             "SAT Stress (stressapptest)",
 	"platform-stress":        "Platform Thermal Cycling",
 	"audit":                  "Audit",
 	"support-bundle":         "Support Bundle",
 	"install":                "Install to Disk",
 	"install-to-ram":         "Install to RAM",
 }
 // burnNames maps target → human-readable name when a burn profile is set.
@@ -83,17 +92,20 @@ func taskDisplayName(target, profile, loader string) string {
 // Task represents one unit of work in the queue.
 type Task struct {
-	ID         string     `json:"id"`
+	ID             string     `json:"id"`
-	Name       string     `json:"name"`
+	Name           string     `json:"name"`
-	Target     string     `json:"target"`
+	Target         string     `json:"target"`
-	Priority   int        `json:"priority"`
+	Priority       int        `json:"priority"`
-	Status     string     `json:"status"`
+	Status         string     `json:"status"`
-	CreatedAt  time.Time  `json:"created_at"`
+	CreatedAt      time.Time  `json:"created_at"`
-	StartedAt  *time.Time `json:"started_at,omitempty"`
+	StartedAt      *time.Time `json:"started_at,omitempty"`
-	DoneAt     *time.Time `json:"done_at,omitempty"`
+	DoneAt         *time.Time `json:"done_at,omitempty"`
-	ElapsedSec int        `json:"elapsed_sec,omitempty"`
+	ElapsedSec     int        `json:"elapsed_sec,omitempty"`
-	ErrMsg     string     `json:"error,omitempty"`
+	ErrMsg         string     `json:"error,omitempty"`
-	LogPath    string     `json:"log_path,omitempty"`
+	LogPath        string     `json:"log_path,omitempty"`
 	ArtifactsDir   string     `json:"artifacts_dir,omitempty"`
 	ReportJSONPath string     `json:"report_json_path,omitempty"`
 	ReportHTMLPath string     `json:"report_html_path,omitempty"`
 	// runtime fields (not serialised)
 	job    *jobState
@@ -106,67 +118,81 @@ type taskParams struct {
 	DiagLevel          int      `json:"diag_level,omitempty"`
 	GPUIndices         []int    `json:"gpu_indices,omitempty"`
 	ExcludeGPUIndices  []int    `json:"exclude_gpu_indices,omitempty"`
 	SizeMB             int      `json:"size_mb,omitempty"`
 	Loader             string   `json:"loader,omitempty"`
 	BurnProfile        string   `json:"burn_profile,omitempty"`
 	BenchmarkProfile   string   `json:"benchmark_profile,omitempty"`
 	RunNCCL            bool     `json:"run_nccl,omitempty"`
 	DisplayName        string   `json:"display_name,omitempty"`
 	Device             string   `json:"device,omitempty"` // for install
 	PlatformComponents []string `json:"platform_components,omitempty"`
 }
 type persistedTask struct {
-	ID        string     `json:"id"`
+	ID             string     `json:"id"`
-	Name      string     `json:"name"`
+	Name           string     `json:"name"`
-	Target    string     `json:"target"`
+	Target         string     `json:"target"`
-	Priority  int        `json:"priority"`
+	Priority       int        `json:"priority"`
-	Status    string     `json:"status"`
+	Status         string     `json:"status"`
-	CreatedAt time.Time  `json:"created_at"`
+	CreatedAt      time.Time  `json:"created_at"`
-	StartedAt *time.Time `json:"started_at,omitempty"`
+	StartedAt      *time.Time `json:"started_at,omitempty"`
-	DoneAt    *time.Time `json:"done_at,omitempty"`
+	DoneAt         *time.Time `json:"done_at,omitempty"`
-	ErrMsg    string     `json:"error,omitempty"`
+	ErrMsg         string     `json:"error,omitempty"`
-	LogPath   string     `json:"log_path,omitempty"`
+	LogPath        string     `json:"log_path,omitempty"`
-	Params    taskParams `json:"params,omitempty"`
+	ArtifactsDir   string     `json:"artifacts_dir,omitempty"`
 	ReportJSONPath string     `json:"report_json_path,omitempty"`
 	ReportHTMLPath string     `json:"report_html_path,omitempty"`
 	Params         taskParams `json:"params,omitempty"`
 }
 type burnPreset struct {
 	NvidiaDiag  int
 	DurationSec int
 }
 func resolveBurnPreset(profile string) burnPreset {
 	switch profile {
 	case "overnight":
-		return burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}
+		return burnPreset{DurationSec: 8 * 60 * 60}
 	case "acceptance":
-		return burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}
+		return burnPreset{DurationSec: 60 * 60}
 	default:
-		return burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}
+		return burnPreset{DurationSec: 5 * 60}
 	}
 }
 func resolvePlatformStressPreset(profile string) platform.PlatformStressOptions {
 	acceptanceCycles := []platform.PlatformStressCycle{
 		{LoadSec: 85, IdleSec: 5},
 		{LoadSec: 80, IdleSec: 10},
 		{LoadSec: 55, IdleSec: 5},
 		{LoadSec: 60, IdleSec: 0},
 		{LoadSec: 100, IdleSec: 10},
 		{LoadSec: 145, IdleSec: 15},
 		{LoadSec: 190, IdleSec: 20},
 		{LoadSec: 235, IdleSec: 25},
 		{LoadSec: 280, IdleSec: 30},
 		{LoadSec: 325, IdleSec: 35},
 		{LoadSec: 370, IdleSec: 40},
 		{LoadSec: 415, IdleSec: 45},
 		{LoadSec: 460, IdleSec: 50},
 		{LoadSec: 510, IdleSec: 0},
 	}
 	switch profile {
 	case "overnight":
-		return platform.PlatformStressOptions{Cycles: []platform.PlatformStressCycle{
+		cycles := make([]platform.PlatformStressCycle, 0, len(acceptanceCycles)*8)
-			{LoadSec: 600, IdleSec: 120},
+		for range 8 {
-			{LoadSec: 600, IdleSec: 60},
+			cycles = append(cycles, acceptanceCycles...)
-			{LoadSec: 600, IdleSec: 30},
+		}
-			{LoadSec: 600, IdleSec: 120},
+		return platform.PlatformStressOptions{Cycles: cycles}
 			{LoadSec: 600, IdleSec: 60},
 			{LoadSec: 600, IdleSec: 30},
 			{LoadSec: 600, IdleSec: 120},
 			{LoadSec: 600, IdleSec: 60},
 		}}
 	case "acceptance":
-		return platform.PlatformStressOptions{Cycles: []platform.PlatformStressCycle{
+		return platform.PlatformStressOptions{Cycles: acceptanceCycles}
 			{LoadSec: 300, IdleSec: 60},
 			{LoadSec: 300, IdleSec: 30},
 			{LoadSec: 300, IdleSec: 60},
 			{LoadSec: 300, IdleSec: 30},
 		}}
 	default: // smoke
 		return platform.PlatformStressOptions{Cycles: []platform.PlatformStressCycle{
-			{LoadSec: 90, IdleSec: 60},
+			{LoadSec: 85, IdleSec: 5},
-			{LoadSec: 90, IdleSec: 30},
+			{LoadSec: 80, IdleSec: 10},
 			{LoadSec: 55, IdleSec: 5},
 			{LoadSec: 60, IdleSec: 0},
 		}}
 	}
 }
@@ -232,6 +258,7 @@ func (q *taskQueue) enqueue(t *Task) {
 	q.prune()
 	q.persistLocked()
 	q.mu.Unlock()
 	taskSerialEvent(t, "queued")
 	select {
 	case q.trigger <- struct{}{}:
 	default:
@@ -377,7 +404,7 @@ func (q *taskQueue) startWorker(opts *HandlerOptions) {
 	if !q.started {
 		q.loadLocked()
 		q.started = true
-		go q.worker()
+		goRecoverLoop("task worker", 2*time.Second, q.worker)
 	}
 	hasPending := q.nextPending() != nil
 	q.mu.Unlock()
@@ -392,78 +419,115 @@ func (q *taskQueue) startWorker(opts *HandlerOptions) {
 func (q *taskQueue) worker() {
 	for {
 		<-q.trigger
-		setCPUGovernor("performance")
+		func() {
 			setCPUGovernor("performance")
 			defer setCPUGovernor("powersave")
-		// Drain all pending tasks and start them in parallel.
+			// Drain all pending tasks and start them in parallel.
-		q.mu.Lock()
+			q.mu.Lock()
-		var batch []*Task
+			var batch []*Task
-		for {
+			for {
-			t := q.nextPending()
+				t := q.nextPending()
-			if t == nil {
+				if t == nil {
-				break
+					break
 				}
 				now := time.Now()
 				t.Status = TaskRunning
 				t.StartedAt = &now
 				t.DoneAt = nil
 				t.ErrMsg = ""
 				j := newTaskJobState(t.LogPath, taskSerialPrefix(t))
 				t.job = j
 				batch = append(batch, t)
 			}
-			now := time.Now()
+			if len(batch) > 0 {
-			t.Status = TaskRunning
+				q.persistLocked()
-			t.StartedAt = &now
+			}
-			t.DoneAt = nil
+			q.mu.Unlock()
 			t.ErrMsg = ""
 			j := newTaskJobState(t.LogPath)
 			t.job = j
 			batch = append(batch, t)
 		}
 		if len(batch) > 0 {
 			q.persistLocked()
 		}
 		q.mu.Unlock()
-		var wg sync.WaitGroup
+			var wg sync.WaitGroup
-		for _, t := range batch {
+			for _, t := range batch {
-			t := t
+				t := t
-			j := t.job
+				j := t.job
-			taskCtx, taskCancel := context.WithCancel(context.Background())
+				taskCtx, taskCancel := context.WithCancel(context.Background())
-			j.cancel = taskCancel
+				j.cancel = taskCancel
-			wg.Add(1)
+				wg.Add(1)
-			go func() {
+				goRecoverOnce("task "+t.Target, func() {
-				defer wg.Done()
+					defer wg.Done()
-
+					defer taskCancel()
-				if q.kmsgWatcher != nil && isSATTarget(t.Target) {
+					q.executeTask(t, j, taskCtx)
-					q.kmsgWatcher.NotifyTaskStarted(t.ID, t.Target)
+				})
-				}
+			}
-
+			wg.Wait()
 				q.runTask(t, j, taskCtx)
 				if q.kmsgWatcher != nil {
 					q.kmsgWatcher.NotifyTaskFinished(t.ID)
 				}
 			if len(batch) > 0 {
 				q.mu.Lock()
-				now2 := time.Now()
+				q.prune()
 				t.DoneAt = &now2
 				if t.Status == TaskRunning {
 					if j.err != "" {
 						t.Status = TaskFailed
 						t.ErrMsg = j.err
 					} else {
 						t.Status = TaskDone
 					}
 				}
 				q.persistLocked()
 				q.mu.Unlock()
-			}()
+			}
-		}
+		}()
 		wg.Wait()
 		if len(batch) > 0 {
 			q.mu.Lock()
 			q.prune()
 			q.persistLocked()
 			q.mu.Unlock()
 		}
 		setCPUGovernor("powersave")
 	}
 }
 func (q *taskQueue) executeTask(t *Task, j *jobState, ctx context.Context) {
 	startedKmsgWatch := false
 	defer q.finalizeTaskRun(t, j)
 	defer func() {
 		if startedKmsgWatch && q.kmsgWatcher != nil {
 			q.kmsgWatcher.NotifyTaskFinished(t.ID)
 		}
 	}()
 	defer func() {
 		if rec := recover(); rec != nil {
 			msg := fmt.Sprintf("task panic: %v", rec)
 			slog.Error("task panic",
 				"task_id", t.ID,
 				"target", t.Target,
 				"panic", fmt.Sprint(rec),
 				"stack", string(debug.Stack()),
 			)
 			j.append("ERROR: " + msg)
 			j.finish(msg)
 		}
 	}()
 	if q.kmsgWatcher != nil && isSATTarget(t.Target) {
 		q.kmsgWatcher.NotifyTaskStarted(t.ID, t.Target)
 		startedKmsgWatch = true
 	}
 	q.runTask(t, j, ctx)
 }
 func (q *taskQueue) finalizeTaskRun(t *Task, j *jobState) {
 	q.mu.Lock()
 	now := time.Now()
 	t.DoneAt = &now
 	if t.Status == TaskRunning {
 		if j.err != "" {
 			t.Status = TaskFailed
 			t.ErrMsg = j.err
 		} else {
 			t.Status = TaskDone
 			t.ErrMsg = ""
 		}
 	}
 	q.finalizeTaskArtifactPathsLocked(t)
 	q.persistLocked()
 	q.mu.Unlock()
 	if err := writeTaskReportArtifacts(t); err != nil {
 		appendJobLog(t.LogPath, "WARN: task report generation failed: "+err.Error())
 	}
 	if t.ErrMsg != "" {
 		taskSerialEvent(t, "finished with status="+t.Status+" error="+t.ErrMsg)
 		return
 	}
 	taskSerialEvent(t, "finished with status="+t.Status)
 }
 // setCPUGovernor writes the given governor to all CPU scaling_governor sysfs files.
 // Silently ignores errors (e.g. when cpufreq is not available).
 func setCPUGovernor(governor string) {
@@ -502,9 +566,6 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			break
 		}
 		diagLevel := t.params.DiagLevel
 		if t.params.BurnProfile != "" && diagLevel <= 0 {
 			diagLevel = resolveBurnPreset(t.params.BurnProfile).NvidiaDiag
 		}
 		if len(t.params.GPUIndices) > 0 || diagLevel > 0 {
 			result, e := a.RunNvidiaAcceptancePackWithOptions(
 				ctx, "", diagLevel, t.params.GPUIndices, j.append,
@@ -517,6 +578,78 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		} else {
 			archive, err = a.RunNvidiaAcceptancePack("", j.append)
 		}
 	case "nvidia-targeted-stress":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if dur <= 0 {
 			dur = 300
 		}
 		archive, err = a.RunNvidiaTargetedStressValidatePack(ctx, "", dur, t.params.GPUIndices, j.append)
 	case "nvidia-benchmark":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = a.RunNvidiaBenchmarkCtx(ctx, "", platform.NvidiaBenchmarkOptions{
 			Profile:           t.params.BenchmarkProfile,
 			SizeMB:            t.params.SizeMB,
 			GPUIndices:        t.params.GPUIndices,
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
 			RunNCCL:           t.params.RunNCCL,
 		}, j.append)
 	case "nvidia-compute":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, j.append)
 	case "nvidia-targeted-power":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = a.RunNvidiaTargetedPowerPack(ctx, "", dur, t.params.GPUIndices, j.append)
 	case "nvidia-pulse":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = a.RunNvidiaPulseTestPack(ctx, "", dur, t.params.GPUIndices, j.append)
 	case "nvidia-bandwidth":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = a.RunNvidiaBandwidthPack(ctx, "", t.params.GPUIndices, j.append)
 	case "nvidia-interconnect":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
 			DurationSec: dur,
 			Loader:      platform.NvidiaStressLoaderNCCL,
 			GPUIndices:  t.params.GPUIndices,
 		}, j.append)
 	case "nvidia-stress":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
@@ -731,6 +864,7 @@ func (h *handler) handleAPITasksCancel(w http.ResponseWriter, r *http.Request) {
 		now := time.Now()
 		t.DoneAt = &now
 		globalQueue.persistLocked()
 		taskSerialEvent(t, "finished with status="+t.Status)
 		writeJSON(w, map[string]string{"status": "cancelled"})
 	case TaskRunning:
 		if t.job != nil {
@@ -740,6 +874,7 @@ func (h *handler) handleAPITasksCancel(w http.ResponseWriter, r *http.Request) {
 		now := time.Now()
 		t.DoneAt = &now
 		globalQueue.persistLocked()
 		taskSerialEvent(t, "finished with status="+t.Status)
 		writeJSON(w, map[string]string{"status": "cancelled"})
 	default:
 		writeError(w, http.StatusConflict, "task is not running or pending")
@@ -780,6 +915,7 @@ func (h *handler) handleAPITasksCancelAll(w http.ResponseWriter, _ *http.Request
 		case TaskPending:
 			t.Status = TaskCancelled
 			t.DoneAt = &now
 			taskSerialEvent(t, "finished with status="+t.Status)
 			n++
 		case TaskRunning:
 			if t.job != nil {
@@ -787,6 +923,7 @@ func (h *handler) handleAPITasksCancelAll(w http.ResponseWriter, _ *http.Request
 			}
 			t.Status = TaskCancelled
 			t.DoneAt = &now
 			taskSerialEvent(t, "finished with status="+t.Status)
 			n++
 		}
 	}
@@ -805,6 +942,7 @@ func (h *handler) handleAPITasksKillWorkers(w http.ResponseWriter, _ *http.Reque
 		case TaskPending:
 			t.Status = TaskCancelled
 			t.DoneAt = &now
 			taskSerialEvent(t, "finished with status="+t.Status)
 			cancelled++
 		case TaskRunning:
 			if t.job != nil {
@@ -812,6 +950,7 @@ func (h *handler) handleAPITasksKillWorkers(w http.ResponseWriter, _ *http.Reque
 			}
 			t.Status = TaskCancelled
 			t.DoneAt = &now
 			taskSerialEvent(t, "finished with status="+t.Status)
 			cancelled++
 		}
 	}
@@ -875,10 +1014,10 @@ func (h *handler) handleAPITasksStream(w http.ResponseWriter, r *http.Request) {
 }
 func (q *taskQueue) assignTaskLogPathLocked(t *Task) {
-	if t.LogPath != "" || q.logsDir == "" || t.ID == "" {
+	if q.logsDir == "" || t.ID == "" {
 		return
 	}
-	t.LogPath = filepath.Join(q.logsDir, t.ID+".log")
+	q.ensureTaskArtifactPathsLocked(t)
 }
 func (q *taskQueue) loadLocked() {
@@ -895,17 +1034,20 @@ func (q *taskQueue) loadLocked() {
 	}
 	for _, pt := range persisted {
 		t := &Task{
-			ID:        pt.ID,
+			ID:             pt.ID,
-			Name:      pt.Name,
+			Name:           pt.Name,
-			Target:    pt.Target,
+			Target:         pt.Target,
-			Priority:  pt.Priority,
+			Priority:       pt.Priority,
-			Status:    pt.Status,
+			Status:         pt.Status,
-			CreatedAt: pt.CreatedAt,
+			CreatedAt:      pt.CreatedAt,
-			StartedAt: pt.StartedAt,
+			StartedAt:      pt.StartedAt,
-			DoneAt:    pt.DoneAt,
+			DoneAt:         pt.DoneAt,
-			ErrMsg:    pt.ErrMsg,
+			ErrMsg:         pt.ErrMsg,
-			LogPath:   pt.LogPath,
+			LogPath:        pt.LogPath,
-			params:    pt.Params,
+			ArtifactsDir:   pt.ArtifactsDir,
 			ReportJSONPath: pt.ReportJSONPath,
 			ReportHTMLPath: pt.ReportHTMLPath,
 			params:         pt.Params,
 		}
 		q.assignTaskLogPathLocked(t)
 		if t.Status == TaskRunning {
@@ -936,17 +1078,20 @@ func (q *taskQueue) persistLocked() {
 	state := make([]persistedTask, 0, len(q.tasks))
 	for _, t := range q.tasks {
 		state = append(state, persistedTask{
-			ID:        t.ID,
+			ID:             t.ID,
-			Name:      t.Name,
+			Name:           t.Name,
-			Target:    t.Target,
+			Target:         t.Target,
-			Priority:  t.Priority,
+			Priority:       t.Priority,
-			Status:    t.Status,
+			Status:         t.Status,
-			CreatedAt: t.CreatedAt,
+			CreatedAt:      t.CreatedAt,
-			StartedAt: t.StartedAt,
+			StartedAt:      t.StartedAt,
-			DoneAt:    t.DoneAt,
+			DoneAt:         t.DoneAt,
-			ErrMsg:    t.ErrMsg,
+			ErrMsg:         t.ErrMsg,
-			LogPath:   t.LogPath,
+			LogPath:        t.LogPath,
-			Params:    t.params,
+			ArtifactsDir:   t.ArtifactsDir,
 			ReportJSONPath: t.ReportJSONPath,
 			ReportHTMLPath: t.ReportHTMLPath,
 			Params:         t.params,
 		})
 	}
 	data, err := json.MarshalIndent(state, "", "  ")
@@ -977,3 +1122,88 @@ func taskElapsedSec(t *Task, now time.Time) int {
 	}
 	return int(end.Sub(start).Round(time.Second) / time.Second)
 }
 func taskFolderStatus(status string) string {
 	status = strings.TrimSpace(strings.ToLower(status))
 	switch status {
 	case TaskRunning, TaskDone, TaskFailed, TaskCancelled:
 		return status
 	default:
 		return TaskPending
 	}
 }
 func sanitizeTaskFolderPart(s string) string {
 	s = strings.TrimSpace(strings.ToLower(s))
 	if s == "" {
 		return "task"
 	}
 	var b strings.Builder
 	lastDash := false
 	for _, r := range s {
 		isAlnum := (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9')
 		if isAlnum {
 			b.WriteRune(r)
 			lastDash = false
 			continue
 		}
 		if !lastDash {
 			b.WriteByte('-')
 			lastDash = true
 		}
 	}
 	out := strings.Trim(b.String(), "-")
 	if out == "" {
 		return "task"
 	}
 	return out
 }
 func taskArtifactsDir(root string, t *Task, status string) string {
 	if strings.TrimSpace(root) == "" || t == nil {
 		return ""
 	}
 	return filepath.Join(root, fmt.Sprintf("%s_%s_%s", t.ID, sanitizeTaskFolderPart(t.Name), taskFolderStatus(status)))
 }
 func ensureTaskReportPaths(t *Task) {
 	if t == nil || strings.TrimSpace(t.ArtifactsDir) == "" {
 		return
 	}
 	if t.LogPath == "" || filepath.Base(t.LogPath) == "task.log" {
 		t.LogPath = filepath.Join(t.ArtifactsDir, "task.log")
 	}
 	t.ReportJSONPath = filepath.Join(t.ArtifactsDir, "report.json")
 	t.ReportHTMLPath = filepath.Join(t.ArtifactsDir, "report.html")
 }
 func (q *taskQueue) ensureTaskArtifactPathsLocked(t *Task) {
 	if t == nil || strings.TrimSpace(q.logsDir) == "" || strings.TrimSpace(t.ID) == "" {
 		return
 	}
 	if strings.TrimSpace(t.ArtifactsDir) == "" {
 		t.ArtifactsDir = taskArtifactsDir(q.logsDir, t, t.Status)
 	}
 	if t.ArtifactsDir != "" {
 		_ = os.MkdirAll(t.ArtifactsDir, 0755)
 	}
 	ensureTaskReportPaths(t)
 }
 func (q *taskQueue) finalizeTaskArtifactPathsLocked(t *Task) {
 	if t == nil || strings.TrimSpace(q.logsDir) == "" || strings.TrimSpace(t.ID) == "" {
 		return
 	}
 	q.ensureTaskArtifactPathsLocked(t)
 	dstDir := taskArtifactsDir(q.logsDir, t, t.Status)
 	if dstDir == "" {
 		return
 	}
 	if t.ArtifactsDir != "" && t.ArtifactsDir != dstDir {
 		if _, err := os.Stat(dstDir); err != nil {
 			_ = os.Rename(t.ArtifactsDir, dstDir)
 		}
 		t.ArtifactsDir = dstDir
 	}
 	ensureTaskReportPaths(t)
 }
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -2,6 +2,7 @@ package webui
 import (
 	"context"
 	"encoding/json"
 	"net/http"
 	"net/http/httptest"
 	"os"
@@ -12,6 +13,7 @@ import (
 	"time"
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 )
 func TestTaskQueuePersistsAndRecoversPendingTasks(t *testing.T) {
@@ -248,15 +250,133 @@ func TestHandleAPITasksStreamPendingTaskStartsSSEImmediately(t *testing.T) {
 	t.Fatalf("stream did not emit queued status promptly, body=%q", rec.Body.String())
 }
 func TestFinalizeTaskRunCreatesReportFolderAndArtifacts(t *testing.T) {
 	dir := t.TempDir()
 	metricsPath := filepath.Join(dir, "metrics.db")
 	prevMetricsPath := taskReportMetricsDBPath
 	taskReportMetricsDBPath = metricsPath
 	t.Cleanup(func() { taskReportMetricsDBPath = prevMetricsPath })
 	db, err := openMetricsDB(metricsPath)
 	if err != nil {
 		t.Fatalf("openMetricsDB: %v", err)
 	}
 	base := time.Now().UTC().Add(-45 * time.Second)
 	if err := db.Write(platform.LiveMetricSample{
 		Timestamp:  base,
 		CPULoadPct: 42,
 		MemLoadPct: 35,
 		PowerW:     510,
 	}); err != nil {
 		t.Fatalf("Write: %v", err)
 	}
 	_ = db.Close()
 	q := &taskQueue{
 		statePath: filepath.Join(dir, "tasks-state.json"),
 		logsDir:   filepath.Join(dir, "tasks"),
 		trigger:   make(chan struct{}, 1),
 	}
 	if err := os.MkdirAll(q.logsDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	started := time.Now().UTC().Add(-90 * time.Second)
 	task := &Task{
 		ID:        "task-1",
 		Name:      "CPU SAT",
 		Target:    "cpu",
 		Status:    TaskRunning,
 		CreatedAt: started.Add(-10 * time.Second),
 		StartedAt: &started,
 	}
 	q.assignTaskLogPathLocked(task)
 	appendJobLog(task.LogPath, "line-1")
 	job := newTaskJobState(task.LogPath)
 	job.finish("")
 	q.finalizeTaskRun(task, job)
 	if task.Status != TaskDone {
 		t.Fatalf("status=%q want %q", task.Status, TaskDone)
 	}
 	if !strings.Contains(filepath.Base(task.ArtifactsDir), "_done") {
 		t.Fatalf("artifacts dir=%q", task.ArtifactsDir)
 	}
 	if _, err := os.Stat(task.ReportJSONPath); err != nil {
 		t.Fatalf("report json: %v", err)
 	}
 	if _, err := os.Stat(task.ReportHTMLPath); err != nil {
 		t.Fatalf("report html: %v", err)
 	}
 	var report taskReport
 	data, err := os.ReadFile(task.ReportJSONPath)
 	if err != nil {
 		t.Fatalf("ReadFile(report.json): %v", err)
 	}
 	if err := json.Unmarshal(data, &report); err != nil {
 		t.Fatalf("Unmarshal(report.json): %v", err)
 	}
 	if report.ID != task.ID || report.Status != TaskDone {
 		t.Fatalf("report=%+v", report)
 	}
 	if len(report.Charts) == 0 {
 		t.Fatalf("expected charts in report, got none")
 	}
 }
 func TestTaskLifecycleMirrorsToSerialConsole(t *testing.T) {
 	var lines []string
 	prev := taskSerialWriteLine
 	taskSerialWriteLine = func(line string) { lines = append(lines, line) }
 	t.Cleanup(func() { taskSerialWriteLine = prev })
 	dir := t.TempDir()
 	q := &taskQueue{
 		statePath: filepath.Join(dir, "tasks-state.json"),
 		logsDir:   filepath.Join(dir, "tasks"),
 		trigger:   make(chan struct{}, 1),
 	}
 	task := &Task{
 		ID:        "task-serial-1",
 		Name:      "CPU SAT",
 		Target:    "cpu",
 		Status:    TaskPending,
 		CreatedAt: time.Now().UTC(),
 	}
 	q.enqueue(task)
 	started := time.Now().UTC()
 	task.Status = TaskRunning
 	task.StartedAt = &started
 	job := newTaskJobState(task.LogPath, taskSerialPrefix(task))
 	job.append("Starting CPU SAT...")
 	job.append("CPU stress duration: 60s")
 	job.finish("")
 	q.finalizeTaskRun(task, job)
 	joined := strings.Join(lines, "\n")
 	for _, needle := range []string{
 		"queued",
 		"Starting CPU SAT...",
 		"CPU stress duration: 60s",
 		"finished with status=done",
 	} {
 		if !strings.Contains(joined, needle) {
 			t.Fatalf("serial mirror missing %q in %q", needle, joined)
 		}
 	}
 }
 func TestResolveBurnPreset(t *testing.T) {
 	tests := []struct {
 		profile string
 		want    burnPreset
 	}{
-		{profile: "smoke", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
+		{profile: "smoke", want: burnPreset{DurationSec: 5 * 60}},
-		{profile: "acceptance", want: burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}},
+		{profile: "acceptance", want: burnPreset{DurationSec: 60 * 60}},
-		{profile: "overnight", want: burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}},
+		{profile: "overnight", want: burnPreset{DurationSec: 8 * 60 * 60}},
-		{profile: "", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
+		{profile: "", want: burnPreset{DurationSec: 5 * 60}},
 	}
 	for _, tc := range tests {
 		if got := resolveBurnPreset(tc.profile); got != tc.want {
@@ -467,3 +587,52 @@ func TestRunTaskInstallUsesSharedCommandStreaming(t *testing.T) {
 		t.Fatalf("unexpected error: %q", j.err)
 	}
 }
 func TestExecuteTaskMarksPanicsAsFailedAndClosesKmsgWindow(t *testing.T) {
 	dir := t.TempDir()
 	q := &taskQueue{
 		opts:        &HandlerOptions{App: &app.App{}},
 		statePath:   filepath.Join(dir, "tasks-state.json"),
 		logsDir:     filepath.Join(dir, "tasks"),
 		kmsgWatcher: newKmsgWatcher(nil),
 	}
 	tk := &Task{
 		ID:        "cpu-panic-1",
 		Name:      "CPU SAT",
 		Target:    "cpu",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 	}
 	j := &jobState{}
 	orig := runCPUAcceptancePackCtx
 	runCPUAcceptancePackCtx = func(_ *app.App, _ context.Context, _ string, _ int, _ func(string)) (string, error) {
 		panic("boom")
 	}
 	defer func() { runCPUAcceptancePackCtx = orig }()
 	q.executeTask(tk, j, context.Background())
 	if tk.Status != TaskFailed {
 		t.Fatalf("status=%q want %q", tk.Status, TaskFailed)
 	}
 	if tk.DoneAt == nil {
 		t.Fatal("expected done_at to be set")
 	}
 	if !strings.Contains(tk.ErrMsg, "task panic: boom") {
 		t.Fatalf("task error=%q", tk.ErrMsg)
 	}
 	if !strings.Contains(j.err, "task panic: boom") {
 		t.Fatalf("job error=%q", j.err)
 	}
 	q.kmsgWatcher.mu.Lock()
 	activeCount := q.kmsgWatcher.activeCount
 	window := q.kmsgWatcher.window
 	q.kmsgWatcher.mu.Unlock()
 	if activeCount != 0 {
 		t.Fatalf("activeCount=%d want 0", activeCount)
 	}
 	if window != nil {
 		t.Fatalf("expected kmsg window to be cleared, got %+v", window)
 	}
 }
--- a/2
+++ b/2
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
@@ -302,6 +302,12 @@ memtest_fail() {
    return 0
 }
 nvidia_runtime_fail() {
    msg="$1"
    echo "ERROR: ${msg}" >&2
    exit 1
 }
 iso_memtest_present() {
    iso_path="$1"
    iso_files="$(mktemp)"
@@ -439,6 +445,44 @@ validate_iso_memtest() {
    echo "=== memtest validation OK ==="
 }
 validate_iso_nvidia_runtime() {
    iso_path="$1"
    [ "$BEE_GPU_VENDOR" = "nvidia" ] || return 0
    echo "=== validating NVIDIA runtime in ISO ==="
    [ -f "$iso_path" ] || nvidia_runtime_fail "ISO not found for NVIDIA runtime validation: $iso_path"
    require_iso_reader "$iso_path" >/dev/null 2>&1 || nvidia_runtime_fail "ISO reader unavailable for NVIDIA runtime validation"
    command -v unsquashfs >/dev/null 2>&1 || nvidia_runtime_fail "unsquashfs is required for NVIDIA runtime validation"
    squashfs_tmp="$(mktemp)"
    squashfs_list="$(mktemp)"
    iso_read_member "$iso_path" live/filesystem.squashfs "$squashfs_tmp" || {
        rm -f "$squashfs_tmp" "$squashfs_list"
        nvidia_runtime_fail "failed to extract live/filesystem.squashfs from ISO"
    }
    unsquashfs -ll "$squashfs_tmp" > "$squashfs_list" 2>/dev/null || {
        rm -f "$squashfs_tmp" "$squashfs_list"
        nvidia_runtime_fail "failed to inspect filesystem.squashfs from ISO"
    }
    grep -Eq 'usr/bin/dcgmi$' "$squashfs_list" || {
        rm -f "$squashfs_tmp" "$squashfs_list"
        nvidia_runtime_fail "dcgmi missing from final NVIDIA ISO"
    }
    grep -Eq 'usr/bin/nv-hostengine$' "$squashfs_list" || {
        rm -f "$squashfs_tmp" "$squashfs_list"
        nvidia_runtime_fail "nv-hostengine missing from final NVIDIA ISO"
    }
    grep -Eq 'usr/bin/dcgmproftester([0-9]+)?$' "$squashfs_list" || {
        rm -f "$squashfs_tmp" "$squashfs_list"
        nvidia_runtime_fail "dcgmproftester missing from final NVIDIA ISO"
    }
    rm -f "$squashfs_tmp" "$squashfs_list"
    echo "=== NVIDIA runtime validation OK ==="
 }
 append_memtest_grub_entry() {
    grub_cfg="$1"
    [ -f "$grub_cfg" ] || return 1
@@ -1144,6 +1188,7 @@ if [ -f "$ISO_RAW" ]; then
        fi
    fi
    validate_iso_memtest "$ISO_RAW"
    validate_iso_nvidia_runtime "$ISO_RAW"
    cp "$ISO_RAW" "$ISO_OUT"
    echo ""
    echo "=== done (${BEE_GPU_VENDOR}) ==="
--- a/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
+++ b/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
@@ -30,6 +30,7 @@ systemctl enable bee-preflight.service
 systemctl enable bee-audit.service
 systemctl enable bee-web.service
 systemctl enable bee-sshsetup.service
 systemctl enable bee-selfheal.timer
 systemctl enable ssh.service
 systemctl enable lightdm.service 2>/dev/null || true
 systemctl enable qemu-guest-agent.service 2>/dev/null || true
@@ -58,6 +59,7 @@ chmod +x /usr/local/bin/bee-sshsetup   2>/dev/null || true
 chmod +x /usr/local/bin/bee-smoketest  2>/dev/null || true
 chmod +x /usr/local/bin/bee            2>/dev/null || true
 chmod +x /usr/local/bin/bee-log-run    2>/dev/null || true
 chmod +x /usr/local/bin/bee-selfheal   2>/dev/null || true
 if [ "$GPU_VENDOR" = "nvidia" ]; then
    chmod +x /usr/local/bin/bee-nvidia-load 2>/dev/null || true
    chmod +x /usr/local/bin/bee-gpu-burn 2>/dev/null || true
--- a/iso/builder/config/package-lists/bee-nvidia.list.chroot
+++ b/iso/builder/config/package-lists/bee-nvidia.list.chroot
@@ -1,6 +1,10 @@
-# NVIDIA DCGM (Data Center GPU Manager) — dcgmi diag for acceptance testing.
+# NVIDIA DCGM (Data Center GPU Manager).
-# DCGM 4 is packaged per CUDA major. The image ships NVIDIA driver 590 with CUDA 13 userspace,
+# Validate uses dcgmi diagnostics; Burn uses dcgmproftester as the official
-# so install the CUDA 13 build plus proprietary diagnostic components explicitly.
+# NVIDIA max-compute recipe. The smoketest/runtime contract treats
 # dcgmproftester as required in the LiveCD.
 # DCGM 4 is packaged per CUDA major. The image ships NVIDIA driver 590 with
 # CUDA 13 userspace, so install the CUDA 13 build plus proprietary components
 # explicitly.
 datacenter-gpu-manager-4-cuda13=1:%%DCGM_VERSION%%
 datacenter-gpu-manager-4-proprietary=1:%%DCGM_VERSION%%
 datacenter-gpu-manager-4-proprietary-cuda13=1:%%DCGM_VERSION%%
--- a/iso/builder/smoketest.sh
+++ b/iso/builder/smoketest.sh
@@ -52,6 +52,31 @@ else
    fail "nvidia-smi: NOT FOUND"
 fi
 if p=$(PATH="/usr/local/bin:$PATH" command -v dcgmi 2>/dev/null); then
    ok "dcgmi found: $p"
 else
    fail "dcgmi: NOT FOUND"
 fi
 if p=$(PATH="/usr/local/bin:$PATH" command -v nv-hostengine 2>/dev/null); then
    ok "nv-hostengine found: $p"
 else
    fail "nv-hostengine: NOT FOUND"
 fi
 DCGM_PROFTESTER=""
 for tool in dcgmproftester dcgmproftester13 dcgmproftester12 dcgmproftester11; do
    if p=$(PATH="/usr/local/bin:$PATH" command -v "$tool" 2>/dev/null); then
        DCGM_PROFTESTER="$p"
        break
    fi
 done
 if [ -n "$DCGM_PROFTESTER" ]; then
    ok "dcgmproftester found: $DCGM_PROFTESTER"
 else
    fail "dcgmproftester: NOT FOUND"
 fi
 for tool in bee-gpu-burn bee-john-gpu-stress bee-nccl-gpu-stress all_reduce_perf; do
    if p=$(PATH="/usr/local/bin:$PATH" command -v "$tool" 2>/dev/null); then
        ok "$tool found: $p"
@@ -60,6 +85,12 @@ for tool in bee-gpu-burn bee-john-gpu-stress bee-nccl-gpu-stress all_reduce_perf
    fi
 done
 if p=$(PATH="/usr/local/bin:$PATH" command -v nvbandwidth 2>/dev/null); then
    ok "nvbandwidth found: $p"
 else
    warn "nvbandwidth: NOT FOUND"
 fi
 echo ""
 echo "-- NVIDIA modules --"
 KO_DIR="/usr/local/lib/nvidia"
@@ -171,6 +202,12 @@ for svc in bee-nvidia bee-network bee-preflight bee-audit bee-web; do
    fi
 done
 if systemctl is-active --quiet bee-selfheal.timer 2>/dev/null; then
    ok "timer active: bee-selfheal.timer"
 else
    fail "timer NOT active: bee-selfheal.timer"
 fi
 echo ""
 echo "-- runtime health --"
 if [ -f /appdata/bee/export/runtime-health.json ] && [ -s /appdata/bee/export/runtime-health.json ]; then
--- a/iso/overlay/etc/systemd/system/bee-audit.service
+++ b/iso/overlay/etc/systemd/system/bee-audit.service
@@ -1,7 +1,6 @@
 [Unit]
 Description=Bee: hardware audit
 After=bee-preflight.service bee-network.service bee-nvidia.service
 Before=bee-web.service
 [Service]
 Type=oneshot
--- a/iso/overlay/etc/systemd/system/bee-selfheal.service
+++ b/iso/overlay/etc/systemd/system/bee-selfheal.service
@@ -0,0 +1,9 @@
 [Unit]
 Description=Bee: periodic runtime self-heal
 After=bee-web.service bee-audit.service bee-preflight.service
 [Service]
 Type=oneshot
 ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-selfheal.log /usr/local/bin/bee-selfheal
 StandardOutput=journal
 StandardError=journal
--- a/iso/overlay/etc/systemd/system/bee-selfheal.timer
+++ b/iso/overlay/etc/systemd/system/bee-selfheal.timer
@@ -0,0 +1,11 @@
 [Unit]
 Description=Bee: run self-heal checks periodically
 [Timer]
 OnBootSec=45sec
 OnUnitActiveSec=60sec
 AccuracySec=15sec
 Unit=bee-selfheal.service
 [Install]
 WantedBy=timers.target
--- a/iso/overlay/etc/systemd/system/bee-web.service
+++ b/iso/overlay/etc/systemd/system/bee-web.service
@@ -1,12 +1,12 @@
 [Unit]
 Description=Bee: hardware audit web viewer
-After=bee-audit.service
+StartLimitIntervalSec=0
 [Service]
 Type=simple
 ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-web.log /usr/local/bin/bee web --listen :80 --audit-path /appdata/bee/export/bee-audit.json --export-dir /appdata/bee/export --title "Bee Hardware Audit"
 Restart=always
-RestartSec=2
+RestartSec=3
 StandardOutput=journal
 StandardError=journal
 LimitMEMLOCK=infinity
--- a/iso/overlay/usr/local/bin/bee-selfheal
+++ b/iso/overlay/usr/local/bin/bee-selfheal
@@ -0,0 +1,99 @@
 #!/bin/bash
 # bee-selfheal — periodic best-effort recovery for critical live ISO services.
 set -u
 LOG_PREFIX="bee-selfheal"
 EXPORT_DIR="/appdata/bee/export"
 AUDIT_JSON="${EXPORT_DIR}/bee-audit.json"
 RUNTIME_JSON="${EXPORT_DIR}/runtime-health.json"
 LOCK_DIR="/run/bee-selfheal.lock"
 log() {
    echo "[${LOG_PREFIX}] $*"
 }
 have_nvidia_gpu() {
    lspci -nn 2>/dev/null | grep -qi '10de:'
 }
 service_active() {
    systemctl is-active --quiet "$1" 2>/dev/null
 }
 restart_service() {
    local svc="$1"
    if systemctl restart "$svc" >/dev/null 2>&1; then
        log "restarted ${svc}"
        return 0
    fi
    log "WARN: failed to restart ${svc}"
    return 1
 }
 file_ready() {
    [ -s "$1" ]
 }
 artifact_state() {
    local path="$1"
    if [ -s "${path}" ]; then
        echo "ready"
        return 0
    fi
    if [ -e "${path}.tmp" ]; then
        echo "interrupted"
        return 0
    fi
    echo "missing"
 }
 web_healthy() {
    bash -c 'exec 3<>/dev/tcp/127.0.0.1/80 && printf "GET /healthz HTTP/1.0\r\nHost: localhost\r\n\r\n" >&3 && grep -q "^ok$" <&3' \
        >/dev/null 2>&1
 }
 mkdir -p "${EXPORT_DIR}" /run
 if ! mkdir "${LOCK_DIR}" 2>/dev/null; then
    log "another self-heal run is already active"
    exit 0
 fi
 trap 'rmdir "${LOCK_DIR}" >/dev/null 2>&1 || true' EXIT
 log "start"
 if have_nvidia_gpu && [ ! -e /dev/nvidia0 ]; then
    log "NVIDIA GPU detected but /dev/nvidia0 is missing"
    restart_service bee-nvidia.service || true
 fi
 runtime_state="$(artifact_state "${RUNTIME_JSON}")"
 if [ "${runtime_state}" != "ready" ]; then
    if [ "${runtime_state}" = "interrupted" ]; then
        log "runtime-health.json.tmp exists — interrupted runtime-health write detected"
    else
        log "runtime-health.json missing or empty"
    fi
    restart_service bee-preflight.service || true
 fi
 audit_state="$(artifact_state "${AUDIT_JSON}")"
 if [ "${audit_state}" != "ready" ]; then
    if [ "${audit_state}" = "interrupted" ]; then
        log "bee-audit.json.tmp exists — interrupted audit write detected"
    else
        log "bee-audit.json missing or empty"
    fi
    restart_service bee-audit.service || true
 fi
 if ! service_active bee-web.service; then
    log "bee-web.service is not active"
    restart_service bee-web.service || true
 elif ! web_healthy; then
    log "bee-web health check failed"
    restart_service bee-web.service || true
 fi
 log "done"
Author	SHA1	Message	Date
Michael Chus	f1621efee4	Mirror task lifecycle to serial console	2026-04-05 18:34:06 +03:00
Michael Chus	4461249cc3	Make memory stress size follow available RAM	2026-04-05 18:33:26 +03:00
Michael Chus	e609fbbc26	Add task reports and streamline GPU charts	2026-04-05 18:13:58 +03:00
Michael Chus	cc2b49ea41	Improve validate GPU runs and web UI feedback	2026-04-05 17:50:13 +03:00
Michael Chus	33e0a5bef2	Refine validate UI and runtime health table	2026-04-05 16:24:45 +03:00
Michael Chus	38e79143eb	Refine burn UI and NVIDIA stress flows	2026-04-05 13:43:43 +03:00
Michael Chus	25af2df23a	Unify metrics charts on custom SVG renderer	2026-04-05 12:17:50 +03:00
Michael Chus	20abff7f90	WIP: checkpoint current tree	2026-04-05 12:05:00 +03:00
Michael Chus	a14ec8631c	Persist GPU chart mode and expand GPU charts	2026-04-05 11:52:32 +03:00
Michael Chus	f58c7e58d3	Fix webui streaming recovery regressions	2026-04-05 10:39:09 +03:00
Michael Chus	bf47c8dbd2	Add NVIDIA benchmark reporting flow	2026-04-05 10:30:56 +03:00
Michael Chus	143b7dca5d	Add stability hardening and self-heal recovery	2026-04-05 10:29:37 +03:00
Michael Chus	9826d437a5	Add GPU clock charts and grouped GPU metrics view	2026-04-05 09:57:38 +03:00
Mikhail Chusavitin	f3c14cd893	Harden NIC probing for empty SFP ports	2026-04-04 15:23:15 +03:00
Mikhail Chusavitin	728270dc8e	Unblock bee-web startup and expand support bundle diagnostics	2026-04-04 15:18:43 +03:00