Commit remaining workspace changes

Unify NVIDIA GPU recovery paths
fix(grub): fix bitmap error and menu rendering
2026-04-23 20:32:26 +03:00 · 2026-04-23 20:31:41 +03:00 · 2026-04-22 22:05:16 +03:00 · 2026-04-22 20:39:27 +03:00 · 2026-04-22 19:01:50 +03:00 · 2026-04-22 19:00:04 +03:00
108 changed files with 16954 additions and 4271 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
 .DS_Store
 dist/
 iso/out/
 build-cache/
 audit/bee
--- a/audit/bee
+++ b/audit/bee
--- a/audit/cmd/bee/main.go
+++ b/audit/cmd/bee/main.go
@@ -71,6 +71,8 @@ func run(args []string, stdout, stderr io.Writer) (exitCode int) {
 		return runSAT(args[1:], stdout, stderr)
 	case "benchmark":
 		return runBenchmark(args[1:], stdout, stderr)
 	case "bee-worker":
 		return runBeeWorker(args[1:], stdout, stderr)
 	case "version", "--version", "-version":
 		fmt.Fprintln(stdout, Version)
 		return 0
@@ -90,6 +92,7 @@ func printRootUsage(w io.Writer) {
  bee web     --listen :80 [--audit-path `+app.DefaultAuditJSONPath+`]
  bee sat nvidia|memory|storage|cpu [--duration <seconds>]
  bee benchmark nvidia [--profile standard|stability|overnight]
  bee bee-worker --export-dir `+app.DefaultExportDir+` --task-id TASK-001
  bee version
  bee help [command]`)
 }
@@ -110,6 +113,8 @@ func runHelp(args []string, stdout, stderr io.Writer) int {
 		return runSAT([]string{"--help"}, stdout, stderr)
 	case "benchmark":
 		return runBenchmark([]string{"--help"}, stdout, stderr)
 	case "bee-worker":
 		return runBeeWorker([]string{"--help"}, stdout, stderr)
 	case "version":
 		fmt.Fprintln(stdout, "usage: bee version")
 		return 0
@@ -382,9 +387,9 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 			archive, err = application.RunNvidiaAcceptancePack("", logLine)
 		}
 	case "memory":
-		archive, err = application.RunMemoryAcceptancePackCtx(context.Background(), "", logLine)
+		archive, err = application.RunMemoryAcceptancePackCtx(context.Background(), "", 256, 1, logLine)
 	case "storage":
-		archive, err = application.RunStorageAcceptancePackCtx(context.Background(), "", logLine)
+		archive, err = application.RunStorageAcceptancePackCtx(context.Background(), "", false, logLine)
 	case "cpu":
 		dur := *duration
 		if dur <= 0 {
@@ -462,6 +467,28 @@ func runBenchmark(args []string, stdout, stderr io.Writer) int {
 	return 0
 }
 func runBeeWorker(args []string, stdout, stderr io.Writer) int {
 	fs := flag.NewFlagSet("bee-worker", flag.ContinueOnError)
 	fs.SetOutput(stderr)
 	exportDir := fs.String("export-dir", app.DefaultExportDir, "directory with task state and artifacts")
 	taskID := fs.String("task-id", "", "task identifier, e.g. TASK-001")
 	fs.Usage = func() {
 		fmt.Fprintf(stderr, "usage: bee bee-worker --export-dir %s --task-id TASK-001\n", app.DefaultExportDir)
 		fs.PrintDefaults()
 	}
 	if err := fs.Parse(args); err != nil {
 		if err == flag.ErrHelp {
 			return 0
 		}
 		return 2
 	}
 	if fs.NArg() != 0 {
 		fs.Usage()
 		return 2
 	}
 	return webui.RunPersistedTask(*exportDir, *taskID, stdout, stderr)
 }
 func parseBenchmarkIndexCSV(raw string) ([]int, error) {
 	raw = strings.TrimSpace(raw)
 	if raw == "" {
--- a/audit/go.mod
+++ b/audit/go.mod
@@ -5,22 +5,18 @@ go 1.25.0
 replace reanimator/chart => ../internal/chart
 require (
-	github.com/go-analyze/charts v0.5.26
+	modernc.org/sqlite v1.48.0
 	reanimator/chart v0.0.0-00010101000000-000000000000
 )
 require (
 	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/go-analyze/bulk v0.1.3 // indirect
 	github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/ncruces/go-strftime v1.0.0 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	golang.org/x/image v0.24.0 // indirect
 	golang.org/x/sys v0.42.0 // indirect
-	modernc.org/libc v1.70.0 // indirect
+	modernc.org/libc v1.72.0 // indirect
 	modernc.org/mathutil v1.7.1 // indirect
 	modernc.org/memory v1.11.0 // indirect
 	modernc.org/sqlite v1.48.0 // indirect
 )
--- a/audit/go.sum
+++ b/audit/go.sum
@@ -1,37 +1,51 @@
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
-github.com/go-analyze/bulk v0.1.3 h1:pzRdBqzHDAT9PyROt0SlWE0YqPtdmTcEpIJY0C3vF0c=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
-github.com/go-analyze/bulk v0.1.3/go.mod h1:afon/KtFJYnekIyN20H/+XUvcLFjE8sKR1CfpqfClgM=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
 github.com/go-analyze/charts v0.5.26 h1:rSwZikLQuFX6cJzwI8OAgaWZneG1kDYxD857ms00ZxY=
 github.com/go-analyze/charts v0.5.26/go.mod h1:s1YvQhjiSwtLx1f2dOKfiV9x2TT49nVSL6v2rlRpTbY=
 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
 github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
-github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
-github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
-golang.org/x/image v0.24.0 h1:AN7zRgVsbvmTfNyqIbbOraYL8mSwcKncEj8ofjgzcMQ=
+golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
-golang.org/x/image v0.24.0/go.mod h1:4b/ITuLfqYq1hqZcjofwctIhi7sZh2WaCjvsBNjjya8=
+golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
 golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
-gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
-gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
-modernc.org/libc v1.70.0 h1:U58NawXqXbgpZ/dcdS9kMshu08aiA6b7gusEusqzNkw=
+modernc.org/cc/v4 v4.27.3 h1:uNCgn37E5U09mTv1XgskEVUJ8ADKpmFMPxzGJ0TSo+U=
-modernc.org/libc v1.70.0/go.mod h1:OVmxFGP1CI/Z4L3E0Q3Mf1PDE0BucwMkcXjjLntvHJo=
+modernc.org/cc/v4 v4.27.3/go.mod h1:3YjcbCqhoTTHPycJDRl2WZKKFj0nwcOIPBfEZK0Hdk8=
 modernc.org/ccgo/v4 v4.32.4 h1:L5OB8rpEX4ZsXEQwGozRfJyJSFHbbNVOoQ59DU9/KuU=
 modernc.org/ccgo/v4 v4.32.4/go.mod h1:lY7f+fiTDHfcv6YlRgSkxYfhs+UvOEEzj49jAn2TOx0=
 modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM=
 modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU=
 modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
 modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
 modernc.org/gc/v3 v3.1.2 h1:ZtDCnhonXSZexk/AYsegNRV1lJGgaNZJuKjJSWKyEqo=
 modernc.org/gc/v3 v3.1.2/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
 modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
 modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
 modernc.org/libc v1.72.0 h1:IEu559v9a0XWjw0DPoVKtXpO2qt5NVLAnFaBbjq+n8c=
 modernc.org/libc v1.72.0/go.mod h1:tTU8DL8A+XLVkEY3x5E/tO7s2Q/q42EtnNWda/L5QhQ=
 modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
 modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
 modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
 modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
 modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
 modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
 modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
 modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
 modernc.org/sqlite v1.48.0 h1:ElZyLop3Q2mHYk5IFPPXADejZrlHu7APbpB0sF78bq4=
 modernc.org/sqlite v1.48.0/go.mod h1:hWjRO6Tj/5Ik8ieqxQybiEOUXy0NJFNp2tpvVpKlvig=
 modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
 modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
 modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
 modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -19,18 +19,22 @@ import (
 )
 var (
-	DefaultExportDir        = "/appdata/bee/export"
+	DefaultExportDir                     = "/appdata/bee/export"
-	DefaultAuditJSONPath    = DefaultExportDir + "/bee-audit.json"
+	DefaultAuditJSONPath                 = DefaultExportDir + "/bee-audit.json"
-	DefaultAuditLogPath     = DefaultExportDir + "/bee-audit.log"
+	DefaultAuditLogPath                  = DefaultExportDir + "/bee-audit.log"
-	DefaultWebLogPath       = DefaultExportDir + "/bee-web.log"
+	DefaultWebLogPath                    = DefaultExportDir + "/bee-web.log"
-	DefaultNetworkLogPath   = DefaultExportDir + "/bee-network.log"
+	DefaultNetworkLogPath                = DefaultExportDir + "/bee-network.log"
-	DefaultNvidiaLogPath    = DefaultExportDir + "/bee-nvidia.log"
+	DefaultNvidiaLogPath                 = DefaultExportDir + "/bee-nvidia.log"
-	DefaultSSHLogPath       = DefaultExportDir + "/bee-sshsetup.log"
+	DefaultSSHLogPath                    = DefaultExportDir + "/bee-sshsetup.log"
-	DefaultRuntimeJSONPath  = DefaultExportDir + "/runtime-health.json"
+	DefaultRuntimeJSONPath               = DefaultExportDir + "/runtime-health.json"
-	DefaultRuntimeLogPath   = DefaultExportDir + "/runtime-health.log"
+	DefaultRuntimeLogPath                = DefaultExportDir + "/runtime-health.log"
-	DefaultTechDumpDir      = DefaultExportDir + "/techdump"
+	DefaultTechDumpDir                   = DefaultExportDir + "/techdump"
-	DefaultSATBaseDir       = DefaultExportDir + "/bee-sat"
+	DefaultSATBaseDir                    = DefaultExportDir + "/bee-sat"
-	DefaultBenchmarkBaseDir = DefaultExportDir + "/bee-benchmark"
+	DefaultBeeBenchBaseDir               = DefaultExportDir + "/bee-bench"
 	DefaultBeeBenchAutotuneDir           = DefaultBeeBenchBaseDir + "/autotune"
 	DefaultBeeBenchPerfDir               = DefaultBeeBenchBaseDir + "/perf"
 	DefaultBeeBenchPowerDir              = DefaultBeeBenchBaseDir + "/power"
 	DefaultBeeBenchPowerSourceConfigPath = DefaultBeeBenchBaseDir + "/power-source-autotune.json"
 )
 type App struct {
@@ -84,6 +88,7 @@ type installer interface {
 	InstallToDisk(ctx context.Context, device string, logFile string) error
 	IsLiveMediaInRAM() bool
 	LiveBootSource() platform.LiveBootSource
 	LiveMediaRAMState() platform.LiveMediaRAMState
 	RunInstallToRAM(ctx context.Context, logFunc func(string)) error
 }
@@ -108,6 +113,10 @@ func (a *App) LiveBootSource() platform.LiveBootSource {
 	return a.installer.LiveBootSource()
 }
 func (a *App) LiveMediaRAMState() platform.LiveMediaRAMState {
 	return a.installer.LiveMediaRAMState()
 }
 func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
 	return a.installer.RunInstallToRAM(ctx, logFunc)
 }
@@ -117,13 +126,17 @@ type satRunner interface {
 	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBenchmark(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
-	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
+	RunNvidiaPowerBench(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
 	RunNvidiaPowerSourceAutotune(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error)
 	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaStressPack(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error)
-	RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	ListNvidiaGPUStatuses() ([]platform.NvidiaGPUStatus, error)
-	RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	ResetNvidiaGPU(index int) (string, error)
 	RunMemoryAcceptancePack(ctx context.Context, baseDir string, sizeMB, passes int, logFunc func(string)) (string, error)
 	RunStorageAcceptancePack(ctx context.Context, baseDir string, extended bool, logFunc func(string)) (string, error)
 	RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	ListNvidiaGPUs() ([]platform.NvidiaGPU, error)
 	DetectGPUVendor() string
@@ -136,7 +149,7 @@ type satRunner interface {
 	RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
 	RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error)
-	RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	RunNCCLTests(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error)
 }
 type runtimeChecker interface {
@@ -188,6 +201,7 @@ func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, erro
 	}
 	result := collector.Run(runtimeMode)
 	applyLatestSATStatuses(&result.Hardware, DefaultSATBaseDir, a.StatusDB)
 	writePSUStatusesToDB(a.StatusDB, result.Hardware.PowerSupplies)
 	if health, err := ReadRuntimeHealth(DefaultRuntimeJSONPath); err == nil {
 		result.Runtime = &health
 	}
@@ -293,7 +307,7 @@ func (a *App) ExportLatestAudit(target platform.RemovableTarget) (string, error)
 	}
 	filename := fmt.Sprintf("audit-%s-%s.json", sanitizeFilename(hostnameOr("unknown")), time.Now().UTC().Format("20060102-150405"))
 	tmpPath := filepath.Join(os.TempDir(), filename)
-	data, err := os.ReadFile(DefaultAuditJSONPath)
+	data, err := readFileLimited(DefaultAuditJSONPath, 100<<20)
 	if err != nil {
 		return "", err
 	}
@@ -521,6 +535,15 @@ func (a *App) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
 	return a.sat.ListNvidiaGPUs()
 }
 func (a *App) ListNvidiaGPUStatuses() ([]platform.NvidiaGPUStatus, error) {
 	return a.sat.ListNvidiaGPUStatuses()
 }
 func (a *App) ResetNvidiaGPU(index int) (ActionResult, error) {
 	out, err := a.sat.ResetNvidiaGPU(index)
 	return ActionResult{Title: fmt.Sprintf("Reset NVIDIA GPU %d", index), Body: strings.TrimSpace(out)}, err
 }
 func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (ActionResult, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
@@ -550,16 +573,66 @@ func (a *App) RunNvidiaBenchmark(baseDir string, opts platform.NvidiaBenchmarkOp
 func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
-		baseDir = DefaultBenchmarkBaseDir
+		baseDir = DefaultBeeBenchPerfDir
 	}
 	resolved, err := a.ensureBenchmarkPowerAutotune(ctx, baseDir, opts, "performance", logFunc)
 	if err != nil {
 		return "", err
 	}
 	opts.ServerPowerSource = resolved.SelectedSource
 	return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
 }
-func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+func (a *App) RunNvidiaPowerBenchCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultBeeBenchPowerDir
 	}
 	resolved, err := a.ensureBenchmarkPowerAutotune(ctx, baseDir, opts, "power-fit", logFunc)
 	if err != nil {
 		return "", err
 	}
 	opts.ServerPowerSource = resolved.SelectedSource
 	return a.sat.RunNvidiaPowerBench(ctx, baseDir, opts, logFunc)
 }
 func (a *App) RunNvidiaPowerSourceAutotuneCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultBeeBenchAutotuneDir
 	}
 	return a.sat.RunNvidiaPowerSourceAutotune(ctx, baseDir, opts, benchmarkKind, logFunc)
 }
 func (a *App) LoadBenchmarkPowerAutotune() (*platform.BenchmarkPowerAutotuneConfig, error) {
 	return platform.LoadBenchmarkPowerAutotuneConfig(DefaultBeeBenchPowerSourceConfigPath)
 }
 func (a *App) ensureBenchmarkPowerAutotune(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (platform.BenchmarkPowerAutotuneConfig, error) {
 	cfgPath := platform.BenchmarkPowerSourceConfigPath(baseDir)
 	if cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(cfgPath); err == nil {
 		if logFunc != nil {
 			logFunc(fmt.Sprintf("benchmark autotune: using saved server power source %s", cfg.SelectedSource))
 		}
 		return *cfg, nil
 	}
 	if logFunc != nil {
 		logFunc("benchmark autotune: no saved power source config, running autotune first")
 	}
 	autotuneDir := filepath.Join(filepath.Dir(baseDir), "autotune")
 	if _, err := a.RunNvidiaPowerSourceAutotuneCtx(ctx, autotuneDir, opts, benchmarkKind, logFunc); err != nil {
 		return platform.BenchmarkPowerAutotuneConfig{}, err
 	}
 	cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(cfgPath)
 	if err != nil {
 		return platform.BenchmarkPowerAutotuneConfig{}, err
 	}
 	return *cfg, nil
 }
 func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
+	return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, staggerSec, logFunc)
 }
 func (a *App) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
@@ -591,14 +664,14 @@ func (a *App) RunNvidiaStressPackCtx(ctx context.Context, baseDir string, opts p
 }
 func (a *App) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
-	return a.RunMemoryAcceptancePackCtx(context.Background(), baseDir, logFunc)
+	return a.RunMemoryAcceptancePackCtx(context.Background(), baseDir, 256, 1, logFunc)
 }
-func (a *App) RunMemoryAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+func (a *App) RunMemoryAcceptancePackCtx(ctx context.Context, baseDir string, sizeMB, passes int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunMemoryAcceptancePack(ctx, baseDir, logFunc)
+	return a.sat.RunMemoryAcceptancePack(ctx, baseDir, sizeMB, passes, logFunc)
 }
 func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
@@ -623,14 +696,14 @@ func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (Actio
 }
 func (a *App) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
-	return a.RunStorageAcceptancePackCtx(context.Background(), baseDir, logFunc)
+	return a.RunStorageAcceptancePackCtx(context.Background(), baseDir, false, logFunc)
 }
-func (a *App) RunStorageAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+func (a *App) RunStorageAcceptancePackCtx(ctx context.Context, baseDir string, extended bool, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunStorageAcceptancePack(ctx, baseDir, logFunc)
+	return a.sat.RunStorageAcceptancePack(ctx, baseDir, extended, logFunc)
 }
 func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
@@ -717,8 +790,15 @@ func (a *App) RunPlatformStress(ctx context.Context, baseDir string, opts platfo
 	return a.sat.RunPlatformStress(ctx, baseDir, opts, logFunc)
 }
 func (a *App) RunNCCLTests(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
 	return a.sat.RunNCCLTests(ctx, baseDir, gpuIndices, logFunc)
 }
 func (a *App) RunNCCLTestsResult(ctx context.Context) (ActionResult, error) {
-	path, err := a.sat.RunNCCLTests(ctx, DefaultSATBaseDir, nil)
+	path, err := a.RunNCCLTests(ctx, DefaultSATBaseDir, nil, nil)
 	body := "Results: " + path
 	if err != nil && err != context.Canceled {
 		body += "\nERROR: " + err.Error()
@@ -915,6 +995,41 @@ func bodyOr(body, fallback string) string {
 	return body
 }
 // writePSUStatusesToDB records PSU statuses collected during audit into the
 // component-status DB so they are visible in the Hardware Summary card.
 // PSU status is sourced from IPMI (ipmitool fru + sdr) during audit.
 func writePSUStatusesToDB(db *ComponentStatusDB, psus []schema.HardwarePowerSupply) {
 	if db == nil || len(psus) == 0 {
 		return
 	}
 	const source = "audit:ipmi"
 	worstStatus := "OK"
 	for _, psu := range psus {
 		if psu.Status == nil {
 			continue
 		}
 		slot := "?"
 		if psu.Slot != nil {
 			slot = *psu.Slot
 		}
 		st := *psu.Status
 		detail := ""
 		if psu.ErrorDescription != nil {
 			detail = *psu.ErrorDescription
 		}
 		db.Record("psu:"+slot, source, st, detail)
 		switch st {
 		case "Critical":
 			worstStatus = "Critical"
 		case "Warning":
 			if worstStatus != "Critical" {
 				worstStatus = "Warning"
 			}
 		}
 	}
 	db.Record("psu:all", source, worstStatus, "")
 }
 func ReadRuntimeHealth(path string) (schema.RuntimeHealth, error) {
 	raw, err := os.ReadFile(path)
 	if err != nil {
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -9,6 +9,7 @@ import (
 	"io"
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 	"bee/audit/internal/platform"
@@ -122,11 +123,14 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
 type fakeSAT struct {
 	runNvidiaFn               func(string) (string, error)
 	runNvidiaBenchmarkFn      func(string, platform.NvidiaBenchmarkOptions) (string, error)
 	runNvidiaPowerBenchFn     func(string, platform.NvidiaBenchmarkOptions) (string, error)
 	runNvidiaAutotuneFn       func(string, platform.NvidiaBenchmarkOptions, string) (string, error)
 	runNvidiaStressFn         func(string, platform.NvidiaStressOptions) (string, error)
 	runNvidiaComputeFn        func(string, int, []int) (string, error)
 	runNvidiaPowerFn          func(string, int, []int) (string, error)
 	runNvidiaPulseFn          func(string, int, []int) (string, error)
 	runNvidiaBandwidthFn      func(string, []int) (string, error)
 	runNCCLFn                 func(string, []int) (string, error)
 	runNvidiaTargetedStressFn func(string, int, []int) (string, error)
 	runMemoryFn               func(string) (string, error)
 	runStorageFn              func(string) (string, error)
@@ -135,6 +139,8 @@ type fakeSAT struct {
 	listAMDGPUsFn             func() ([]platform.AMDGPUInfo, error)
 	runAMDPackFn              func(string) (string, error)
 	listNvidiaGPUsFn          func() ([]platform.NvidiaGPU, error)
 	listNvidiaGPUStatusesFn   func() ([]platform.NvidiaGPUStatus, error)
 	resetNvidiaGPUFn          func(int) (string, error)
 }
 func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string, _ func(string)) (string, error) {
@@ -152,6 +158,20 @@ func (f fakeSAT) RunNvidiaBenchmark(_ context.Context, baseDir string, opts plat
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaPowerBench(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, _ func(string)) (string, error) {
 	if f.runNvidiaPowerBenchFn != nil {
 		return f.runNvidiaPowerBenchFn(baseDir, opts)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaPowerSourceAutotune(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, _ func(string)) (string, error) {
 	if f.runNvidiaAutotuneFn != nil {
 		return f.runNvidiaAutotuneFn(baseDir, opts, benchmarkKind)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaTargetedStressFn != nil {
 		return f.runNvidiaTargetedStressFn(baseDir, durationSec, gpuIndices)
@@ -159,7 +179,7 @@ func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir
 	return f.runNvidiaFn(baseDir)
 }
-func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
+func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ int, _ func(string)) (string, error) {
 	if f.runNvidiaComputeFn != nil {
 		return f.runNvidiaComputeFn(baseDir, durationSec, gpuIndices)
 	}
@@ -201,11 +221,25 @@ func (f fakeSAT) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
 	return nil, nil
 }
-func (f fakeSAT) RunMemoryAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
+func (f fakeSAT) ListNvidiaGPUStatuses() ([]platform.NvidiaGPUStatus, error) {
 	if f.listNvidiaGPUStatusesFn != nil {
 		return f.listNvidiaGPUStatusesFn()
 	}
 	return nil, nil
 }
 func (f fakeSAT) ResetNvidiaGPU(index int) (string, error) {
 	if f.resetNvidiaGPUFn != nil {
 		return f.resetNvidiaGPUFn(index)
 	}
 	return "", nil
 }
 func (f fakeSAT) RunMemoryAcceptancePack(_ context.Context, baseDir string, _, _ int, _ func(string)) (string, error) {
 	return f.runMemoryFn(baseDir)
 }
-func (f fakeSAT) RunStorageAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
+func (f fakeSAT) RunStorageAcceptancePack(_ context.Context, baseDir string, _ bool, _ func(string)) (string, error) {
 	return f.runStorageFn(baseDir)
 }
@@ -263,10 +297,43 @@ func (f fakeSAT) RunPlatformStress(_ context.Context, _ string, _ platform.Platf
 	return "", nil
 }
-func (f fakeSAT) RunNCCLTests(_ context.Context, _ string, _ func(string)) (string, error) {
+func (f fakeSAT) RunNCCLTests(_ context.Context, baseDir string, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNCCLFn != nil {
 		return f.runNCCLFn(baseDir, gpuIndices)
 	}
 	return "", nil
 }
 func TestRunNCCLTestsPassesSelectedGPUs(t *testing.T) {
 	t.Parallel()
 	var gotBaseDir string
 	var gotGPUIndices []int
 	a := &App{
 		sat: fakeSAT{
 			runNCCLFn: func(baseDir string, gpuIndices []int) (string, error) {
 				gotBaseDir = baseDir
 				gotGPUIndices = append([]int(nil), gpuIndices...)
 				return "/tmp/nccl-tests.tar.gz", nil
 			},
 		},
 	}
 	path, err := a.RunNCCLTests(context.Background(), "/tmp/sat", []int{3, 1}, nil)
 	if err != nil {
 		t.Fatalf("RunNCCLTests error: %v", err)
 	}
 	if path != "/tmp/nccl-tests.tar.gz" {
 		t.Fatalf("path=%q want %q", path, "/tmp/nccl-tests.tar.gz")
 	}
 	if gotBaseDir != "/tmp/sat" {
 		t.Fatalf("baseDir=%q want %q", gotBaseDir, "/tmp/sat")
 	}
 	if len(gotGPUIndices) != 2 || gotGPUIndices[0] != 3 || gotGPUIndices[1] != 1 {
 		t.Fatalf("gpuIndices=%v want [3 1]", gotGPUIndices)
 	}
 }
 func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
 	t.Parallel()
@@ -526,8 +593,6 @@ func TestActionResultsUseFallbackBody(t *testing.T) {
 }
 func TestExportSupportBundleResultMentionsUnmountedUSB(t *testing.T) {
 	t.Parallel()
 	tmp := t.TempDir()
 	oldExportDir := DefaultExportDir
 	DefaultExportDir = tmp
@@ -564,8 +629,6 @@ func TestExportSupportBundleResultMentionsUnmountedUSB(t *testing.T) {
 }
 func TestExportSupportBundleResultDoesNotPretendSuccessOnError(t *testing.T) {
 	t.Parallel()
 	tmp := t.TempDir()
 	oldExportDir := DefaultExportDir
 	DefaultExportDir = tmp
@@ -627,8 +690,6 @@ func TestRunNvidiaAcceptancePackResult(t *testing.T) {
 }
 func TestRunSATDefaultsToExportDir(t *testing.T) {
 	t.Parallel()
 	oldSATBaseDir := DefaultSATBaseDir
 	DefaultSATBaseDir = "/tmp/export/bee-sat"
 	t.Cleanup(func() { DefaultSATBaseDir = oldSATBaseDir })
@@ -757,6 +818,12 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run", "verbose.log"), []byte("sat verbose"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.MkdirAll(filepath.Join(exportDir, "bee-bench"), 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-bench", "power-source-autotune.json"), []byte(`{"version":1,"updated_at":"2026-04-20T01:02:03Z","selected_source":"sdr_psu_input","reason":"selected lowest relative error"}`), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run.tar.gz"), []byte("nested sat archive"), 0644); err != nil {
 		t.Fatal(err)
 	}
@@ -784,6 +851,7 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	tr := tar.NewReader(gzr)
 	var names []string
 	var auditJSON string
 	var manifest string
 	for {
 		hdr, err := tr.Next()
 		if errors.Is(err, io.EOF) {
@@ -800,11 +868,21 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 			}
 			auditJSON = string(body)
 		}
 		if strings.HasSuffix(hdr.Name, "/manifest.txt") {
 			body, err := io.ReadAll(tr)
 			if err != nil {
 				t.Fatalf("read manifest entry: %v", err)
 			}
 			manifest = string(body)
 		}
 	}
 	for _, want := range []string{
 		"/system/ip-link.txt",
 		"/system/ip-link-stats.txt",
 		"/system/kernel-aer-nvidia.txt",
 		"/system/lspci-nvidia-bridges-vv.txt",
 		"/system/pcie-aer-sysfs.txt",
 		"/system/ethtool-info.txt",
 		"/system/ethtool-link.txt",
 		"/system/ethtool-module.txt",
@@ -840,6 +918,12 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	if !contains(auditJSON, "PASCARI") || !contains(auditJSON, "NVIDIA H100") {
 		t.Fatalf("support bundle should keep real devices:\n%s", auditJSON)
 	}
 	if !contains(manifest, "files:") {
 		t.Fatalf("support bundle manifest missing files section:\n%s", manifest)
 	}
 	if !strings.Contains(manifest, "power_autotune_selected_source=sdr_psu_input") {
 		t.Fatalf("support bundle manifest missing autotune source:\n%s", manifest)
 	}
 }
 func TestMainBanner(t *testing.T) {
--- a/audit/internal/app/atomic_write.go
+++ b/audit/internal/app/atomic_write.go
@@ -2,10 +2,29 @@ package app
 import (
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 )
 // readFileLimited reads path into memory, refusing files larger than maxBytes.
 // Prevents OOM on corrupted or unexpectedly large data files.
 func readFileLimited(path string, maxBytes int64) ([]byte, error) {
 	f, err := os.Open(path)
 	if err != nil {
 		return nil, err
 	}
 	defer f.Close()
 	data, err := io.ReadAll(io.LimitReader(f, maxBytes+1))
 	if err != nil {
 		return nil, err
 	}
 	if int64(len(data)) > maxBytes {
 		return nil, fmt.Errorf("file %s too large (exceeds %d bytes)", path, maxBytes)
 	}
 	return data, nil
 }
 func atomicWriteFile(path string, data []byte, perm os.FileMode) error {
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return fmt.Errorf("mkdir %s: %w", filepath.Dir(path), err)
--- a/audit/internal/app/component_status_db.go
+++ b/audit/internal/app/component_status_db.go
@@ -46,7 +46,7 @@ func OpenComponentStatusDB(path string) (*ComponentStatusDB, error) {
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return nil, err
 	}
-	data, err := os.ReadFile(path)
+	data, err := readFileLimited(path, 10<<20)
 	if err != nil && !os.IsNotExist(err) {
 		return nil, err
 	}
--- a/audit/internal/app/sat_overlay.go
+++ b/audit/internal/app/sat_overlay.go
@@ -3,6 +3,7 @@ package app
 import (
 	"os"
 	"path/filepath"
 	"strconv"
 	"sort"
 	"strings"
@@ -18,6 +19,7 @@ func applyLatestSATStatuses(snap *schema.HardwareSnapshot, baseDir string, db *C
 	}
 	if summary, ok := loadLatestSATSummary(baseDir, "gpu-nvidia-"); ok {
 		applyGPUVendorSAT(snap.PCIeDevices, "nvidia", summary)
 		applyNvidiaPerGPUStatus(snap.PCIeDevices, baseDir)
 	}
 	if summary, ok := loadLatestSATSummary(baseDir, "memory-"); ok {
 		applyMemorySAT(snap.Memory, summary)
@@ -32,6 +34,100 @@ func applyLatestSATStatuses(snap *schema.HardwareSnapshot, baseDir string, db *C
 	applyComponentStatusDB(snap, db)
 }
 type nvidiaPerGPUStatus struct {
 	runStatus string
 	reason    string
 }
 func applyNvidiaPerGPUStatus(devs []schema.HardwarePCIeDevice, baseDir string) {
 	statusByIndex, ts, ok := loadLatestNvidiaPerGPUStatus(baseDir)
 	if !ok {
 		return
 	}
 	for i := range devs {
 		if devs[i].Telemetry == nil {
 			continue
 		}
 		rawIdx, ok := devs[i].Telemetry["nvidia_gpu_index"]
 		if !ok {
 			continue
 		}
 		idx, ok := telemetryInt(rawIdx)
 		if !ok {
 			continue
 		}
 		st, ok := statusByIndex[idx]
 		if !ok {
 			continue
 		}
 		status, description, ok := satKeyStatus(st.runStatus, firstNonEmpty(strings.TrimSpace(st.reason), "nvidia GPU SAT"))
 		if !ok {
 			continue
 		}
 		mergeComponentStatusPreferDetail(&devs[i].HardwareComponentStatus, ts, status, description)
 	}
 }
 func loadLatestNvidiaPerGPUStatus(baseDir string) (map[int]nvidiaPerGPUStatus, string, bool) {
 	matches, err := filepath.Glob(filepath.Join(baseDir, "gpu-nvidia-*"))
 	if err != nil || len(matches) == 0 {
 		return nil, "", false
 	}
 	sort.Strings(matches)
 	runDir := matches[len(matches)-1]
 	summaryRaw, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
 	if err != nil {
 		return nil, "", false
 	}
 	summaryKV := parseKeyValueSummary(string(summaryRaw))
 	runAtUTC := strings.TrimSpace(summaryKV["run_at_utc"])
 	files, err := filepath.Glob(filepath.Join(runDir, "gpu-*-status.txt"))
 	if err != nil || len(files) == 0 {
 		return nil, "", false
 	}
 	out := make(map[int]nvidiaPerGPUStatus, len(files))
 	for _, file := range files {
 		raw, err := os.ReadFile(file)
 		if err != nil {
 			continue
 		}
 		kv := parseKeyValueSummary(string(raw))
 		idx, err := strconv.Atoi(strings.TrimSpace(kv["gpu_index"]))
 		if err != nil {
 			continue
 		}
 		out[idx] = nvidiaPerGPUStatus{
 			runStatus: strings.ToUpper(strings.TrimSpace(kv["run_status"])),
 			reason:    strings.TrimSpace(kv["reason"]),
 		}
 	}
 	if len(out) == 0 {
 		return nil, "", false
 	}
 	return out, runAtUTC, true
 }
 func telemetryInt(v any) (int, bool) {
 	switch value := v.(type) {
 	case int:
 		return value, true
 	case int32:
 		return int(value), true
 	case int64:
 		return int(value), true
 	case float64:
 		return int(value), true
 	case string:
 		n, err := strconv.Atoi(strings.TrimSpace(value))
 		if err != nil {
 			return 0, false
 		}
 		return n, true
 	default:
 		return 0, false
 	}
 }
 type satSummary struct {
 	runAtUTC string
 	overall  string
@@ -176,6 +272,31 @@ func mergeComponentStatus(component *schema.HardwareComponentStatus, changedAt,
 	}
 }
 func mergeComponentStatusPreferDetail(component *schema.HardwareComponentStatus, changedAt, satStatus, description string) {
 	if component == nil || satStatus == "" {
 		return
 	}
 	current := strings.TrimSpace(ptrString(component.Status))
 	newSeverity := statusSeverity(satStatus)
 	currentSeverity := statusSeverity(current)
 	if current == "" || current == "Unknown" || newSeverity > currentSeverity {
 		mergeComponentStatus(component, changedAt, satStatus, description)
 		return
 	}
 	if newSeverity == currentSeverity && strings.TrimSpace(description) != "" {
 		component.Status = appStringPtr(satStatus)
 		component.ErrorDescription = appStringPtr(description)
 		if strings.TrimSpace(changedAt) != "" {
 			component.StatusChangedAt = appStringPtr(changedAt)
 			component.StatusHistory = append(component.StatusHistory, schema.HardwareStatusHistory{
 				Status:    satStatus,
 				ChangedAt: changedAt,
 				Details:   appStringPtr(description),
 			})
 		}
 	}
 }
 func statusSeverity(status string) int {
 	switch strings.TrimSpace(status) {
 	case "Critical":
--- a/audit/internal/app/sat_overlay_test.go
+++ b/audit/internal/app/sat_overlay_test.go
@@ -59,3 +59,51 @@ func TestApplyLatestSATStatusesMarksAMDGPUs(t *testing.T) {
 		t.Fatalf("gpu status=%v want Critical", snap.PCIeDevices[0].Status)
 	}
 }
 func TestApplyLatestSATStatusesMarksNvidiaGPUByPerGPUStatusFile(t *testing.T) {
 	baseDir := t.TempDir()
 	runDir := filepath.Join(baseDir, "gpu-nvidia-20260407-162123")
 	if err := os.MkdirAll(runDir, 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte("run_at_utc=2026-04-07T16:21:23Z\noverall_status=FAILED\n"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(runDir, "gpu-1-status.txt"), []byte("gpu_index=1\ngpu_name=NVIDIA H100 PCIe\nrun_status=FAILED\nreason=GPU requires reset\n"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	class := "VideoController"
 	manufacturer := "NVIDIA Corporation"
 	bdf0 := "0000:4b:00.0"
 	bdf1 := "0000:4f:00.0"
 	snap := schema.HardwareSnapshot{
 		PCIeDevices: []schema.HardwarePCIeDevice{
 			{
 				DeviceClass:  &class,
 				Manufacturer: &manufacturer,
 				BDF:          &bdf0,
 				Telemetry:    map[string]any{"nvidia_gpu_index": 0},
 			},
 			{
 				DeviceClass:  &class,
 				Manufacturer: &manufacturer,
 				BDF:          &bdf1,
 				Telemetry:    map[string]any{"nvidia_gpu_index": 1},
 			},
 		},
 	}
 	applyLatestSATStatuses(&snap, baseDir, nil)
 	if snap.PCIeDevices[1].Status == nil || *snap.PCIeDevices[1].Status != "Critical" {
 		t.Fatalf("gpu1 status=%v want Critical", snap.PCIeDevices[1].Status)
 	}
 	if snap.PCIeDevices[1].ErrorDescription == nil || *snap.PCIeDevices[1].ErrorDescription != "GPU requires reset failed" {
 		got := "<nil>"
 		if snap.PCIeDevices[1].ErrorDescription != nil {
 			got = *snap.PCIeDevices[1].ErrorDescription
 		}
 		t.Fatalf("gpu1 error=%q want per-gpu reason", got)
 	}
 }
--- a/audit/internal/app/support_bundle.go
+++ b/audit/internal/app/support_bundle.go
@@ -2,6 +2,7 @@ package app
 import (
 	"archive/tar"
 	"bee/audit/internal/platform"
 	"compress/gzip"
 	"fmt"
 	"io"
@@ -22,6 +23,8 @@ var supportBundleServices = []string{
 	"bee-selfheal.service",
 	"bee-selfheal.timer",
 	"bee-sshsetup.service",
 	"nvidia-dcgm.service",
 	"nvidia-fabricmanager.service",
 }
 var supportBundleCommands = []struct {
@@ -40,17 +43,112 @@ var supportBundleCommands = []struct {
 	{name: "system/mount.txt", cmd: []string{"mount"}},
 	{name: "system/df-h.txt", cmd: []string{"df", "-h"}},
 	{name: "system/dmesg.txt", cmd: []string{"dmesg"}},
 	{name: "system/kernel-aer-nvidia.txt", cmd: []string{"sh", "-c", `
 if command -v dmesg >/dev/null 2>&1; then
  dmesg | grep -iE 'AER|NVRM|Xid|pcieport|nvidia' || echo "no AER/NVRM/Xid kernel messages found"
 else
  echo "dmesg not found"
 fi
 `}},
 	{name: "system/nvidia-smi-q.txt", cmd: []string{"nvidia-smi", "-q"}},
 	{name: "system/nvidia-smi-topo.txt", cmd: []string{"sh", "-c", `
 if command -v nvidia-smi >/dev/null 2>&1; then
  nvidia-smi topo -m 2>&1 || true
 else
  echo "nvidia-smi not found"
 fi
 `}},
 	{name: "system/systemctl-nvidia-units.txt", cmd: []string{"sh", "-c", `
 if ! command -v systemctl >/dev/null 2>&1; then
  echo "systemctl not found"
  exit 0
 fi
 echo "=== unit files ==="
 systemctl list-unit-files --no-pager --all 'nvidia*' 'fabric*' 2>&1 || true
 echo
 echo "=== active units ==="
 systemctl list-units --no-pager --all 'nvidia*' 'fabric*' 2>&1 || true
 echo
 echo "=== failed units ==="
 systemctl --failed --no-pager 2>&1 | grep -iE 'nvidia|fabric' || echo "no failed nvidia/fabric units"
 `}},
 	{name: "system/fabric-manager-paths.txt", cmd: []string{"sh", "-c", `
 for candidate in \
  /usr/bin/nvidia-fabricmanager \
  /usr/bin/nv-fabricmanager \
  /usr/bin/nvidia-fabricmanagerd \
  /usr/bin/nvlsm; do
  if [ -e "$candidate" ]; then
    echo "=== $candidate ==="
    ls -l "$candidate" 2>&1 || true
    echo
  fi
 done
 if ! ls /usr/bin/nvidia-fabricmanager /usr/bin/nv-fabricmanager /usr/bin/nvidia-fabricmanagerd /usr/bin/nvlsm >/dev/null 2>&1; then
  echo "no fabric manager binaries found"
 fi
 `}},
 	{name: "system/lspci-nvidia-bridges-vv.txt", cmd: []string{"sh", "-c", `
 if ! command -v lspci >/dev/null 2>&1; then
  echo "lspci not found"
  exit 0
 fi
 found=0
 	for gpu in $(lspci -Dn | awk '$2 ~ /^03(00|02):$/ && $3 ~ /^10de:/ {print $1}'); do
  found=1
  echo "=== GPU $gpu ==="
  lspci -s "$gpu" -vv 2>&1 || true
  bridge=$(basename "$(readlink -f "/sys/bus/pci/devices/$gpu/.." 2>/dev/null)" 2>/dev/null)
  if [ -n "$bridge" ] && [ "$bridge" != "$gpu" ]; then
    echo
    echo "=== UPSTREAM $bridge for $gpu ==="
    lspci -s "$bridge" -vv 2>&1 || true
  fi
  echo
 done
 if [ "$found" -eq 0 ]; then
  echo "no NVIDIA PCI devices found"
 fi
 `}},
 	{name: "system/pcie-nvidia-link.txt", cmd: []string{"sh", "-c", `
 for d in /sys/bus/pci/devices/*/; do
  vendor=$(cat "$d/vendor" 2>/dev/null)
-  [ "$vendor" = "0x10de" ] || continue
+	  [ "$vendor" = "0x10de" ] || continue
-  dev=$(basename "$d")
+	  class=$(cat "$d/class" 2>/dev/null)
 	  case "$class" in
 	    0x030000|0x030200) ;;
 	    *) continue ;;
 	  esac
 	  dev=$(basename "$d")
  echo "=== $dev ==="
  for f in current_link_speed current_link_width max_link_speed max_link_width; do
    printf "  %-22s %s\n" "$f" "$(cat "$d/$f" 2>/dev/null)"
  done
 done
 `}},
 	{name: "system/pcie-aer-sysfs.txt", cmd: []string{"sh", "-c", `
 found=0
 for dev in /sys/bus/pci/devices/*; do
  [ -e "$dev" ] || continue
  bdf=$(basename "$dev")
  block=""
  for f in aer_dev_correctable aer_dev_fatal aer_dev_nonfatal aer_rootport_total_err_cor aer_rootport_total_err_fatal aer_rootport_total_err_nonfatal; do
    if [ -r "$dev/$f" ]; then
      if [ -z "$block" ]; then
        block=1
        found=1
        echo "=== $bdf ==="
      fi
      printf "  %-30s %s\n" "$f" "$(cat "$dev/$f" 2>/dev/null)"
    fi
  done
  if [ -n "$block" ]; then
    echo
  fi
 done
 if [ "$found" -eq 0 ]; then
  echo "no PCIe AER sysfs counters found"
 fi
 `}},
 	{name: "system/ethtool-info.txt", cmd: []string{"sh", "-c", `
 if ! command -v ethtool >/dev/null 2>&1; then
@@ -137,9 +235,13 @@ var supportBundleOptionalFiles = []struct {
 }{
 	{name: "system/kern.log", src: "/var/log/kern.log"},
 	{name: "system/syslog.txt", src: "/var/log/syslog"},
 	{name: "system/fabricmanager.log", src: "/var/log/fabricmanager.log"},
 	{name: "system/nvlsm.log", src: "/var/log/nvlsm.log"},
 	{name: "system/fabricmanager/fabricmanager.log", src: "/var/log/fabricmanager/fabricmanager.log"},
 	{name: "system/fabricmanager/nvlsm.log", src: "/var/log/fabricmanager/nvlsm.log"},
 }
-const supportBundleGlob = "bee-support-*.tar.gz"
+const supportBundleGlob = "????-??-?? (BEE-SP*)*.tar.gz"
 func BuildSupportBundle(exportDir string) (string, error) {
 	exportDir = strings.TrimSpace(exportDir)
@@ -153,9 +255,14 @@ func BuildSupportBundle(exportDir string) (string, error) {
 		return "", err
 	}
-	host := sanitizeFilename(hostnameOr("unknown"))
+	now := time.Now().UTC()
-	ts := time.Now().UTC().Format("20060102-150405")
+	date := now.Format("2006-01-02")
-	stageRoot := filepath.Join(os.TempDir(), fmt.Sprintf("bee-support-%s-%s", host, ts))
+	tod := now.Format("150405")
 	ver := bundleVersion()
 	model := serverModelForBundle()
 	sn := serverSerialForBundle()
 	stageRoot := filepath.Join(os.TempDir(), fmt.Sprintf("bee-support-stage-%s-%s", sanitizeFilename(hostnameOr("unknown")), now.Format("20060102-150405")))
 	if err := os.MkdirAll(stageRoot, 0755); err != nil {
 		return "", err
 	}
@@ -187,7 +294,8 @@ func BuildSupportBundle(exportDir string) (string, error) {
 		return "", err
 	}
-	archivePath := filepath.Join(os.TempDir(), fmt.Sprintf("bee-support-%s-%s.tar.gz", host, ts))
+	archiveName := fmt.Sprintf("%s (BEE-SP v%s) %s %s %s.tar.gz", date, ver, model, sn, tod)
 	archivePath := filepath.Join(os.TempDir(), archiveName)
 	if err := createSupportTarGz(archivePath, stageRoot); err != nil {
 		return "", err
 	}
@@ -317,6 +425,13 @@ func writeManifest(dst, exportDir, stageRoot string) error {
 	fmt.Fprintf(&body, "host=%s\n", hostnameOr("unknown"))
 	fmt.Fprintf(&body, "generated_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
 	fmt.Fprintf(&body, "export_dir=%s\n", exportDir)
 	if cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(filepath.Join(exportDir, "bee-bench", "power-source-autotune.json")); err == nil && cfg != nil {
 		fmt.Fprintf(&body, "power_autotune_selected_source=%s\n", cfg.SelectedSource)
 		fmt.Fprintf(&body, "power_autotune_updated_at=%s\n", cfg.UpdatedAt.UTC().Format(time.RFC3339))
 		if strings.TrimSpace(cfg.Reason) != "" {
 			fmt.Fprintf(&body, "power_autotune_reason=%s\n", cfg.Reason)
 		}
 	}
 	fmt.Fprintf(&body, "\nfiles:\n")
 	var files []string
@@ -344,6 +459,60 @@ func writeManifest(dst, exportDir, stageRoot string) error {
 	return os.WriteFile(dst, []byte(body.String()), 0644)
 }
 func bundleVersion() string {
 	v := buildVersion()
 	v = strings.TrimPrefix(v, "v")
 	v = strings.TrimPrefix(v, "V")
 	if v == "" || v == "unknown" {
 		return "0.0"
 	}
 	return v
 }
 func serverModelForBundle() string {
 	raw, err := exec.Command("dmidecode", "-t", "1").Output()
 	if err != nil {
 		return "unknown"
 	}
 	for _, line := range strings.Split(string(raw), "\n") {
 		line = strings.TrimSpace(line)
 		key, val, ok := strings.Cut(line, ": ")
 		if !ok {
 			continue
 		}
 		if strings.TrimSpace(key) == "Product Name" {
 			val = strings.TrimSpace(val)
 			if val == "" {
 				return "unknown"
 			}
 			return strings.ReplaceAll(val, " ", "_")
 		}
 	}
 	return "unknown"
 }
 func serverSerialForBundle() string {
 	raw, err := exec.Command("dmidecode", "-t", "1").Output()
 	if err != nil {
 		return "unknown"
 	}
 	for _, line := range strings.Split(string(raw), "\n") {
 		line = strings.TrimSpace(line)
 		key, val, ok := strings.Cut(line, ": ")
 		if !ok {
 			continue
 		}
 		if strings.TrimSpace(key) == "Serial Number" {
 			val = strings.TrimSpace(val)
 			if val == "" {
 				return "unknown"
 			}
 			return val
 		}
 	}
 	return "unknown"
 }
 func buildVersion() string {
 	raw, err := exec.Command("bee", "version").CombinedOutput()
 	if err != nil {
--- a/audit/internal/collector/nic_mellanox.go
+++ b/audit/internal/collector/nic_mellanox.go
@@ -179,11 +179,3 @@ func commandOutputWithTimeout(timeout time.Duration, name string, args ...string
 	defer cancel()
 	return exec.CommandContext(ctx, name, args...).Output()
 }
 func interfaceHasCarrier(iface string) bool {
 	raw, err := readNetCarrierFile(iface)
 	if err != nil {
 		return false
 	}
 	return strings.TrimSpace(raw) == "1"
 }
--- a/audit/internal/collector/nic_telemetry.go
+++ b/audit/internal/collector/nic_telemetry.go
@@ -58,12 +58,10 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw
 			}
 		}
-		if interfaceHasCarrier(iface) {
+		if out, err := ethtoolModuleQuery(iface); err == nil {
-			if out, err := ethtoolModuleQuery(iface); err == nil {
+			if injectSFPDOMTelemetry(&devs[i], out) {
-				if injectSFPDOMTelemetry(&devs[i], out) {
+				enriched++
-					enriched++
+				continue
 					continue
 				}
 			}
 		}
 		if len(devs[i].MacAddresses) > 0 || devs[i].Firmware != nil {
@@ -115,8 +113,38 @@ func injectSFPDOMTelemetry(dev *schema.HardwarePCIeDevice, raw string) bool {
 		}
 		key := strings.ToLower(strings.TrimSpace(trimmed[:idx]))
 		val := strings.TrimSpace(trimmed[idx+1:])
 		if val == "" || strings.EqualFold(val, "not supported") || strings.EqualFold(val, "unknown") {
 			continue
 		}
 		switch {
 		case key == "identifier":
 			s := parseSFPIdentifier(val)
 			dev.SFPIdentifier = &s
 			t := true
 			dev.SFPPresent = &t
 			changed = true
 		case key == "connector":
 			s := parseSFPConnector(val)
 			dev.SFPConnector = &s
 			changed = true
 		case key == "vendor name":
 			s := strings.TrimSpace(val)
 			dev.SFPVendor = &s
 			changed = true
 		case key == "vendor pn":
 			s := strings.TrimSpace(val)
 			dev.SFPPartNumber = &s
 			changed = true
 		case key == "vendor sn":
 			s := strings.TrimSpace(val)
 			dev.SFPSerialNumber = &s
 			changed = true
 		case strings.Contains(key, "laser wavelength"):
 			if f, ok := firstFloat(val); ok {
 				dev.SFPWavelengthNM = &f
 				changed = true
 			}
 		case strings.Contains(key, "module temperature"):
 			if f, ok := firstFloat(val); ok {
 				dev.SFPTemperatureC = &f
@@ -147,12 +175,61 @@ func injectSFPDOMTelemetry(dev *schema.HardwarePCIeDevice, raw string) bool {
 	return changed
 }
 // parseSFPIdentifier extracts the human-readable transceiver type from the
 // raw ethtool identifier line, e.g. "0x03 (SFP)" → "SFP".
 func parseSFPIdentifier(val string) string {
 	if s := extractParens(val); s != "" {
 		return s
 	}
 	return val
 }
 // parseSFPConnector extracts the connector type from the raw ethtool line,
 // e.g. "0x07 (LC)" → "LC".
 func parseSFPConnector(val string) string {
 	if s := extractParens(val); s != "" {
 		return s
 	}
 	return val
 }
 var parenRe = regexp.MustCompile(`\(([^)]+)\)`)
 func extractParens(s string) string {
 	m := parenRe.FindStringSubmatch(s)
 	if len(m) < 2 {
 		return ""
 	}
 	return strings.TrimSpace(m[1])
 }
 func parseSFPDOM(raw string) map[string]any {
 	dev := schema.HardwarePCIeDevice{}
 	if !injectSFPDOMTelemetry(&dev, raw) {
 		return map[string]any{}
 	}
 	out := map[string]any{}
 	if dev.SFPPresent != nil {
 		out["sfp_present"] = *dev.SFPPresent
 	}
 	if dev.SFPIdentifier != nil {
 		out["sfp_identifier"] = *dev.SFPIdentifier
 	}
 	if dev.SFPConnector != nil {
 		out["sfp_connector"] = *dev.SFPConnector
 	}
 	if dev.SFPVendor != nil {
 		out["sfp_vendor"] = *dev.SFPVendor
 	}
 	if dev.SFPPartNumber != nil {
 		out["sfp_part_number"] = *dev.SFPPartNumber
 	}
 	if dev.SFPSerialNumber != nil {
 		out["sfp_serial_number"] = *dev.SFPSerialNumber
 	}
 	if dev.SFPWavelengthNM != nil {
 		out["sfp_wavelength_nm"] = *dev.SFPWavelengthNM
 	}
 	if dev.SFPTemperatureC != nil {
 		out["sfp_temperature_c"] = *dev.SFPTemperatureC
 	}
--- a/audit/internal/collector/nic_telemetry_test.go
+++ b/audit/internal/collector/nic_telemetry_test.go
@@ -122,10 +122,7 @@ func TestEnrichPCIeWithNICTelemetrySkipsModuleQueryWithoutCarrier(t *testing.T)
 	readNetAddressFile = func(string) (string, error) { return "aa:bb:cc:dd:ee:ff", nil }
 	readNetCarrierFile = func(string) (string, error) { return "0", nil }
 	ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
-	ethtoolModuleQuery = func(string) (string, error) {
+	ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("no module") }
 		t.Fatal("ethtool -m should not be called without carrier")
 		return "", nil
 	}
 	class := "EthernetController"
 	bdf := "0000:18:00.0"
--- a/audit/internal/collector/nvidia.go
+++ b/audit/internal/collector/nvidia.go
@@ -13,7 +13,9 @@ import (
 const nvidiaVendorID = 0x10de
 type nvidiaGPUInfo struct {
 	Index              int
 	BDF                string
 	Name               string
 	Serial             string
 	VBIOS              string
 	TemperatureC       *float64
@@ -72,6 +74,9 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
 			continue
 		}
 		if v := strings.TrimSpace(info.Name); v != "" {
 			devs[i].Model = &v
 		}
 		if v := strings.TrimSpace(info.Serial); v != "" {
 			devs[i].SerialNumber = &v
 		}
@@ -98,7 +103,7 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
 func queryNVIDIAGPUs() (map[string]nvidiaGPUInfo, error) {
 	out, err := exec.Command(
 		"nvidia-smi",
-		"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown,pcie.link.gen.current,pcie.link.gen.max,pcie.link.width.current,pcie.link.width.max",
+		"--query-gpu=index,pci.bus_id,name,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown,pcie.link.gen.current,pcie.link.gen.max,pcie.link.width.current,pcie.link.width.max",
 		"--format=csv,noheader,nounits",
 	).Output()
 	if err != nil {
@@ -122,8 +127,8 @@ func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
 		if len(rec) == 0 {
 			continue
 		}
-		if len(rec) < 13 {
+		if len(rec) < 14 {
-			return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 13", len(rec))
+			return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 14", len(rec))
 		}
 		bdf := normalizePCIeBDF(rec[1])
@@ -132,18 +137,20 @@ func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
 		}
 		info := nvidiaGPUInfo{
 			Index:              parseRequiredInt(rec[0]),
 			BDF:                bdf,
-			Serial:             strings.TrimSpace(rec[2]),
+			Name:               strings.TrimSpace(rec[2]),
-			VBIOS:              strings.TrimSpace(rec[3]),
+			Serial:             strings.TrimSpace(rec[3]),
-			TemperatureC:       parseMaybeFloat(rec[4]),
+			VBIOS:              strings.TrimSpace(rec[4]),
-			PowerW:             parseMaybeFloat(rec[5]),
+			TemperatureC:       parseMaybeFloat(rec[5]),
-			ECCUncorrected:     parseMaybeInt64(rec[6]),
+			PowerW:             parseMaybeFloat(rec[6]),
-			ECCCorrected:       parseMaybeInt64(rec[7]),
+			ECCUncorrected:     parseMaybeInt64(rec[7]),
-			HWSlowdown:         parseMaybeBool(rec[8]),
+			ECCCorrected:       parseMaybeInt64(rec[8]),
-			PCIeLinkGenCurrent: parseMaybeInt(rec[9]),
+			HWSlowdown:         parseMaybeBool(rec[9]),
-			PCIeLinkGenMax:     parseMaybeInt(rec[10]),
+			PCIeLinkGenCurrent: parseMaybeInt(rec[10]),
-			PCIeLinkWidthCur:   parseMaybeInt(rec[11]),
+			PCIeLinkGenMax:     parseMaybeInt(rec[11]),
-			PCIeLinkWidthMax:   parseMaybeInt(rec[12]),
+			PCIeLinkWidthCur:   parseMaybeInt(rec[12]),
 			PCIeLinkWidthMax:   parseMaybeInt(rec[13]),
 		}
 		result[bdf] = info
 	}
@@ -187,6 +194,14 @@ func parseMaybeInt(v string) *int {
 	return &n
 }
 func parseRequiredInt(v string) int {
 	n, err := strconv.Atoi(strings.TrimSpace(v))
 	if err != nil {
 		return 0
 	}
 	return n
 }
 func pcieLinkGenLabel(gen int) string {
 	return fmt.Sprintf("Gen%d", gen)
 }
@@ -240,6 +255,10 @@ func setPCIeFallback(dev *schema.HardwarePCIeDevice) {
 }
 func injectNVIDIATelemetry(dev *schema.HardwarePCIeDevice, info nvidiaGPUInfo) {
 	if dev.Telemetry == nil {
 		dev.Telemetry = map[string]any{}
 	}
 	dev.Telemetry["nvidia_gpu_index"] = info.Index
 	if info.TemperatureC != nil {
 		dev.TemperatureC = info.TemperatureC
 	}
--- a/audit/internal/collector/nvidia_test.go
+++ b/audit/internal/collector/nvidia_test.go
@@ -6,7 +6,7 @@ import (
 )
 func TestParseNVIDIASMIQuery(t *testing.T) {
-	raw := "0, 00000000:65:00.0, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active, 4, 4, 16, 16\n"
+	raw := "0, 00000000:65:00.0, NVIDIA H100 80GB HBM3, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active, 4, 4, 16, 16\n"
 	byBDF, err := parseNVIDIASMIQuery(raw)
 	if err != nil {
 		t.Fatalf("parse failed: %v", err)
@@ -16,6 +16,9 @@ func TestParseNVIDIASMIQuery(t *testing.T) {
 	if !ok {
 		t.Fatalf("gpu by normalized bdf not found")
 	}
 	if gpu.Name != "NVIDIA H100 80GB HBM3" {
 		t.Fatalf("name: got %q", gpu.Name)
 	}
 	if gpu.Serial != "GPU-SERIAL-1" {
 		t.Fatalf("serial: got %q", gpu.Serial)
 	}
@@ -86,6 +89,9 @@ func TestEnrichPCIeWithNVIDIAData_driverLoaded(t *testing.T) {
 	if out[0].Firmware == nil || *out[0].Firmware != "96.00.1F.00.02" {
 		t.Fatalf("firmware: got %v", out[0].Firmware)
 	}
 	if out[0].Telemetry == nil || out[0].Telemetry["nvidia_gpu_index"] != 0 {
 		t.Fatalf("telemetry nvidia_gpu_index: got %#v", out[0].Telemetry)
 	}
 	if out[0].Status == nil || *out[0].Status != statusWarning {
 		t.Fatalf("status: got %v", out[0].Status)
 	}
--- a/audit/internal/collector/pcie.go
+++ b/audit/internal/collector/pcie.go
@@ -2,6 +2,7 @@ package collector
 import (
 	"bee/audit/internal/schema"
 	"fmt"
 	"log/slog"
 	"os/exec"
 	"strconv"
@@ -79,6 +80,25 @@ func shouldIncludePCIeDevice(class, vendor, device string) bool {
 		}
 	}
 	// Exclude BMC/management virtual VGA adapters — these are firmware video chips,
 	// not real GPUs, and pollute the GPU inventory (e.g. iBMC, iDRAC, iLO VGA).
 	if strings.Contains(c, "vga") || strings.Contains(c, "display") || strings.Contains(c, "3d") {
 		bmcPatterns := []string{
 			"management system chip",
 			"management controller",
 			"ibmc",
 			"idrac",
 			"ilo vga",
 			"aspeed",
 			"matrox",
 		}
 		for _, bad := range bmcPatterns {
 			if strings.Contains(d, bad) {
 				return false
 			}
 		}
 	}
 	if strings.Contains(v, "advanced micro devices") || strings.Contains(v, "[amd]") {
 		internalAMDPatterns := []string{
 			"dummy function",
@@ -153,6 +173,9 @@ func parseLspciDevice(fields map[string]string) schema.HardwarePCIeDevice {
 	// SVendor/SDevice available but not in schema — skip
 	// Warn if PCIe link is running below its maximum negotiated speed.
 	applyPCIeLinkSpeedWarning(&dev)
 	return dev
 }
@@ -222,6 +245,41 @@ func readPCIStringAttribute(bdf, attribute string) (string, bool) {
 	return value, true
 }
 // applyPCIeLinkSpeedWarning sets the device status to Warning if the current PCIe link
 // speed is below the maximum negotiated speed supported by both ends.
 func applyPCIeLinkSpeedWarning(dev *schema.HardwarePCIeDevice) {
 	if dev.LinkSpeed == nil || dev.MaxLinkSpeed == nil {
 		return
 	}
 	if pcieLinkSpeedRank(*dev.LinkSpeed) < pcieLinkSpeedRank(*dev.MaxLinkSpeed) {
 		warn := statusWarning
 		dev.Status = &warn
 		desc := fmt.Sprintf("PCIe link speed degraded: running at %s, capable of %s", *dev.LinkSpeed, *dev.MaxLinkSpeed)
 		dev.ErrorDescription = &desc
 	}
 }
 // pcieLinkSpeedRank returns a numeric rank for a normalized Gen string (e.g. "Gen4" → 4).
 // Returns 0 for unrecognised values so comparisons fail safe.
 func pcieLinkSpeedRank(gen string) int {
 	switch gen {
 	case "Gen1":
 		return 1
 	case "Gen2":
 		return 2
 	case "Gen3":
 		return 3
 	case "Gen4":
 		return 4
 	case "Gen5":
 		return 5
 	case "Gen6":
 		return 6
 	default:
 		return 0
 	}
 }
 func normalizePCILinkSpeed(raw string) string {
 	raw = strings.TrimSpace(strings.ToLower(raw))
 	switch {
--- a/audit/internal/collector/pcie_filter_test.go
+++ b/audit/internal/collector/pcie_filter_test.go
@@ -1,6 +1,7 @@
 package collector
 import (
 	"bee/audit/internal/schema"
 	"encoding/json"
 	"strings"
 	"testing"
@@ -29,6 +30,8 @@ func TestShouldIncludePCIeDevice(t *testing.T) {
 		{name: "raid", class: "RAID bus controller", want: true},
 		{name: "nvme", class: "Non-Volatile memory controller", want: true},
 		{name: "vga", class: "VGA compatible controller", want: true},
 		{name: "ibmc vga", class: "VGA compatible controller", vendor: "Huawei Technologies Co., Ltd.", device: "Hi171x Series [iBMC Intelligent Management system chip w/VGA support]", want: false},
 		{name: "aspeed vga", class: "VGA compatible controller", vendor: "ASPEED Technology, Inc.", device: "ASPEED Graphics Family", want: false},
 		{name: "other encryption controller", class: "Encryption controller", vendor: "Intel Corporation", device: "QuickAssist", want: true},
 	}
@@ -139,3 +142,77 @@ func TestNormalizePCILinkSpeed(t *testing.T) {
 		}
 	}
 }
 func TestApplyPCIeLinkSpeedWarning(t *testing.T) {
 	ptr := func(s string) *string { return &s }
 	tests := []struct {
 		name        string
 		linkSpeed   *string
 		maxSpeed    *string
 		wantWarning bool
 		wantGenIn   string // substring expected in ErrorDescription when warning
 	}{
 		{
 			name:        "degraded Gen1 vs Gen5",
 			linkSpeed:   ptr("Gen1"),
 			maxSpeed:    ptr("Gen5"),
 			wantWarning: true,
 			wantGenIn:   "Gen1",
 		},
 		{
 			name:        "at max Gen5",
 			linkSpeed:   ptr("Gen5"),
 			maxSpeed:    ptr("Gen5"),
 			wantWarning: false,
 		},
 		{
 			name:        "degraded Gen4 vs Gen5",
 			linkSpeed:   ptr("Gen4"),
 			maxSpeed:    ptr("Gen5"),
 			wantWarning: true,
 			wantGenIn:   "Gen4",
 		},
 		{
 			name:        "missing current speed — no warning",
 			linkSpeed:   nil,
 			maxSpeed:    ptr("Gen5"),
 			wantWarning: false,
 		},
 		{
 			name:        "missing max speed — no warning",
 			linkSpeed:   ptr("Gen1"),
 			maxSpeed:    nil,
 			wantWarning: false,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			dev := schema.HardwarePCIeDevice{}
 			ok := statusOK
 			dev.Status = &ok
 			dev.LinkSpeed = tt.linkSpeed
 			dev.MaxLinkSpeed = tt.maxSpeed
 			applyPCIeLinkSpeedWarning(&dev)
 			gotWarn := dev.Status != nil && *dev.Status == statusWarning
 			if gotWarn != tt.wantWarning {
 				t.Fatalf("wantWarning=%v gotWarning=%v (status=%v)", tt.wantWarning, gotWarn, dev.Status)
 			}
 			if tt.wantWarning {
 				if dev.ErrorDescription == nil {
 					t.Fatal("expected ErrorDescription to be set")
 				}
 				if !strings.Contains(*dev.ErrorDescription, tt.wantGenIn) {
 					t.Fatalf("ErrorDescription %q does not contain %q", *dev.ErrorDescription, tt.wantGenIn)
 				}
 			} else {
 				if dev.ErrorDescription != nil {
 					t.Fatalf("unexpected ErrorDescription: %s", *dev.ErrorDescription)
 				}
 			}
 		})
 	}
 }
--- a/audit/internal/collector/psu.go
+++ b/audit/internal/collector/psu.go
@@ -160,11 +160,57 @@ type psuSDR struct {
 }
 var psuSlotPatterns = []*regexp.Regexp{
-	regexp.MustCompile(`(?i)\bpsu?\s*([0-9]+)\b`),
+	// MSI/underscore style: PSU1_POWER_IN, PSU2_POWER_OUT — underscore is \w so \b
-	regexp.MustCompile(`(?i)\bps\s*([0-9]+)\b`),
+	// does not fire after the digit; match explicitly with underscore terminator.
-	regexp.MustCompile(`(?i)\bpws\s*([0-9]+)\b`),
+	regexp.MustCompile(`(?i)\bpsu([0-9]+)_`),
-	regexp.MustCompile(`(?i)\bpower\s*supply(?:\s*bay)?\s*([0-9]+)\b`),
+	regexp.MustCompile(`(?i)\bpsu?\s*([0-9]+)\b`),                    // PSU1, PS1, ps 2
-	regexp.MustCompile(`(?i)\bbay\s*([0-9]+)\b`),
+	regexp.MustCompile(`(?i)\bps\s*([0-9]+)\b`),                      // PS 6, PS6
 	regexp.MustCompile(`(?i)\bpws\s*([0-9]+)\b`),                     // PWS1
 	regexp.MustCompile(`(?i)\bpower\s*supply(?:\s*bay)?\s*([0-9]+)\b`), // Power Supply 1, Power Supply Bay 3
 	regexp.MustCompile(`(?i)\bbay\s*([0-9]+)\b`),                     // Bay 1
 	// Fallback for xFusion-style generic numbered PSU sensors (Power1, Power2, …).
 	// Must be last: "power supply N" is already caught by the pattern above.
 	regexp.MustCompile(`(?i)\bpower([0-9]+)\b`),
 }
 // psuInputPowerKeywords matches AC-input power sensor names across vendors:
 //   MSI:     PSU1_POWER_IN, PSU1_PIN
 //   MLT:     PSU1_PIN
 //   xFusion: (matched via default fallback — no explicit keyword)
 //   HPE:     PS1 Input Power, PS1 Input Watts
 func isPSUInputPower(name string) bool {
 	return strings.Contains(name, "input power") ||
 		strings.Contains(name, "input watts") ||
 		strings.Contains(name, "_pin") ||
 		strings.Contains(name, " pin") ||
 		strings.Contains(name, "_power_in") ||
 		strings.Contains(name, "power_in")
 }
 // isPSUOutputPower matches DC-output power sensor names across vendors:
 //   MSI:     PSU1_POWER_OUT
 //   MLT:     PSU1_POUT
 //   xFusion: PS1 POut
 func isPSUOutputPower(name string) bool {
 	return strings.Contains(name, "output power") ||
 		strings.Contains(name, "output watts") ||
 		strings.Contains(name, "_pout") ||
 		strings.Contains(name, " pout") ||
 		strings.Contains(name, "_power_out") ||
 		strings.Contains(name, "power_out") ||
 		strings.Contains(name, "power supply bay") ||
 		strings.Contains(name, "psu bay")
 }
 // parseBoundedFloat parses a numeric value from an SDR value field and
 // validates it is within (0, max]. Returns nil for zero, negative, or
 // out-of-range values — these indicate missing/off/fault sensor readings.
 func parseBoundedFloat(raw string, max float64) *float64 {
 	v := parseFloatPtr(raw)
 	if v == nil || *v <= 0 || *v > max {
 		return nil
 	}
 	return v
 }
 func parsePSUSDR(raw string) map[int]psuSDR {
@@ -194,24 +240,59 @@ func parsePSUSDR(raw string) map[int]psuSDR {
 		lowerName := strings.ToLower(name)
 		switch {
-		case strings.Contains(lowerName, "input power"):
+		case isPSUInputPower(lowerName):
-			entry.inputPowerW = parseFloatPtr(value)
+			entry.inputPowerW = parseBoundedFloat(value, 6000)
-		case strings.Contains(lowerName, "output power"):
+		case isPSUOutputPower(lowerName):
-			entry.outputPowerW = parseFloatPtr(value)
+			entry.outputPowerW = parseBoundedFloat(value, 6000)
 		case strings.Contains(lowerName, "power supply bay"), strings.Contains(lowerName, "psu bay"):
 			entry.outputPowerW = parseFloatPtr(value)
 		case strings.Contains(lowerName, "input voltage"), strings.Contains(lowerName, "ac input"):
 			entry.inputVoltage = parseFloatPtr(value)
 		case strings.Contains(lowerName, "temp"):
 			entry.temperatureC = parseFloatPtr(value)
 		case strings.Contains(lowerName, "health"), strings.Contains(lowerName, "remaining life"), strings.Contains(lowerName, "life remaining"):
 			entry.healthPct = parsePercentPtr(value)
 		default:
 			// Generic PSU power reading: sensor matched a slot pattern but carries
 			// no input/output keyword (e.g. xFusion "Power1", "Power2"). Treat as
 			// AC input if the value looks like wattage and no better data is set yet.
 			if entry.inputPowerW == nil {
 				entry.inputPowerW = parseBoundedFloat(value, 6000)
 			}
 		}
 		out[slot] = entry
 	}
 	return out
 }
 // PSUSlotPower holds SDR power readings for one PSU slot.
 // Slot key used by PSUSlotsFromSDR is the 0-based index string,
 // matching HardwarePowerSupply.Slot in the audit schema.
 type PSUSlotPower struct {
 	InputW  *float64 `json:"input_w,omitempty"`
 	OutputW *float64 `json:"output_w,omitempty"`
 	Status  string   `json:"status,omitempty"`
 }
 // PSUSlotsFromSDR parses `ipmitool sdr` output and returns per-slot PSU data
 // using the same battle-tested slot patterns as the hardware audit collector.
 // Works across MSI (PSU1_POWER_IN), xFusion (Power1, PS1 POut), MLT (PSU1_PIN).
 // Slot keys are 0-based index strings matching HardwarePowerSupply.Slot.
 func PSUSlotsFromSDR(sdrOutput string) map[string]PSUSlotPower {
 	sdr := parsePSUSDR(sdrOutput)
 	if len(sdr) == 0 {
 		return nil
 	}
 	out := make(map[string]PSUSlotPower, len(sdr))
 	for slot, entry := range sdr {
 		key := strconv.Itoa(slot - 1) // audit uses 0-based slot
 		out[key] = PSUSlotPower{
 			InputW:  entry.inputPowerW,
 			OutputW: entry.outputPowerW,
 			Status:  entry.status,
 		}
 	}
 	return out
 }
 func synthesizePSUsFromSDR(sdr map[int]psuSDR) []schema.HardwarePowerSupply {
 	if len(sdr) == 0 {
 		return nil
--- a/audit/internal/collector/psu_sdr_test.go
+++ b/audit/internal/collector/psu_sdr_test.go
@@ -49,6 +49,10 @@ func TestParsePSUSlotVendorVariants(t *testing.T) {
 		{name: "PWS1 Status", want: 1},
 		{name: "Power Supply Bay 8", want: 8},
 		{name: "PS 6 Input Power", want: 6},
 		// MSI underscore format — \b does not fire between digit and '_'
 		{name: "PSU1_POWER_IN", want: 1},
 		{name: "PSU2_POWER_OUT", want: 2},
 		{name: "PSU4_STATUS", want: 4},
 	}
 	for _, tt := range tests {
@@ -59,6 +63,31 @@ func TestParsePSUSlotVendorVariants(t *testing.T) {
 	}
 }
 func TestParsePSUSDRMSIFormat(t *testing.T) {
 	t.Parallel()
 	raw := `
 PSU1_STATUS      | F1h | ok
 PSU1_POWER_OUT   | 928 Watts | ok
 PSU1_POWER_IN    | 976 Watts | ok
 PSU2_STATUS      | F2h | ok
 PSU2_POWER_OUT   | 944 Watts | ok
 PSU2_POWER_IN    | 992 Watts | ok
 `
 	got := parsePSUSDR(raw)
 	if len(got) != 2 {
 		t.Fatalf("len(got)=%d want 2", len(got))
 	}
 	if got[1].inputPowerW == nil || *got[1].inputPowerW != 976 {
 		t.Fatalf("psu1 input power=%v want 976", got[1].inputPowerW)
 	}
 	if got[1].outputPowerW == nil || *got[1].outputPowerW != 928 {
 		t.Fatalf("psu1 output power=%v want 928", got[1].outputPowerW)
 	}
 	if got[2].inputPowerW == nil || *got[2].inputPowerW != 992 {
 		t.Fatalf("psu2 input power=%v want 992", got[2].inputPowerW)
 	}
 }
 func TestSynthesizePSUsFromSDR(t *testing.T) {
 	t.Parallel()
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
--- a/audit/internal/platform/benchmark_power_autotune.go
+++ b/audit/internal/platform/benchmark_power_autotune.go
@@ -0,0 +1,735 @@
 package platform
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"math"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"sort"
 	"strings"
 	"time"
 )
 const (
 	benchmarkPowerAutotuneVersion         = 1
 	benchmarkPowerAutotuneIdleSec         = 60
 	benchmarkPowerAutotuneLoadSec         = 90
 	benchmarkPowerAutotuneSampleInterval  = 3
 	defaultBenchmarkPowerSourceConfigPath = "/appdata/bee/export/bee-bench/power-source-autotune.json"
 )
 func BenchmarkPowerSourceConfigPath(baseDir string) string {
 	baseDir = strings.TrimSpace(baseDir)
 	if baseDir == "" {
 		return defaultBenchmarkPowerSourceConfigPath
 	}
 	return filepath.Join(filepath.Dir(baseDir), "power-source-autotune.json")
 }
 func LoadBenchmarkPowerAutotuneConfig(path string) (*BenchmarkPowerAutotuneConfig, error) {
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		return nil, err
 	}
 	var cfg BenchmarkPowerAutotuneConfig
 	if err := json.Unmarshal(raw, &cfg); err != nil {
 		return nil, err
 	}
 	if strings.TrimSpace(cfg.SelectedSource) == "" {
 		return nil, fmt.Errorf("autotune config missing selected_source")
 	}
 	return &cfg, nil
 }
 func SaveBenchmarkPowerAutotuneConfig(path string, cfg BenchmarkPowerAutotuneConfig) error {
 	if strings.TrimSpace(path) == "" {
 		return fmt.Errorf("empty autotune config path")
 	}
 	if cfg.Version <= 0 {
 		cfg.Version = benchmarkPowerAutotuneVersion
 	}
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return err
 	}
 	data, err := json.MarshalIndent(cfg, "", "  ")
 	if err != nil {
 		return err
 	}
 	tmp := path + ".tmp"
 	if err := os.WriteFile(tmp, data, 0644); err != nil {
 		return err
 	}
 	return os.Rename(tmp, path)
 }
 func LoadSystemPowerSourceConfig(exportDir string) (*BenchmarkPowerAutotuneConfig, error) {
 	return LoadBenchmarkPowerAutotuneConfig(BenchmarkPowerSourceConfigPath(exportDir))
 }
 func ResetBenchmarkPowerAutotuneConfig(path string) error {
 	if strings.TrimSpace(path) == "" {
 		return fmt.Errorf("empty autotune config path")
 	}
 	if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
 		return err
 	}
 	return nil
 }
 func normalizeBenchmarkPowerSource(source string) string {
 	switch strings.TrimSpace(strings.ToLower(source)) {
 	case BenchmarkPowerSourceSDRPSUInput:
 		return BenchmarkPowerSourceSDRPSUInput
 	default:
 		return BenchmarkPowerSourceDCMI
 	}
 }
 func ResolveSystemPowerDecision(exportDir string) SystemPowerSourceDecision {
 	cfg, err := LoadSystemPowerSourceConfig(exportDir)
 	if err == nil && cfg != nil && strings.TrimSpace(cfg.SelectedSource) != "" {
 		selected := normalizeBenchmarkPowerSource(cfg.SelectedSource)
 		return SystemPowerSourceDecision{
 			Configured:      true,
 			SelectedSource:  selected,
 			EffectiveSource: selected,
 			Mode:            "autotuned",
 			Reason:          strings.TrimSpace(cfg.Reason),
 			ConfiguredAt:    cfg.UpdatedAt,
 		}
 	}
 	sources := sampleBenchmarkPowerSources()
 	if value := sources[BenchmarkPowerSourceSDRPSUInput]; value > 0 {
 		return SystemPowerSourceDecision{
 			Configured:      false,
 			EffectiveSource: BenchmarkPowerSourceSDRPSUInput,
 			Mode:            "fallback",
 			Reason:          "autotune config not found; using temporary fallback source sdr_psu_input",
 		}
 	}
 	return SystemPowerSourceDecision{
 		Configured:      false,
 		EffectiveSource: BenchmarkPowerSourceDCMI,
 		Mode:            "fallback",
 		Reason:          "autotune config not found; using temporary fallback source dcmi",
 	}
 }
 func SampleSystemPowerResolved(exportDir string) (float64, SystemPowerSourceDecision, error) {
 	decision := ResolveSystemPowerDecision(exportDir)
 	if decision.EffectiveSource != "" {
 		if value, err := queryBenchmarkPowerSourceW(decision.EffectiveSource); err == nil && value > 0 {
 			return value, decision, nil
 		} else if decision.Configured {
 			fallback := BenchmarkPowerSourceDCMI
 			if decision.EffectiveSource == BenchmarkPowerSourceDCMI {
 				fallback = BenchmarkPowerSourceSDRPSUInput
 			}
 			if fallbackValue, fallbackErr := queryBenchmarkPowerSourceW(fallback); fallbackErr == nil && fallbackValue > 0 {
 				decision.Mode = "degraded"
 				decision.Reason = fmt.Sprintf("configured source %s unavailable; using degraded fallback %s", decision.SelectedSource, fallback)
 				decision.EffectiveSource = fallback
 				return fallbackValue, decision, nil
 			}
 			decision.Mode = "degraded"
 			decision.Reason = fmt.Sprintf("configured source %s unavailable and no fallback source responded", decision.SelectedSource)
 			return 0, decision, err
 		}
 	}
 	return 0, decision, fmt.Errorf("system power source unavailable")
 }
 func queryBenchmarkPowerSourceW(source string) (float64, error) {
 	switch normalizeBenchmarkPowerSource(source) {
 	case BenchmarkPowerSourceSDRPSUInput:
 		sdr := sampleIPMISDRPowerSensors()
 		if sdr.PSUInW > 0 {
 			return sdr.PSUInW, nil
 		}
 		return 0, fmt.Errorf("sdr psu input unavailable")
 	default:
 		return queryIPMIServerPowerW()
 	}
 }
 func sampleBenchmarkPowerSources() map[string]float64 {
 	out := map[string]float64{}
 	if w, err := queryIPMIServerPowerW(); err == nil && w > 0 {
 		out[BenchmarkPowerSourceDCMI] = w
 	}
 	if w, err := queryBenchmarkPowerSourceW(BenchmarkPowerSourceSDRPSUInput); err == nil && w > 0 {
 		out[BenchmarkPowerSourceSDRPSUInput] = w
 	}
 	return out
 }
 func sampleBenchmarkPowerSourceSeries(ctx context.Context, source string, durationSec, intervalSec int) (float64, bool) {
 	if durationSec <= 0 {
 		return 0, false
 	}
 	samples := collectSelectedPowerSourceSamples(ctx, source, durationSec, intervalSec)
 	if len(samples) == 0 {
 		return 0, false
 	}
 	return benchmarkMean(samples), true
 }
 func collectSelectedPowerSourceSamples(ctx context.Context, source string, durationSec, intervalSec int) []float64 {
 	if durationSec <= 0 {
 		return nil
 	}
 	stopCh := make(chan struct{})
 	doneCh := startSelectedPowerSourceSampler(stopCh, source, intervalSec)
 	select {
 	case <-ctx.Done():
 	case <-time.After(time.Duration(durationSec) * time.Second):
 	}
 	close(stopCh)
 	return <-doneCh
 }
 func startSelectedPowerSourceSampler(stopCh <-chan struct{}, source string, intervalSec int) <-chan []float64 {
 	if intervalSec <= 0 {
 		intervalSec = benchmarkPowerAutotuneSampleInterval
 	}
 	ch := make(chan []float64, 1)
 	go func() {
 		defer close(ch)
 		var samples []float64
 		record := func() {
 			if w, err := queryBenchmarkPowerSourceW(source); err == nil && w > 0 {
 				samples = append(samples, w)
 			}
 		}
 		record()
 		ticker := time.NewTicker(time.Duration(intervalSec) * time.Second)
 		defer ticker.Stop()
 		for {
 			select {
 			case <-stopCh:
 				ch <- samples
 				return
 			case <-ticker.C:
 				record()
 			}
 		}
 	}()
 	return ch
 }
 type benchmarkPowerAutotuneSample struct {
 	ElapsedSec     float64
 	GPUAvgUsagePct float64
 	CPUUsagePct    float64
 	GPUSumPowerW   float64
 	Sources        map[string]float64
 }
 func collectBenchmarkPowerAutotuneSamples(ctx context.Context, phase string, gpuIndices []int, durationSec int, logFunc func(string)) []benchmarkPowerAutotuneSample {
 	if durationSec <= 0 {
 		return nil
 	}
 	var out []benchmarkPowerAutotuneSample
 	deadline := time.Now().Add(time.Duration(durationSec) * time.Second)
 	start := time.Now()
 	for {
 		if ctx.Err() != nil {
 			return out
 		}
 		row := benchmarkPowerAutotuneSample{
 			ElapsedSec:  time.Since(start).Seconds(),
 			CPUUsagePct: sampleCPULoadPct(),
 			Sources:     sampleBenchmarkPowerSources(),
 		}
 		if gpuRows, err := sampleGPUMetrics(gpuIndices); err == nil && len(gpuRows) > 0 {
 			var usageSum float64
 			for _, gpu := range gpuRows {
 				row.GPUSumPowerW += gpu.PowerW
 				usageSum += gpu.UsagePct
 			}
 			row.GPUAvgUsagePct = usageSum / float64(len(gpuRows))
 		}
 		out = append(out, row)
 		logBenchmarkPowerAutotuneSample(phase, row, logFunc)
 		if time.Now().After(deadline) {
 			return out
 		}
 		select {
 		case <-ctx.Done():
 			return out
 		case <-time.After(benchmarkPowerAutotuneSampleInterval * time.Second):
 		}
 	}
 }
 func logBenchmarkPowerAutotuneSample(phase string, sample benchmarkPowerAutotuneSample, logFunc func(string)) {
 	if logFunc == nil {
 		return
 	}
 	var sourceParts []string
 	for _, source := range []string{BenchmarkPowerSourceDCMI, BenchmarkPowerSourceSDRPSUInput} {
 		if value, ok := sample.Sources[source]; ok && value > 0 {
 			sourceParts = append(sourceParts, fmt.Sprintf("%s=%.0fW", source, value))
 		} else {
 			sourceParts = append(sourceParts, fmt.Sprintf("%s=n/a", source))
 		}
 	}
 	logFunc(fmt.Sprintf(
 		"autotune %s sample t=%.0fs gpu_avg_util=%.1f%% gpu_sum_power=%.0fW cpu_load=%.1f%% %s",
 		phase,
 		sample.ElapsedSec,
 		sample.GPUAvgUsagePct,
 		sample.GPUSumPowerW,
 		sample.CPUUsagePct,
 		strings.Join(sourceParts, " "),
 	))
 }
 func logBenchmarkPowerAutotunePhaseSummary(phase string, samples []benchmarkPowerAutotuneSample, logFunc func(string)) {
 	if logFunc == nil || len(samples) == 0 {
 		return
 	}
 	var gpuUsage []float64
 	var cpuUsage []float64
 	var gpuPower []float64
 	sourceBuckets := map[string][]float64{}
 	for _, sample := range samples {
 		gpuUsage = append(gpuUsage, sample.GPUAvgUsagePct)
 		cpuUsage = append(cpuUsage, sample.CPUUsagePct)
 		gpuPower = append(gpuPower, sample.GPUSumPowerW)
 		for source, value := range sample.Sources {
 			if value > 0 {
 				sourceBuckets[source] = append(sourceBuckets[source], value)
 			}
 		}
 	}
 	var sourceParts []string
 	for _, source := range []string{BenchmarkPowerSourceDCMI, BenchmarkPowerSourceSDRPSUInput} {
 		values := sourceBuckets[source]
 		if len(values) == 0 {
 			sourceParts = append(sourceParts, fmt.Sprintf("%s_avg=n/a", source))
 			continue
 		}
 		sourceParts = append(sourceParts, fmt.Sprintf("%s_avg=%.0fW", source, benchmarkMean(values)))
 	}
 	logFunc(fmt.Sprintf(
 		"autotune %s summary samples=%d gpu_avg_util=%.1f%% gpu_p95_util=%.1f%% gpu_avg_power=%.0fW cpu_avg=%.1f%% cpu_p95=%.1f%% %s",
 		phase,
 		len(samples),
 		benchmarkMean(gpuUsage),
 		benchmarkPercentile(gpuUsage, 95),
 		benchmarkMean(gpuPower),
 		benchmarkMean(cpuUsage),
 		benchmarkPercentile(cpuUsage, 95),
 		strings.Join(sourceParts, " "),
 	))
 }
 func logBenchmarkPowerAutotuneSelection(candidates []BenchmarkPowerAutotuneCandidate, selectedSource string, gpuDelta float64, logFunc func(string)) {
 	if logFunc == nil {
 		return
 	}
 	for _, candidate := range candidates {
 		if !candidate.Available {
 			logFunc(fmt.Sprintf("autotune candidate %s unavailable", candidate.Source))
 			continue
 		}
 		logFunc(fmt.Sprintf(
 			"autotune candidate %s idle_avg=%.0fW load_avg=%.0fW delta=%.0fW gpu_delta=%.0fW relative_error=%.3f confidence=%.0f%%%s",
 			candidate.Source,
 			candidate.IdleAvgW,
 			candidate.LoadAvgW,
 			candidate.DeltaW,
 			gpuDelta,
 			candidate.RelativeError,
 			candidate.Confidence*100,
 			map[bool]string{true: " SELECTED", false: ""}[candidate.Source == selectedSource],
 		))
 		if strings.TrimSpace(candidate.SelectionNotes) != "" {
 			logFunc(fmt.Sprintf("autotune candidate %s reason: %s", candidate.Source, candidate.SelectionNotes))
 		}
 	}
 }
 func validateBenchmarkPowerAutotuneIdle(samples []benchmarkPowerAutotuneSample) *BenchmarkPowerAutotuneValidation {
 	result := &BenchmarkPowerAutotuneValidation{}
 	if len(samples) == 0 {
 		result.Reason = "no idle telemetry samples collected"
 		return result
 	}
 	var gpuUsage []float64
 	var cpuUsage []float64
 	for _, sample := range samples {
 		gpuUsage = append(gpuUsage, sample.GPUAvgUsagePct)
 		if sample.CPUUsagePct > 0 {
 			cpuUsage = append(cpuUsage, sample.CPUUsagePct)
 		}
 	}
 	result.GPUSamples = len(gpuUsage)
 	result.CPUSamples = len(cpuUsage)
 	result.GPUAvgUsagePct = math.Round(benchmarkMean(gpuUsage)*10) / 10
 	result.GPUP95UsagePct = math.Round(benchmarkPercentile(gpuUsage, 95)*10) / 10
 	result.CPUAvgUsagePct = math.Round(benchmarkMean(cpuUsage)*10) / 10
 	result.CPUP95UsagePct = math.Round(benchmarkPercentile(cpuUsage, 95)*10) / 10
 	switch {
 	case result.GPUAvgUsagePct > 5:
 		result.Reason = fmt.Sprintf("idle validation failed: average GPU load %.1f%% exceeds 5%%", result.GPUAvgUsagePct)
 	case result.GPUP95UsagePct > 10:
 		result.Reason = fmt.Sprintf("idle validation failed: p95 GPU load %.1f%% exceeds 10%%", result.GPUP95UsagePct)
 	case result.CPUAvgUsagePct > 20:
 		result.Reason = fmt.Sprintf("idle validation failed: average CPU load %.1f%% exceeds 20%%", result.CPUAvgUsagePct)
 	case result.CPUP95UsagePct > 35:
 		result.Reason = fmt.Sprintf("idle validation failed: p95 CPU load %.1f%% exceeds 35%%", result.CPUP95UsagePct)
 	default:
 		result.Valid = true
 	}
 	return result
 }
 func chooseBenchmarkPowerAutotuneSource(idle, load []benchmarkPowerAutotuneSample) (string, []BenchmarkPowerAutotuneCandidate, float64, float64, error) {
 	idleBySource := map[string][]float64{}
 	loadBySource := map[string][]float64{}
 	var idleGPU []float64
 	var loadGPU []float64
 	for _, sample := range idle {
 		idleGPU = append(idleGPU, sample.GPUSumPowerW)
 		for source, value := range sample.Sources {
 			if value > 0 {
 				idleBySource[source] = append(idleBySource[source], value)
 			}
 		}
 	}
 	for _, sample := range load {
 		loadGPU = append(loadGPU, sample.GPUSumPowerW)
 		for source, value := range sample.Sources {
 			if value > 0 {
 				loadBySource[source] = append(loadBySource[source], value)
 			}
 		}
 	}
 	idleGPUAvg := benchmarkMean(idleGPU)
 	loadGPUAvg := benchmarkMean(loadGPU)
 	gpuDelta := loadGPUAvg - idleGPUAvg
 	if gpuDelta <= 0 {
 		gpuDelta = loadGPUAvg
 	}
 	candidates := []BenchmarkPowerAutotuneCandidate{
 		buildBenchmarkPowerAutotuneCandidate(BenchmarkPowerSourceDCMI, idleBySource[BenchmarkPowerSourceDCMI], loadBySource[BenchmarkPowerSourceDCMI], gpuDelta),
 		buildBenchmarkPowerAutotuneCandidate(BenchmarkPowerSourceSDRPSUInput, idleBySource[BenchmarkPowerSourceSDRPSUInput], loadBySource[BenchmarkPowerSourceSDRPSUInput], gpuDelta),
 	}
 	available := make([]BenchmarkPowerAutotuneCandidate, 0, len(candidates))
 	for _, candidate := range candidates {
 		if candidate.Available && candidate.DeltaW > 0 {
 			available = append(available, candidate)
 		}
 	}
 	if len(available) == 0 {
 		return "", candidates, idleGPUAvg, loadGPUAvg, fmt.Errorf("no usable server power source samples collected")
 	}
 	sort.Slice(available, func(i, j int) bool {
 		if math.Abs(available[i].RelativeError-available[j].RelativeError) <= 0.10 {
 			if available[i].Source != available[j].Source {
 				return available[i].Source == BenchmarkPowerSourceSDRPSUInput
 			}
 		}
 		if available[i].RelativeError != available[j].RelativeError {
 			return available[i].RelativeError < available[j].RelativeError
 		}
 		return available[i].Samples > available[j].Samples
 	})
 	selected := available[0]
 	for idx := range candidates {
 		if candidates[idx].Source == selected.Source {
 			candidates[idx].Selected = true
 			candidates[idx].SelectionNotes = fmt.Sprintf("selected because delta %.0f W is closest to GPU delta %.0f W (relative error %.3f)", selected.DeltaW, gpuDelta, selected.RelativeError)
 		}
 	}
 	return selected.Source, candidates, idleGPUAvg, loadGPUAvg, nil
 }
 func buildBenchmarkPowerAutotuneCandidate(source string, idle, load []float64, gpuDelta float64) BenchmarkPowerAutotuneCandidate {
 	candidate := BenchmarkPowerAutotuneCandidate{
 		Source:    source,
 		Available: len(idle) > 0 && len(load) > 0,
 		Samples:   minInt(len(idle), len(load)),
 	}
 	if !candidate.Available {
 		return candidate
 	}
 	candidate.IdleAvgW = benchmarkMean(idle)
 	candidate.LoadAvgW = benchmarkMean(load)
 	candidate.DeltaW = candidate.LoadAvgW - candidate.IdleAvgW
 	if gpuDelta > 0 {
 		candidate.RelativeError = math.Abs(candidate.DeltaW-gpuDelta) / gpuDelta
 		candidate.Confidence = math.Max(0, 1-candidate.RelativeError)
 	}
 	return candidate
 }
 func renderBenchmarkPowerAutotuneSummary(result BenchmarkPowerAutotuneResult) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "generated_at=%s\n", result.GeneratedAt.UTC().Format(time.RFC3339))
 	fmt.Fprintf(&b, "status=%s\n", result.Status)
 	fmt.Fprintf(&b, "benchmark_kind=%s\n", result.BenchmarkKind)
 	fmt.Fprintf(&b, "profile=%s\n", result.Profile)
 	fmt.Fprintf(&b, "idle_duration_sec=%d\n", result.IdleDurationSec)
 	fmt.Fprintf(&b, "load_duration_sec=%d\n", result.LoadDurationSec)
 	fmt.Fprintf(&b, "sample_interval_sec=%d\n", result.SampleIntervalSec)
 	if result.SelectedSource != "" {
 		fmt.Fprintf(&b, "selected_source=%s\n", result.SelectedSource)
 	}
 	if result.IdleValidation != nil {
 		fmt.Fprintf(&b, "idle_valid=%t\n", result.IdleValidation.Valid)
 		fmt.Fprintf(&b, "idle_gpu_avg_usage_pct=%.1f\n", result.IdleValidation.GPUAvgUsagePct)
 		fmt.Fprintf(&b, "idle_gpu_p95_usage_pct=%.1f\n", result.IdleValidation.GPUP95UsagePct)
 		fmt.Fprintf(&b, "idle_cpu_avg_usage_pct=%.1f\n", result.IdleValidation.CPUAvgUsagePct)
 		fmt.Fprintf(&b, "idle_cpu_p95_usage_pct=%.1f\n", result.IdleValidation.CPUP95UsagePct)
 		if result.IdleValidation.Reason != "" {
 			fmt.Fprintf(&b, "idle_validation_error=%s\n", result.IdleValidation.Reason)
 		}
 	}
 	for _, candidate := range result.Candidates {
 		fmt.Fprintf(&b, "candidate_%s_available=%t\n", candidate.Source, candidate.Available)
 		if candidate.Available {
 			fmt.Fprintf(&b, "candidate_%s_idle_avg_w=%.0f\n", candidate.Source, candidate.IdleAvgW)
 			fmt.Fprintf(&b, "candidate_%s_load_avg_w=%.0f\n", candidate.Source, candidate.LoadAvgW)
 			fmt.Fprintf(&b, "candidate_%s_delta_w=%.0f\n", candidate.Source, candidate.DeltaW)
 			fmt.Fprintf(&b, "candidate_%s_relative_error=%.3f\n", candidate.Source, candidate.RelativeError)
 		}
 	}
 	return b.String()
 }
 func renderBenchmarkPowerAutotuneReport(result BenchmarkPowerAutotuneResult) string {
 	var b strings.Builder
 	b.WriteString("# Bee Bench Power Source Autotune\n\n")
 	fmt.Fprintf(&b, "**Status:** %s  \n", result.Status)
 	fmt.Fprintf(&b, "**Benchmark kind:** %s  \n", result.BenchmarkKind)
 	fmt.Fprintf(&b, "**Profile:** %s  \n", result.Profile)
 	fmt.Fprintf(&b, "**Idle window:** %ds  \n", result.IdleDurationSec)
 	fmt.Fprintf(&b, "**Load window:** %ds  \n", result.LoadDurationSec)
 	fmt.Fprintf(&b, "**Sample interval:** %ds  \n", result.SampleIntervalSec)
 	if result.SelectedSource != "" {
 		fmt.Fprintf(&b, "**Selected source:** `%s`  \n", result.SelectedSource)
 	}
 	b.WriteString("\n")
 	if result.IdleValidation != nil {
 		b.WriteString("## Idle Validation\n\n")
 		fmt.Fprintf(&b, "- valid: %t\n", result.IdleValidation.Valid)
 		fmt.Fprintf(&b, "- GPU avg usage: %.1f%%\n", result.IdleValidation.GPUAvgUsagePct)
 		fmt.Fprintf(&b, "- GPU p95 usage: %.1f%%\n", result.IdleValidation.GPUP95UsagePct)
 		fmt.Fprintf(&b, "- CPU avg usage: %.1f%%\n", result.IdleValidation.CPUAvgUsagePct)
 		fmt.Fprintf(&b, "- CPU p95 usage: %.1f%%\n", result.IdleValidation.CPUP95UsagePct)
 		if result.IdleValidation.Reason != "" {
 			fmt.Fprintf(&b, "- reason: %s\n", result.IdleValidation.Reason)
 		}
 		b.WriteString("\n")
 	}
 	if len(result.Candidates) > 0 {
 		b.WriteString("## Candidates\n\n")
 		b.WriteString("| Source | Idle avg W | Load avg W | Delta W | Relative error | Selected |\n")
 		b.WriteString("|--------|------------|------------|---------|----------------|----------|\n")
 		for _, candidate := range result.Candidates {
 			if !candidate.Available {
 				fmt.Fprintf(&b, "| %s | — | — | — | — | no |\n", candidate.Source)
 				continue
 			}
 			selected := "no"
 			if candidate.Selected {
 				selected = "yes"
 			}
 			fmt.Fprintf(&b, "| %s | %.0f | %.0f | %.0f | %.2f | %s |\n",
 				candidate.Source, candidate.IdleAvgW, candidate.LoadAvgW, candidate.DeltaW, candidate.RelativeError, selected)
 		}
 		b.WriteString("\n")
 	}
 	for _, note := range result.Notes {
 		fmt.Fprintf(&b, "- %s\n", note)
 	}
 	return b.String()
 }
 func benchmarkAutotuneLoadCommand(kind string, durationSec int, gpuIndices []int, sizeMB int) ([]string, string) {
 	allDevices := joinIndexList(gpuIndices)
 	switch strings.TrimSpace(strings.ToLower(kind)) {
 	case "power-fit", "power", "nvidia-bench-power":
 		cmd, _, err := resolveBenchmarkPowerLoadCommand(durationSec, gpuIndices)
 		if err == nil {
 			return cmd, "power-fit"
 		}
 		return nvidiaDCGMNamedDiagCommand("targeted_power", durationSec, gpuIndices), "power-fit"
 	default:
 		cmd := []string{
 			"bee-gpu-burn",
 			"--seconds", fmt.Sprintf("%d", durationSec),
 			"--devices", allDevices,
 		}
 		if sizeMB > 0 {
 			cmd = append(cmd, "--size-mb", fmt.Sprintf("%d", sizeMB))
 		}
 		return cmd, "performance"
 	}
 }
 func (s *System) RunNvidiaPowerSourceAutotune(ctx context.Context, baseDir string, opts NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error) {
 	if ctx == nil {
 		ctx = context.Background()
 	}
 	if logFunc == nil {
 		logFunc = func(string) {}
 	}
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = "/var/log/bee-bench/autotune"
 	}
 	if err := os.MkdirAll(baseDir, 0755); err != nil {
 		return "", fmt.Errorf("mkdir %s: %w", baseDir, err)
 	}
 	selected, err := resolveNvidiaGPUSelection(nil, nil)
 	if err != nil {
 		return "", err
 	}
 	if len(selected) == 0 {
 		return "", fmt.Errorf("no NVIDIA GPUs detected for autotune")
 	}
 	ts := time.Now().UTC().Format("20060102-150405")
 	runDir := filepath.Join(baseDir, "autotune-"+ts)
 	if err := os.MkdirAll(runDir, 0755); err != nil {
 		return "", fmt.Errorf("mkdir %s: %w", runDir, err)
 	}
 	verboseLog := filepath.Join(runDir, "verbose.log")
 	hostname, _ := os.Hostname()
 	loadCmd, normalizedKind := benchmarkAutotuneLoadCommand(benchmarkKind, benchmarkPowerAutotuneLoadSec, selected, opts.SizeMB)
 	result := BenchmarkPowerAutotuneResult{
 		GeneratedAt:       time.Now().UTC(),
 		Hostname:          hostname,
 		ServerModel:       readServerModel(),
 		BenchmarkKind:     normalizedKind,
 		Profile:           opts.Profile,
 		Status:            "FAILED",
 		IdleDurationSec:   benchmarkPowerAutotuneIdleSec,
 		LoadDurationSec:   benchmarkPowerAutotuneLoadSec,
 		SampleIntervalSec: benchmarkPowerAutotuneSampleInterval,
 	}
 	logFunc(fmt.Sprintf("autotune: idle validation window %ds on GPUs %s", benchmarkPowerAutotuneIdleSec, joinIndexList(selected)))
 	idleSamples := collectBenchmarkPowerAutotuneSamples(ctx, "idle", selected, benchmarkPowerAutotuneIdleSec, logFunc)
 	logBenchmarkPowerAutotunePhaseSummary("idle", idleSamples, logFunc)
 	result.IdleValidation = validateBenchmarkPowerAutotuneIdle(idleSamples)
 	if result.IdleValidation == nil || !result.IdleValidation.Valid {
 		if result.IdleValidation != nil {
 			result.IdleValidationError = result.IdleValidation.Reason
 			logFunc(result.IdleValidation.Reason)
 		}
 		result.Notes = append(result.Notes, "autotune stopped before load stage because idle validation failed")
 		if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
 			return "", err
 		}
 		return runDir, fmt.Errorf("%s", result.IdleValidationError)
 	}
 	logFunc(fmt.Sprintf("autotune: full-load stage using %s for %ds", normalizedKind, benchmarkPowerAutotuneLoadSec))
 	loadSamplesCh := make(chan []benchmarkPowerAutotuneSample, 1)
 	go func() {
 		loadSamplesCh <- collectBenchmarkPowerAutotuneSamples(ctx, "load", selected, benchmarkPowerAutotuneLoadSec, logFunc)
 	}()
 	out, runErr := runSATCommandCtx(ctx, verboseLog, "autotune-load.log", loadCmd, nil, logFunc)
 	_ = os.WriteFile(filepath.Join(runDir, "autotune-load.log"), out, 0644)
 	loadSamples := <-loadSamplesCh
 	logBenchmarkPowerAutotunePhaseSummary("load", loadSamples, logFunc)
 	if runErr != nil {
 		result.Notes = append(result.Notes, "full-load stage failed: "+runErr.Error())
 		if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
 			return "", err
 		}
 		return runDir, fmt.Errorf("autotune load stage: %w", runErr)
 	}
 	selectedSource, candidates, idleGPUAvg, loadGPUAvg, chooseErr := chooseBenchmarkPowerAutotuneSource(idleSamples, loadSamples)
 	result.Candidates = candidates
 	result.GPUPowerIdleW = idleGPUAvg
 	result.GPUPowerLoadW = loadGPUAvg
 	if chooseErr != nil {
 		result.Notes = append(result.Notes, chooseErr.Error())
 		if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
 			return "", err
 		}
 		return runDir, chooseErr
 	}
 	gpuDelta := loadGPUAvg - idleGPUAvg
 	if gpuDelta <= 0 {
 		gpuDelta = loadGPUAvg
 	}
 	logBenchmarkPowerAutotuneSelection(candidates, selectedSource, gpuDelta, logFunc)
 	result.SelectedSource = selectedSource
 	result.Status = "OK"
 	var confidence float64
 	selectionReason := fmt.Sprintf("selected %s after comparing full-load average against GPU-reported delta", selectedSource)
 	for _, candidate := range candidates {
 		if candidate.Selected {
 			confidence = candidate.Confidence
 			if strings.TrimSpace(candidate.SelectionNotes) != "" {
 				selectionReason = candidate.SelectionNotes
 			}
 			break
 		}
 	}
 	cfg := BenchmarkPowerAutotuneConfig{
 		Version:           benchmarkPowerAutotuneVersion,
 		UpdatedAt:         time.Now().UTC(),
 		SelectedSource:    selectedSource,
 		BenchmarkKind:     normalizedKind,
 		Profile:           opts.Profile,
 		IdleDurationSec:   benchmarkPowerAutotuneIdleSec,
 		LoadDurationSec:   benchmarkPowerAutotuneLoadSec,
 		SampleIntervalSec: benchmarkPowerAutotuneSampleInterval,
 		Confidence:        confidence,
 		Reason:            selectionReason,
 	}
 	result.Config = &cfg
 	configPath := BenchmarkPowerSourceConfigPath(baseDir)
 	if err := SaveBenchmarkPowerAutotuneConfig(configPath, cfg); err != nil {
 		result.Status = "FAILED"
 		result.Notes = append(result.Notes, "failed to save autotune config: "+err.Error())
 		if writeErr := writeBenchmarkPowerAutotuneArtifacts(runDir, result); writeErr != nil {
 			return "", writeErr
 		}
 		return runDir, err
 	}
 	logFunc(fmt.Sprintf("autotune conclusion: selected source %s; reason: %s", selectedSource, cfg.Reason))
 	result.Notes = append(result.Notes, "saved autotune config to "+configPath)
 	if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
 		return "", err
 	}
 	return runDir, nil
 }
 func writeBenchmarkPowerAutotuneArtifacts(runDir string, result BenchmarkPowerAutotuneResult) error {
 	resultJSON, err := json.MarshalIndent(result, "", "  ")
 	if err != nil {
 		return fmt.Errorf("marshal autotune result: %w", err)
 	}
 	if err := os.WriteFile(filepath.Join(runDir, "result.json"), resultJSON, 0644); err != nil {
 		return fmt.Errorf("write autotune result.json: %w", err)
 	}
 	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(renderBenchmarkPowerAutotuneSummary(result)), 0644); err != nil {
 		return fmt.Errorf("write autotune summary.txt: %w", err)
 	}
 	if err := os.WriteFile(filepath.Join(runDir, "report.md"), []byte(renderBenchmarkPowerAutotuneReport(result)), 0644); err != nil {
 		return fmt.Errorf("write autotune report.md: %w", err)
 	}
 	return nil
 }
 func minInt(a, b int) int {
 	if a < b {
 		return a
 	}
 	return b
 }
 var _ = exec.ErrNotFound
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -2,38 +2,74 @@ package platform
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"regexp"
 	"strings"
 	"time"
 )
 func renderBenchmarkReport(result NvidiaBenchmarkResult) string {
-	return renderBenchmarkReportWithCharts(result, nil)
+	return renderBenchmarkReportWithCharts(result)
 }
-type benchmarkReportChart struct {
+func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 	Title   string
 	Content string
 }
 var ansiEscapePattern = regexp.MustCompile(`\x1b\[[0-9;]*m`)
 func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benchmarkReportChart) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "Bee NVIDIA Benchmark Report\n")
 	fmt.Fprintf(&b, "===========================\n\n")
 	fmt.Fprintf(&b, "Generated: %s\n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC"))
 	fmt.Fprintf(&b, "Host: %s\n", result.Hostname)
 	fmt.Fprintf(&b, "Profile: %s\n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "Overall status: %s\n", result.OverallStatus)
 	fmt.Fprintf(&b, "Selected GPUs: %s\n", joinIndexList(result.SelectedGPUIndices))
 	fmt.Fprintf(&b, "Normalization: %s\n\n", result.Normalization.Status)
 	// ── Header ────────────────────────────────────────────────────────────────
 	b.WriteString("# Bee NVIDIA Benchmark Report\n\n")
 	// System identity block
 	if result.ServerModel != "" {
 		fmt.Fprintf(&b, "**Server:** %s  \n", result.ServerModel)
 	}
 	if result.Hostname != "" {
 		fmt.Fprintf(&b, "**Host:** %s  \n", result.Hostname)
 	}
 	// GPU models summary
 	if len(result.GPUs) > 0 {
 		modelCount := make(map[string]int)
 		var modelOrder []string
 		for _, g := range result.GPUs {
 			m := strings.TrimSpace(g.Name)
 			if m == "" {
 				m = "Unknown GPU"
 			}
 			if modelCount[m] == 0 {
 				modelOrder = append(modelOrder, m)
 			}
 			modelCount[m]++
 		}
 		var parts []string
 		for _, m := range modelOrder {
 			if modelCount[m] == 1 {
 				parts = append(parts, m)
 			} else {
 				parts = append(parts, fmt.Sprintf("%d× %s", modelCount[m], m))
 			}
 		}
 		fmt.Fprintf(&b, "**GPU(s):** %s  \n", strings.Join(parts, ", "))
 	}
 	fmt.Fprintf(&b, "**Profile:** %s  \n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "**Benchmark version:** %s  \n", result.BenchmarkVersion)
 	fmt.Fprintf(&b, "**Generated:** %s  \n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC"))
 	if result.RampStep > 0 && result.RampTotal > 0 {
 		fmt.Fprintf(&b, "**Ramp-up step:** %d of %d  \n", result.RampStep, result.RampTotal)
 		if result.RampRunID != "" {
 			fmt.Fprintf(&b, "**Ramp-up run ID:** %s  \n", result.RampRunID)
 		}
 	} else if result.ParallelGPUs {
 		fmt.Fprintf(&b, "**Mode:** parallel (all GPUs simultaneously)  \n")
 	}
 	if result.ScalabilityScore > 0 {
 		fmt.Fprintf(&b, "**Scalability score:** %.1f%%  \n", result.ScalabilityScore)
 	}
 	if result.PlatformPowerScore > 0 {
 		fmt.Fprintf(&b, "**Platform power score:** %.1f%%  \n", result.PlatformPowerScore)
 	}
 	fmt.Fprintf(&b, "**Overall status:** %s  \n", result.OverallStatus)
 	b.WriteString("\n")
 	// ── Executive Summary ─────────────────────────────────────────────────────
 	if len(result.Findings) > 0 {
-		fmt.Fprintf(&b, "Executive Summary\n")
+		b.WriteString("## Executive Summary\n\n")
 		fmt.Fprintf(&b, "-----------------\n")
 		for _, finding := range result.Findings {
 			fmt.Fprintf(&b, "- %s\n", finding)
 		}
@@ -41,143 +77,466 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 	}
 	if len(result.Warnings) > 0 {
-		fmt.Fprintf(&b, "Warnings\n")
+		b.WriteString("## Warnings\n\n")
 		fmt.Fprintf(&b, "--------\n")
 		for _, warning := range result.Warnings {
 			fmt.Fprintf(&b, "- %s\n", warning)
 		}
 		b.WriteString("\n")
 	}
-	fmt.Fprintf(&b, "Per GPU Scorecard\n")
+	// ── Balanced Scorecard ────────────────────────────────────────────────────
-	fmt.Fprintf(&b, "-----------------\n")
+	b.WriteString("## Balanced Scorecard\n\n")
-	for _, gpu := range result.GPUs {
+
-		fmt.Fprintf(&b, "GPU %d  %s\n", gpu.Index, gpu.Name)
+	// Perspective 1: Compatibility — hard stops
-		fmt.Fprintf(&b, "  Status: %s\n", gpu.Status)
+	b.WriteString("### 1. Compatibility\n\n")
-		fmt.Fprintf(&b, "  Composite score: %.2f\n", gpu.Scores.CompositeScore)
+	{
-		fmt.Fprintf(&b, "  Compute score: %.2f\n", gpu.Scores.ComputeScore)
+		var rows [][]string
-		fmt.Fprintf(&b, "  Power sustain: %.1f\n", gpu.Scores.PowerSustainScore)
+		for _, gpu := range result.GPUs {
-		fmt.Fprintf(&b, "  Thermal sustain: %.1f\n", gpu.Scores.ThermalSustainScore)
+			thermalThrottle := "-"
-		fmt.Fprintf(&b, "  Stability: %.1f\n", gpu.Scores.StabilityScore)
+			if gpu.Scores.ThermalThrottlePct > 0 {
-		if gpu.Scores.InterconnectScore > 0 {
+				thermalThrottle = fmt.Sprintf("%.1f%%", gpu.Scores.ThermalThrottlePct)
 			fmt.Fprintf(&b, "  Interconnect: %.1f\n", gpu.Scores.InterconnectScore)
 		}
 		if len(gpu.DegradationReasons) > 0 {
 			fmt.Fprintf(&b, "  Degradation reasons: %s\n", strings.Join(gpu.DegradationReasons, ", "))
 		}
 		fmt.Fprintf(&b, "  Avg power/temp/clock: %.1f W / %.1f C / %.0f MHz\n", gpu.Steady.AvgPowerW, gpu.Steady.AvgTempC, gpu.Steady.AvgGraphicsClockMHz)
 		fmt.Fprintf(&b, "  P95 power/temp/clock: %.1f W / %.1f C / %.0f MHz\n", gpu.Steady.P95PowerW, gpu.Steady.P95TempC, gpu.Steady.P95GraphicsClockMHz)
 		if len(gpu.PrecisionResults) > 0 {
 			fmt.Fprintf(&b, "  Precision results:\n")
 			for _, precision := range gpu.PrecisionResults {
 				if precision.Supported {
 					fmt.Fprintf(&b, "    - %s: %.2f TOPS lanes=%d iterations=%d\n", precision.Name, precision.TeraOpsPerSec, precision.Lanes, precision.Iterations)
 				} else {
 					fmt.Fprintf(&b, "    - %s: unsupported (%s)\n", precision.Name, precision.Notes)
 				}
 			}
-		}
+			fanAtThrottle := "-"
-		fmt.Fprintf(&b, "  Throttle counters (us): sw_power=%d sw_thermal=%d sync_boost=%d hw_thermal=%d hw_power_brake=%d\n",
+			if result.Cooling != nil && result.Cooling.FanDutyCycleAvailable && gpu.Scores.ThermalThrottlePct > 0 {
-			gpu.Throttle.SWPowerCapUS,
+				fanAtThrottle = fmt.Sprintf("%.0f%%", result.Cooling.P95FanDutyCyclePct)
 			gpu.Throttle.SWThermalSlowdownUS,
 			gpu.Throttle.SyncBoostUS,
 			gpu.Throttle.HWThermalSlowdownUS,
 			gpu.Throttle.HWPowerBrakeSlowdownUS,
 		)
 		if len(gpu.Notes) > 0 {
 			fmt.Fprintf(&b, "  Notes:\n")
 			for _, note := range gpu.Notes {
 				fmt.Fprintf(&b, "    - %s\n", note)
 			}
 			ecc := "-"
 			if gpu.ECC.Uncorrected > 0 {
 				ecc = fmt.Sprintf("⛔ %d", gpu.ECC.Uncorrected)
 			}
 			compatStatus := "✓ OK"
 			if gpu.ECC.Uncorrected > 0 || (gpu.Scores.ThermalThrottlePct > 0 && result.Cooling != nil && result.Cooling.FanDutyCycleAvailable && result.Cooling.P95FanDutyCyclePct < 95) {
 				compatStatus = "⛔ HARD STOP"
 			}
 			rows = append(rows, []string{fmt.Sprintf("GPU %d", gpu.Index), thermalThrottle, fanAtThrottle, ecc, compatStatus})
 		}
 		b.WriteString(fmtMDTable([]string{"GPU", "Thermal throttle", "Fan duty at throttle", "ECC uncorr", "Status"}, rows))
 		b.WriteString("\n")
 	}
 	// Perspective 2: Thermal headroom
 	b.WriteString("### 2. Thermal Headroom\n\n")
 	{
 		var rows [][]string
 		for _, gpu := range result.GPUs {
 			shutdownTemp := gpu.ShutdownTempC
 			if shutdownTemp <= 0 {
 				shutdownTemp = 90
 			}
 			slowdownTemp := gpu.SlowdownTempC
 			if slowdownTemp <= 0 {
 				slowdownTemp = 80
 			}
 			headroom := gpu.Scores.TempHeadroomC
 			thermalStatus := "✓ OK"
 			switch {
 			case headroom < 10:
 				thermalStatus = "⛔ CRITICAL"
 			case gpu.Steady.P95TempC >= slowdownTemp:
 				thermalStatus = "⚠ WARNING"
 			}
 			throttlePct := "-"
 			if gpu.Scores.ThermalThrottlePct > 0 {
 				throttlePct = fmt.Sprintf("%.1f%%", gpu.Scores.ThermalThrottlePct)
 			}
 			rows = append(rows, []string{
 				fmt.Sprintf("GPU %d", gpu.Index),
 				fmt.Sprintf("%.1f°C", gpu.Steady.P95TempC),
 				fmt.Sprintf("%.0f°C", slowdownTemp),
 				fmt.Sprintf("%.0f°C", shutdownTemp),
 				fmt.Sprintf("%.1f°C", headroom),
 				throttlePct,
 				thermalStatus,
 			})
 		}
 		b.WriteString(fmtMDTable([]string{"GPU", "p95 temp", "Slowdown limit", "Shutdown limit", "Headroom", "Thermal throttle", "Status"}, rows))
 		b.WriteString("\n")
 	}
 	// Perspective 3: Power delivery
 	b.WriteString("### 3. Power Delivery\n\n")
 	{
 		var rows [][]string
 		for _, gpu := range result.GPUs {
 			powerCap := "-"
 			if gpu.Scores.PowerCapThrottlePct > 0 {
 				powerCap = fmt.Sprintf("%.1f%%", gpu.Scores.PowerCapThrottlePct)
 			}
 			fanDuty := "-"
 			if result.Cooling != nil && result.Cooling.FanDutyCycleAvailable {
 				fanDuty = fmt.Sprintf("%.0f%%", result.Cooling.P95FanDutyCyclePct)
 			}
 			powerStatus := "✓ OK"
 			if gpu.Scores.PowerCapThrottlePct > 5 {
 				powerStatus = "⚠ POWER LIMITED"
 			}
 			rows = append(rows, []string{
 				fmt.Sprintf("GPU %d", gpu.Index),
 				powerCap,
 				fmt.Sprintf("%.1f", gpu.Scores.PowerSustainScore),
 				fanDuty,
 				powerStatus,
 			})
 		}
 		b.WriteString(fmtMDTable([]string{"GPU", "Power cap throttle", "Power stability", "Fan duty (p95)", "Status"}, rows))
 		b.WriteString("\n")
 	}
 	// Perspective 4: Performance
 	b.WriteString("### 4. Performance\n\n")
 	{
 		var rows [][]string
 		for _, gpu := range result.GPUs {
 			synthetic := "-"
 			if gpu.Scores.SyntheticScore > 0 {
 				synthetic = fmt.Sprintf("%.2f", gpu.Scores.SyntheticScore)
 			}
 			mixed := "-"
 			if gpu.Scores.MixedScore > 0 {
 				mixed = fmt.Sprintf("%.2f", gpu.Scores.MixedScore)
 			}
 			mixedEff := "-"
 			if gpu.Scores.MixedEfficiency > 0 {
 				mixedEff = fmt.Sprintf("%.1f%%", gpu.Scores.MixedEfficiency*100)
 			}
 			topsPerSM := "-"
 			if gpu.Scores.TOPSPerSMPerGHz > 0 {
 				topsPerSM = fmt.Sprintf("%.3f", gpu.Scores.TOPSPerSMPerGHz)
 			}
 			rows = append(rows, []string{
 				fmt.Sprintf("GPU %d", gpu.Index),
 				fmt.Sprintf("**%.2f**", gpu.Scores.CompositeScore),
 				synthetic, mixed, mixedEff, topsPerSM,
 			})
 		}
 		b.WriteString(fmtMDTable([]string{"GPU", "Compute TOPS", "Synthetic", "Mixed", "Mixed Eff.", "TOPS/SM/GHz"}, rows))
 		if len(result.PerformanceRampSteps) > 0 {
 			fmt.Fprintf(&b, "\n**Platform power score (scalability):** %.1f%%\n", result.PlatformPowerScore)
 		}
 		b.WriteString("\n")
 	}
 	// Perspective 5: Anomaly flags
 	b.WriteString("### 5. Anomalies\n\n")
 	{
 		var rows [][]string
 		for _, gpu := range result.GPUs {
 			eccCorr := "-"
 			if gpu.ECC.Corrected > 0 {
 				eccCorr = fmt.Sprintf("⚠ %d", gpu.ECC.Corrected)
 			}
 			syncBoost := "-"
 			if gpu.Scores.SyncBoostThrottlePct > 0 {
 				syncBoost = fmt.Sprintf("%.1f%%", gpu.Scores.SyncBoostThrottlePct)
 			}
 			powerVar := "OK"
 			if gpu.Scores.PowerSustainScore < 70 {
 				powerVar = "⚠ unstable"
 			}
 			thermalVar := "OK"
 			if gpu.Scores.ThermalSustainScore < 70 {
 				thermalVar = "⚠ unstable"
 			}
 			rows = append(rows, []string{fmt.Sprintf("GPU %d", gpu.Index), eccCorr, syncBoost, powerVar, thermalVar})
 		}
 		b.WriteString(fmtMDTable([]string{"GPU", "ECC corrected", "Sync boost throttle", "Power instability", "Thermal instability"}, rows))
 		b.WriteString("\n")
 	}
 	// ── Per GPU detail ────────────────────────────────────────────────────────
 	b.WriteString("## Per-GPU Details\n\n")
 	for _, gpu := range result.GPUs {
 		name := strings.TrimSpace(gpu.Name)
 		if name == "" {
 			name = "Unknown GPU"
 		}
 		fmt.Fprintf(&b, "### GPU %d — %s\n\n", gpu.Index, name)
 		// Identity
 		if gpu.BusID != "" {
 			fmt.Fprintf(&b, "- **Bus ID:** %s\n", gpu.BusID)
 		}
 		if gpu.VBIOS != "" {
 			fmt.Fprintf(&b, "- **vBIOS:** %s\n", gpu.VBIOS)
 		}
 		if gpu.ComputeCapability != "" {
 			fmt.Fprintf(&b, "- **Compute capability:** %s\n", gpu.ComputeCapability)
 		}
 		if gpu.MultiprocessorCount > 0 {
 			fmt.Fprintf(&b, "- **SMs:** %d\n", gpu.MultiprocessorCount)
 		}
 		if gpu.PowerLimitW > 0 {
 			fmt.Fprintf(&b, "- **Power limit:** %.0f W (default %.0f W)\n", gpu.PowerLimitW, gpu.DefaultPowerLimitW)
 		}
 		if gpu.PowerLimitDerated {
 			fmt.Fprintf(&b, "- **Power limit derating:** active (reduced limit %.0f W)\n", gpu.PowerLimitW)
 		}
 		if gpu.CalibratedPeakPowerW > 0 {
 			if gpu.CalibratedPeakTempC > 0 {
 				fmt.Fprintf(&b, "- **Calibrated peak power:** %.0f W p95 at %.1f °C p95\n", gpu.CalibratedPeakPowerW, gpu.CalibratedPeakTempC)
 			} else {
 				fmt.Fprintf(&b, "- **Calibrated peak power:** %.0f W p95\n", gpu.CalibratedPeakPowerW)
 			}
 		}
 		if gpu.LockedGraphicsClockMHz > 0 {
 			fmt.Fprintf(&b, "- **Locked clocks:** GPU %.0f MHz / Mem %.0f MHz\n", gpu.LockedGraphicsClockMHz, gpu.LockedMemoryClockMHz)
 		}
 		b.WriteString("\n")
 		// Steady-state telemetry
 		if benchmarkTelemetryAvailable(gpu.Steady) {
 			fmt.Fprintf(&b, "**Steady-state telemetry** (%ds):\n\n", int(gpu.Steady.DurationSec))
 			b.WriteString(fmtMDTable(
 				[]string{"", "Avg", "P95"},
 				[][]string{
 					{"Power", fmt.Sprintf("%.1f W", gpu.Steady.AvgPowerW), fmt.Sprintf("%.1f W", gpu.Steady.P95PowerW)},
 					{"Temperature", fmt.Sprintf("%.1f °C", gpu.Steady.AvgTempC), fmt.Sprintf("%.1f °C", gpu.Steady.P95TempC)},
 					{"GPU clock", fmt.Sprintf("%.0f MHz", gpu.Steady.AvgGraphicsClockMHz), fmt.Sprintf("%.0f MHz", gpu.Steady.P95GraphicsClockMHz)},
 					{"Memory clock", fmt.Sprintf("%.0f MHz", gpu.Steady.AvgMemoryClockMHz), fmt.Sprintf("%.0f MHz", gpu.Steady.P95MemoryClockMHz)},
 					{"GPU utilisation", fmt.Sprintf("%.1f %%", gpu.Steady.AvgUsagePct), "—"},
 				},
 			))
 			b.WriteString("\n")
 		} else {
 			b.WriteString("**Steady-state telemetry:** unavailable\n\n")
 		}
 		// Per-precision stability phases.
 		if len(gpu.PrecisionSteady) > 0 {
 			b.WriteString("**Per-precision stability:**\n\n")
 			var precRows [][]string
 			for _, p := range gpu.PrecisionSteady {
 				eccCorr := "—"
 				eccUncorr := "—"
 				if !p.ECC.IsZero() {
 					eccCorr = fmt.Sprintf("%d", p.ECC.Corrected)
 					eccUncorr = fmt.Sprintf("%d", p.ECC.Uncorrected)
 				}
 				status := p.Status
 				if strings.TrimSpace(status) == "" {
 					status = "OK"
 				}
 				precRows = append(precRows, []string{
 					p.Precision, status,
 					fmt.Sprintf("%.1f%%", p.Steady.ClockCVPct),
 					fmt.Sprintf("%.1f%%", p.Steady.PowerCVPct),
 					fmt.Sprintf("%.1f%%", p.Steady.ClockDriftPct),
 					eccCorr, eccUncorr,
 				})
 			}
 			b.WriteString(fmtMDTable([]string{"Precision", "Status", "Clock CV", "Power CV", "Clock Drift", "ECC corr", "ECC uncorr"}, precRows))
 			b.WriteString("\n")
 		} else {
 			// Legacy: show combined-window variance.
 			fmt.Fprintf(&b, "**Clock/power variance (combined window):** clock CV %.1f%% · power CV %.1f%% · clock drift %.1f%%\n\n",
 				gpu.Steady.ClockCVPct, gpu.Steady.PowerCVPct, gpu.Steady.ClockDriftPct)
 		}
 		// ECC summary
 		if !gpu.ECC.IsZero() {
 			fmt.Fprintf(&b, "**ECC errors (total):** corrected=%d uncorrected=%d\n\n",
 				gpu.ECC.Corrected, gpu.ECC.Uncorrected)
 		}
 		// Throttle
 		throttle := formatThrottleLine(gpu.Throttle, gpu.Steady.DurationSec)
 		if throttle != "none" {
 			fmt.Fprintf(&b, "**Throttle:** %s\n\n", throttle)
 		}
 		// Precision results
 		if len(gpu.PrecisionResults) > 0 {
 			b.WriteString("**Precision results:**\n\n")
 			var presRows [][]string
 			for _, p := range gpu.PrecisionResults {
 				if p.Supported {
 					presRows = append(presRows, []string{
 						p.Name,
 						fmt.Sprintf("%.2f", p.TeraOpsPerSec),
 						fmt.Sprintf("×%.3g", p.Weight),
 						fmt.Sprintf("%.2f", p.WeightedTeraOpsPerSec),
 						fmt.Sprintf("%d", p.Lanes),
 						fmt.Sprintf("%d", p.Iterations),
 					})
 				} else {
 					presRows = append(presRows, []string{p.Name, "— (unsupported)", "—", "—", "—", "—"})
 				}
 			}
 			b.WriteString(fmtMDTable([]string{"Precision", "TOPS (raw)", "Weight", "TOPS (fp32-eq)", "Lanes", "Iterations"}, presRows))
 			b.WriteString("\n")
 		}
 		// Degradation / Notes
 		if len(gpu.DegradationReasons) > 0 {
 			fmt.Fprintf(&b, "**Degradation reasons:** %s\n\n", strings.Join(gpu.DegradationReasons, ", "))
 		}
 		if len(gpu.Notes) > 0 {
 			b.WriteString("**Notes:**\n\n")
 			for _, note := range gpu.Notes {
 				fmt.Fprintf(&b, "- %s\n", note)
 			}
 			b.WriteString("\n")
 		}
 	}
 	// ── Interconnect ──────────────────────────────────────────────────────────
 	if result.Interconnect != nil {
-		fmt.Fprintf(&b, "Interconnect\n")
+		b.WriteString("## Interconnect (NCCL)\n\n")
-		fmt.Fprintf(&b, "------------\n")
+		fmt.Fprintf(&b, "**Status:** %s\n\n", result.Interconnect.Status)
 		fmt.Fprintf(&b, "Status: %s\n", result.Interconnect.Status)
 		if result.Interconnect.Supported {
-			fmt.Fprintf(&b, "Avg algbw / busbw: %.1f / %.1f GB/s\n", result.Interconnect.AvgAlgBWGBps, result.Interconnect.AvgBusBWGBps)
+			b.WriteString(fmtMDTable(
-			fmt.Fprintf(&b, "Max algbw / busbw: %.1f / %.1f GB/s\n", result.Interconnect.MaxAlgBWGBps, result.Interconnect.MaxBusBWGBps)
+				[]string{"Metric", "Avg", "Max"},
 				[][]string{
 					{"Alg BW", fmt.Sprintf("%.1f GB/s", result.Interconnect.AvgAlgBWGBps), fmt.Sprintf("%.1f GB/s", result.Interconnect.MaxAlgBWGBps)},
 					{"Bus BW", fmt.Sprintf("%.1f GB/s", result.Interconnect.AvgBusBWGBps), fmt.Sprintf("%.1f GB/s", result.Interconnect.MaxBusBWGBps)},
 				},
 			))
 			b.WriteString("\n")
 		}
 		for _, note := range result.Interconnect.Notes {
 			fmt.Fprintf(&b, "- %s\n", note)
 		}
-		b.WriteString("\n")
+		if len(result.Interconnect.Notes) > 0 {
-	}
+			b.WriteString("\n")
 	if len(charts) > 0 {
 		fmt.Fprintf(&b, "Terminal Charts\n")
 		fmt.Fprintf(&b, "---------------\n")
 		for _, chart := range charts {
 			content := strings.TrimSpace(stripANSIEscapeSequences(chart.Content))
 			if content == "" {
 				continue
 			}
 			fmt.Fprintf(&b, "%s\n", chart.Title)
 			fmt.Fprintf(&b, "%s\n", strings.Repeat("~", len(chart.Title)))
 			fmt.Fprintf(&b, "%s\n\n", content)
 		}
 	}
-	fmt.Fprintf(&b, "Methodology\n")
+	// ── Server Power ───────────────────────────────────────────────────────────
-	fmt.Fprintf(&b, "-----------\n")
+	if sp := result.ServerPower; sp != nil {
-	fmt.Fprintf(&b, "- Profile %s uses standardized baseline, warmup, steady-state, interconnect, and cooldown phases.\n", result.BenchmarkProfile)
+		title := "## Server Power\n\n"
-	fmt.Fprintf(&b, "- Single-GPU compute score comes from bee-gpu-burn cuBLASLt output when available.\n")
+		if sp.Source != "" {
-	fmt.Fprintf(&b, "- Thermal and power limitations are inferred from NVIDIA clock event reason counters and sustained telemetry.\n")
+			title = fmt.Sprintf("## Server Power (`%s`)\n\n", sp.Source)
-	fmt.Fprintf(&b, "- result.json is the canonical machine-readable source for this benchmark run.\n\n")
+		}
 		b.WriteString(title)
 		if !sp.Available {
 			b.WriteString("Server power measurement unavailable.\n\n")
 		} else {
 			spRows := [][]string{
 				{"Server idle", fmt.Sprintf("%.0f W", sp.IdleW)},
 				{"Server under load", fmt.Sprintf("%.0f W", sp.LoadedW)},
 				{"Server delta (load − idle)", fmt.Sprintf("%.0f W", sp.DeltaW)},
 				{"GPU-reported sum", fmt.Sprintf("%.0f W", sp.GPUReportedSumW)},
 			}
 			if sp.ReportingRatio > 0 {
 				spRows = append(spRows, []string{"Reporting ratio", fmt.Sprintf("%.2f (1.0 = accurate, <0.75 = GPU over-reports)", sp.ReportingRatio)})
 			}
 			b.WriteString(fmtMDTable([]string{"", "Value"}, spRows))
 			b.WriteString("\n")
 		}
 		for _, note := range sp.Notes {
 			fmt.Fprintf(&b, "- %s\n", note)
 		}
 		if len(sp.Notes) > 0 {
 			b.WriteString("\n")
 		}
 	}
-	fmt.Fprintf(&b, "Raw Files\n")
+	// ── PSU Issues ────────────────────────────────────────────────────────────
-	fmt.Fprintf(&b, "---------\n")
+	if len(result.PSUIssues) > 0 {
-	fmt.Fprintf(&b, "- result.json\n")
+		b.WriteString("## PSU Issues\n\n")
-	fmt.Fprintf(&b, "- report.txt\n")
+		b.WriteString("The following power supply anomalies were detected during the benchmark:\n\n")
-	fmt.Fprintf(&b, "- summary.txt\n")
+		for _, issue := range result.PSUIssues {
-	fmt.Fprintf(&b, "- verbose.log\n")
+			fmt.Fprintf(&b, "- ⛔ %s\n", issue)
-	fmt.Fprintf(&b, "- gpu-*-baseline-metrics.csv/html/term.txt\n")
+		}
-	fmt.Fprintf(&b, "- gpu-*-warmup.log\n")
+		b.WriteString("\n")
-	fmt.Fprintf(&b, "- gpu-*-steady.log\n")
+	}
-	fmt.Fprintf(&b, "- gpu-*-steady-metrics.csv/html/term.txt\n")
+
-	fmt.Fprintf(&b, "- gpu-*-cooldown-metrics.csv/html/term.txt\n")
+	// ── Cooling ───────────────────────────────────────────────────────────────
 	if cooling := result.Cooling; cooling != nil {
 		b.WriteString("## Cooling\n\n")
 		if cooling.Available {
 			dutyAvg, dutyP95 := "N/A", "N/A"
 			if cooling.FanDutyCycleAvailable {
 				dutyAvg = fmt.Sprintf("%.1f%%", cooling.AvgFanDutyCyclePct)
 				dutyP95 = fmt.Sprintf("%.1f%%", cooling.P95FanDutyCyclePct)
 			}
 			b.WriteString(fmtMDTable(
 				[]string{"Metric", "Value"},
 				[][]string{
 					{"Average fan speed", fmt.Sprintf("%.0f RPM", cooling.AvgFanRPM)},
 					{"Average fan duty cycle", dutyAvg},
 					{"P95 fan duty cycle", dutyP95},
 				},
 			))
 			b.WriteString("\n")
 		} else {
 			b.WriteString("Cooling telemetry unavailable.\n\n")
 		}
 		for _, note := range cooling.Notes {
 			fmt.Fprintf(&b, "- %s\n", note)
 		}
 		if len(cooling.Notes) > 0 {
 			b.WriteString("\n")
 		}
 	}
 	// ── Platform Scalability ──────────────────────────────────────────────────
 	if len(result.PerformanceRampSteps) > 0 {
 		b.WriteString("## Platform Scalability (Performance Ramp)\n\n")
 		fmt.Fprintf(&b, "**Platform power score:** %.1f%%  \n\n", result.PlatformPowerScore)
 		var scalRows [][]string
 		for _, step := range result.PerformanceRampSteps {
 			scalRows = append(scalRows, []string{
 				fmt.Sprintf("%d", step.StepIndex),
 				joinIndexList(step.GPUIndices),
 				fmt.Sprintf("%.2f", step.TotalSyntheticTOPS),
 				fmt.Sprintf("%.1f%%", step.ScalabilityPct),
 			})
 		}
 		b.WriteString(fmtMDTable([]string{"k GPUs", "GPU Indices", "Total Synthetic TOPS", "Scalability"}, scalRows))
 		b.WriteString("\n")
 	}
 	// ── Raw files ─────────────────────────────────────────────────────────────
 	b.WriteString("## Raw Files\n\n")
 	b.WriteString("- `result.json`\n- `report.md`\n- `summary.txt`\n- `verbose.log`\n")
 	b.WriteString("- `gpu-metrics.csv`\n- `gpu-metrics.html`\n- `gpu-burn.log`\n")
 	if result.Interconnect != nil {
-		fmt.Fprintf(&b, "- nccl-all-reduce.log\n")
+		b.WriteString("- `nccl-all-reduce.log`\n")
 	}
 	return b.String()
 }
-func loadBenchmarkReportCharts(runDir string, gpuIndices []int) []benchmarkReportChart {
+// formatThrottleLine renders throttle counters as human-readable percentages of
-	phases := []struct {
+// the steady-state window.  Only non-zero counters are shown.  When the steady
-		name  string
+// duration is unknown (0), raw seconds are shown instead.
 func formatThrottleLine(t BenchmarkThrottleCounters, steadyDurationSec float64) string {
 	type counter struct {
 		label string
-	}{
+		us    uint64
 		{name: "baseline", label: "Baseline"},
 		{name: "steady", label: "Steady State"},
 		{name: "cooldown", label: "Cooldown"},
 	}
-	var charts []benchmarkReportChart
+	counters := []counter{
-	for _, idx := range gpuIndices {
+		{"sw_power", t.SWPowerCapUS},
-		for _, phase := range phases {
+		{"sw_thermal", t.SWThermalSlowdownUS},
-			path := filepath.Join(runDir, fmt.Sprintf("gpu-%d-%s-metrics-term.txt", idx, phase.name))
+		{"sync_boost", t.SyncBoostUS},
-			raw, err := os.ReadFile(path)
+		{"hw_thermal", t.HWThermalSlowdownUS},
-			if err != nil || len(raw) == 0 {
+		{"hw_power_brake", t.HWPowerBrakeSlowdownUS},
-				continue
+	}
-			}
+	var parts []string
-			charts = append(charts, benchmarkReportChart{
+	for _, c := range counters {
-				Title:   fmt.Sprintf("GPU %d %s", idx, phase.label),
+		if c.us == 0 {
-				Content: string(raw),
+			continue
-			})
+		}
 		sec := float64(c.us) / 1e6
 		if steadyDurationSec > 0 {
 			pct := sec / steadyDurationSec * 100
 			parts = append(parts, fmt.Sprintf("%s=%.1f%% (%.0fs)", c.label, pct, sec))
 		} else if sec < 1 {
 			parts = append(parts, fmt.Sprintf("%s=%.0fms", c.label, sec*1000))
 		} else {
 			parts = append(parts, fmt.Sprintf("%s=%.1fs", c.label, sec))
 		}
 	}
-	return charts
+	if len(parts) == 0 {
-}
+		return "none"
-
+	}
-func stripANSIEscapeSequences(raw string) string {
+	return strings.Join(parts, "  ")
 	return ansiEscapePattern.ReplaceAllString(raw, "")
 }
 func renderBenchmarkSummary(result NvidiaBenchmarkResult) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "run_at_utc=%s\n", result.GeneratedAt.Format(time.RFC3339))
 	fmt.Fprintf(&b, "benchmark_version=%s\n", result.BenchmarkVersion)
 	fmt.Fprintf(&b, "benchmark_profile=%s\n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "overall_status=%s\n", result.OverallStatus)
 	fmt.Fprintf(&b, "gpu_count=%d\n", len(result.GPUs))
--- a/audit/internal/platform/benchmark_table.go
+++ b/audit/internal/platform/benchmark_table.go
@@ -0,0 +1,75 @@
 package platform
 import (
 	"strings"
 )
 // fmtMDTable renders a markdown table with column widths padded so the table
 // is readable as plain text without a markdown renderer.
 //
 // headers contains the column header strings.
 // rows contains data rows; each row must have the same number of cells as headers.
 // Cells with fewer entries than headers are treated as empty.
 func fmtMDTable(headers []string, rows [][]string) string {
 	ncols := len(headers)
 	if ncols == 0 {
 		return ""
 	}
 	// Compute max width per column.
 	widths := make([]int, ncols)
 	for i, h := range headers {
 		if len(h) > widths[i] {
 			widths[i] = len(h)
 		}
 	}
 	for _, row := range rows {
 		for i := 0; i < ncols; i++ {
 			cell := ""
 			if i < len(row) {
 				cell = row[i]
 			}
 			if len(cell) > widths[i] {
 				widths[i] = len(cell)
 			}
 		}
 	}
 	var b strings.Builder
 	// Header row.
 	b.WriteByte('|')
 	for i, h := range headers {
 		b.WriteByte(' ')
 		b.WriteString(h)
 		b.WriteString(strings.Repeat(" ", widths[i]-len(h)))
 		b.WriteString(" |")
 	}
 	b.WriteByte('\n')
 	// Separator row.
 	b.WriteByte('|')
 	for i := range headers {
 		b.WriteString(strings.Repeat("-", widths[i]+2))
 		b.WriteByte('|')
 	}
 	b.WriteByte('\n')
 	// Data rows.
 	for _, row := range rows {
 		b.WriteByte('|')
 		for i := 0; i < ncols; i++ {
 			cell := ""
 			if i < len(row) {
 				cell = row[i]
 			}
 			b.WriteByte(' ')
 			b.WriteString(cell)
 			b.WriteString(strings.Repeat(" ", widths[i]-len(cell)))
 			b.WriteString(" |")
 		}
 		b.WriteByte('\n')
 	}
 	return b.String()
 }
--- a/audit/internal/platform/benchmark_test.go
+++ b/audit/internal/platform/benchmark_test.go
@@ -1,8 +1,13 @@
 package platform
 import (
 	"context"
 	"fmt"
 	"os/exec"
 	"path/filepath"
 	"strings"
 	"testing"
 	"time"
 )
 func TestResolveBenchmarkProfile(t *testing.T) {
@@ -16,17 +21,17 @@ func TestResolveBenchmarkProfile(t *testing.T) {
 		{
 			name:    "default",
 			profile: "",
-			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 120, SteadySec: 480, NCCLSec: 180, CooldownSec: 120},
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 45, SteadySec: 480, NCCLSec: 180, CooldownSec: 0},
 		},
 		{
 			name:    "stability",
 			profile: "stability",
-			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 300, SteadySec: 3600, NCCLSec: 300, CooldownSec: 300},
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 120, SteadySec: 3600, NCCLSec: 300, CooldownSec: 0},
 		},
 		{
 			name:    "overnight",
 			profile: "overnight",
-			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 600, SteadySec: 27000, NCCLSec: 600, CooldownSec: 300},
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 180, SteadySec: 27000, NCCLSec: 600, CooldownSec: 0},
 		},
 	}
@@ -41,6 +46,222 @@ func TestResolveBenchmarkProfile(t *testing.T) {
 	}
 }
 func TestBuildBenchmarkSteadyPlanStandard(t *testing.T) {
 	t.Parallel()
 	labels, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
 		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, SteadySec: 480},
 		benchmarkPrecisionPhases,
 		func(label string) string { return label },
 	)
 	if len(labels) != 5 || len(phases) != 5 {
 		t.Fatalf("labels=%d phases=%d want 5", len(labels), len(phases))
 	}
 	if basePhaseSec != 60 {
 		t.Fatalf("basePhaseSec=%d want 60", basePhaseSec)
 	}
 	if mixedPhaseSec != 300 {
 		t.Fatalf("mixedPhaseSec=%d want 300", mixedPhaseSec)
 	}
 	if phases[len(phases)-1].PlanLabel != "mixed" || phases[len(phases)-1].DurationSec != 300 {
 		t.Fatalf("mixed phase=%+v want duration 300", phases[len(phases)-1])
 	}
 	if benchmarkPlanDurationsCSV(phases) != "60,60,60,60,300" {
 		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
 	}
 }
 func TestBuildBenchmarkSteadyPlanStability(t *testing.T) {
 	t.Parallel()
 	_, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
 		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, SteadySec: 3600},
 		benchmarkPrecisionPhases,
 		func(label string) string { return label },
 	)
 	if basePhaseSec != 300 {
 		t.Fatalf("basePhaseSec=%d want 300", basePhaseSec)
 	}
 	if mixedPhaseSec != 3600 {
 		t.Fatalf("mixedPhaseSec=%d want 3600", mixedPhaseSec)
 	}
 	if benchmarkPlanDurationsCSV(phases) != "300,300,300,300,3600" {
 		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
 	}
 }
 func TestBuildBenchmarkSteadyPlanOvernight(t *testing.T) {
 	t.Parallel()
 	_, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
 		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, SteadySec: 27000},
 		benchmarkPrecisionPhases,
 		func(label string) string { return label },
 	)
 	if basePhaseSec != 3600 {
 		t.Fatalf("basePhaseSec=%d want 3600", basePhaseSec)
 	}
 	if mixedPhaseSec != 14400 {
 		t.Fatalf("mixedPhaseSec=%d want 14400", mixedPhaseSec)
 	}
 	if benchmarkPlanDurationsCSV(phases) != "3600,3600,3600,3600,14400" {
 		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
 	}
 }
 func TestSplitBenchmarkRowsByPlannedPhaseUsesPhaseDurations(t *testing.T) {
 	t.Parallel()
 	phases := []benchmarkPlannedPhase{
 		{PlanLabel: "fp8", MetricStage: "fp8", DurationSec: 10},
 		{PlanLabel: "fp16", MetricStage: "fp16", DurationSec: 10},
 		{PlanLabel: "mixed", MetricStage: "mixed", DurationSec: 50},
 	}
 	rows := []GPUMetricRow{
 		{ElapsedSec: 5},
 		{ElapsedSec: 15},
 		{ElapsedSec: 25},
 		{ElapsedSec: 65},
 	}
 	got := splitBenchmarkRowsByPlannedPhase(rows, phases)
 	if len(got["fp8"]) != 1 {
 		t.Fatalf("fp8 rows=%d want 1", len(got["fp8"]))
 	}
 	if len(got["fp16"]) != 1 {
 		t.Fatalf("fp16 rows=%d want 1", len(got["fp16"]))
 	}
 	if len(got["mixed"]) != 2 {
 		t.Fatalf("mixed rows=%d want 2", len(got["mixed"]))
 	}
 }
 func TestBenchmarkSupportedPrecisionsSkipsFP4BeforeBlackwell(t *testing.T) {
 	t.Parallel()
 	if got := benchmarkSupportedPrecisions("9.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32" {
 		t.Fatalf("supported=%v", got)
 	}
 	if got := benchmarkSupportedPrecisions("10.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32" {
 		t.Fatalf("supported=%v", got)
 	}
 }
 func TestBenchmarkPlannedPhaseStatus(t *testing.T) {
 	t.Parallel()
 	cases := []struct {
 		name       string
 		raw        string
 		wantStatus string
 	}{
 		{name: "ok", raw: "status=OK\n", wantStatus: "OK"},
 		{name: "failed", raw: "phase_error=fp16\n", wantStatus: "FAILED"},
 		{name: "unsupported", raw: "cublasLt_profiles=unsupported\nphase_error=fp4\n", wantStatus: "UNSUPPORTED"},
 	}
 	for _, tc := range cases {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
 			got, _ := benchmarkPlannedPhaseStatus([]byte(tc.raw))
 			if got != tc.wantStatus {
 				t.Fatalf("status=%q want %q", got, tc.wantStatus)
 			}
 		})
 	}
 }
 func TestBenchmarkCalibrationThrottleReasonIgnoresPowerReasons(t *testing.T) {
 	t.Parallel()
 	before := BenchmarkThrottleCounters{}
 	if got := benchmarkCalibrationThrottleReason(before, BenchmarkThrottleCounters{SWPowerCapUS: 1_000_000}); got != "" {
 		t.Fatalf("sw_power_cap should be ignored, got %q", got)
 	}
 	if got := benchmarkCalibrationThrottleReason(before, BenchmarkThrottleCounters{HWPowerBrakeSlowdownUS: 1_000_000}); got != "" {
 		t.Fatalf("hw_power_brake should be ignored, got %q", got)
 	}
 	if got := benchmarkCalibrationThrottleReason(before, BenchmarkThrottleCounters{HWThermalSlowdownUS: 1_000_000}); got != "hw_thermal" {
 		t.Fatalf("hw_thermal mismatch: got %q", got)
 	}
 	if got := benchmarkCalibrationThrottleReason(before, BenchmarkThrottleCounters{SWThermalSlowdownUS: 1_000_000}); got != "sw_thermal" {
 		t.Fatalf("sw_thermal mismatch: got %q", got)
 	}
 }
 func TestResetBenchmarkGPUsSkipsWithoutRoot(t *testing.T) {
 	oldGeteuid := benchmarkGeteuid
 	oldReset := benchmarkResetNvidiaGPU
 	benchmarkGeteuid = func() int { return 1000 }
 	benchmarkResetNvidiaGPU = func(int) (string, error) {
 		t.Fatal("unexpected reset call")
 		return "", nil
 	}
 	t.Cleanup(func() {
 		benchmarkGeteuid = oldGeteuid
 		benchmarkResetNvidiaGPU = oldReset
 	})
 	var logs []string
 	failed := resetBenchmarkGPUs(context.Background(), filepath.Join(t.TempDir(), "verbose.log"), []int{0, 2}, func(line string) {
 		logs = append(logs, line)
 	})
 	if got, want := strings.Join(logs, "\n"), "power benchmark pre-flight: root privileges unavailable, GPU reset skipped"; !strings.Contains(got, want) {
 		t.Fatalf("logs=%q want substring %q", got, want)
 	}
 	if len(failed) != 2 || failed[0] != 0 || failed[1] != 2 {
 		t.Fatalf("failed=%v want [0 2]", failed)
 	}
 }
 func TestResetBenchmarkGPUsResetsEachGPU(t *testing.T) {
 	oldGeteuid := benchmarkGeteuid
 	oldSleep := benchmarkSleep
 	oldReset := benchmarkResetNvidiaGPU
 	benchmarkGeteuid = func() int { return 0 }
 	benchmarkSleep = func(time.Duration) {}
 	var calls []int
 	benchmarkResetNvidiaGPU = func(index int) (string, error) {
 		calls = append(calls, index)
 		return "ok\n", nil
 	}
 	t.Cleanup(func() {
 		benchmarkGeteuid = oldGeteuid
 		benchmarkSleep = oldSleep
 		benchmarkResetNvidiaGPU = oldReset
 	})
 	failed := resetBenchmarkGPUs(context.Background(), filepath.Join(t.TempDir(), "verbose.log"), []int{2, 5}, nil)
 	if len(failed) != 0 {
 		t.Fatalf("failed=%v want no failures", failed)
 	}
 	if got, want := fmt.Sprint(calls), "[2 5]"; got != want {
 		t.Fatalf("calls=%v want %s", calls, want)
 	}
 }
 func TestResetBenchmarkGPUsTracksFailuresFromSharedReset(t *testing.T) {
 	oldGeteuid := benchmarkGeteuid
 	oldSleep := benchmarkSleep
 	oldReset := benchmarkResetNvidiaGPU
 	benchmarkGeteuid = func() int { return 0 }
 	benchmarkSleep = func(time.Duration) {}
 	benchmarkResetNvidiaGPU = func(index int) (string, error) {
 		if index == 5 {
 			return "busy\n", exec.ErrNotFound
 		}
 		return "ok\n", nil
 	}
 	t.Cleanup(func() {
 		benchmarkGeteuid = oldGeteuid
 		benchmarkSleep = oldSleep
 		benchmarkResetNvidiaGPU = oldReset
 	})
 	failed := resetBenchmarkGPUs(context.Background(), filepath.Join(t.TempDir(), "verbose.log"), []int{2, 5}, nil)
 	if got, want := fmt.Sprint(failed), "[5]"; got != want {
 		t.Fatalf("failed=%v want %s", failed, want)
 	}
 }
 func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
 	t.Parallel()
@@ -56,6 +277,59 @@ func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
 	}
 }
 func TestInitialBenchmarkCalibrationLimitW(t *testing.T) {
 	t.Parallel()
 	cases := []struct {
 		name string
 		info benchmarkGPUInfo
 		want int
 	}{
 		{
 			name: "prefers default tdp over current derated limit",
 			info: benchmarkGPUInfo{
 				PowerLimitW:        500,
 				DefaultPowerLimitW: 600,
 				MaxPowerLimitW:     600,
 			},
 			want: 600,
 		},
 		{
 			name: "caps default tdp to reported max limit",
 			info: benchmarkGPUInfo{
 				PowerLimitW:        500,
 				DefaultPowerLimitW: 700,
 				MaxPowerLimitW:     650,
 			},
 			want: 650,
 		},
 		{
 			name: "falls back to current limit when default missing",
 			info: benchmarkGPUInfo{
 				PowerLimitW:    525,
 				MaxPowerLimitW: 600,
 			},
 			want: 525,
 		},
 		{
 			name: "falls back to max limit when only that is known",
 			info: benchmarkGPUInfo{
 				MaxPowerLimitW: 575,
 			},
 			want: 575,
 		},
 	}
 	for _, tc := range cases {
 		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
 			if got := initialBenchmarkCalibrationLimitW(tc.info); got != tc.want {
 				t.Fatalf("initialBenchmarkCalibrationLimitW(%+v)=%d want %d", tc.info, got, tc.want)
 			}
 		})
 	}
 }
 func TestParseBenchmarkBurnLog(t *testing.T) {
 	t.Parallel()
@@ -65,8 +339,10 @@ func TestParseBenchmarkBurnLog(t *testing.T) {
 		"[gpu 0] compute_capability=9.0",
 		"[gpu 0] backend=cublasLt",
 		"[gpu 0] duration_s=10",
 		"[gpu 0] int8_tensor[0]=READY dim=16384x16384x8192 block=128 stream=0",
 		"[gpu 0] fp16_tensor[0]=READY dim=4096x4096x4096 block=128 stream=0",
 		"[gpu 0] fp8_e4m3[0]=READY dim=8192x8192x4096 block=128 stream=0",
 		"[gpu 0] int8_tensor_iterations=80",
 		"[gpu 0] fp16_tensor_iterations=200",
 		"[gpu 0] fp8_e4m3_iterations=50",
 		"[gpu 0] status=OK",
@@ -79,15 +355,24 @@ func TestParseBenchmarkBurnLog(t *testing.T) {
 	if got.ComputeCapability != "9.0" {
 		t.Fatalf("compute capability=%q want 9.0", got.ComputeCapability)
 	}
-	if len(got.Profiles) != 2 {
+	if len(got.Profiles) != 3 {
-		t.Fatalf("profiles=%d want 2", len(got.Profiles))
+		t.Fatalf("profiles=%d want 3", len(got.Profiles))
 	}
 	if got.Profiles[0].TeraOpsPerSec <= 0 {
 		t.Fatalf("profile[0] teraops=%f want >0", got.Profiles[0].TeraOpsPerSec)
 	}
 	if got.Profiles[0].Category != "fp16_bf16" {
 		t.Fatalf("profile[0] category=%q want fp16_bf16", got.Profiles[0].Category)
 	}
 	if got.Profiles[1].Category != "fp8" {
 		t.Fatalf("profile[1] category=%q want fp8", got.Profiles[1].Category)
 	}
 	if got.Profiles[2].Category != "int8" {
 		t.Fatalf("profile[2] category=%q want int8", got.Profiles[2].Category)
 	}
 	if got.Profiles[2].Weight != 0.25 {
 		t.Fatalf("profile[2] weight=%f want 0.25", got.Profiles[2].Weight)
 	}
 }
 func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
@@ -131,14 +416,25 @@ func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 				DegradationReasons: []string{"power_capped"},
 			},
 		},
 		Cooling: &BenchmarkCoolingSummary{
 			Available:             true,
 			AvgFanRPM:             9200,
 			FanDutyCycleAvailable: true,
 			AvgFanDutyCyclePct:    47.5,
 			P95FanDutyCyclePct:    62.0,
 		},
 	}
 	report := renderBenchmarkReport(result)
 	for _, needle := range []string{
 		"Executive Summary",
 		"GPU 0 spent measurable time under SW power cap.",
-		"Composite score: 1176.00",
+		"1176.00",
-		"fp16_tensor: 700.00 TOPS",
+		"fp16_tensor",
 		"700.00",
 		"Cooling",
 		"Average fan duty cycle",
 		"47.5%",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
@@ -146,34 +442,141 @@ func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 	}
 }
-func TestRenderBenchmarkReportIncludesTerminalChartsWithoutANSI(t *testing.T) {
+func TestRenderBenchmarkReportListsUnifiedArtifacts(t *testing.T) {
 	t.Parallel()
-	report := renderBenchmarkReportWithCharts(NvidiaBenchmarkResult{
+	report := renderBenchmarkReport(NvidiaBenchmarkResult{
 		BenchmarkProfile:   NvidiaBenchmarkProfileStandard,
 		OverallStatus:      "OK",
 		SelectedGPUIndices: []int{0},
 		Normalization: BenchmarkNormalization{
 			Status: "full",
 		},
 	}, []benchmarkReportChart{
 		{
 			Title:   "GPU 0 Steady State",
 			Content: "\x1b[31mGPU 0 chart\x1b[0m\n 42┤───",
 		},
 	})
 	for _, needle := range []string{
-		"Terminal Charts",
+		"gpu-metrics.csv",
-		"GPU 0 Steady State",
+		"gpu-metrics.html",
-		"GPU 0 chart",
+		"gpu-burn.log",
 		"42┤───",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
 		}
 	}
-	if strings.Contains(report, "\x1b[31m") {
+}
-		t.Fatalf("report should not contain ANSI escapes\n%s", report)
+
 func TestScoreBenchmarkGPUIgnoresDisabledPrecisions(t *testing.T) {
 	t.Parallel()
 	score := scoreBenchmarkGPUResult(BenchmarkGPUResult{
 		PrecisionSteady: []BenchmarkPrecisionSteadyPhase{
 			{Precision: "fp16", WeightedTeraOpsPerSec: 100},
 			{Precision: "fp64", WeightedTeraOpsPerSec: 999},
 			{Precision: "fp4", WeightedTeraOpsPerSec: 999},
 		},
 		PrecisionResults: []BenchmarkPrecisionResult{
 			{Category: "fp32_tf32", Supported: true, WeightedTeraOpsPerSec: 50},
 			{Category: "fp64", Supported: true, WeightedTeraOpsPerSec: 999},
 			{Category: "fp4", Supported: true, WeightedTeraOpsPerSec: 999},
 		},
 	})
 	if score.SyntheticScore != 100 {
 		t.Fatalf("SyntheticScore=%f want 100", score.SyntheticScore)
 	}
 	if score.MixedScore != 50 {
 		t.Fatalf("MixedScore=%f want 50", score.MixedScore)
 	}
 }
 func TestEnrichGPUInfoWithNvidiaSMIQ(t *testing.T) {
 	t.Parallel()
 	nvsmiQ := []byte(`
 GPU 00000000:4E:00.0
    Product Name                          : NVIDIA RTX PRO 6000 Blackwell Server Edition
    Min Power Limit                       : 200.00 W
    Max Power Limit                       : 600.00 W
    Default Power Limit                   : 575.00 W
    Current Power Limit                   : 560.00 W
    Clocks
        Graphics                          : 2422 MHz
        Memory                            : 12481 MHz
    Max Clocks
        Graphics                          : 2430 MHz
        SM                                : 2430 MHz
        Memory                            : 12481 MHz
        Video                             : 2107 MHz
 GPU 00000000:4F:00.0
    Product Name                          : NVIDIA RTX PRO 6000 Blackwell Server Edition
    Max Clocks
        Graphics                          : 2430 MHz
        Memory                            : 12481 MHz
 `)
 	infoByIndex := map[int]benchmarkGPUInfo{
 		0: {Index: 0, BusID: "00000000:4E:00.0"},
 		1: {Index: 1, BusID: "00000000:4F:00.0"},
 	}
 	enrichGPUInfoWithNvidiaSMIQ(infoByIndex, nvsmiQ)
 	if infoByIndex[0].MaxGraphicsClockMHz != 2430 {
 		t.Errorf("GPU 0 MaxGraphicsClockMHz = %v, want 2430", infoByIndex[0].MaxGraphicsClockMHz)
 	}
 	if infoByIndex[0].MaxMemoryClockMHz != 12481 {
 		t.Errorf("GPU 0 MaxMemoryClockMHz = %v, want 12481", infoByIndex[0].MaxMemoryClockMHz)
 	}
 	if infoByIndex[1].MaxGraphicsClockMHz != 2430 {
 		t.Errorf("GPU 1 MaxGraphicsClockMHz = %v, want 2430", infoByIndex[1].MaxGraphicsClockMHz)
 	}
 	if infoByIndex[1].MaxMemoryClockMHz != 12481 {
 		t.Errorf("GPU 1 MaxMemoryClockMHz = %v, want 12481", infoByIndex[1].MaxMemoryClockMHz)
 	}
 	if infoByIndex[0].MinPowerLimitW != 200 {
 		t.Errorf("GPU 0 MinPowerLimitW = %v, want 200", infoByIndex[0].MinPowerLimitW)
 	}
 	if infoByIndex[0].MaxPowerLimitW != 600 {
 		t.Errorf("GPU 0 MaxPowerLimitW = %v, want 600", infoByIndex[0].MaxPowerLimitW)
 	}
 	if infoByIndex[0].DefaultPowerLimitW != 575 {
 		t.Errorf("GPU 0 DefaultPowerLimitW = %v, want 575", infoByIndex[0].DefaultPowerLimitW)
 	}
 	if infoByIndex[0].PowerLimitW != 560 {
 		t.Errorf("GPU 0 PowerLimitW = %v, want 560", infoByIndex[0].PowerLimitW)
 	}
 }
 func TestEnrichGPUInfoWithNvidiaSMIQSkipsPopulated(t *testing.T) {
 	t.Parallel()
 	nvsmiQ := []byte(`
 GPU 00000000:4E:00.0
    Min Power Limit                       : 100.00 W
    Max Power Limit                       : 900.00 W
    Max Clocks
        Graphics                          : 9999 MHz
        Memory                            : 9999 MHz
 `)
 	// Already populated — must not be overwritten.
 	infoByIndex := map[int]benchmarkGPUInfo{
 		0: {
 			Index:               0,
 			BusID:               "00000000:4E:00.0",
 			MaxGraphicsClockMHz: 2430,
 			MaxMemoryClockMHz:   12481,
 			MinPowerLimitW:      200,
 			MaxPowerLimitW:      600,
 		},
 	}
 	enrichGPUInfoWithNvidiaSMIQ(infoByIndex, nvsmiQ)
 	if infoByIndex[0].MaxGraphicsClockMHz != 2430 {
 		t.Errorf("expected existing value to be preserved, got %v", infoByIndex[0].MaxGraphicsClockMHz)
 	}
 	if infoByIndex[0].MinPowerLimitW != 200 {
 		t.Errorf("expected existing min power limit to be preserved, got %v", infoByIndex[0].MinPowerLimitW)
 	}
 }
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -2,32 +2,192 @@ package platform
 import "time"
 // BenchmarkHostConfig holds static CPU and memory configuration captured at
 // benchmark start. Useful for correlating results across runs on different hardware.
 type BenchmarkHostConfig struct {
 	CPUModel    string  `json:"cpu_model,omitempty"`
 	CPUSockets  int     `json:"cpu_sockets,omitempty"`
 	CPUCores    int     `json:"cpu_cores,omitempty"`
 	CPUThreads  int     `json:"cpu_threads,omitempty"`
 	MemTotalGiB float64 `json:"mem_total_gib,omitempty"`
 }
 // BenchmarkCPULoad summarises host CPU utilisation sampled during the GPU
 // steady-state phase. High or unstable CPU load during a GPU benchmark may
 // indicate a competing workload or a CPU-bound driver bottleneck.
 type BenchmarkCPULoad struct {
 	AvgPct  float64 `json:"avg_pct"`
 	MaxPct  float64 `json:"max_pct"`
 	P95Pct  float64 `json:"p95_pct"`
 	Samples int     `json:"samples"`
 	// Status is "ok", "high", or "unstable".
 	Status string `json:"status"`
 	Note   string `json:"note,omitempty"`
 }
 // BenchmarkCoolingSummary captures fan telemetry averaged across the full
 // benchmark run.
 type BenchmarkCoolingSummary struct {
 	Available             bool     `json:"available"`
 	AvgFanRPM             float64  `json:"avg_fan_rpm,omitempty"`
 	FanDutyCycleAvailable bool     `json:"fan_duty_cycle_available,omitempty"`
 	FanDutyCycleEstimated bool     `json:"fan_duty_cycle_estimated,omitempty"`
 	AvgFanDutyCyclePct    float64  `json:"avg_fan_duty_cycle_pct,omitempty"`
 	P95FanDutyCyclePct    float64  `json:"p95_fan_duty_cycle_pct,omitempty"`
 	Notes                 []string `json:"notes,omitempty"`
 }
 const (
 	NvidiaBenchmarkProfileStandard  = "standard"
 	NvidiaBenchmarkProfileStability = "stability"
 	NvidiaBenchmarkProfileOvernight = "overnight"
 )
 const (
 	BenchmarkPowerEngineDCGMProfTester = "dcgmproftester"
 	BenchmarkPowerEngineTargetedPower  = "targeted_power"
 )
 // Estimated wall-clock durations for benchmark runs, derived from real _v8 logs.
 // Rule: when changing profile phase durations in resolveBenchmarkProfile(),
 // re-measure from actual task logs and update the constants here.
 //
 // Sources:
 //   - BenchmarkEstimatedPerfStandardSec:   MLT v8.22 ramp 1-4: 927 s; xFusion v8.22 parallel 8GPU: 1080 s
 //   - BenchmarkEstimatedPerfStabilitySec:  xFusion v8.22 ramp 1-8: 5532 s
 //   - BenchmarkEstimatedPerfOvernightSec:  derived from profile phases (SteadySec=27000)
 //   - BenchmarkEstimatedPowerStandardSec:  MLT v8.22 ramp 1-4: 2663 s; MSI v8.22 ramp 1-8: 2375 s
 //   - BenchmarkEstimatedPowerStabilitySec: target ~90 min with calibDurationSec=300 (8 GPU × ~2-3 attempts)
 const (
 	// Performance Benchmark (bee-gpu-burn).
 	// Duration is per full ramp-up run (ramp 1→N) or per single parallel run.
 	// Sequential per-GPU mode scales approximately linearly.
 	BenchmarkEstimatedPerfStandardSec  = 960  // ~16 min; ramp-up 1-4: 927 s, parallel 8GPU: 1080 s
 	BenchmarkEstimatedPerfStabilitySec = 5532 // ~92 min; ramp-up 1-8 measured
 	BenchmarkEstimatedPerfOvernightSec = 8 * 3600
 	// Power / Thermal Fit (dcgmproftester load + nvidia-smi power-limit binary search).
 	// Duration is for the full ramp-up run; individual steps vary with convergence speed.
 	BenchmarkEstimatedPowerStandardSec  = 2600 // ~43 min; ramp 1-4: 2663 s, ramp 1-8: 2375 s
 	BenchmarkEstimatedPowerStabilitySec = 5400 // ~90 min; calibDurationSec=300 × 8 GPU × ~2-3 attempts
 	BenchmarkEstimatedPowerOvernightSec = 3 * 3600
 )
 type NvidiaBenchmarkOptions struct {
 	Profile           string
 	SizeMB            int
 	GPUIndices        []int
 	ExcludeGPUIndices []int
 	RunNCCL           bool
 	ServerPowerSource string
 	ParallelGPUs      bool   // run all selected GPUs simultaneously instead of sequentially
 	RampStep          int    // 1-based step index within a ramp-up run (0 = not a ramp-up)
 	RampTotal         int    // total number of ramp-up steps in this run
 	RampRunID         string // shared identifier across all steps of the same ramp-up run
 }
 const (
 	BenchmarkPowerSourceDCMI        = "dcmi"
 	BenchmarkPowerSourceSDRPSUInput = "sdr_psu_input"
 )
 type BenchmarkPowerAutotuneConfig struct {
 	Version           int       `json:"version"`
 	UpdatedAt         time.Time `json:"updated_at"`
 	SelectedSource    string    `json:"selected_source"`
 	BenchmarkKind     string    `json:"benchmark_kind,omitempty"`
 	Profile           string    `json:"profile,omitempty"`
 	IdleDurationSec   int       `json:"idle_duration_sec,omitempty"`
 	LoadDurationSec   int       `json:"load_duration_sec,omitempty"`
 	SampleIntervalSec int       `json:"sample_interval_sec,omitempty"`
 	Confidence        float64   `json:"confidence,omitempty"`
 	Reason            string    `json:"reason,omitempty"`
 }
 type SystemPowerSourceDecision struct {
 	Configured      bool      `json:"configured"`
 	SelectedSource  string    `json:"selected_source,omitempty"`
 	EffectiveSource string    `json:"effective_source,omitempty"`
 	Mode            string    `json:"mode,omitempty"` // autotuned, fallback, degraded
 	Reason          string    `json:"reason,omitempty"`
 	ConfiguredAt    time.Time `json:"configured_at,omitempty"`
 }
 type BenchmarkPowerAutotuneResult struct {
 	GeneratedAt         time.Time                         `json:"generated_at"`
 	Hostname            string                            `json:"hostname,omitempty"`
 	ServerModel         string                            `json:"server_model,omitempty"`
 	BenchmarkKind       string                            `json:"benchmark_kind,omitempty"`
 	Profile             string                            `json:"profile,omitempty"`
 	Status              string                            `json:"status"`
 	IdleDurationSec     int                               `json:"idle_duration_sec"`
 	LoadDurationSec     int                               `json:"load_duration_sec"`
 	SampleIntervalSec   int                               `json:"sample_interval_sec"`
 	SelectedSource      string                            `json:"selected_source,omitempty"`
 	IdleValidationError string                            `json:"idle_validation_error,omitempty"`
 	IdleValidation      *BenchmarkPowerAutotuneValidation `json:"idle_validation,omitempty"`
 	GPUPowerIdleW       float64                           `json:"gpu_power_idle_w,omitempty"`
 	GPUPowerLoadW       float64                           `json:"gpu_power_load_w,omitempty"`
 	Candidates          []BenchmarkPowerAutotuneCandidate `json:"candidates,omitempty"`
 	Notes               []string                          `json:"notes,omitempty"`
 	Config              *BenchmarkPowerAutotuneConfig     `json:"config,omitempty"`
 }
 type BenchmarkPowerAutotuneValidation struct {
 	Valid          bool    `json:"valid"`
 	GPUAvgUsagePct float64 `json:"gpu_avg_usage_pct,omitempty"`
 	GPUP95UsagePct float64 `json:"gpu_p95_usage_pct,omitempty"`
 	CPUAvgUsagePct float64 `json:"cpu_avg_usage_pct,omitempty"`
 	CPUP95UsagePct float64 `json:"cpu_p95_usage_pct,omitempty"`
 	GPUSamples     int     `json:"gpu_samples,omitempty"`
 	CPUSamples     int     `json:"cpu_samples,omitempty"`
 	Reason         string  `json:"reason,omitempty"`
 }
 type BenchmarkPowerAutotuneCandidate struct {
 	Source         string  `json:"source"`
 	IdleAvgW       float64 `json:"idle_avg_w,omitempty"`
 	LoadAvgW       float64 `json:"load_avg_w,omitempty"`
 	DeltaW         float64 `json:"delta_w,omitempty"`
 	Samples        int     `json:"samples,omitempty"`
 	RelativeError  float64 `json:"relative_error,omitempty"`
 	Confidence     float64 `json:"confidence,omitempty"`
 	Selected       bool    `json:"selected,omitempty"`
 	Available      bool    `json:"available"`
 	SelectionNotes string  `json:"selection_notes,omitempty"`
 }
 type NvidiaBenchmarkResult struct {
-	BenchmarkVersion   string                       `json:"benchmark_version"`
+	BenchmarkVersion string    `json:"benchmark_version"`
-	GeneratedAt        time.Time                    `json:"generated_at"`
+	GeneratedAt      time.Time `json:"generated_at"`
-	Hostname           string                       `json:"hostname,omitempty"`
+	Hostname         string    `json:"hostname,omitempty"`
-	BenchmarkProfile   string                       `json:"benchmark_profile"`
+	ServerModel      string    `json:"server_model,omitempty"`
-	OverallStatus      string                       `json:"overall_status"`
+	BenchmarkProfile string    `json:"benchmark_profile"`
-	SelectedGPUIndices []int                        `json:"selected_gpu_indices"`
+	ParallelGPUs     bool      `json:"parallel_gpus,omitempty"`
-	Findings           []string                     `json:"findings,omitempty"`
+	RampStep         int       `json:"ramp_step,omitempty"`
-	Warnings           []string                     `json:"warnings,omitempty"`
+	RampTotal        int       `json:"ramp_total,omitempty"`
-	Normalization      BenchmarkNormalization       `json:"normalization"`
+	RampRunID        string    `json:"ramp_run_id,omitempty"`
-	GPUs               []BenchmarkGPUResult         `json:"gpus"`
+	ScalabilityScore float64   `json:"scalability_score,omitempty"`
-	Interconnect       *BenchmarkInterconnectResult `json:"interconnect,omitempty"`
+	// PlatformPowerScore is the mean compute scalability across ramp steps 2..N.
 	// 100% = each added GPU contributes exactly its single-card throughput.
 	// < 100% = throughput loss due to thermal throttle, power limits, or contention.
 	PlatformPowerScore   float64                      `json:"platform_power_score,omitempty"`
 	PerformanceRampSteps []NvidiaPerformanceRampStep  `json:"performance_ramp_steps,omitempty"`
 	OverallStatus        string                       `json:"overall_status"`
 	SelectedGPUIndices   []int                        `json:"selected_gpu_indices"`
 	Findings             []string                     `json:"findings,omitempty"`
 	Warnings             []string                     `json:"warnings,omitempty"`
 	Normalization        BenchmarkNormalization       `json:"normalization"`
 	HostConfig           *BenchmarkHostConfig         `json:"host_config,omitempty"`
 	CPULoad              *BenchmarkCPULoad            `json:"cpu_load,omitempty"`
 	Cooling              *BenchmarkCoolingSummary     `json:"cooling,omitempty"`
 	GPUs                 []BenchmarkGPUResult         `json:"gpus"`
 	Interconnect         *BenchmarkInterconnectResult `json:"interconnect,omitempty"`
 	ServerPower          *BenchmarkServerPower        `json:"server_power,omitempty"`
 	// PSUIssues holds power supply fault events detected by comparing IPMI PSU
 	// sensor states before and after the benchmark run. Empty when IPMI is
 	// unavailable or no PSU faults occurred during the test.
 	PSUIssues []string `json:"psu_issues,omitempty"`
 }
 type BenchmarkNormalization struct {
@@ -47,27 +207,51 @@ type BenchmarkNormalizationGPU struct {
 }
 type BenchmarkGPUResult struct {
-	Index                  int                        `json:"index"`
+	Index               int     `json:"index"`
-	UUID                   string                     `json:"uuid,omitempty"`
+	UUID                string  `json:"uuid,omitempty"`
-	Name                   string                     `json:"name,omitempty"`
+	Name                string  `json:"name,omitempty"`
-	BusID                  string                     `json:"bus_id,omitempty"`
+	BusID               string  `json:"bus_id,omitempty"`
-	VBIOS                  string                     `json:"vbios,omitempty"`
+	VBIOS               string  `json:"vbios,omitempty"`
-	ComputeCapability      string                     `json:"compute_capability,omitempty"`
+	ComputeCapability   string  `json:"compute_capability,omitempty"`
-	Backend                string                     `json:"backend,omitempty"`
+	Backend             string  `json:"backend,omitempty"`
-	Status                 string                     `json:"status"`
+	Status              string  `json:"status"`
-	PowerLimitW            float64                    `json:"power_limit_w,omitempty"`
+	PowerLimitW         float64 `json:"power_limit_w,omitempty"`
-	MaxGraphicsClockMHz    float64                    `json:"max_graphics_clock_mhz,omitempty"`
+	PowerLimitDerated   bool    `json:"power_limit_derated,omitempty"`
-	MaxMemoryClockMHz      float64                    `json:"max_memory_clock_mhz,omitempty"`
+	MultiprocessorCount int     `json:"multiprocessor_count,omitempty"`
-	LockedGraphicsClockMHz float64                    `json:"locked_graphics_clock_mhz,omitempty"`
+	DefaultPowerLimitW  float64 `json:"default_power_limit_w,omitempty"`
-	LockedMemoryClockMHz   float64                    `json:"locked_memory_clock_mhz,omitempty"`
+	// ShutdownTempC is the hardware thermal shutdown threshold for this GPU,
-	Baseline               BenchmarkTelemetrySummary  `json:"baseline"`
+	// sourced from nvidia-smi -q ("GPU Shutdown Temp"). Fallback: 90°C.
-	Steady                 BenchmarkTelemetrySummary  `json:"steady"`
+	ShutdownTempC float64 `json:"shutdown_temp_c,omitempty"`
-	Cooldown               BenchmarkTelemetrySummary  `json:"cooldown"`
+	// SlowdownTempC is the software throttle onset threshold ("GPU Slowdown Temp").
-	Throttle               BenchmarkThrottleCounters  `json:"throttle_counters"`
+	// Fallback: 80°C.
-	PrecisionResults       []BenchmarkPrecisionResult `json:"precision_results,omitempty"`
+	SlowdownTempC float64 `json:"slowdown_temp_c,omitempty"`
-	Scores                 BenchmarkScorecard         `json:"scores"`
+	// CalibratedPeakPowerW is the p95 power measured during a short
-	DegradationReasons     []string                   `json:"degradation_reasons,omitempty"`
+	// dcgmi targeted_power calibration run before the main benchmark.
-	Notes                  []string                   `json:"notes,omitempty"`
+	// Used as the reference denominator for PowerSustainScore instead of
 	// the hardware default limit, which bee-gpu-burn cannot reach.
 	CalibratedPeakPowerW   float64                         `json:"calibrated_peak_power_w,omitempty"`
 	CalibratedPeakTempC    float64                         `json:"calibrated_peak_temp_c,omitempty"`
 	PowerCalibrationTries  int                             `json:"power_calibration_tries,omitempty"`
 	MaxGraphicsClockMHz    float64                         `json:"max_graphics_clock_mhz,omitempty"`
 	BaseGraphicsClockMHz   float64                         `json:"base_graphics_clock_mhz,omitempty"`
 	MaxMemoryClockMHz      float64                         `json:"max_memory_clock_mhz,omitempty"`
 	LockedGraphicsClockMHz float64                         `json:"locked_graphics_clock_mhz,omitempty"`
 	LockedMemoryClockMHz   float64                         `json:"locked_memory_clock_mhz,omitempty"`
 	Baseline               BenchmarkTelemetrySummary       `json:"baseline"`
 	Steady                 BenchmarkTelemetrySummary       `json:"steady"`
 	PrecisionSteady        []BenchmarkPrecisionSteadyPhase `json:"precision_steady,omitempty"`
 	PrecisionFailures      []string                        `json:"precision_failures,omitempty"`
 	Cooldown               BenchmarkTelemetrySummary       `json:"cooldown"`
 	Throttle               BenchmarkThrottleCounters       `json:"throttle_counters"`
 	// ECC error delta accumulated over the full benchmark (all phases combined).
 	ECC                BenchmarkECCCounters       `json:"ecc,omitempty"`
 	PrecisionResults   []BenchmarkPrecisionResult `json:"precision_results,omitempty"`
 	Scores             BenchmarkScorecard         `json:"scores"`
 	DegradationReasons []string                   `json:"degradation_reasons,omitempty"`
 	Notes              []string                   `json:"notes,omitempty"`
 	// CoolingWarning is non-empty when a thermal throttle event occurred with
 	// a clock drop ≥20% while server fans were not at 100% duty cycle.
 	CoolingWarning string `json:"cooling_warning,omitempty"`
 }
 type BenchmarkTelemetrySummary struct {
@@ -97,6 +281,18 @@ type BenchmarkThrottleCounters struct {
 	HWPowerBrakeSlowdownUS uint64 `json:"hw_power_brake_slowdown_us"`
 }
 // BenchmarkECCCounters holds ECC error counts sampled at a point in time.
 // Corrected = single-bit errors fixed by ECC (DRAM degradation).
 // Uncorrected = double-bit errors that could not be corrected (serious fault).
 // Both are volatile (since last driver reset), not persistent.
 type BenchmarkECCCounters struct {
 	Corrected   uint64 `json:"corrected"`
 	Uncorrected uint64 `json:"uncorrected"`
 }
 func (e BenchmarkECCCounters) Total() uint64 { return e.Corrected + e.Uncorrected }
 func (e BenchmarkECCCounters) IsZero() bool  { return e.Corrected == 0 && e.Uncorrected == 0 }
 type BenchmarkPrecisionResult struct {
 	Name          string  `json:"name"`
 	Category      string  `json:"category"`
@@ -107,16 +303,124 @@ type BenchmarkPrecisionResult struct {
 	K             uint64  `json:"k,omitempty"`
 	Iterations    uint64  `json:"iterations,omitempty"`
 	TeraOpsPerSec float64 `json:"teraops_per_sec,omitempty"`
-	Notes         string  `json:"notes,omitempty"`
+	// Weight is the fp32-equivalence factor for this precision category.
 	// fp32 = 1.0 (baseline), fp64 = 2.0, fp16 = 0.5, int8/fp8 = 0.25, fp4 = 0.125.
 	// WeightedTOPS = TeraOpsPerSec * Weight gives fp32-equivalent throughput.
 	Weight                float64 `json:"weight,omitempty"`
 	WeightedTeraOpsPerSec float64 `json:"weighted_teraops_per_sec,omitempty"`
 	Notes                 string  `json:"notes,omitempty"`
 }
 type BenchmarkScorecard struct {
-	ComputeScore        float64 `json:"compute_score"`
+	ComputeScore float64 `json:"compute_score"`
 	// SyntheticScore is the sum of fp32-equivalent TOPS from per-precision
 	// steady phases (each precision ran alone, full GPU dedicated).
 	SyntheticScore float64 `json:"synthetic_score,omitempty"`
 	// MixedScore is the sum of fp32-equivalent TOPS from the combined phase
 	// (all precisions competing simultaneously — closer to real workloads).
 	MixedScore float64 `json:"mixed_score,omitempty"`
 	// MixedEfficiency = MixedScore / SyntheticScore. Measures how well the GPU
 	// sustains throughput under concurrent mixed-precision load.
 	MixedEfficiency     float64 `json:"mixed_efficiency,omitempty"`
 	PowerSustainScore   float64 `json:"power_sustain_score"`
 	ThermalSustainScore float64 `json:"thermal_sustain_score"`
-	StabilityScore      float64 `json:"stability_score"`
+	// StabilityScore: fraction of steady-state time the GPU spent throttling
-	InterconnectScore   float64 `json:"interconnect_score"`
+	// (thermal + power cap combined). 0% throttle = 100; 100% throttle = 0.
-	CompositeScore      float64 `json:"composite_score"`
+	StabilityScore float64 `json:"stability_score"`
 	// Throttle breakdown — percentage of steady-state time in each throttle type.
 	// Used for diagnosis: tells WHY the GPU throttled, not just whether it did.
 	ThermalThrottlePct   float64 `json:"thermal_throttle_pct"`   // HW+SW thermal slowdown
 	PowerCapThrottlePct  float64 `json:"power_cap_throttle_pct"` // SW power cap
 	SyncBoostThrottlePct float64 `json:"sync_boost_throttle_pct,omitempty"`
 	// Temperature headroom: distance to the 100°C destruction threshold.
 	// TempHeadroomC = 100 - P95TempC. < 20°C = warning; < 10°C = critical.
 	// Independent of throttle — a GPU at 86°C without throttle is still in the red zone.
 	TempHeadroomC float64 `json:"temp_headroom_c"`
 	InterconnectScore float64 `json:"interconnect_score"`
 	// ServerQualityScore (0–100) reflects server infrastructure quality independent
 	// of GPU model. Combines throttle time, power variance, and temp variance.
 	// Use this to compare servers with the same GPU, or to flag a bad server
 	// that throttles an otherwise fast GPU.
 	ServerQualityScore float64 `json:"server_quality_score"`
 	// CompositeScore is the raw compute score (TOPS, fp32-equivalent).
 	// A throttling GPU will score lower here automatically — no quality multiplier.
 	CompositeScore float64 `json:"composite_score"`
 	// TOPSPerSMPerGHz is compute efficiency independent of clock speed and SM count.
 	TOPSPerSMPerGHz float64 `json:"tops_per_sm_per_ghz,omitempty"`
 }
 // BenchmarkPSUSlotPower holds SDR power readings for one PSU slot sampled
 // during the benchmark. Slot keys match audit HardwarePowerSupply.Slot (0-based)
 // so benchmark and audit data can be correlated by slot.
 type BenchmarkPSUSlotPower struct {
 	InputW  *float64 `json:"input_w,omitempty"`  // AC wall input (PSUx_POWER_IN)
 	OutputW *float64 `json:"output_w,omitempty"` // DC output (PSUx_POWER_OUT)
 	Status  string   `json:"status,omitempty"`
 }
 // BenchmarkServerPower captures server-side power from multiple independent
 // sources: IPMI DCMI (high-level), IPMI SDR per-PSU sensors (granular), and
 // GPU-reported power (nvidia-smi). Cross-comparing sources detects when DCMI
 // covers only a subset of installed PSUs (partial coverage).
 //
 // Source legend:
 //   - DCMI      — `ipmitool dcmi power reading`; fast but may miss PSUs
 //   - SDR       — `ipmitool sdr` PSUx_POWER_IN/OUT; per-PSU, reliable
 //   - nvidia-smi — GPU self-reported via internal shunt; accurate for GPU load
 type BenchmarkServerPower struct {
 	Available         bool    `json:"available"`
 	Source            string  `json:"source,omitempty"`
 	Mode              string  `json:"mode,omitempty"`
 	Reason            string  `json:"reason,omitempty"`
 	SampleIntervalSec int     `json:"sample_interval_sec,omitempty"`
 	IdleW             float64 `json:"idle_w,omitempty"`   // DCMI at idle
 	LoadedW           float64 `json:"loaded_w,omitempty"` // DCMI at peak load
 	DeltaW            float64 `json:"delta_w,omitempty"`  // DCMI loaded − idle
 	GPUReportedSumW   float64 `json:"gpu_reported_sum_w,omitempty"`
 	ReportingRatio    float64 `json:"reporting_ratio,omitempty"`
 	// PSU AC input sum — sampled at idle and at peak load using collector's
 	// slot patterns (PSU1_POWER_IN, PSU1_PIN, PS1 POut, Power1…).
 	PSUInputIdleW   float64 `json:"psu_input_idle_w,omitempty"`
 	PSUInputLoadedW float64 `json:"psu_input_loaded_w,omitempty"`
 	// PSU DC output sum — power delivered to server internals after conversion.
 	PSUOutputIdleW   float64 `json:"psu_output_idle_w,omitempty"`
 	PSUOutputLoadedW float64 `json:"psu_output_loaded_w,omitempty"`
 	// Per-slot PSU readings at idle and at peak load.
 	// Keys are 0-based slot strings matching audit HardwarePowerSupply.Slot.
 	PSUSlotReadingsIdle   map[string]BenchmarkPSUSlotPower `json:"psu_slot_readings_idle,omitempty"`
 	PSUSlotReadingsLoaded map[string]BenchmarkPSUSlotPower `json:"psu_slot_readings_loaded,omitempty"`
 	// GPUSlotTotalW is the sum of GPU_POWER_SLOTx SDR sensors at peak load.
 	// PCIe slot delivery only (excludes 16-pin connector power).
 	GPUSlotTotalW float64 `json:"gpu_slot_total_w,omitempty"`
 	// DCMICoverageRatio = DCMI_idle / SDR_PSU_IN_idle.
 	// Near 1.0 → DCMI tracks all PSUs. Near 0.5 → DCMI tracks half the PSUs.
 	DCMICoverageRatio float64 `json:"dcmi_coverage_ratio,omitempty"`
 	Notes []string `json:"notes,omitempty"`
 }
 // BenchmarkPrecisionSteadyPhase holds per-precision-category telemetry collected
 // during a dedicated single-precision steady window.  Because only one kernel
 // type runs at a time the PowerCVPct here is a genuine stability signal.
 type BenchmarkPrecisionSteadyPhase struct {
 	Precision             string                    `json:"precision"` // e.g. "fp8", "fp16", "fp32"
 	Status                string                    `json:"status,omitempty"`
 	Steady                BenchmarkTelemetrySummary `json:"steady"`
 	TeraOpsPerSec         float64                   `json:"teraops_per_sec,omitempty"`
 	WeightedTeraOpsPerSec float64                   `json:"weighted_teraops_per_sec,omitempty"`
 	// ECC errors accumulated during this precision phase only.
 	// Non-zero corrected = stress-induced DRAM errors for this kernel type.
 	// Any uncorrected = serious fault triggered by this precision workload.
 	ECC   BenchmarkECCCounters `json:"ecc,omitempty"`
 	Notes string               `json:"notes,omitempty"`
 }
 type BenchmarkInterconnectResult struct {
@@ -130,3 +434,103 @@ type BenchmarkInterconnectResult struct {
 	MaxBusBWGBps       float64  `json:"max_busbw_gbps,omitempty"`
 	Notes              []string `json:"notes,omitempty"`
 }
 type NvidiaPowerBenchResult struct {
 	BenchmarkVersion     string                 `json:"benchmark_version"`
 	GeneratedAt          time.Time              `json:"generated_at"`
 	Hostname             string                 `json:"hostname,omitempty"`
 	ServerModel          string                 `json:"server_model,omitempty"`
 	BenchmarkProfile     string                 `json:"benchmark_profile"`
 	SelectedGPUIndices   []int                  `json:"selected_gpu_indices"`
 	RecommendedSlotOrder []int                  `json:"recommended_slot_order,omitempty"`
 	RampSteps            []NvidiaPowerBenchStep `json:"ramp_steps,omitempty"`
 	OverallStatus        string                 `json:"overall_status"`
 	// PlatformMaxTDPW is the sum of per-GPU stable power limits found during the
 	// cumulative thermal ramp. Represents the actual sustained power budget of
 	// this server under full GPU load. Use for rack power planning.
 	PlatformMaxTDPW float64 `json:"platform_max_tdp_w"`
 	// ServerPower captures IPMI server power delta (idle→loaded) measured in
 	// parallel with the thermal ramp. Use to compare GPU-reported TDP against
 	// actual wall-power draw as seen by the server's power supply.
 	ServerPower *BenchmarkServerPower `json:"server_power,omitempty"`
 	Findings    []string              `json:"findings,omitempty"`
 	GPUs        []NvidiaPowerBenchGPU `json:"gpus"`
 	// PSUIssues holds power supply fault events detected by comparing IPMI PSU
 	// sensor states before and after the power benchmark run. Empty when IPMI is
 	// unavailable or no PSU faults occurred during the test.
 	PSUIssues []string `json:"psu_issues,omitempty"`
 }
 type NvidiaPowerBenchGPU struct {
 	Index              int     `json:"index"`
 	Name               string  `json:"name,omitempty"`
 	BusID              string  `json:"bus_id,omitempty"`
 	DefaultPowerLimitW float64 `json:"default_power_limit_w,omitempty"`
 	// AppliedPowerLimitW is the stable limit found during single-card calibration.
 	AppliedPowerLimitW float64 `json:"applied_power_limit_w,omitempty"`
 	// StablePowerLimitW is the final fixed limit for this GPU after the
 	// cumulative thermal ramp. This is the limit at which the GPU operated
 	// stably with all other GPUs running simultaneously at their own limits.
 	// May be lower than AppliedPowerLimitW if multi-GPU thermal load required
 	// additional derating.
 	StablePowerLimitW   float64  `json:"stable_power_limit_w,omitempty"`
 	MaxObservedPowerW   float64  `json:"max_observed_power_w,omitempty"`
 	MaxObservedTempC    float64  `json:"max_observed_temp_c,omitempty"`
 	CalibrationAttempts int      `json:"calibration_attempts,omitempty"`
 	Derated             bool     `json:"derated,omitempty"`
 	Status              string   `json:"status"`
 	Notes               []string `json:"notes,omitempty"`
 	// CoolingWarning mirrors BenchmarkGPUResult.CoolingWarning for the power workflow.
 	CoolingWarning string `json:"cooling_warning,omitempty"`
 	// ServerLoadedW is the IPMI server power reading captured during this
 	// GPU's single-card calibration run. ServerDeltaW = ServerLoadedW − idle.
 	ServerLoadedW float64 `json:"server_loaded_w,omitempty"`
 	ServerDeltaW  float64 `json:"server_delta_w,omitempty"`
 	// Telemetry holds the aggregated stats from the final converged calibration
 	// attempt for this GPU (temperature, power, fan, clock percentiles).
 	Telemetry *BenchmarkTelemetrySummary `json:"telemetry,omitempty"`
 	// Fan state sampled at the end of single-card calibration.
 	AvgFanRPM          float64 `json:"avg_fan_rpm,omitempty"`
 	AvgFanDutyCyclePct float64 `json:"avg_fan_duty_cycle_pct,omitempty"`
 }
 type NvidiaPowerBenchStep struct {
 	StepIndex  int   `json:"step_index"`
 	GPUIndices []int `json:"gpu_indices"`
 	// NewGPUIndex is the GPU whose stable limit was searched in this step.
 	NewGPUIndex int `json:"new_gpu_index"`
 	// NewGPUStableLimitW is the stable power limit found for the new GPU.
 	NewGPUStableLimitW  float64  `json:"new_gpu_stable_limit_w,omitempty"`
 	TotalObservedPowerW float64  `json:"total_observed_power_w,omitempty"`
 	AvgObservedPowerW   float64  `json:"avg_observed_power_w,omitempty"`
 	Derated             bool     `json:"derated,omitempty"`
 	Status              string   `json:"status"`
 	Notes               []string `json:"notes,omitempty"`
 	// ServerLoadedW is the IPMI server power reading captured during this
 	// ramp step's calibration run. ServerDeltaW = ServerLoadedW − idle.
 	ServerLoadedW float64 `json:"server_loaded_w,omitempty"`
 	ServerDeltaW  float64 `json:"server_delta_w,omitempty"`
 	// PSU slot readings sampled at end of this ramp step.
 	PSUSlotReadings map[string]BenchmarkPSUSlotPower `json:"psu_slot_readings,omitempty"`
 	// Fan state at end of this ramp step.
 	AvgFanRPM          float64 `json:"avg_fan_rpm,omitempty"`
 	AvgFanDutyCyclePct float64 `json:"avg_fan_duty_cycle_pct,omitempty"`
 	// Per-GPU telemetry from this step's calibration, keyed by GPU index.
 	PerGPUTelemetry map[int]*BenchmarkTelemetrySummary `json:"per_gpu_telemetry,omitempty"`
 }
 // NvidiaPerformanceRampStep holds per-step performance data for the
 // scalability ramp-up phase of the performance benchmark.
 type NvidiaPerformanceRampStep struct {
 	StepIndex  int   `json:"step_index"`
 	GPUIndices []int `json:"gpu_indices"`
 	// TotalSyntheticTOPS is the sum of per-GPU SyntheticScore (fp32-equivalent
 	// TOPS from dedicated single-precision phases) across all GPUs in this step.
 	TotalSyntheticTOPS float64 `json:"total_synthetic_tops"`
 	TotalMixedTOPS     float64 `json:"total_mixed_tops,omitempty"`
 	// ScalabilityPct = TotalSyntheticTOPS / (k × best_single_gpu_tops) × 100.
 	// 100% = perfect linear scaling. < 100% = thermal/power/interconnect loss.
 	ScalabilityPct float64  `json:"scalability_pct"`
 	Status         string   `json:"status"`
 	Notes          []string `json:"notes,omitempty"`
 }
--- a/audit/internal/platform/gpu_metrics.go
+++ b/audit/internal/platform/gpu_metrics.go
@@ -13,14 +13,21 @@ import (
 // GPUMetricRow is one telemetry sample from nvidia-smi during a stress test.
 type GPUMetricRow struct {
-	ElapsedSec  float64 `json:"elapsed_sec"`
+	Stage                 string  `json:"stage,omitempty"`
-	GPUIndex    int     `json:"index"`
+	StageStartSec         float64 `json:"stage_start_sec,omitempty"`
-	TempC       float64 `json:"temp_c"`
+	StageEndSec           float64 `json:"stage_end_sec,omitempty"`
-	UsagePct    float64 `json:"usage_pct"`
+	ElapsedSec            float64 `json:"elapsed_sec"`
-	MemUsagePct float64 `json:"mem_usage_pct"`
+	GPUIndex              int     `json:"index"`
-	PowerW      float64 `json:"power_w"`
+	TempC                 float64 `json:"temp_c"`
-	ClockMHz    float64 `json:"clock_mhz"`
+	UsagePct              float64 `json:"usage_pct"`
-	MemClockMHz float64 `json:"mem_clock_mhz"`
+	MemUsagePct           float64 `json:"mem_usage_pct"`
 	PowerW                float64 `json:"power_w"`
 	ClockMHz              float64 `json:"clock_mhz"`
 	MemClockMHz           float64 `json:"mem_clock_mhz"`
 	FanAvgRPM             float64 `json:"fan_avg_rpm,omitempty"`
 	FanDutyCyclePct       float64 `json:"fan_duty_cycle_pct,omitempty"`
 	FanDutyCycleAvailable bool    `json:"fan_duty_cycle_available,omitempty"`
 	FanDutyCycleEstimated bool    `json:"fan_duty_cycle_estimated,omitempty"`
 }
 // sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
@@ -141,14 +148,28 @@ func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
 // WriteGPUMetricsCSV writes collected rows as a CSV file.
 func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
 	var b bytes.Buffer
-	b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz\n")
+	b.WriteString("stage,elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz,fan_avg_rpm,fan_duty_cycle_pct,fan_duty_cycle_available,fan_duty_cycle_estimated\n")
 	for _, r := range rows {
-		fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f\n",
+		dutyAvail := 0
-			r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz)
+		if r.FanDutyCycleAvailable {
 			dutyAvail = 1
 		}
 		dutyEstimated := 0
 		if r.FanDutyCycleEstimated {
 			dutyEstimated = 1
 		}
 		fmt.Fprintf(&b, "%s,%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f,%.0f,%.1f,%d,%d\n",
 			strconv.Quote(strings.TrimSpace(r.Stage)), r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz, r.FanAvgRPM, r.FanDutyCyclePct, dutyAvail, dutyEstimated)
 	}
 	return os.WriteFile(path, b.Bytes(), 0644)
 }
 type gpuMetricStageSpan struct {
 	Name  string
 	Start float64
 	End   float64
 }
 // WriteGPUMetricsHTML writes a standalone HTML file with one SVG chart per GPU.
 func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
 	// Group by GPU index preserving order.
@@ -163,9 +184,25 @@ func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
 		gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
 	}
 	stageSpans := buildGPUMetricStageSpans(rows)
 	stageColorByName := make(map[string]string, len(stageSpans))
 	for i, span := range stageSpans {
 		stageColorByName[span.Name] = gpuMetricStagePalette[i%len(gpuMetricStagePalette)]
 	}
 	var legend strings.Builder
 	if len(stageSpans) > 0 {
 		legend.WriteString(`<div class="stage-legend">`)
 		for _, span := range stageSpans {
 			fmt.Fprintf(&legend, `<span class="stage-chip"><span class="stage-swatch" style="background:%s"></span>%s</span>`,
 				stageColorByName[span.Name], gpuHTMLEscape(span.Name))
 		}
 		legend.WriteString(`</div>`)
 	}
 	var svgs strings.Builder
 	for _, gpuIdx := range order {
-		svgs.WriteString(drawGPUChartSVG(gpuMap[gpuIdx], gpuIdx))
+		svgs.WriteString(drawGPUChartSVG(gpuMap[gpuIdx], gpuIdx, stageSpans, stageColorByName))
 		svgs.WriteString("\n")
 	}
@@ -175,21 +212,39 @@ func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
 <meta charset="utf-8">
 <title>GPU Stress Test Metrics</title>
 <style>
-body { font-family: sans-serif; background: #f0f0f0; margin: 0; padding: 20px; }
+:root{--bg:#fff;--surface:#fff;--surface-2:#f9fafb;--border:rgba(34,36,38,.15);--border-lite:rgba(34,36,38,.1);--ink:rgba(0,0,0,.87);--muted:rgba(0,0,0,.6)}
-h1 { text-align: center; color: #333; margin: 0 0 8px; }
+*{box-sizing:border-box}
-p  { text-align: center; color: #888; font-size: 13px; margin: 0 0 24px; }
+body{font:14px/1.5 Lato,"Helvetica Neue",Arial,Helvetica,sans-serif;background:var(--bg);color:var(--ink);margin:0}
 .page{padding:24px}
 .card{background:var(--surface);border:1px solid var(--border);border-radius:4px;box-shadow:0 1px 2px rgba(34,36,38,.15);overflow:hidden}
 .card-head{padding:11px 16px;background:var(--surface-2);border-bottom:1px solid var(--border);font-weight:700;font-size:13px}
 .card-body{padding:16px}
 h1{font-size:22px;margin:0 0 6px}
 p{color:var(--muted);font-size:13px;margin:0 0 16px}
 .stage-legend{display:flex;flex-wrap:wrap;gap:10px;margin:0 0 16px}
 .stage-chip{display:inline-flex;align-items:center;gap:8px;padding:4px 10px;border-radius:999px;background:var(--surface-2);border:1px solid var(--border-lite);font-size:12px}
 .stage-swatch{display:inline-block;width:12px;height:12px;border-radius:999px}
 .chart-block{margin-top:16px}
 </style>
 </head><body>
 <div class="page">
 <div class="card">
 <div class="card-head">GPU Stress Test Metrics</div>
 <div class="card-body">
 <h1>GPU Stress Test Metrics</h1>
 <p>Generated %s</p>
 %s
-</body></html>`, ts, svgs.String())
+<div class="chart-block">%s</div>
 </div>
 </div>
 </div>
 </body></html>`, ts, legend.String(), svgs.String())
 	return os.WriteFile(path, []byte(html), 0644)
 }
 // drawGPUChartSVG generates a self-contained SVG chart for one GPU.
-func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
+func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int, stageSpans []gpuMetricStageSpan, stageColorByName map[string]string) string {
 	// Layout
 	const W, H = 960, 520
 	const plotX1 = 120 // usage axis / chart left border
@@ -284,6 +339,23 @@ func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
 	}
 	b.WriteString("</g>\n")
 	// Stage backgrounds
 	for _, span := range stageSpans {
 		x1 := xv(span.Start)
 		x2 := xv(span.End)
 		if x2 < x1 {
 			x1, x2 = x2, x1
 		}
 		if x2-x1 < 1 {
 			x2 = x1 + 1
 		}
 		color := stageColorByName[span.Name]
 		fmt.Fprintf(&b, `<rect x="%.1f" y="%d" width="%.1f" height="%d" fill="%s" fill-opacity="0.18"/>`+"\n",
 			x1, plotY1, x2-x1, PH, color)
 		fmt.Fprintf(&b, `<text x="%.1f" y="%d" font-family="sans-serif" font-size="10" fill="#444" text-anchor="middle">%s</text>`+"\n",
 			x1+(x2-x1)/2, plotY1+12, gpuHTMLEscape(span.Name))
 	}
 	// Chart border
 	fmt.Fprintf(&b, `<rect x="%d" y="%d" width="%d" height="%d"`+
 		` fill="none" stroke="#333" stroke-width="1"/>`+"\n",
@@ -382,224 +454,6 @@ func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
 	return b.String()
 }
 const (
 	ansiRed    = "\033[31m"
 	ansiBlue   = "\033[34m"
 	ansiGreen  = "\033[32m"
 	ansiYellow = "\033[33m"
 	ansiReset  = "\033[0m"
 )
 const (
 	termChartWidth  = 70
 	termChartHeight = 12
 )
 // RenderGPUTerminalChart returns ANSI line charts (asciigraph-style) per GPU.
 // Used in SAT stress-test logs.
 func RenderGPUTerminalChart(rows []GPUMetricRow) string {
 	seen := make(map[int]bool)
 	var order []int
 	gpuMap := make(map[int][]GPUMetricRow)
 	for _, r := range rows {
 		if !seen[r.GPUIndex] {
 			seen[r.GPUIndex] = true
 			order = append(order, r.GPUIndex)
 		}
 		gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
 	}
 	type seriesDef struct {
 		caption string
 		color   string
 		fn      func(GPUMetricRow) float64
 	}
 	defs := []seriesDef{
 		{"Temperature (°C)", ansiRed, func(r GPUMetricRow) float64 { return r.TempC }},
 		{"GPU Usage (%)", ansiBlue, func(r GPUMetricRow) float64 { return r.UsagePct }},
 		{"Power (W)", ansiGreen, func(r GPUMetricRow) float64 { return r.PowerW }},
 		{"Clock (MHz)", ansiYellow, func(r GPUMetricRow) float64 { return r.ClockMHz }},
 	}
 	var b strings.Builder
 	for _, gpuIdx := range order {
 		gr := gpuMap[gpuIdx]
 		if len(gr) == 0 {
 			continue
 		}
 		tMax := gr[len(gr)-1].ElapsedSec - gr[0].ElapsedSec
 		fmt.Fprintf(&b, "GPU %d — Stress Test Metrics  (%.0f seconds)\n\n", gpuIdx, tMax)
 		for _, d := range defs {
 			b.WriteString(renderLineChart(extractGPUField(gr, d.fn), d.color, d.caption,
 				termChartHeight, termChartWidth))
 			b.WriteRune('\n')
 		}
 	}
 	return strings.TrimRight(b.String(), "\n")
 }
 // renderLineChart draws a single time-series line chart using box-drawing characters.
 // Produces output in the style of asciigraph: ╭─╮ │ ╰─╯ with a Y axis and caption.
 func renderLineChart(vals []float64, color, caption string, height, width int) string {
 	if len(vals) == 0 {
 		return caption + "\n"
 	}
 	mn, mx := gpuMinMax(vals)
 	if mn == mx {
 		mx = mn + 1
 	}
 	// Use the smaller of width or len(vals) to avoid stretching sparse data.
 	w := width
 	if len(vals) < w {
 		w = len(vals)
 	}
 	data := gpuDownsample(vals, w)
 	// row[i] = display row index: 0 = top = max value, height = bottom = min value.
 	row := make([]int, w)
 	for i, v := range data {
 		r := int(math.Round((mx - v) / (mx - mn) * float64(height)))
 		if r < 0 {
 			r = 0
 		}
 		if r > height {
 			r = height
 		}
 		row[i] = r
 	}
 	// Fill the character grid.
 	grid := make([][]rune, height+1)
 	for i := range grid {
 		grid[i] = make([]rune, w)
 		for j := range grid[i] {
 			grid[i][j] = ' '
 		}
 	}
 	for x := 0; x < w; x++ {
 		r := row[x]
 		if x == 0 {
 			grid[r][0] = '─'
 			continue
 		}
 		p := row[x-1]
 		switch {
 		case r == p:
 			grid[r][x] = '─'
 		case r < p: // value went up (row index decreased toward top)
 			grid[r][x] = '╭'
 			grid[p][x] = '╯'
 			for y := r + 1; y < p; y++ {
 				grid[y][x] = '│'
 			}
 		default: // r > p, value went down
 			grid[p][x] = '╮'
 			grid[r][x] = '╰'
 			for y := p + 1; y < r; y++ {
 				grid[y][x] = '│'
 			}
 		}
 	}
 	// Y axis tick labels.
 	ticks := gpuNiceTicks(mn, mx, height/2)
 	tickAtRow := make(map[int]string)
 	labelWidth := 4
 	for _, t := range ticks {
 		r := int(math.Round((mx - t) / (mx - mn) * float64(height)))
 		if r < 0 || r > height {
 			continue
 		}
 		s := gpuFormatTick(t)
 		tickAtRow[r] = s
 		if len(s) > labelWidth {
 			labelWidth = len(s)
 		}
 	}
 	var b strings.Builder
 	for r := 0; r <= height; r++ {
 		label := tickAtRow[r]
 		fmt.Fprintf(&b, "%*s", labelWidth, label)
 		switch {
 		case label != "":
 			b.WriteRune('┤')
 		case r == height:
 			b.WriteRune('┼')
 		default:
 			b.WriteRune('│')
 		}
 		b.WriteString(color)
 		b.WriteString(string(grid[r]))
 		b.WriteString(ansiReset)
 		b.WriteRune('\n')
 	}
 	// Bottom axis.
 	b.WriteString(strings.Repeat(" ", labelWidth))
 	b.WriteRune('└')
 	b.WriteString(strings.Repeat("─", w))
 	b.WriteRune('\n')
 	// Caption centered under the chart.
 	if caption != "" {
 		total := labelWidth + 1 + w
 		if pad := (total - len(caption)) / 2; pad > 0 {
 			b.WriteString(strings.Repeat(" ", pad))
 		}
 		b.WriteString(caption)
 		b.WriteRune('\n')
 	}
 	return b.String()
 }
 func extractGPUField(rows []GPUMetricRow, fn func(GPUMetricRow) float64) []float64 {
 	v := make([]float64, len(rows))
 	for i, r := range rows {
 		v[i] = fn(r)
 	}
 	return v
 }
 // gpuDownsample averages vals into w buckets (or nearest-neighbor upsamples if len(vals) < w).
 func gpuDownsample(vals []float64, w int) []float64 {
 	n := len(vals)
 	if n == 0 {
 		return make([]float64, w)
 	}
 	result := make([]float64, w)
 	if n >= w {
 		counts := make([]int, w)
 		for i, v := range vals {
 			bucket := i * w / n
 			if bucket >= w {
 				bucket = w - 1
 			}
 			result[bucket] += v
 			counts[bucket]++
 		}
 		for i := range result {
 			if counts[i] > 0 {
 				result[i] /= float64(counts[i])
 			}
 		}
 	} else {
 		// Nearest-neighbour upsample.
 		for i := range result {
 			src := i * (n - 1) / (w - 1)
 			if src >= n {
 				src = n - 1
 			}
 			result[i] = vals[src]
 		}
 	}
 	return result
 }
 func gpuMinMax(vals []float64) (float64, float64) {
 	if len(vals) == 0 {
 		return 0, 1
@@ -644,3 +498,57 @@ func gpuFormatTick(v float64) string {
 	}
 	return strconv.FormatFloat(v, 'f', 1, 64)
 }
 var gpuMetricStagePalette = []string{
 	"#d95c5c",
 	"#2185d0",
 	"#21ba45",
 	"#f2c037",
 	"#6435c9",
 	"#00b5ad",
 	"#a5673f",
 }
 func buildGPUMetricStageSpans(rows []GPUMetricRow) []gpuMetricStageSpan {
 	var spans []gpuMetricStageSpan
 	for _, row := range rows {
 		name := strings.TrimSpace(row.Stage)
 		if name == "" {
 			name = "run"
 		}
 		start := row.StageStartSec
 		end := row.StageEndSec
 		if end <= start {
 			start = row.ElapsedSec
 			end = row.ElapsedSec
 		}
 		if len(spans) == 0 || spans[len(spans)-1].Name != name {
 			spans = append(spans, gpuMetricStageSpan{Name: name, Start: start, End: end})
 			continue
 		}
 		if start < spans[len(spans)-1].Start {
 			spans[len(spans)-1].Start = start
 		}
 		if end > spans[len(spans)-1].End {
 			spans[len(spans)-1].End = end
 		}
 	}
 	for i := range spans {
 		if spans[i].End <= spans[i].Start {
 			spans[i].End = spans[i].Start + 1
 		}
 	}
 	return spans
 }
 var gpuHTMLReplacer = strings.NewReplacer(
 	"&", "&amp;",
 	"<", "&lt;",
 	">", "&gt;",
 	`"`, "&quot;",
 	"'", "&#39;",
 )
 func gpuHTMLEscape(s string) string {
 	return gpuHTMLReplacer.Replace(s)
 }
--- a/audit/internal/platform/gpu_metrics_test.go
+++ b/audit/internal/platform/gpu_metrics_test.go
@@ -0,0 +1,65 @@
 package platform
 import (
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 )
 func TestWriteGPUMetricsCSVIncludesStageColumn(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
 	path := filepath.Join(dir, "gpu-metrics.csv")
 	rows := []GPUMetricRow{
 		{Stage: "warmup", ElapsedSec: 1, GPUIndex: 0, TempC: 71, UsagePct: 99, MemUsagePct: 80, PowerW: 420, ClockMHz: 1800, MemClockMHz: 1200},
 	}
 	if err := WriteGPUMetricsCSV(path, rows); err != nil {
 		t.Fatalf("WriteGPUMetricsCSV: %v", err)
 	}
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatalf("ReadFile: %v", err)
 	}
 	text := string(raw)
 	for _, needle := range []string{
 		"stage,elapsed_sec,gpu_index",
 		`"warmup",1.0,0,71.0,99.0,80.0,420.0,1800,1200`,
 	} {
 		if !strings.Contains(text, needle) {
 			t.Fatalf("csv missing %q\n%s", needle, text)
 		}
 	}
 }
 func TestWriteGPUMetricsHTMLShowsStageLegendAndLabels(t *testing.T) {
 	t.Parallel()
 	dir := t.TempDir()
 	path := filepath.Join(dir, "gpu-metrics.html")
 	rows := []GPUMetricRow{
 		{Stage: "baseline", ElapsedSec: 1, GPUIndex: 0, TempC: 50, UsagePct: 10, MemUsagePct: 5, PowerW: 100, ClockMHz: 500, MemClockMHz: 400},
 		{Stage: "baseline", ElapsedSec: 2, GPUIndex: 0, TempC: 51, UsagePct: 11, MemUsagePct: 5, PowerW: 101, ClockMHz: 510, MemClockMHz: 400},
 		{Stage: "steady-fp16", ElapsedSec: 3, GPUIndex: 0, TempC: 70, UsagePct: 98, MemUsagePct: 75, PowerW: 390, ClockMHz: 1700, MemClockMHz: 1100},
 		{Stage: "steady-fp16", ElapsedSec: 4, GPUIndex: 0, TempC: 71, UsagePct: 99, MemUsagePct: 76, PowerW: 395, ClockMHz: 1710, MemClockMHz: 1110},
 	}
 	if err := WriteGPUMetricsHTML(path, rows); err != nil {
 		t.Fatalf("WriteGPUMetricsHTML: %v", err)
 	}
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		t.Fatalf("ReadFile: %v", err)
 	}
 	text := string(raw)
 	for _, needle := range []string{
 		"stage-legend",
 		"baseline",
 		"steady-fp16",
 		"GPU Stress Test Metrics",
 	} {
 		if !strings.Contains(text, needle) {
 			t.Fatalf("html missing %q\n%s", needle, text)
 		}
 	}
 }
--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -11,12 +11,11 @@ import (
 	"strings"
 )
 const installToRAMDir = "/dev/shm/bee-live"
 const copyProgressLogStep int64 = 100 * 1024 * 1024
 func (s *System) IsLiveMediaInRAM() bool {
-	fsType := mountFSType("/run/live/medium")
+	return s.LiveMediaRAMState().InRAM
 	if fsType == "" {
 		return toramActive()
 	}
 	return strings.EqualFold(fsType, "tmpfs")
 }
 func (s *System) LiveBootSource() LiveBootSource {
@@ -48,42 +47,164 @@ func (s *System) LiveBootSource() LiveBootSource {
 	return status
 }
-func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
+func (s *System) LiveMediaRAMState() LiveMediaRAMState {
 	return evaluateLiveMediaRAMState(
 		s.LiveBootSource(),
 		toramActive(),
 		globPaths("/run/live/medium/live/*.squashfs"),
 		globPaths(filepath.Join(installToRAMDir, "*.squashfs")),
 	)
 }
 func evaluateLiveMediaRAMState(status LiveBootSource, toram bool, sourceSquashfs, copiedSquashfs []string) LiveMediaRAMState {
 	state := LiveMediaRAMState{
 		LiveBootSource: status,
 		ToramActive:    toram,
 		CopyPresent:    len(copiedSquashfs) > 0,
 	}
 	if status.InRAM {
 		state.State = "in_ram"
 		state.Status = "ok"
 		state.CopyComplete = true
 		state.Message = "Running from RAM — installation media can be safely disconnected."
 		return state
 	}
 	expected := pathBaseSet(sourceSquashfs)
 	copied := pathBaseSet(copiedSquashfs)
 	state.CopyComplete = len(expected) > 0 && setContainsAll(copied, expected)
 	switch {
 	case state.CopyComplete:
 		state.State = "partial"
 		state.Status = "partial"
 		state.CanStartCopy = true
 		state.Message = "Live media files were copied to RAM, but the system is still mounted from the original boot source."
 	case state.CopyPresent:
 		state.State = "partial"
 		state.Status = "partial"
 		state.CanStartCopy = true
 		state.Message = "Partial RAM copy detected. A previous Copy to RAM run was interrupted or cancelled."
 	case toram:
 		state.State = "toram_failed"
 		state.Status = "failed"
 		state.CanStartCopy = true
 		state.Message = "toram boot parameter is set but the live medium is not mounted from RAM."
 	default:
 		state.State = "not_in_ram"
 		state.Status = "warning"
 		state.CanStartCopy = true
 		state.Message = "ISO not copied to RAM. Use Copy to RAM to free the boot drive and improve performance."
 	}
 	return state
 }
 func globPaths(pattern string) []string {
 	matches, _ := filepath.Glob(pattern)
 	return matches
 }
 func pathBaseSet(paths []string) map[string]struct{} {
 	out := make(map[string]struct{}, len(paths))
 	for _, path := range paths {
 		base := strings.TrimSpace(filepath.Base(path))
 		if base != "" {
 			out[base] = struct{}{}
 		}
 	}
 	return out
 }
 func setContainsAll(have, want map[string]struct{}) bool {
 	if len(want) == 0 {
 		return false
 	}
 	for name := range want {
 		if _, ok := have[name]; !ok {
 			return false
 		}
 	}
 	return true
 }
 func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) (retErr error) {
 	log := func(msg string) {
 		if logFunc != nil {
 			logFunc(msg)
 		}
 	}
-	if s.IsLiveMediaInRAM() {
+	state := s.LiveMediaRAMState()
 	if state.InRAM {
 		log("Already running from RAM — installation media can be safely disconnected.")
 		return nil
 	}
 	squashfsFiles, err := filepath.Glob("/run/live/medium/live/*.squashfs")
-	if err != nil || len(squashfsFiles) == 0 {
+	sourceAvailable := err == nil && len(squashfsFiles) > 0
 		return fmt.Errorf("no squashfs files found in /run/live/medium/live/")
 	}
-	free := freeMemBytes()
+	dstDir := installToRAMDir
-	var needed int64
+
-	for _, sf := range squashfsFiles {
+	// If the source medium is unavailable, check whether a previous run already
-		fi, err2 := os.Stat(sf)
+	// produced a complete copy in RAM. If so, skip the copy phase and proceed
-		if err2 != nil {
+	// directly to the loop-rebind / bind-mount steps.
-			return fmt.Errorf("stat %s: %v", sf, err2)
+	if !sourceAvailable {
 		copiedFiles, _ := filepath.Glob(filepath.Join(dstDir, "*.squashfs"))
 		if len(copiedFiles) > 0 {
 			log("Source medium not available, but a previous RAM copy was found — resuming from existing copy.")
 			// Proceed to rebind with the already-copied files.
 			for _, dst := range copiedFiles {
 				base := filepath.Base(dst)
 				// Re-associate the loop device that was originally backed by the
 				// source file (now gone); find it by the old source path pattern.
 				srcGuess := "/run/live/medium/live/" + base
 				loopDev, lerr := findLoopForFile(srcGuess)
 				if lerr != nil {
 					log(fmt.Sprintf("Loop device for %s not found (%v) — skipping re-association.", base, lerr))
 					continue
 				}
 				if rerr := reassociateLoopDevice(loopDev, dst); rerr != nil {
 					log(fmt.Sprintf("Warning: could not re-associate %s → %s: %v", loopDev, dst, rerr))
 				} else {
 					log(fmt.Sprintf("Loop device %s now backed by RAM copy.", loopDev))
 				}
 			}
 			goto bindMedium
 		}
-		needed += fi.Size()
+		return fmt.Errorf("no squashfs files found in /run/live/medium/live/ and no prior RAM copy in %s — reconnect the installation medium and retry", dstDir)
 	}
 	const headroom = 256 * 1024 * 1024
 	if free > 0 && needed+headroom > free {
 		return fmt.Errorf("insufficient RAM: need %s, available %s",
 			humanBytes(needed+headroom), humanBytes(free))
 	}
-	dstDir := "/dev/shm/bee-live"
+	{
 		free := freeMemBytes()
 		var needed int64
 		for _, sf := range squashfsFiles {
 			fi, err2 := os.Stat(sf)
 			if err2 != nil {
 				return fmt.Errorf("stat %s: %v", sf, err2)
 			}
 			needed += fi.Size()
 		}
 		const headroom = 256 * 1024 * 1024
 		if free > 0 && needed+headroom > free {
 			return fmt.Errorf("insufficient RAM: need %s, available %s",
 				humanBytes(needed+headroom), humanBytes(free))
 		}
 	}
 	if state.CopyPresent {
 		log("Removing stale partial RAM copy before retry...")
 	}
 	_ = os.RemoveAll(dstDir)
 	if err := os.MkdirAll(dstDir, 0755); err != nil {
 		return fmt.Errorf("create tmpfs dir: %v", err)
 	}
 	defer func() {
 		if retErr == nil {
 			return
 		}
 		_ = os.RemoveAll(dstDir)
 		log("Removed incomplete RAM copy.")
 	}()
 	for _, sf := range squashfsFiles {
 		if err := ctx.Err(); err != nil {
@@ -109,6 +230,7 @@ func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) erro
 		}
 	}
 bindMedium:
 	log("Copying remaining medium files...")
 	if err := cpDir(ctx, "/run/live/medium", dstDir, log); err != nil {
 		log(fmt.Sprintf("Warning: partial copy: %v", err))
@@ -116,25 +238,47 @@ func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) erro
 	if err := ctx.Err(); err != nil {
 		return err
 	}
-	if err := exec.Command("mount", "--bind", dstDir, "/run/live/medium").Run(); err != nil {
+
-		log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
+	mediumRebound := false
 	if err := bindMount(dstDir, "/run/live/medium"); err != nil {
 		log(fmt.Sprintf("Warning: rebind /run/live/medium → %s failed: %v", dstDir, err))
 	} else {
 		mediumRebound = true
 	}
 	log("Verifying live medium now served from RAM...")
 	status := s.LiveBootSource()
-	if err := verifyInstallToRAMStatus(status); err != nil {
+	if err := verifyInstallToRAMStatus(status, dstDir, mediumRebound, log); err != nil {
 		return err
 	}
-	log(fmt.Sprintf("Verification passed: live medium now served from %s.", describeLiveBootSource(status)))
+	if status.InRAM {
-	log("Done. Installation media can be safely disconnected.")
+		log(fmt.Sprintf("Verification passed: live medium now served from %s.", describeLiveBootSource(status)))
 	}
 	log("Done. Squashfs files are in RAM. Installation media can be safely disconnected.")
 	return nil
 }
-func verifyInstallToRAMStatus(status LiveBootSource) error {
+func verifyInstallToRAMStatus(status LiveBootSource, dstDir string, mediumRebound bool, log func(string)) error {
 	if status.InRAM {
 		return nil
 	}
-	return fmt.Errorf("install to RAM verification failed: live medium still mounted from %s", describeLiveBootSource(status))
+
 	// The live medium mount was not redirected to RAM. This is expected when
 	// booting from an ISO/CD-ROM: the squashfs loop device has a non-zero
 	// offset and LOOP_CHANGE_FD cannot be used; the bind mount also fails
 	// because the CD-ROM mount is in use. Check whether files were at least
 	// copied to the tmpfs directory — that is sufficient for safe disconnection
 	// once the kernel has paged in all actively-used data.
 	files, _ := filepath.Glob(filepath.Join(dstDir, "*.squashfs"))
 	if len(files) > 0 {
 		if !mediumRebound {
 			log(fmt.Sprintf("Note: squashfs copied to RAM (%s) but /run/live/medium still shows the original source.", dstDir))
 			log("This is normal for CD-ROM boots. For a fully transparent RAM boot, add 'toram' to the kernel parameters.")
 		}
 		return nil
 	}
 	return fmt.Errorf("install to RAM verification failed: live medium still mounted from %s and no squashfs found in %s", describeLiveBootSource(status), dstDir)
 }
 func describeLiveBootSource(status LiveBootSource) string {
@@ -176,6 +320,7 @@ func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) e
 	defer out.Close()
 	total := fi.Size()
 	var copied int64
 	var lastLogged int64
 	buf := make([]byte, 4*1024*1024)
 	for {
 		if err := ctx.Err(); err != nil {
@@ -187,7 +332,8 @@ func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) e
 				return werr
 			}
 			copied += int64(n)
-			if logFunc != nil && total > 0 {
+			if shouldLogCopyProgress(copied, total, lastLogged) {
 				lastLogged = copied
 				pct := int(float64(copied) / float64(total) * 100)
 				logFunc(fmt.Sprintf("  %s / %s (%d%%)", humanBytes(copied), humanBytes(total), pct))
 			}
@@ -202,6 +348,19 @@ func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) e
 	return out.Sync()
 }
 func shouldLogCopyProgress(copied, total, lastLogged int64) bool {
 	if total <= 0 || copied <= 0 {
 		return false
 	}
 	if copied >= total {
 		return copied > lastLogged
 	}
 	if copied < copyProgressLogStep {
 		return false
 	}
 	return copied-lastLogged >= copyProgressLogStep
 }
 func cpDir(ctx context.Context, src, dst string, logFunc func(string)) error {
 	return filepath.Walk(src, func(path string, fi os.FileInfo, err error) error {
 		if ctx.Err() != nil {
@@ -247,7 +406,31 @@ func findLoopForFile(backingFile string) (string, error) {
 	return "", fmt.Errorf("no loop device found for %s", backingFile)
 }
 // loopDeviceOffset returns the byte offset configured for the loop device,
 // or -1 if it cannot be determined.
 func loopDeviceOffset(loopDev string) int64 {
 	out, err := exec.Command("losetup", "--json", loopDev).Output()
 	if err != nil {
 		return -1
 	}
 	var result struct {
 		Loopdevices []struct {
 			Offset int64 `json:"offset"`
 		} `json:"loopdevices"`
 	}
 	if err := json.Unmarshal(out, &result); err != nil || len(result.Loopdevices) == 0 {
 		return -1
 	}
 	return result.Loopdevices[0].Offset
 }
 func reassociateLoopDevice(loopDev, newFile string) error {
 	// LOOP_CHANGE_FD requires lo_offset == 0. ISO/CD-ROM loop devices are
 	// typically set up with a non-zero offset (squashfs lives inside the ISO),
 	// so the ioctl returns EINVAL. Detect this early for a clear error message.
 	if off := loopDeviceOffset(loopDev); off > 0 {
 		return fmt.Errorf("loop device has non-zero offset (%d bytes, typical for ISO/CD-ROM) — LOOP_CHANGE_FD not supported; use 'toram' kernel parameter for RAM boot", off)
 	}
 	if err := exec.Command("losetup", "--replace", loopDev, newFile).Run(); err == nil {
 		return nil
 	}
--- a/audit/internal/platform/install_to_ram_linux.go
+++ b/audit/internal/platform/install_to_ram_linux.go
@@ -26,3 +26,8 @@ func loopChangeFD(loopDev, newFile string) error {
 	}
 	return nil
 }
 // bindMount binds src over dst using the syscall directly (avoids exec PATH issues).
 func bindMount(src, dst string) error {
 	return syscall.Mount(src, dst, "", syscall.MS_BIND, "")
 }
--- a/audit/internal/platform/install_to_ram_other.go
+++ b/audit/internal/platform/install_to_ram_other.go
@@ -7,3 +7,7 @@ import "errors"
 func loopChangeFD(loopDev, newFile string) error {
 	return errors.New("LOOP_CHANGE_FD not available on this platform")
 }
 func bindMount(src, dst string) error {
 	return errors.New("bind mount not available on this platform")
 }
--- a/audit/internal/platform/install_to_ram_test.go
+++ b/audit/internal/platform/install_to_ram_test.go
@@ -33,14 +33,17 @@ func TestInferLiveBootKind(t *testing.T) {
 func TestVerifyInstallToRAMStatus(t *testing.T) {
 	t.Parallel()
-	if err := verifyInstallToRAMStatus(LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"}); err != nil {
+	dstDir := t.TempDir()
 	if err := verifyInstallToRAMStatus(LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"}, dstDir, false, nil); err != nil {
 		t.Fatalf("expected success for RAM-backed status, got %v", err)
 	}
-	err := verifyInstallToRAMStatus(LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"})
+
 	err := verifyInstallToRAMStatus(LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"}, dstDir, false, nil)
 	if err == nil {
 		t.Fatal("expected verification failure when media is still on USB")
 	}
-	if got := err.Error(); got != "install to RAM verification failed: live medium still mounted from USB (/dev/sdb1)" {
+	if got := err.Error(); got != "install to RAM verification failed: live medium still mounted from USB (/dev/sdb1) and no squashfs found in "+dstDir {
 		t.Fatalf("error=%q", got)
 	}
 }
@@ -55,3 +58,69 @@ func TestDescribeLiveBootSource(t *testing.T) {
 		t.Fatalf("got %q want /run/live/medium", got)
 	}
 }
 func TestEvaluateLiveMediaRAMState(t *testing.T) {
 	t.Parallel()
 	t.Run("in_ram", func(t *testing.T) {
 		state := evaluateLiveMediaRAMState(
 			LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"},
 			false,
 			nil,
 			nil,
 		)
 		if state.State != "in_ram" || state.Status != "ok" || state.CanStartCopy {
 			t.Fatalf("state=%+v", state)
 		}
 	})
 	t.Run("partial_copy_after_cancel", func(t *testing.T) {
 		state := evaluateLiveMediaRAMState(
 			LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"},
 			false,
 			[]string{"/run/live/medium/live/filesystem.squashfs", "/run/live/medium/live/firmware.squashfs"},
 			[]string{"/dev/shm/bee-live/filesystem.squashfs"},
 		)
 		if state.State != "partial" || state.Status != "partial" || !state.CanStartCopy {
 			t.Fatalf("state=%+v", state)
 		}
 		if state.CopyComplete {
 			t.Fatalf("CopyComplete=%v want false", state.CopyComplete)
 		}
 	})
 	t.Run("toram_failed", func(t *testing.T) {
 		state := evaluateLiveMediaRAMState(
 			LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"},
 			true,
 			nil,
 			nil,
 		)
 		if state.State != "toram_failed" || state.Status != "failed" || !state.CanStartCopy {
 			t.Fatalf("state=%+v", state)
 		}
 	})
 }
 func TestShouldLogCopyProgress(t *testing.T) {
 	t.Parallel()
 	total := int64(250 * 1024 * 1024)
 	step := int64(100 * 1024 * 1024)
 	if shouldLogCopyProgress(step-1, total, 0) {
 		t.Fatal("progress logged too early")
 	}
 	if !shouldLogCopyProgress(step, total, 0) {
 		t.Fatal("expected log at first 100MB boundary")
 	}
 	if shouldLogCopyProgress(step+16*1024*1024, total, step) {
 		t.Fatal("progress logged again before next 100MB")
 	}
 	if !shouldLogCopyProgress(2*step, total, step) {
 		t.Fatal("expected log at second 100MB boundary")
 	}
 	if !shouldLogCopyProgress(total, total, 2*step) {
 		t.Fatal("expected final completion log")
 	}
 }
--- a/audit/internal/platform/kill_workers.go
+++ b/audit/internal/platform/kill_workers.go
@@ -1,11 +1,14 @@
 package platform
 import (
 	"context"
 	"fmt"
 	"log/slog"
 	"os"
 	"strconv"
 	"strings"
 	"syscall"
 	"time"
 )
 // workerPatterns are substrings matched against /proc/<pid>/cmdline to identify
@@ -15,6 +18,7 @@ var workerPatterns = []string{
 	"stress-ng",
 	"stressapptest",
 	"memtester",
 	"nvbandwidth",
 	// DCGM diagnostic workers — nvvs is spawned by dcgmi diag and survives
 	// if dcgmi is killed mid-run, leaving the GPU occupied (DCGM_ST_IN_USE).
 	"nvvs",
@@ -30,7 +34,12 @@ type KilledProcess struct {
 // KillTestWorkers scans /proc for running test worker processes and sends
 // SIGKILL to each one found. It returns a list of killed processes.
 // Errors for individual processes (e.g. already exited) are silently ignored.
 // The scan runs under a 5-second deadline to avoid blocking if the process
 // table is very large (e.g. after a stress test with thousands of children).
 func KillTestWorkers() []KilledProcess {
 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 	entries, err := os.ReadDir("/proc")
 	if err != nil {
 		return nil
@@ -38,6 +47,13 @@ func KillTestWorkers() []KilledProcess {
 	var killed []KilledProcess
 	for _, e := range entries {
 		select {
 		case <-ctx.Done():
 			slog.Warn("KillTestWorkers scan timed out", "killed_so_far", len(killed))
 			return killed
 		default:
 		}
 		if !e.IsDir() {
 			continue
 		}
@@ -56,13 +72,19 @@ func KillTestWorkers() []KilledProcess {
 		if idx := strings.LastIndexByte(exe, '/'); idx >= 0 {
 			base = exe[idx+1:]
 		}
-		for _, pat := range workerPatterns {
+		if shouldKillWorkerProcess(exe, base) {
-			if strings.Contains(base, pat) || strings.Contains(exe, pat) {
+			_ = syscall.Kill(pid, syscall.SIGKILL)
-				_ = syscall.Kill(pid, syscall.SIGKILL)
+			killed = append(killed, KilledProcess{PID: pid, Name: base})
 				killed = append(killed, KilledProcess{PID: pid, Name: base})
 				break
 			}
 		}
 	}
 	return killed
 }
 func shouldKillWorkerProcess(exe, base string) bool {
 	for _, pat := range workerPatterns {
 		if strings.Contains(base, pat) || strings.Contains(exe, pat) {
 			return true
 		}
 	}
 	return false
 }
--- a/audit/internal/platform/kill_workers_test.go
+++ b/audit/internal/platform/kill_workers_test.go
@@ -0,0 +1,39 @@
 package platform
 import "testing"
 func TestShouldKillWorkerProcess(t *testing.T) {
 	tests := []struct {
 		name string
 		exe  string
 		base string
 		want bool
 	}{
 		{
 			name: "nvbandwidth executable",
 			exe:  "/usr/libexec/datacenter-gpu-manager-4/plugins/cuda13/nvbandwidth",
 			base: "nvbandwidth",
 			want: true,
 		},
 		{
 			name: "dcgmi executable",
 			exe:  "/usr/bin/dcgmi",
 			base: "dcgmi",
 			want: true,
 		},
 		{
 			name: "unrelated process",
 			exe:  "/usr/bin/bash",
 			base: "bash",
 			want: false,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			if got := shouldKillWorkerProcess(tt.exe, tt.base); got != tt.want {
 				t.Fatalf("shouldKillWorkerProcess(%q, %q)=%v want %v", tt.exe, tt.base, got, tt.want)
 			}
 		})
 	}
 }
--- a/audit/internal/platform/live_metrics.go
+++ b/audit/internal/platform/live_metrics.go
@@ -1,8 +1,10 @@
 package platform
 import (
 	"bee/audit/internal/collector"
 	"bufio"
 	"encoding/json"
 	"fmt"
 	"os"
 	"os/exec"
 	"sort"
@@ -14,13 +16,24 @@ import (
 // LiveMetricSample is a single point-in-time snapshot of server metrics
 // collected for the web UI metrics page.
 type LiveMetricSample struct {
-	Timestamp  time.Time      `json:"ts"`
+	Timestamp   time.Time      `json:"ts"`
-	Fans       []FanReading   `json:"fans"`
+	Fans        []FanReading   `json:"fans"`
-	Temps      []TempReading  `json:"temps"`
+	Temps       []TempReading  `json:"temps"`
-	PowerW     float64        `json:"power_w"`
+	PowerW      float64        `json:"power_w"`
-	CPULoadPct float64        `json:"cpu_load_pct"`
+	PowerSource string         `json:"power_source,omitempty"`
-	MemLoadPct float64        `json:"mem_load_pct"`
+	PowerMode   string         `json:"power_mode,omitempty"`
-	GPUs       []GPUMetricRow `json:"gpus"`
+	PowerReason string         `json:"power_reason,omitempty"`
 	PSUs        []PSUReading   `json:"psus,omitempty"`
 	CPULoadPct  float64        `json:"cpu_load_pct"`
 	MemLoadPct  float64        `json:"mem_load_pct"`
 	GPUs        []GPUMetricRow `json:"gpus"`
 }
 // PSUReading is a per-slot power supply input power reading.
 type PSUReading struct {
 	Slot   int     `json:"slot"`
 	Name   string  `json:"name"`
 	PowerW float64 `json:"power_w"`
 }
 // TempReading is a named temperature sensor value.
@@ -54,8 +67,17 @@ func SampleLiveMetrics() LiveMetricSample {
 		}
 	}
-	// System power — returns 0 if unavailable
+	// Per-PSU power — populated when IPMI SDR has Power Supply entities with Watt readings
-	s.PowerW = sampleSystemPower()
+	s.PSUs = samplePSUPower()
 	// System power: use the global autotune-selected source when configured,
 	// otherwise fall back to the historical heuristic and mark the mode.
 	if powerW, decision, err := SampleSystemPowerResolved(""); err == nil {
 		s.PowerW = powerW
 		s.PowerSource = decision.EffectiveSource
 		s.PowerMode = decision.Mode
 		s.PowerReason = decision.Reason
 	}
 	// CPU load — from /proc/stat
 	s.CPULoadPct = sampleCPULoadPct()
@@ -326,3 +348,46 @@ func compactAmbientTempName(chip, name string) string {
 	}
 	return chip + " / " + name
 }
 // samplePSUPower reads per-PSU input power via IPMI SDR.
 // Uses collector.PSUSlotsFromSDR (name-based matching) which works across
 // vendors where PSU sensors may not carry entity ID "10.N".
 // Returns nil when IPMI is unavailable or no PSU Watt sensors exist.
 func samplePSUPower() []PSUReading {
 	out, err := exec.Command("ipmitool", "sdr").Output()
 	if err != nil || len(out) == 0 {
 		return nil
 	}
 	slots := collector.PSUSlotsFromSDR(string(out))
 	if len(slots) == 0 {
 		return nil
 	}
 	// Collect slot keys and sort for stable output.
 	keys := make([]int, 0, len(slots))
 	for k := range slots {
 		n, err := strconv.Atoi(k)
 		if err == nil {
 			keys = append(keys, n)
 		}
 	}
 	sort.Ints(keys)
 	psus := make([]PSUReading, 0, len(keys))
 	for _, k := range keys {
 		entry := slots[strconv.Itoa(k)]
 		// Prefer AC input power; fall back to DC output power.
 		var w float64
 		if entry.InputW != nil && *entry.InputW > 0 {
 			w = *entry.InputW
 		} else if entry.OutputW != nil && *entry.OutputW > 0 {
 			w = *entry.OutputW
 		}
 		if w <= 0 {
 			continue
 		}
 		psus = append(psus, PSUReading{Slot: k + 1, Name: fmt.Sprintf("PSU%d", k+1), PowerW: w})
 	}
 	if len(psus) == 0 {
 		return nil
 	}
 	return psus
 }
--- a/audit/internal/platform/nvidia_recover.go
+++ b/audit/internal/platform/nvidia_recover.go
@@ -0,0 +1,51 @@
 package platform
 import (
 	"fmt"
 	"os/exec"
 	"strconv"
 	"strings"
 	"time"
 )
 const nvidiaRecoverHelper = "/usr/local/bin/bee-nvidia-recover"
 func runNvidiaRecover(args ...string) (string, error) {
 	helperArgs := append([]string{nvidiaRecoverHelper}, args...)
 	if _, err := exec.LookPath("systemd-run"); err == nil {
 		unit := fmt.Sprintf("bee-nvidia-recover-%d", time.Now().UnixNano())
 		cmdArgs := []string{
 			"systemd-run",
 			"--quiet",
 			"--pipe",
 			"--wait",
 			"--collect",
 			"--service-type=oneshot",
 			"--unit", unit,
 		}
 		cmdArgs = append(cmdArgs, helperArgs...)
 		raw, err := exec.Command("sudo", cmdArgs...).CombinedOutput()
 		return string(raw), err
 	}
 	raw, err := exec.Command("sudo", helperArgs...).CombinedOutput()
 	return string(raw), err
 }
 func resetNvidiaGPU(index int) (string, error) {
 	if index < 0 {
 		return "", fmt.Errorf("gpu index must be >= 0")
 	}
 	out, err := runNvidiaRecover("reset-gpu", strconv.Itoa(index))
 	if strings.TrimSpace(out) == "" && err == nil {
 		out = "GPU reset completed.\n"
 	}
 	return out, err
 }
 func restartNvidiaDrivers() (string, error) {
 	out, err := runNvidiaRecover("restart-drivers")
 	if strings.TrimSpace(out) == "" && err == nil {
 		out = "NVIDIA drivers restarted.\n"
 	}
 	return out, err
 }
--- a/audit/internal/platform/nvidia_stress.go
+++ b/audit/internal/platform/nvidia_stress.go
@@ -49,6 +49,9 @@ func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
 			"--seconds", strconv.Itoa(opts.DurationSec),
 			"--size-mb", strconv.Itoa(opts.SizeMB),
 		}
 		if opts.StaggerSeconds > 0 && len(selected) > 1 {
 			cmd = append(cmd, "--stagger-seconds", strconv.Itoa(opts.StaggerSeconds))
 		}
 		if len(selected) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(selected))
 		}
@@ -63,6 +66,9 @@ func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
 			"bee-john-gpu-stress",
 			"--seconds", strconv.Itoa(opts.DurationSec),
 		}
 		if opts.StaggerSeconds > 0 && len(selected) > 1 {
 			cmd = append(cmd, "--stagger-seconds", strconv.Itoa(opts.StaggerSeconds))
 		}
 		if len(selected) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(selected))
 		}
--- a/audit/internal/platform/platform_stress.go
+++ b/audit/internal/platform/platform_stress.go
@@ -161,13 +161,7 @@ func (s *System) RunPlatformStress(
 	}
 	_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)
-	// Pack tar.gz
+	return runDir, nil
 	archivePath := filepath.Join(baseDir, "platform-stress-"+stamp+".tar.gz")
 	if err := packPlatformDir(runDir, archivePath); err != nil {
 		return "", fmt.Errorf("pack archive: %w", err)
 	}
 	_ = os.RemoveAll(runDir)
 	return archivePath, nil
 }
 // collectPhase samples live metrics every second until ctx is done.
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -1,6 +1,7 @@
 package platform
 import (
 	"bufio"
 	"os"
 	"os/exec"
 	"strings"
@@ -27,6 +28,8 @@ var runtimeTrackedServices = []string{
 	"bee-audit",
 	"bee-web",
 	"bee-sshsetup",
 	"nvidia-dcgm",
 	"nvidia-fabricmanager",
 }
 func (s *System) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, error) {
@@ -114,6 +117,8 @@ func (s *System) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, e
 	}
 	s.collectGPURuntimeHealth(vendor, &health)
 	s.collectToRAMHealth(&health)
 	s.collectUSBExportHealth(&health)
 	if health.Status != "FAILED" && len(health.Issues) > 0 {
 		health.Status = "PARTIAL"
@@ -168,6 +173,99 @@ func resolvedToolStatus(display string, candidates ...string) ToolStatus {
 	return ToolStatus{Name: display}
 }
 // collectToRAMHealth evaluates whether the live system is fully running from RAM.
 // Status values: "ok" = fully in RAM, "warning" = not copied, "partial" = stale or
 // incomplete RAM copy exists but runtime still depends on the boot medium,
 // "failed" = toram was requested but medium is not in RAM.
 func (s *System) collectToRAMHealth(health *schema.RuntimeHealth) {
 	state := s.LiveMediaRAMState()
 	health.ToRAMStatus = state.Status
 	switch state.Status {
 	case "ok":
 		return
 	case "failed":
 		health.Issues = append(health.Issues, schema.RuntimeIssue{
 			Code:        "toram_copy_failed",
 			Severity:    "warning",
 			Description: state.Message,
 		})
 	case "partial":
 		health.Issues = append(health.Issues, schema.RuntimeIssue{
 			Code:        "toram_copy_partial",
 			Severity:    "warning",
 			Description: state.Message,
 		})
 	}
 }
 // collectUSBExportHealth scans /proc/mounts for a writable USB-backed filesystem
 // suitable for log export. Sets USBExportPath to the first match found.
 func (s *System) collectUSBExportHealth(health *schema.RuntimeHealth) {
 	health.USBExportPath = findUSBExportMount()
 }
 // findUSBExportMount returns the mount point of the first writable USB filesystem
 // found in /proc/mounts (vfat, exfat, ext2/3/4, ntfs) whose backing block device
 // has USB transport. Returns "" if none found.
 func findUSBExportMount() string {
 	f, err := os.Open("/proc/mounts")
 	if err != nil {
 		return ""
 	}
 	defer f.Close()
 	// fs types that are expected on USB export drives
 	exportFSTypes := map[string]bool{
 		"vfat":    true,
 		"exfat":   true,
 		"ext2":    true,
 		"ext3":    true,
 		"ext4":    true,
 		"ntfs":    true,
 		"ntfs3":   true,
 		"fuseblk": true,
 	}
 	scanner := bufio.NewScanner(f)
 	for scanner.Scan() {
 		// fields: device mountpoint fstype options dump pass
 		fields := strings.Fields(scanner.Text())
 		if len(fields) < 4 {
 			continue
 		}
 		device, mountPoint, fsType, options := fields[0], fields[1], fields[2], fields[3]
 		if !exportFSTypes[strings.ToLower(fsType)] {
 			continue
 		}
 		// Skip read-only mounts
 		opts := strings.Split(options, ",")
 		readOnly := false
 		for _, o := range opts {
 			if strings.TrimSpace(o) == "ro" {
 				readOnly = true
 				break
 			}
 		}
 		if readOnly {
 			continue
 		}
 		// Check USB transport via lsblk on the device (or its parent disk for partitions).
 		if !strings.HasPrefix(device, "/dev/") {
 			continue
 		}
 		checkDev := device
 		// lsblk only reports TRAN for the whole disk, not for partitions (e.g. /dev/sdc1).
 		// Strip trailing partition digits to get the parent disk name.
 		if trimmed := strings.TrimRight(device, "0123456789"); trimmed != device && len(trimmed) > len("/dev/") {
 			checkDev = trimmed
 		}
 		if blockDeviceTransport(checkDev) == "usb" {
 			return mountPoint
 		}
 	}
 	return ""
 }
 func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHealth) {
 	lsmodText := commandText("lsmod")
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -20,6 +20,54 @@ import (
 	"time"
 )
 // Estimated wall-clock durations for each SAT/validate test, derived from real
 // production logs in _benchmark/_v8/.
 //
 // Rule: whenever the commands, timeout parameters, or number of sub-jobs inside
 // the corresponding Run*Pack function change, re-measure the wall-clock duration
 // from actual task logs and update the matching constant here.
 //
 // Sources:
 //   - SATEstimatedCPUValidateSec:                 xFusion v8.6 — 62 s
 //   - SATEstimatedMemoryValidateSec:               xFusion v8.6 — 68 s
 //   - SATEstimatedNvidiaGPUValidateSec:            xFusion v8.6/v8.22 — 77–87 s/GPU (measured per-GPU; re-measure after switch to all-GPU simultaneous)
 //   - SATEstimatedNvidiaGPUStressSec:              xFusion v8.6/v8.22 — 444–448 s/GPU (measured per-GPU; re-measure after switch to all-GPU simultaneous)
 //   - SATEstimatedNvidiaTargetedStressSec:         xFusion v8.6/v8.22 — 347–348 s/GPU (measured per-GPU; re-measure after switch to all-GPU simultaneous)
 //   - SATEstimatedNvidiaTargetedPowerSec:          MSI v8.22 / xFusion v8.6 — 346–351 s/GPU (measured per-GPU; re-measure after switch to all-GPU simultaneous)
 //   - SATEstimatedNvidiaPulseTestSec:              xFusion v8.6 — 4 926 s / 8 GPU (all simultaneous)
 //   - SATEstimatedNvidiaInterconnectSec:           xFusion v8.6/v8.22 — 210–384 s / 8 GPU (all simultaneous)
 //   - SATEstimatedNvidiaBandwidthSec:              xFusion v8.6/v8.22 — 2 664–2 688 s / 8 GPU (all simultaneous)
 const (
 	// CPU stress: stress-ng 60 s + lscpu/sensors overhead.
 	SATEstimatedCPUValidateSec = 65
 	// CPU stress: stress-ng 1800 s (stress mode default).
 	SATEstimatedCPUStressSec = 1800
 	// RAM: memtester 256 MB / 1 pass.
 	SATEstimatedMemoryValidateSec = 70
 	// RAM: memtester 512 MB / 1 pass (extrapolated from validate timing, linear with size).
 	SATEstimatedMemoryStressSec = 140
 	// NVIDIA dcgmi diag Level 2 (medium), all GPUs simultaneously.
 	SATEstimatedNvidiaGPUValidateSec = 85
 	// NVIDIA dcgmi diag Level 3 (targeted stress), all GPUs simultaneously.
 	SATEstimatedNvidiaGPUStressSec = 450
 	// NVIDIA dcgmi targeted_stress 300 s + overhead, all GPUs simultaneously.
 	SATEstimatedNvidiaTargetedStressSec = 350
 	// NVIDIA dcgmi targeted_power 300 s + overhead, all GPUs simultaneously.
 	SATEstimatedNvidiaTargetedPowerSec = 350
 	// NVIDIA dcgmi pulse_test, all GPUs simultaneously (not per-GPU).
 	SATEstimatedNvidiaPulseTestSec = 5000
 	// NCCL all_reduce_perf, all GPUs simultaneously.
 	SATEstimatedNvidiaInterconnectSec = 300
 	// nvbandwidth, all GPUs simultaneously. Tool runs all built-in tests
 	// without a user-configurable time limit; duration is determined by nvbandwidth itself.
 	SATEstimatedNvidiaBandwidthSec = 2700
 )
 var (
 	satExecCommand  = exec.Command
 	satLookPath     = exec.LookPath
@@ -88,6 +136,37 @@ type NvidiaGPU struct {
 	MemoryMB int    `json:"memory_mb"`
 }
 type NvidiaGPUStatus struct {
 	Index        int    `json:"index"`
 	Name         string `json:"name"`
 	BDF          string `json:"bdf,omitempty"`
 	Serial       string `json:"serial,omitempty"`
 	Status       string `json:"status"`
 	RawLine      string `json:"raw_line,omitempty"`
 	NeedsReset   bool   `json:"needs_reset"`
 	ParseFailure bool   `json:"parse_failure,omitempty"`
 }
 type nvidiaGPUHealth struct {
 	Index        int
 	Name         string
 	NeedsReset   bool
 	RawLine      string
 	ParseFailure bool
 }
 type nvidiaGPUStatusFile struct {
 	Index      int
 	Name       string
 	RunStatus  string
 	Reason     string
 	Health     string
 	HealthRaw  string
 	Observed   bool
 	Selected   bool
 	FailingJob string
 }
 // AMDGPUInfo holds basic info about an AMD GPU from rocm-smi.
 type AMDGPUInfo struct {
 	Index int    `json:"index"`
@@ -269,12 +348,73 @@ func (s *System) ListNvidiaGPUs() ([]NvidiaGPU, error) {
 	return gpus, nil
 }
-// RunNCCLTests runs nccl-tests all_reduce_perf across all NVIDIA GPUs.
+func (s *System) ListNvidiaGPUStatuses() ([]NvidiaGPUStatus, error) {
 	out, err := satExecCommand(
 		"nvidia-smi",
 		"--query-gpu=index,name,pci.bus_id,serial,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total",
 		"--format=csv,noheader,nounits",
 	).Output()
 	if err != nil {
 		return nil, fmt.Errorf("nvidia-smi: %w", err)
 	}
 	var gpus []NvidiaGPUStatus
 	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
 		line = strings.TrimSpace(line)
 		if line == "" {
 			continue
 		}
 		parts := strings.Split(line, ",")
 		if len(parts) < 4 {
 			gpus = append(gpus, NvidiaGPUStatus{RawLine: line, Status: "UNKNOWN", ParseFailure: true})
 			continue
 		}
 		idx, err := strconv.Atoi(strings.TrimSpace(parts[0]))
 		if err != nil {
 			gpus = append(gpus, NvidiaGPUStatus{RawLine: line, Status: "UNKNOWN", ParseFailure: true})
 			continue
 		}
 		upper := strings.ToUpper(line)
 		needsReset := strings.Contains(upper, "GPU REQUIRES RESET")
 		status := "OK"
 		if needsReset {
 			status = "RESET_REQUIRED"
 		}
 		gpus = append(gpus, NvidiaGPUStatus{
 			Index:      idx,
 			Name:       strings.TrimSpace(parts[1]),
 			BDF:        normalizeNvidiaBusID(strings.TrimSpace(parts[2])),
 			Serial:     strings.TrimSpace(parts[3]),
 			Status:     status,
 			RawLine:    line,
 			NeedsReset: needsReset,
 		})
 	}
 	sort.Slice(gpus, func(i, j int) bool { return gpus[i].Index < gpus[j].Index })
 	return gpus, nil
 }
 func normalizeNvidiaBusID(v string) string {
 	v = strings.TrimSpace(strings.ToLower(v))
 	parts := strings.Split(v, ":")
 	if len(parts) == 3 && len(parts[0]) > 4 {
 		parts[0] = parts[0][len(parts[0])-4:]
 		return strings.Join(parts, ":")
 	}
 	return v
 }
 func (s *System) ResetNvidiaGPU(index int) (string, error) {
 	return resetNvidiaGPU(index)
 }
 // RunNCCLTests runs nccl-tests all_reduce_perf across the selected NVIDIA GPUs.
 // Measures collective communication bandwidth over NVLink/PCIe.
-func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+func (s *System) RunNCCLTests(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
-	// detect GPU count
+	selected, err := resolveDCGMGPUIndices(gpuIndices)
-	out, _ := exec.Command("nvidia-smi", "--query-gpu=index", "--format=csv,noheader").Output()
+	if err != nil {
-	gpuCount := len(strings.Split(strings.TrimSpace(string(out)), "\n"))
+		return "", err
 	}
 	gpuCount := len(selected)
 	if gpuCount < 1 {
 		gpuCount = 1
 	}
@@ -283,18 +423,40 @@ func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(
 		satJob{name: "02-all-reduce-perf.log", cmd: []string{
 			"all_reduce_perf", "-b", "512M", "-e", "4G", "-f", "2",
 			"-g", strconv.Itoa(gpuCount), "--iters", "20",
-		}},
+		}, env: nvidiaVisibleDevicesEnv(selected)},
 	), logFunc)
 }
-func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
-	profCmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)))
+	var (
-	if err != nil {
+		profCmd []string
-		return "", err
+		profEnv []string
 	)
 	if len(selected) > 1 {
 		// For multiple GPUs, always spawn one dcgmproftester process per GPU via
 		// bee-dcgmproftester-staggered (stagger=0 means all start simultaneously).
 		// A single dcgmproftester process without -i only loads GPU 0 regardless
 		// of CUDA_VISIBLE_DEVICES.
 		stagger := staggerSec
 		if stagger < 0 {
 			stagger = 0
 		}
 		profCmd = []string{
 			"bee-dcgmproftester-staggered",
 			"--seconds", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)),
 			"--stagger-seconds", strconv.Itoa(stagger),
 			"--devices", joinIndexList(selected),
 		}
 	} else {
 		profCmd, err = resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)))
 		if err != nil {
 			return "", err
 		}
 		profEnv = nvidiaVisibleDevicesEnv(selected)
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-compute", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
@@ -302,7 +464,7 @@ func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir strin
 		satJob{
 			name:       "03-dcgmproftester.log",
 			cmd:        profCmd,
-			env:        nvidiaVisibleDevicesEnv(selected),
+			env:        profEnv,
 			collectGPU: true,
 			gpuIndices: selected,
 		},
@@ -315,6 +477,13 @@ func (s *System) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string,
 	if err != nil {
 		return "", err
 	}
 	// Kill any lingering nvvs/dcgmi processes from a previous interrupted run
 	// before starting — otherwise dcgmi diag fails with DCGM_ST_IN_USE (-34).
 	if killed := KillTestWorkers(); len(killed) > 0 && logFunc != nil {
 		for _, p := range killed {
 			logFunc(fmt.Sprintf("pre-flight: killed stale worker pid=%d name=%s", p.PID, p.Name))
 		}
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-targeted-power", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{
@@ -332,6 +501,13 @@ func (s *System) RunNvidiaPulseTestPack(ctx context.Context, baseDir string, dur
 	if err != nil {
 		return "", err
 	}
 	// Kill any lingering nvvs/dcgmi processes from a previous interrupted run
 	// before starting — otherwise dcgmi diag fails with DCGM_ST_IN_USE (-34).
 	if killed := KillTestWorkers(); len(killed) > 0 && logFunc != nil {
 		for _, p := range killed {
 			logFunc(fmt.Sprintf("pre-flight: killed stale worker pid=%d name=%s", p.PID, p.Name))
 		}
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-pulse", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{
@@ -349,6 +525,13 @@ func (s *System) RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpu
 	if err != nil {
 		return "", err
 	}
 	// Kill any lingering nvvs/dcgmi processes from a previous interrupted run
 	// before starting — otherwise dcgmi diag fails with DCGM_ST_IN_USE (-34).
 	if killed := KillTestWorkers(); len(killed) > 0 && logFunc != nil {
 		for _, p := range killed {
 			logFunc(fmt.Sprintf("pre-flight: killed stale worker pid=%d name=%s", p.PID, p.Name))
 		}
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-bandwidth", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{
@@ -434,12 +617,26 @@ func memoryStressSizeArg() string {
 	return fmt.Sprintf("%dM", targetMB)
 }
-func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, sizeMB, passes int, logFunc func(string)) (string, error) {
-	sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128)
+	if sizeMB <= 0 {
-	passes := envInt("BEE_MEMTESTER_PASSES", 1)
+		sizeMB = 256
 	}
 	if passes <= 0 {
 		passes = 1
 	}
 	// Keep Validate Memory bounded to a quick diagnostic window. The timeout is
 	// intentionally conservative enough for healthy systems while avoiding the
 	// prior 30-80 minute hangs caused by memtester spinning on a bad subtest.
 	timeoutSec := sizeMB*passes*20/100 + 60
 	if timeoutSec < 180 {
 		timeoutSec = 180
 	}
 	if timeoutSec > 900 {
 		timeoutSec = 900
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "memory", []satJob{
 		{name: "01-free-before.log", cmd: []string{"free", "-h"}},
-		{name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
+		{name: "02-memtester.log", cmd: []string{"timeout", fmt.Sprintf("%d", timeoutSec), "memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
 		{name: "03-free-after.log", cmd: []string{"free", "-h"}},
 	}, logFunc)
 }
@@ -493,7 +690,7 @@ func (s *System) RunCPUAcceptancePack(ctx context.Context, baseDir string, durat
 	}, logFunc)
 }
-func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, extended bool, logFunc func(string)) (string, error) {
 	if baseDir == "" {
 		baseDir = "/var/log/bee-sat"
 	}
@@ -525,7 +722,7 @@ func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, l
 			break
 		}
 		prefix := fmt.Sprintf("%02d-%s", index+1, filepath.Base(devPath))
-		commands := storageSATCommands(devPath)
+		commands := storageSATCommands(devPath, extended)
 		for cmdIndex, job := range commands {
 			if ctx.Err() != nil {
 				break
@@ -547,11 +744,7 @@ func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, l
 	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
 		return "", err
 	}
-	archive := filepath.Join(baseDir, "storage-"+ts+".tar.gz")
+	return runDir, nil
 	if err := createTarGz(archive, runDir); err != nil {
 		return "", err
 	}
 	return archive, nil
 }
 type satJob struct {
@@ -604,7 +797,7 @@ func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
 		satJob{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
-		satJob{name: "04-dcgmi-diag.log", cmd: diagArgs},
+		satJob{name: "04-dcgmi-diag.log", cmd: diagArgs, gpuIndices: gpuIndices},
 	)
 }
@@ -652,11 +845,23 @@ func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []sa
 	var summary strings.Builder
 	stats := satStats{}
 	nvidiaPack := strings.HasPrefix(prefix, "gpu-nvidia")
 	perGPU := map[int]*nvidiaGPUStatusFile{}
 	selectedGPUIndices := map[int]struct{}{}
 	fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
 	for _, job := range jobs {
 		if ctx.Err() != nil {
 			break
 		}
 		for _, idx := range job.gpuIndices {
 			selectedGPUIndices[idx] = struct{}{}
 			status := perGPU[idx]
 			if status == nil {
 				status = &nvidiaGPUStatusFile{Index: idx}
 				perGPU[idx] = status
 			}
 			status.Selected = true
 		}
 		cmd := make([]string, 0, len(job.cmd))
 		for _, arg := range job.cmd {
 			cmd = append(cmd, strings.ReplaceAll(arg, "{{run_dir}}", runDir))
@@ -665,10 +870,37 @@ func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []sa
 		var out []byte
 		var err error
-		if job.collectGPU {
+		if nvidiaPack && nvidiaJobNeedsHealthCheck(job) {
-			out, err = runSATCommandWithMetrics(ctx, verboseLog, job.name, cmd, job.env, job.gpuIndices, runDir, logFunc)
+			if msg, healthErr := checkNvidiaJobHealth(job.gpuIndices); healthErr != nil {
-		} else {
+				if logFunc != nil {
-			out, err = runSATCommandCtx(ctx, verboseLog, job.name, cmd, job.env, logFunc)
+					logFunc(msg)
 				}
 				out = []byte(msg + "\n")
 				err = healthErr
 			}
 		}
 		if err == nil {
 			if job.collectGPU {
 				out, err = runSATCommandWithMetrics(ctx, verboseLog, job.name, cmd, job.env, job.gpuIndices, runDir, logFunc)
 			} else {
 				out, err = runSATCommandCtx(ctx, verboseLog, job.name, cmd, job.env, logFunc)
 			}
 		}
 		if nvidiaPack && nvidiaJobNeedsHealthCheck(job) {
 			if msg, healthErr := checkNvidiaJobHealth(job.gpuIndices); healthErr != nil {
 				if logFunc != nil {
 					logFunc(msg)
 				}
 				if len(out) > 0 && !bytes.HasSuffix(out, []byte("\n")) {
 					out = append(out, '\n')
 				}
 				out = append(out, []byte(msg+"\n")...)
 				if err == nil {
 					err = healthErr
 				}
 			}
 		}
 		if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
@@ -679,6 +911,11 @@ func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []sa
 		}
 		status, rc := classifySATResult(job.name, out, err)
 		stats.Add(status)
 		if nvidiaPack && len(job.gpuIndices) > 0 && nvidiaJobNeedsHealthCheck(job) {
 			for _, idx := range job.gpuIndices {
 				updateNvidiaGPUStatus(perGPU, idx, status, job.name, string(out))
 			}
 		}
 		key := strings.TrimSuffix(strings.TrimPrefix(job.name, "0"), ".log")
 		fmt.Fprintf(&summary, "%s_rc=%d\n", key, rc)
 		fmt.Fprintf(&summary, "%s_status=%s\n", key, status)
@@ -687,12 +924,204 @@ func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []sa
 	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
 		return "", err
 	}
-
+	if nvidiaPack {
-	archive := filepath.Join(baseDir, prefix+"-"+ts+".tar.gz")
+		if err := writeNvidiaGPUStatusFiles(runDir, stats.Overall(), perGPU, selectedGPUIndices); err != nil {
-	if err := createTarGz(archive, runDir); err != nil {
+			return "", err
-		return "", err
+		}
 	}
-	return archive, nil
+
 	return runDir, nil
 }
 func updateNvidiaGPUStatus(perGPU map[int]*nvidiaGPUStatusFile, idx int, status, jobName, detail string) {
 	entry := perGPU[idx]
 	if entry == nil {
 		entry = &nvidiaGPUStatusFile{Index: idx}
 		perGPU[idx] = entry
 	}
 	if nvidiaSATStatusSeverity(status) >= nvidiaSATStatusSeverity(entry.RunStatus) {
 		entry.RunStatus = status
 		entry.FailingJob = jobName
 		entry.Reason = firstLine(detail)
 	}
 }
 func writeNvidiaGPUStatusFiles(runDir, overall string, perGPU map[int]*nvidiaGPUStatusFile, selected map[int]struct{}) error {
 	health, err := readNvidiaGPUHealth()
 	if err == nil {
 		for _, gpu := range health {
 			entry := perGPU[gpu.Index]
 			if entry == nil {
 				entry = &nvidiaGPUStatusFile{Index: gpu.Index}
 				perGPU[gpu.Index] = entry
 			}
 			entry.Name = gpu.Name
 			entry.Observed = true
 			entry.HealthRaw = gpu.RawLine
 			if gpu.NeedsReset {
 				entry.Health = "RESET_REQUIRED"
 				if entry.RunStatus == "" || nvidiaSATStatusSeverity("FAILED") >= nvidiaSATStatusSeverity(entry.RunStatus) {
 					entry.RunStatus = "FAILED"
 					if strings.TrimSpace(entry.Reason) == "" {
 						entry.Reason = "GPU requires reset"
 					}
 				}
 			} else {
 				entry.Health = "OK"
 			}
 		}
 	}
 	for idx := range selected {
 		entry := perGPU[idx]
 		if entry == nil {
 			entry = &nvidiaGPUStatusFile{Index: idx}
 			perGPU[idx] = entry
 		}
 		entry.Selected = true
 	}
 	var indices []int
 	for idx := range perGPU {
 		indices = append(indices, idx)
 	}
 	sort.Ints(indices)
 	for _, idx := range indices {
 		entry := perGPU[idx]
 		if entry.RunStatus == "" {
 			entry.RunStatus = overall
 		}
 		if entry.Health == "" {
 			entry.Health = "UNKNOWN"
 		}
 		if entry.Name == "" {
 			entry.Name = "Unknown GPU"
 		}
 		var body strings.Builder
 		fmt.Fprintf(&body, "gpu_index=%d\n", entry.Index)
 		fmt.Fprintf(&body, "gpu_name=%s\n", entry.Name)
 		fmt.Fprintf(&body, "selected=%t\n", entry.Selected)
 		fmt.Fprintf(&body, "observed=%t\n", entry.Observed)
 		fmt.Fprintf(&body, "run_status=%s\n", entry.RunStatus)
 		fmt.Fprintf(&body, "health_status=%s\n", entry.Health)
 		if strings.TrimSpace(entry.FailingJob) != "" {
 			fmt.Fprintf(&body, "failing_job=%s\n", entry.FailingJob)
 		}
 		if strings.TrimSpace(entry.Reason) != "" {
 			fmt.Fprintf(&body, "reason=%s\n", entry.Reason)
 		}
 		if strings.TrimSpace(entry.HealthRaw) != "" {
 			fmt.Fprintf(&body, "health_raw=%s\n", entry.HealthRaw)
 		}
 		if err := os.WriteFile(filepath.Join(runDir, fmt.Sprintf("gpu-%d-status.txt", idx)), []byte(body.String()), 0644); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 func nvidiaSATStatusSeverity(status string) int {
 	switch strings.ToUpper(strings.TrimSpace(status)) {
 	case "FAILED":
 		return 3
 	case "PARTIAL", "UNSUPPORTED":
 		return 2
 	case "OK":
 		return 1
 	default:
 		return 0
 	}
 }
 func firstLine(s string) string {
 	s = strings.TrimSpace(s)
 	if s == "" {
 		return ""
 	}
 	if idx := strings.IndexByte(s, '\n'); idx >= 0 {
 		return strings.TrimSpace(s[:idx])
 	}
 	return s
 }
 func nvidiaJobNeedsHealthCheck(job satJob) bool {
 	if job.collectGPU {
 		return true
 	}
 	name := strings.ToLower(strings.TrimSpace(job.name))
 	return strings.Contains(name, "dcgmi") ||
 		strings.Contains(name, "gpu-burn") ||
 		strings.Contains(name, "gpu-stress") ||
 		strings.Contains(name, "dcgmproftester")
 }
 func checkNvidiaJobHealth(selected []int) (string, error) {
 	health, err := readNvidiaGPUHealth()
 	if err != nil {
 		return "", nil
 	}
 	var bad []nvidiaGPUHealth
 	selectedSet := make(map[int]struct{}, len(selected))
 	for _, idx := range selected {
 		selectedSet[idx] = struct{}{}
 	}
 	for _, gpu := range health {
 		if len(selectedSet) > 0 {
 			if _, ok := selectedSet[gpu.Index]; !ok {
 				continue
 			}
 		}
 		if gpu.NeedsReset {
 			bad = append(bad, gpu)
 		}
 	}
 	if len(bad) == 0 {
 		return "", nil
 	}
 	lines := make([]string, 0, len(bad)+1)
 	lines = append(lines, "NVIDIA GPU health check failed:")
 	for _, gpu := range bad {
 		lines = append(lines, fmt.Sprintf("gpu %d (%s) requires reset: %s", gpu.Index, gpu.Name, gpu.RawLine))
 	}
 	return strings.Join(lines, "\n"), errors.New("nvidia gpu requires reset")
 }
 func readNvidiaGPUHealth() ([]nvidiaGPUHealth, error) {
 	out, err := satExecCommand(
 		"nvidia-smi",
 		"--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total",
 		"--format=csv,noheader,nounits",
 	).Output()
 	if err != nil {
 		return nil, fmt.Errorf("nvidia-smi: %w", err)
 	}
 	return parseNvidiaGPUHealth(string(out)), nil
 }
 func parseNvidiaGPUHealth(raw string) []nvidiaGPUHealth {
 	var gpus []nvidiaGPUHealth
 	for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
 		line = strings.TrimSpace(line)
 		if line == "" {
 			continue
 		}
 		parts := strings.Split(line, ",")
 		if len(parts) < 2 {
 			gpus = append(gpus, nvidiaGPUHealth{RawLine: line, ParseFailure: true})
 			continue
 		}
 		idx, err := strconv.Atoi(strings.TrimSpace(parts[0]))
 		if err != nil {
 			gpus = append(gpus, nvidiaGPUHealth{RawLine: line, ParseFailure: true})
 			continue
 		}
 		upper := strings.ToUpper(line)
 		gpus = append(gpus, nvidiaGPUHealth{
 			Index:      idx,
 			Name:       strings.TrimSpace(parts[1]),
 			NeedsReset: strings.Contains(upper, "GPU REQUIRES RESET"),
 			RawLine:    line,
 		})
 	}
 	return gpus
 }
 func runSATCommandCtx(ctx context.Context, verboseLog, name string, cmd []string, env []string, logFunc func(string)) ([]byte, error) {
@@ -749,17 +1178,25 @@ func listStorageDevices() ([]string, error) {
 	return parseStorageDevices(string(out)), nil
 }
-func storageSATCommands(devPath string) []satJob {
+func storageSATCommands(devPath string, extended bool) []satJob {
 	if strings.Contains(filepath.Base(devPath), "nvme") {
 		selfTestLevel := "1"
 		if extended {
 			selfTestLevel = "2"
 		}
 		return []satJob{
 			{name: "nvme-id-ctrl", cmd: []string{"nvme", "id-ctrl", devPath, "-o", "json"}},
 			{name: "nvme-smart-log", cmd: []string{"nvme", "smart-log", devPath, "-o", "json"}},
-			{name: "nvme-device-self-test", cmd: []string{"nvme", "device-self-test", devPath, "-s", "1", "--wait"}},
+			{name: "nvme-device-self-test", cmd: []string{"nvme", "device-self-test", devPath, "-s", selfTestLevel, "--wait"}},
 		}
 	}
 	smartTestType := "short"
 	if extended {
 		smartTestType = "long"
 	}
 	return []satJob{
 		{name: "smartctl-health", cmd: []string{"smartctl", "-H", "-A", devPath}},
-		{name: "smartctl-self-test-short", cmd: []string{"smartctl", "-t", "short", devPath}},
+		{name: "smartctl-self-test-short", cmd: []string{"smartctl", "-t", smartTestType, devPath}},
 	}
 }
@@ -818,6 +1255,11 @@ func classifySATResult(name string, out []byte, err error) (string, int) {
 		// nvidia-smi on a machine with no NVIDIA GPU
 		strings.Contains(text, "couldn't communicate with the nvidia driver") ||
 		strings.Contains(text, "no nvidia gpu") ||
 		// Some NVMe firmwares start self-test but never expose progress to nvme-cli
 		// while waiting, so the CLI stops polling without proving device failure.
 		(strings.Contains(name, "self-test") &&
 			strings.Contains(text, "no progress for") &&
 			strings.Contains(text, "stop waiting")) ||
 		(strings.Contains(name, "self-test") && strings.Contains(text, "aborted")) {
 		return "UNSUPPORTED", rc
 	}
@@ -1022,8 +1464,6 @@ func runSATCommandWithMetrics(ctx context.Context, verboseLog, name string, cmd
 	if len(metricRows) > 0 {
 		_ = WriteGPUMetricsCSV(filepath.Join(runDir, "gpu-metrics.csv"), metricRows)
 		_ = WriteGPUMetricsHTML(filepath.Join(runDir, "gpu-metrics.html"), metricRows)
 		chart := RenderGPUTerminalChart(metricRows)
 		_ = os.WriteFile(filepath.Join(runDir, "gpu-metrics-term.txt"), []byte(chart), 0644)
 	}
 	return out, err
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"math"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -20,7 +21,7 @@ type FanStressOptions struct {
 	Phase1DurSec int   // first load phase duration in seconds (default 300)
 	PauseSec     int   // pause between the two load phases (default 60)
 	Phase2DurSec int   // second load phase duration in seconds (default 300)
-	SizeMB       int   // GPU memory to allocate per GPU during stress (default 64)
+	SizeMB       int   // GPU memory to allocate per GPU during stress (0 = auto: 95% of VRAM)
 	GPUIndices   []int // which GPU indices to stress (empty = all detected)
 }
@@ -42,27 +43,56 @@ type GPUStressMetric struct {
 // FanStressRow is one second-interval telemetry sample covering all monitored dimensions.
 type FanStressRow struct {
-	TimestampUTC string
+	TimestampUTC   string
-	ElapsedSec   float64
+	ElapsedSec     float64
-	Phase        string // "baseline", "load1", "pause", "load2", "cooldown"
+	Phase          string // "baseline", "load1", "pause", "load2", "cooldown"
-	GPUs         []GPUStressMetric
+	GPUs           []GPUStressMetric
-	Fans         []FanReading
+	Fans           []FanReading
-	CPUMaxTempC  float64 // highest CPU temperature from ipmitool / sensors
+	CPUMaxTempC    float64 // highest CPU temperature from ipmitool / sensors
-	SysPowerW    float64 // DCMI system power reading
+	SysPowerW      float64
 	SysPowerSource string
 	SysPowerMode   string
 }
 type cachedPowerReading struct {
 	Value     float64
 	Source    string
 	Mode      string
 	Reason    string
 	UpdatedAt time.Time
 }
 type fanObservationState struct {
 	MaxRPM map[string]float64 `json:"max_rpm"`
 }
 type fanPeakCandidate struct {
 	FirstSeen time.Time
 	RPM       float64
 }
 var (
 	systemPowerCacheMu sync.Mutex
 	systemPowerCache   cachedPowerReading
 	fanObservationMu   sync.Mutex
 	fanObservation     fanObservationState
 	fanObservationInit bool
 	fanPeakCandidates  = make(map[string]fanPeakCandidate)
 )
 const systemPowerHoldTTL = 15 * time.Second
 var fanObservationStatePath = "/var/log/bee-sat/fan-observation.json"
 const fanObservationMinPeakHold = time.Second
 func normalizeObservedFanMaxRPM(rpm float64) float64 {
 	if rpm <= 0 {
 		return 0
 	}
 	return math.Ceil(rpm/1000.0) * 1000.0
 }
 // RunFanStressTest runs a two-phase GPU stress test while monitoring fan speeds,
 // temperatures, and power draw every second. Exports metrics.csv and fan-sensors.csv.
 // Designed to reproduce case-04 fan-speed lag and detect GPU thermal throttling.
@@ -223,11 +253,7 @@ func (s *System) RunFanStressTest(ctx context.Context, baseDir string, opts FanS
 		return "", err
 	}
-	archive := filepath.Join(baseDir, "fan-stress-"+ts+".tar.gz")
+	return runDir, nil
 	if err := createTarGz(archive, runDir); err != nil {
 		return "", err
 	}
 	return archive, nil
 }
 func applyFanStressDefaults(opts *FanStressOptions) {
@@ -243,9 +269,8 @@ func applyFanStressDefaults(opts *FanStressOptions) {
 	if opts.Phase2DurSec <= 0 {
 		opts.Phase2DurSec = 300
 	}
-	if opts.SizeMB <= 0 {
+	// SizeMB == 0 means "auto" (worker picks 95% of GPU VRAM for maximum power draw).
-		opts.SizeMB = 64
+	// Leave at 0 to avoid passing a too-small size that starves the tensor-core path.
 	}
 }
 // sampleFanStressRow collects all metrics for one telemetry sample.
@@ -258,7 +283,7 @@ func sampleFanStressRow(gpuIndices []int, phase string, elapsed float64) FanStre
 	row.GPUs = sampleGPUStressMetrics(gpuIndices)
 	row.Fans, _ = sampleFanSpeeds()
 	row.CPUMaxTempC = sampleCPUMaxTemp()
-	row.SysPowerW = sampleSystemPower()
+	row.SysPowerW, row.SysPowerSource, row.SysPowerMode = sampleSystemPowerResolved()
 	return row
 }
@@ -315,11 +340,13 @@ func sampleFanSpeeds() ([]FanReading, error) {
 	out, err := exec.Command("ipmitool", "sdr", "type", "Fan").Output()
 	if err == nil {
 		if fans := parseFanSpeeds(string(out)); len(fans) > 0 {
 			updateFanObservation(fans, time.Now())
 			return fans, nil
 		}
 	}
 	fans, sensorsErr := sampleFanSpeedsViaSensorsJSON()
 	if len(fans) > 0 {
 		updateFanObservation(fans, time.Now())
 		return fans, nil
 	}
 	if err != nil {
@@ -328,6 +355,119 @@ func sampleFanSpeeds() ([]FanReading, error) {
 	return nil, sensorsErr
 }
 func loadFanObservationLocked() {
 	if fanObservationInit {
 		return
 	}
 	fanObservationInit = true
 	fanObservation.MaxRPM = make(map[string]float64)
 	raw, err := os.ReadFile(fanObservationStatePath)
 	if err != nil || len(raw) == 0 {
 		return
 	}
 	var persisted fanObservationState
 	if json.Unmarshal(raw, &persisted) != nil {
 		return
 	}
 	for name, rpm := range persisted.MaxRPM {
 		name = strings.TrimSpace(name)
 		if name == "" || rpm <= 0 {
 			continue
 		}
 		fanObservation.MaxRPM[name] = rpm
 	}
 }
 func saveFanObservationLocked() {
 	if len(fanObservation.MaxRPM) == 0 {
 		return
 	}
 	dir := filepath.Dir(fanObservationStatePath)
 	if dir == "" || dir == "." {
 		dir = "/var/log/bee-sat"
 	}
 	if err := os.MkdirAll(dir, 0755); err != nil {
 		return
 	}
 	raw, err := json.MarshalIndent(fanObservation, "", "  ")
 	if err != nil {
 		return
 	}
 	_ = os.WriteFile(fanObservationStatePath, raw, 0644)
 }
 func updateFanObservation(fans []FanReading, now time.Time) {
 	if len(fans) == 0 {
 		return
 	}
 	fanObservationMu.Lock()
 	defer fanObservationMu.Unlock()
 	loadFanObservationLocked()
 	changed := false
 	for _, fan := range fans {
 		name := strings.TrimSpace(fan.Name)
 		if name == "" || fan.RPM <= 0 {
 			continue
 		}
 		currentMax := fanObservation.MaxRPM[name]
 		if fan.RPM <= currentMax {
 			delete(fanPeakCandidates, name)
 			continue
 		}
 		if cand, ok := fanPeakCandidates[name]; ok {
 			if now.Sub(cand.FirstSeen) >= fanObservationMinPeakHold {
 				newMax := math.Max(cand.RPM, fan.RPM)
 				if newMax > currentMax {
 					fanObservation.MaxRPM[name] = normalizeObservedFanMaxRPM(newMax)
 					changed = true
 				}
 				delete(fanPeakCandidates, name)
 				continue
 			}
 			if fan.RPM > cand.RPM {
 				fanPeakCandidates[name] = fanPeakCandidate{FirstSeen: cand.FirstSeen, RPM: fan.RPM}
 			}
 			continue
 		}
 		fanPeakCandidates[name] = fanPeakCandidate{FirstSeen: now, RPM: fan.RPM}
 	}
 	if changed {
 		saveFanObservationLocked()
 	}
 }
 func estimateFanDutyCyclePctFromObservation(fans []FanReading) (float64, bool) {
 	if len(fans) == 0 {
 		return 0, false
 	}
 	fanObservationMu.Lock()
 	defer fanObservationMu.Unlock()
 	loadFanObservationLocked()
 	var samples []float64
 	for _, fan := range fans {
 		name := strings.TrimSpace(fan.Name)
 		if name == "" || fan.RPM <= 0 {
 			continue
 		}
 		maxRPM := fanObservation.MaxRPM[name]
 		if maxRPM <= 0 {
 			continue
 		}
 		pct := fan.RPM / maxRPM * 100.0
 		if pct > 100 {
 			pct = 100
 		}
 		if pct < 0 {
 			pct = 0
 		}
 		samples = append(samples, pct)
 	}
 	if len(samples) == 0 {
 		return 0, false
 	}
 	return benchmarkMean(samples), true
 }
 // parseFanSpeeds parses "ipmitool sdr type Fan" output.
 // Handles two formats:
 //
@@ -431,6 +571,116 @@ func sampleFanSpeedsViaSensorsJSON() ([]FanReading, error) {
 	return fans, nil
 }
 // sampleFanDutyCyclePct reads fan PWM/duty-cycle controls from lm-sensors.
 // Returns the average duty cycle across all exposed PWM controls.
 func sampleFanDutyCyclePct() (float64, bool, bool) {
 	out, err := exec.Command("sensors", "-j").Output()
 	if err != nil || len(out) == 0 {
 		fans, fanErr := sampleFanSpeeds()
 		if fanErr != nil {
 			return 0, false, false
 		}
 		return sampleFanDutyCyclePctFromFans(fans)
 	}
 	pct, ok := parseFanDutyCyclePctSensorsJSON(out)
 	return pct, ok, false
 }
 func sampleFanDutyCyclePctFromFans(fans []FanReading) (float64, bool, bool) {
 	if len(fans) == 0 {
 		return 0, false, false
 	}
 	if pct, ok := estimateFanDutyCyclePctFromObservation(fans); ok {
 		return pct, true, true
 	}
 	return 0, false, false
 }
 func parseFanDutyCyclePctSensorsJSON(raw []byte) (float64, bool) {
 	var doc map[string]map[string]any
 	if err := json.Unmarshal(raw, &doc); err != nil {
 		return 0, false
 	}
 	var samples []float64
 	for _, features := range doc {
 		for name, feature := range features {
 			if strings.EqualFold(name, "Adapter") {
 				continue
 			}
 			featureMap, ok := feature.(map[string]any)
 			if !ok {
 				continue
 			}
 			if duty, ok := firstFanDutyValue(name, featureMap); ok {
 				samples = append(samples, duty)
 			}
 		}
 	}
 	if len(samples) == 0 {
 		return 0, false
 	}
 	return benchmarkMean(samples), true
 }
 func firstFanDutyValue(featureName string, feature map[string]any) (float64, bool) {
 	featureName = strings.ToLower(strings.TrimSpace(featureName))
 	if strings.Contains(featureName, "enable") || strings.Contains(featureName, "mode") || strings.Contains(featureName, "alarm") {
 		return 0, false
 	}
 	if strings.Contains(featureName, "pwm") {
 		for _, key := range []string{"input", "value", "current"} {
 			if value, ok := feature[key]; ok {
 				if duty, parsed := parseFanDutyValue(value); parsed {
 					return duty, true
 				}
 			}
 		}
 	}
 	keys := make([]string, 0, len(feature))
 	for key := range feature {
 		keys = append(keys, key)
 	}
 	sort.Strings(keys)
 	for _, key := range keys {
 		lower := strings.ToLower(key)
 		if !strings.Contains(lower, "pwm") {
 			continue
 		}
 		if strings.Contains(lower, "enable") || strings.Contains(lower, "mode") || strings.Contains(lower, "alarm") {
 			continue
 		}
 		if duty, parsed := parseFanDutyValue(feature[key]); parsed {
 			return duty, true
 		}
 	}
 	return 0, false
 }
 func parseFanDutyValue(value any) (float64, bool) {
 	switch v := value.(type) {
 	case float64:
 		return normalizePWMAsDutyPct(v)
 	case string:
 		if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil {
 			return normalizePWMAsDutyPct(f)
 		}
 	}
 	return 0, false
 }
 func normalizePWMAsDutyPct(raw float64) (float64, bool) {
 	if raw < 0 {
 		return 0, false
 	}
 	if raw <= 100 {
 		return raw, true
 	}
 	if raw <= 255 {
 		return raw / 255.0 * 100.0, true
 	}
 	return 0, false
 }
 func firstFanInputValue(feature map[string]any) (float64, bool) {
 	keys := make([]string, 0, len(feature))
 	for key := range feature {
@@ -518,19 +768,19 @@ func sampleCPUTempViaSensors() float64 {
 	return max
 }
-// sampleSystemPower reads system power draw via DCMI.
+// sampleSystemPowerResolved reads system power via the global autotune source,
-func sampleSystemPower() float64 {
+// falling back to the historical heuristic before autotune or when degraded.
 func sampleSystemPowerResolved() (float64, string, string) {
 	now := time.Now()
-	current := 0.0
+	current, decision, err := SampleSystemPowerResolved("")
 	out, err := exec.Command("ipmitool", "dcmi", "power", "reading").Output()
 	if err == nil {
 		current = parseDCMIPowerReading(string(out))
 	}
 	systemPowerCacheMu.Lock()
 	defer systemPowerCacheMu.Unlock()
-	value, updated := effectiveSystemPowerReading(systemPowerCache, current, now)
+	if err != nil {
 		current = 0
 	}
 	value, updated := effectiveSystemPowerReading(systemPowerCache, current, decision.EffectiveSource, decision.Mode, decision.Reason, now)
 	systemPowerCache = updated
-	return value
+	return value, updated.Source, updated.Mode
 }
 // parseDCMIPowerReading extracts the instantaneous power reading from ipmitool dcmi output.
@@ -553,9 +803,9 @@ func parseDCMIPowerReading(raw string) float64 {
 	return 0
 }
-func effectiveSystemPowerReading(cache cachedPowerReading, current float64, now time.Time) (float64, cachedPowerReading) {
+func effectiveSystemPowerReading(cache cachedPowerReading, current float64, source, mode, reason string, now time.Time) (float64, cachedPowerReading) {
 	if current > 0 {
-		cache = cachedPowerReading{Value: current, UpdatedAt: now}
+		cache = cachedPowerReading{Value: current, Source: source, Mode: mode, Reason: reason, UpdatedAt: now}
 		return current, cache
 	}
 	if cache.Value > 0 && !cache.UpdatedAt.IsZero() && now.Sub(cache.UpdatedAt) <= systemPowerHoldTTL {
--- a/audit/internal/platform/sat_fan_stress_test.go
+++ b/audit/internal/platform/sat_fan_stress_test.go
@@ -1,6 +1,7 @@
 package platform
 import (
 	"path/filepath"
 	"testing"
 	"time"
 )
@@ -29,6 +30,74 @@ func TestFirstFanInputValue(t *testing.T) {
 	}
 }
 func TestParseFanDutyCyclePctSensorsJSON(t *testing.T) {
 	raw := []byte(`{
 		"chip0": {
 			"fan1": {"input": 9000},
 			"pwm1": {"input": 128},
 			"pwm1_enable": {"input": 1}
 		},
 		"chip1": {
 			"pwm2": {"input": 64}
 		}
 	}`)
 	got, ok := parseFanDutyCyclePctSensorsJSON(raw)
 	if !ok {
 		t.Fatalf("expected duty cycle telemetry to be parsed")
 	}
 	if got < 57 || got > 58 {
 		t.Fatalf("got=%v want ~57.1", got)
 	}
 }
 func TestEstimateFanDutyCyclePctFromObservation(t *testing.T) {
 	t.Parallel()
 	oldPath := fanObservationStatePath
 	oldState := fanObservation
 	oldInit := fanObservationInit
 	oldCandidates := fanPeakCandidates
 	fanObservationStatePath = filepath.Join(t.TempDir(), "fan-observation.json")
 	fanObservation = fanObservationState{}
 	fanObservationInit = false
 	fanPeakCandidates = make(map[string]fanPeakCandidate)
 	t.Cleanup(func() {
 		fanObservationStatePath = oldPath
 		fanObservation = oldState
 		fanObservationInit = oldInit
 		fanPeakCandidates = oldCandidates
 	})
 	start := time.Unix(100, 0)
 	updateFanObservation([]FanReading{{Name: "FAN1", RPM: 5000}}, start)
 	if _, ok := estimateFanDutyCyclePctFromObservation([]FanReading{{Name: "FAN1", RPM: 2500}}); ok {
 		t.Fatalf("single-sample spike should not establish observed max")
 	}
 	updateFanObservation([]FanReading{{Name: "FAN1", RPM: 5200}}, start.Add(500*time.Millisecond))
 	updateFanObservation([]FanReading{{Name: "FAN1", RPM: 5100}}, start.Add(1500*time.Millisecond))
 	got, ok := estimateFanDutyCyclePctFromObservation([]FanReading{{Name: "FAN1", RPM: 2600}})
 	if !ok {
 		t.Fatalf("expected estimated duty cycle from persisted observed max")
 	}
 	if got < 43 || got > 44 {
 		t.Fatalf("got=%v want ~43.3", got)
 	}
 	fanObservation = fanObservationState{}
 	fanObservationInit = false
 	fanPeakCandidates = make(map[string]fanPeakCandidate)
 	got, ok = estimateFanDutyCyclePctFromObservation([]FanReading{{Name: "FAN1", RPM: 2600}})
 	if !ok {
 		t.Fatalf("expected persisted observed max to be reloaded from disk")
 	}
 	if got < 43 || got > 44 {
 		t.Fatalf("reloaded got=%v want ~43.3", got)
 	}
 }
 func TestParseDCMIPowerReading(t *testing.T) {
 	raw := `
 Instantaneous power reading:                   512 Watts
@@ -43,7 +112,7 @@ func TestEffectiveSystemPowerReading(t *testing.T) {
 	now := time.Now()
 	cache := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-5 * time.Second)}
-	got, updated := effectiveSystemPowerReading(cache, 0, now)
+	got, updated := effectiveSystemPowerReading(cache, 0, "", "", "", now)
 	if got != 480 {
 		t.Fatalf("got=%v want cached 480", got)
 	}
@@ -51,7 +120,7 @@ func TestEffectiveSystemPowerReading(t *testing.T) {
 		t.Fatalf("updated=%+v", updated)
 	}
-	got, updated = effectiveSystemPowerReading(cache, 530, now)
+	got, updated = effectiveSystemPowerReading(cache, 530, "dcmi", "fallback", "test", now)
 	if got != 530 {
 		t.Fatalf("got=%v want 530", got)
 	}
@@ -60,7 +129,7 @@ func TestEffectiveSystemPowerReading(t *testing.T) {
 	}
 	expired := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-systemPowerHoldTTL - time.Second)}
-	got, _ = effectiveSystemPowerReading(expired, 0, now)
+	got, _ = effectiveSystemPowerReading(expired, 0, "", "", "", now)
 	if got != 0 {
 		t.Fatalf("expired cache returned %v want 0", got)
 	}
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -14,12 +14,12 @@ import (
 func TestStorageSATCommands(t *testing.T) {
 	t.Parallel()
-	nvme := storageSATCommands("/dev/nvme0n1")
+	nvme := storageSATCommands("/dev/nvme0n1", false)
 	if len(nvme) != 3 || nvme[2].cmd[0] != "nvme" {
 		t.Fatalf("unexpected nvme commands: %#v", nvme)
 	}
-	sata := storageSATCommands("/dev/sda")
+	sata := storageSATCommands("/dev/sda", false)
 	if len(sata) != 2 || sata[0].cmd[0] != "smartctl" {
 		t.Fatalf("unexpected sata commands: %#v", sata)
 	}
@@ -216,6 +216,74 @@ func TestResolveDCGMGPUIndicesKeepsExplicitSelection(t *testing.T) {
 	}
 }
 func TestParseNvidiaGPUHealthDetectsResetRequired(t *testing.T) {
 	t.Parallel()
 	got := parseNvidiaGPUHealth("0, NVIDIA H100 PCIe, 38, 46.89, 0, 0, 81559\n1, NVIDIA H100 PCIe, [GPU requires reset], [N/A], [N/A], 0, 81559\n")
 	if len(got) != 2 {
 		t.Fatalf("len=%d want 2", len(got))
 	}
 	if got[0].NeedsReset {
 		t.Fatalf("gpu0 unexpectedly marked reset-required")
 	}
 	if !got[1].NeedsReset {
 		t.Fatalf("gpu1 should be marked reset-required: %#v", got[1])
 	}
 }
 func TestCheckNvidiaJobHealthReturnsErrorForSelectedResetRequiredGPU(t *testing.T) {
 	oldExecCommand := satExecCommand
 	satExecCommand = func(name string, args ...string) *exec.Cmd {
 		if name == "nvidia-smi" {
 			return exec.Command("sh", "-c", "printf '0, NVIDIA H100 PCIe, 38, 46.89, 0, 0, 81559\n1, NVIDIA H100 PCIe, [GPU requires reset], [N/A], [N/A], 0, 81559\n'")
 		}
 		return exec.Command(name, args...)
 	}
 	t.Cleanup(func() { satExecCommand = oldExecCommand })
 	msg, err := checkNvidiaJobHealth([]int{1})
 	if err == nil {
 		t.Fatal("expected health check error")
 	}
 	if !strings.Contains(msg, "gpu 1") || !strings.Contains(strings.ToLower(msg), "requires reset") {
 		t.Fatalf("unexpected message: %q", msg)
 	}
 }
 func TestWriteNvidiaGPUStatusFilesCreatesPerGPUFiles(t *testing.T) {
 	dir := t.TempDir()
 	oldExecCommand := satExecCommand
 	satExecCommand = func(name string, args ...string) *exec.Cmd {
 		if name == "nvidia-smi" {
 			return exec.Command("sh", "-c", "printf '0, NVIDIA H100 PCIe, 38, 46.89, 0, 0, 81559\n1, NVIDIA H100 PCIe, [GPU requires reset], [N/A], [N/A], 0, 81559\n'")
 		}
 		return exec.Command(name, args...)
 	}
 	t.Cleanup(func() { satExecCommand = oldExecCommand })
 	perGPU := map[int]*nvidiaGPUStatusFile{
 		0: {Index: 0, RunStatus: "OK"},
 		1: {Index: 1, RunStatus: "FAILED", FailingJob: "02-dcgmi-targeted-stress.log", Reason: "NVIDIA GPU health check failed:"},
 	}
 	if err := writeNvidiaGPUStatusFiles(dir, "FAILED", perGPU, map[int]struct{}{0: {}, 1: {}}); err != nil {
 		t.Fatalf("writeNvidiaGPUStatusFiles error: %v", err)
 	}
 	raw, err := os.ReadFile(filepath.Join(dir, "gpu-1-status.txt"))
 	if err != nil {
 		t.Fatalf("ReadFile gpu-1-status.txt: %v", err)
 	}
 	text := string(raw)
 	if !strings.Contains(text, "run_status=FAILED") {
 		t.Fatalf("missing run status:\n%s", text)
 	}
 	if !strings.Contains(text, "health_status=RESET_REQUIRED") {
 		t.Fatalf("missing health status:\n%s", text)
 	}
 	if !strings.Contains(text, "failing_job=02-dcgmi-targeted-stress.log") {
 		t.Fatalf("missing failing job:\n%s", text)
 	}
 }
 func TestResolveDCGMProfTesterCommandUsesVersionedBinary(t *testing.T) {
 	oldLookPath := satLookPath
 	satLookPath = func(file string) (string, error) {
@@ -253,6 +321,19 @@ func TestNvidiaDCGMNamedDiagCommandUsesDurationAndSelection(t *testing.T) {
 	}
 }
 func TestNvidiaDCGMNamedDiagCommandSkipsDurationForNVBandwidth(t *testing.T) {
 	cmd := nvidiaDCGMNamedDiagCommand("nvbandwidth", 0, []int{2, 0})
 	want := []string{"dcgmi", "diag", "-r", "nvbandwidth", "-i", "2,0"}
 	if len(cmd) != len(want) {
 		t.Fatalf("cmd len=%d want %d (%v)", len(cmd), len(want), cmd)
 	}
 	for i := range want {
 		if cmd[i] != want[i] {
 			t.Fatalf("cmd[%d]=%q want %q", i, cmd[i], want[i])
 		}
 	}
 }
 func TestNvidiaVisibleDevicesEnvUsesSelectedGPUs(t *testing.T) {
 	env := nvidiaVisibleDevicesEnv([]int{0, 2, 4})
 	if len(env) != 2 {
@@ -341,6 +422,7 @@ func TestClassifySATResult(t *testing.T) {
 	}{
 		{name: "ok", job: "memtester", out: "done", err: nil, status: "OK"},
 		{name: "unsupported", job: "smartctl-self-test-short", out: "Self-test not supported", err: errors.New("rc 1"), status: "UNSUPPORTED"},
 		{name: "nvme wait timeout without progress", job: "nvme-device-self-test", out: "Short Device self-test started\nWaiting for self test completion...\nno progress for 78 seconds, stop waiting", err: errors.New("rc 1"), status: "UNSUPPORTED"},
 		{name: "failed", job: "bee-gpu-burn", out: "cuda error", err: errors.New("rc 1"), status: "FAILED"},
 		{name: "cuda not ready", job: "bee-gpu-burn", out: "cuInit failed: CUDA_ERROR_SYSTEM_NOT_READY", err: errors.New("rc 1"), status: "UNSUPPORTED"},
 	}
--- a/audit/internal/platform/services.go
+++ b/audit/internal/platform/services.go
@@ -61,6 +61,9 @@ func (s *System) ServiceState(name string) string {
 }
 func (s *System) ServiceDo(name string, action ServiceAction) (string, error) {
 	if name == "bee-nvidia" && action == ServiceRestart {
 		return restartNvidiaDrivers()
 	}
 	// bee-web runs as the bee user; sudo is required to control system services.
 	// /etc/sudoers.d/bee grants bee NOPASSWD:ALL.
 	raw, err := exec.Command("sudo", "systemctl", string(action), name).CombinedOutput()
--- a/audit/internal/platform/techdump.go
+++ b/audit/internal/platform/techdump.go
@@ -20,6 +20,7 @@ var techDumpFixedCommands = []struct {
 	{Name: "dmidecode", Args: []string{"-t", "4"}, File: "dmidecode-type4.txt"},
 	{Name: "dmidecode", Args: []string{"-t", "17"}, File: "dmidecode-type17.txt"},
 	{Name: "lspci", Args: []string{"-vmm", "-D"}, File: "lspci-vmm.txt"},
 	{Name: "lspci", Args: []string{"-vvv"}, File: "lspci-vvv.txt"},
 	{Name: "lsblk", Args: []string{"-J", "-d", "-o", "NAME,TYPE,SIZE,SERIAL,MODEL,TRAN,HCTL"}, File: "lsblk.json"},
 	{Name: "sensors", Args: []string{"-j"}, File: "sensors.json"},
 	{Name: "ipmitool", Args: []string{"fru", "print"}, File: "ipmitool-fru.txt"},
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -9,6 +9,17 @@ type LiveBootSource struct {
 	Device string `json:"device,omitempty"`
 }
 type LiveMediaRAMState struct {
 	LiveBootSource
 	State        string `json:"state"`
 	Status       string `json:"status"`
 	ToramActive  bool   `json:"toram_active,omitempty"`
 	CopyPresent  bool   `json:"copy_present,omitempty"`
 	CopyComplete bool   `json:"copy_complete,omitempty"`
 	CanStartCopy bool   `json:"can_start_copy,omitempty"`
 	Message      string `json:"message,omitempty"`
 }
 type InterfaceInfo struct {
 	Name  string
 	State string
@@ -70,6 +81,7 @@ type NvidiaStressOptions struct {
 	Loader            string
 	GPUIndices        []int
 	ExcludeGPUIndices []int
 	StaggerSeconds    int
 }
 func New() *System {
--- a/audit/internal/schema/hardware.go
+++ b/audit/internal/schema/hardware.go
@@ -15,13 +15,17 @@ type HardwareIngestRequest struct {
 }
 type RuntimeHealth struct {
-	Status        string                 `json:"status"`
+	Status        string `json:"status"`
-	CheckedAt     string                 `json:"checked_at"`
+	CheckedAt     string `json:"checked_at"`
-	ExportDir     string                 `json:"export_dir,omitempty"`
+	ExportDir     string `json:"export_dir,omitempty"`
-	DriverReady   bool                   `json:"driver_ready,omitempty"`
+	DriverReady   bool   `json:"driver_ready,omitempty"`
-	CUDAReady     bool                   `json:"cuda_ready,omitempty"`
+	CUDAReady     bool   `json:"cuda_ready,omitempty"`
-	NvidiaGSPMode string                 `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
+	NvidiaGSPMode string `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
-	NetworkStatus string                 `json:"network_status,omitempty"`
+	NetworkStatus string `json:"network_status,omitempty"`
 	// ToRAMStatus: "ok" (fully in RAM), "warning" (not copied), "partial" (stale/incomplete copy exists), "failed" (toram active but copy failed)
 	ToRAMStatus string `json:"toram_status,omitempty"`
 	// USBExportPath: mount point of the first writable USB drive found, empty if none.
 	USBExportPath string                 `json:"usb_export_path,omitempty"`
 	Issues        []RuntimeIssue         `json:"issues,omitempty"`
 	Tools         []RuntimeToolStatus    `json:"tools,omitempty"`
 	Services      []RuntimeServiceStatus `json:"services,omitempty"`
@@ -183,6 +187,13 @@ type HardwarePCIeDevice struct {
 	BatteryTemperatureC    *float64       `json:"battery_temperature_c,omitempty"`
 	BatteryVoltageV        *float64       `json:"battery_voltage_v,omitempty"`
 	BatteryReplaceRequired *bool          `json:"battery_replace_required,omitempty"`
 	SFPPresent             *bool          `json:"sfp_present,omitempty"`
 	SFPIdentifier          *string        `json:"sfp_identifier,omitempty"`
 	SFPConnector           *string        `json:"sfp_connector,omitempty"`
 	SFPVendor              *string        `json:"sfp_vendor,omitempty"`
 	SFPPartNumber          *string        `json:"sfp_part_number,omitempty"`
 	SFPSerialNumber        *string        `json:"sfp_serial_number,omitempty"`
 	SFPWavelengthNM        *float64       `json:"sfp_wavelength_nm,omitempty"`
 	SFPTemperatureC        *float64       `json:"sfp_temperature_c,omitempty"`
 	SFPTXPowerDBM          *float64       `json:"sfp_tx_power_dbm,omitempty"`
 	SFPRXPowerDBM          *float64       `json:"sfp_rx_power_dbm,omitempty"`
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -12,6 +12,7 @@ import (
 	"path/filepath"
 	"regexp"
 	"sort"
 	"strconv"
 	"strings"
 	"sync/atomic"
 	"syscall"
@@ -28,6 +29,22 @@ var apiListNvidiaGPUs = func(a *app.App) ([]platform.NvidiaGPU, error) {
 	}
 	return a.ListNvidiaGPUs()
 }
 var apiListNvidiaGPUStatuses = func(a *app.App) ([]platform.NvidiaGPUStatus, error) {
 	if a == nil {
 		return nil, fmt.Errorf("app not configured")
 	}
 	return a.ListNvidiaGPUStatuses()
 }
 const (
 	taskPriorityBenchmark      = 10
 	taskPriorityBurn           = 20
 	taskPriorityValidateStress = 30
 	taskPriorityValidate       = 40
 	taskPriorityAudit          = 50
 	taskPriorityInstallToRAM   = 60
 	taskPriorityInstall        = 70
 )
 // ── Job ID counter ────────────────────────────────────────────────────────────
@@ -93,7 +110,7 @@ func writeTaskRunResponse(w http.ResponseWriter, tasks []*Task) {
 func shouldSplitHomogeneousNvidiaTarget(target string) bool {
 	switch strings.TrimSpace(target) {
-	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute",
+	case "nvidia", "nvidia-targeted-stress", "nvidia-bench-perf", "nvidia-bench-power", "nvidia-compute",
 		"nvidia-targeted-power", "nvidia-pulse", "nvidia-interconnect",
 		"nvidia-bandwidth", "nvidia-stress":
 		return true
@@ -102,6 +119,30 @@ func shouldSplitHomogeneousNvidiaTarget(target string) bool {
 	}
 }
 func defaultTaskPriority(target string, params taskParams) int {
 	switch strings.TrimSpace(target) {
 	case "install":
 		return taskPriorityInstall
 	case "install-to-ram":
 		return taskPriorityInstallToRAM
 	case "audit":
 		return taskPriorityAudit
 	case "nvidia-bench-perf", "nvidia-bench-power", "nvidia-bench-autotune":
 		return taskPriorityBenchmark
 	case "nvidia-stress", "amd-stress", "memory-stress", "sat-stress", "platform-stress", "nvidia-compute":
 		return taskPriorityBurn
 	case "nvidia", "nvidia-targeted-stress", "nvidia-targeted-power", "nvidia-pulse",
 		"nvidia-interconnect", "nvidia-bandwidth", "memory", "storage", "cpu",
 		"amd", "amd-mem", "amd-bandwidth":
 		if params.StressMode {
 			return taskPriorityValidateStress
 		}
 		return taskPriorityValidate
 	default:
 		return 0
 	}
 }
 func expandHomogeneousNvidiaSelections(gpus []platform.NvidiaGPU, include, exclude []int) ([]nvidiaTaskSelection, error) {
 	if len(gpus) == 0 {
 		return nil, fmt.Errorf("no NVIDIA GPUs detected")
@@ -203,6 +244,14 @@ func joinTaskIndices(indices []int) string {
 	return strings.Join(parts, ",")
 }
 func formatGPUIndexList(indices []int) string {
 	parts := make([]string, len(indices))
 	for i, idx := range indices {
 		parts[i] = strconv.Itoa(idx)
 	}
 	return strings.Join(parts, ",")
 }
 func formatSplitTaskName(baseName, selectionLabel string) string {
 	baseName = strings.TrimSpace(baseName)
 	selectionLabel = strings.TrimSpace(selectionLabel)
@@ -216,7 +265,21 @@ func formatSplitTaskName(baseName, selectionLabel string) string {
 }
 func buildNvidiaTaskSet(target string, priority int, createdAt time.Time, params taskParams, baseName string, appRef *app.App, idPrefix string) ([]*Task, error) {
-	if !shouldSplitHomogeneousNvidiaTarget(target) {
+	if !shouldSplitHomogeneousNvidiaTarget(target) || params.ParallelGPUs {
 		// Parallel mode (or non-splittable target): one task for all selected GPUs.
 		if params.ParallelGPUs && shouldSplitHomogeneousNvidiaTarget(target) {
 			// Resolve the selected GPU indices so ExcludeGPUIndices is applied.
 			gpus, err := apiListNvidiaGPUs(appRef)
 			if err != nil {
 				return nil, err
 			}
 			resolved, err := expandSelectedGPUIndices(gpus, params.GPUIndices, params.ExcludeGPUIndices)
 			if err != nil {
 				return nil, err
 			}
 			params.GPUIndices = resolved
 			params.ExcludeGPUIndices = nil
 		}
 		t := &Task{
 			ID:        newJobID(idPrefix),
 			Name:      baseName,
@@ -256,6 +319,53 @@ func buildNvidiaTaskSet(target string, priority int, createdAt time.Time, params
 	return tasks, nil
 }
 // expandSelectedGPUIndices returns the sorted list of selected GPU indices after
 // applying include/exclude filters, without splitting by model.
 func expandSelectedGPUIndices(gpus []platform.NvidiaGPU, include, exclude []int) ([]int, error) {
 	indexed := make(map[int]struct{}, len(gpus))
 	allIndices := make([]int, 0, len(gpus))
 	for _, gpu := range gpus {
 		indexed[gpu.Index] = struct{}{}
 		allIndices = append(allIndices, gpu.Index)
 	}
 	sort.Ints(allIndices)
 	selected := allIndices
 	if len(include) > 0 {
 		selected = make([]int, 0, len(include))
 		seen := make(map[int]struct{}, len(include))
 		for _, idx := range include {
 			if _, ok := indexed[idx]; !ok {
 				continue
 			}
 			if _, dup := seen[idx]; dup {
 				continue
 			}
 			seen[idx] = struct{}{}
 			selected = append(selected, idx)
 		}
 		sort.Ints(selected)
 	}
 	if len(exclude) > 0 {
 		skip := make(map[int]struct{}, len(exclude))
 		for _, idx := range exclude {
 			skip[idx] = struct{}{}
 		}
 		filtered := selected[:0]
 		for _, idx := range selected {
 			if _, ok := skip[idx]; ok {
 				continue
 			}
 			filtered = append(filtered, idx)
 		}
 		selected = filtered
 	}
 	if len(selected) == 0 {
 		return nil, fmt.Errorf("no NVIDIA GPUs selected")
 	}
 	return selected, nil
 }
 // ── SSE helpers ───────────────────────────────────────────────────────────────
 func sseWrite(w http.ResponseWriter, event, data string) bool {
@@ -382,6 +492,7 @@ func (h *handler) handleAPIAuditRun(w http.ResponseWriter, _ *http.Request) {
 		ID:        newJobID("audit"),
 		Name:      "Audit",
 		Target:    "audit",
 		Priority:  defaultTaskPriority("audit", taskParams{}),
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 	}
@@ -417,9 +528,11 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 		var body struct {
 			Duration           int      `json:"duration"`
-			DiagLevel          int      `json:"diag_level"`
+			StressMode         bool     `json:"stress_mode"`
 			GPUIndices         []int    `json:"gpu_indices"`
 			ExcludeGPUIndices  []int    `json:"exclude_gpu_indices"`
 			StaggerGPUStart    bool     `json:"stagger_gpu_start"`
 			ParallelGPUs       bool     `json:"parallel_gpus"`
 			Loader             string   `json:"loader"`
 			Profile            string   `json:"profile"`
 			DisplayName        string   `json:"display_name"`
@@ -438,15 +551,17 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 		}
 		params := taskParams{
 			Duration:           body.Duration,
-			DiagLevel:          body.DiagLevel,
+			StressMode:         body.StressMode,
 			GPUIndices:         body.GPUIndices,
 			ExcludeGPUIndices:  body.ExcludeGPUIndices,
 			StaggerGPUStart:    body.StaggerGPUStart,
 			ParallelGPUs:       body.ParallelGPUs,
 			Loader:             body.Loader,
 			BurnProfile:        body.Profile,
 			DisplayName:        body.DisplayName,
 			PlatformComponents: body.PlatformComponents,
 		}
-		tasks, err := buildNvidiaTaskSet(target, 0, time.Now(), params, name, h.opts.App, "sat-"+target)
+		tasks, err := buildNvidiaTaskSet(target, defaultTaskPriority(target, params), time.Now(), params, name, h.opts.App, "sat-"+target)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, err.Error())
 			return
@@ -458,51 +573,208 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 	}
 }
-func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
+func (h *handler) handleAPIBenchmarkNvidiaRunKind(target string) http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		if h.opts.App == nil {
 			writeError(w, http.StatusServiceUnavailable, "app not configured")
 			return
 		}
 		var body struct {
 			Profile           string `json:"profile"`
 			SizeMB            int    `json:"size_mb"`
 			GPUIndices        []int  `json:"gpu_indices"`
 			ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
 			RunNCCL           *bool  `json:"run_nccl"`
 			ParallelGPUs      *bool  `json:"parallel_gpus"`
 			RampUp            *bool  `json:"ramp_up"`
 			DisplayName       string `json:"display_name"`
 		}
 		if r.Body != nil {
 			if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
 				writeError(w, http.StatusBadRequest, "invalid request body")
 				return
 			}
 		}
 		runNCCL := true
 		if body.RunNCCL != nil {
 			runNCCL = *body.RunNCCL
 		}
 		parallelGPUs := false
 		if body.ParallelGPUs != nil {
 			parallelGPUs = *body.ParallelGPUs
 		}
 		rampUp := false
 		if body.RampUp != nil {
 			rampUp = *body.RampUp
 		}
 		// Build a descriptive base name that includes profile and mode so the task
 		// list is self-explanatory without opening individual task detail pages.
 		profile := strings.TrimSpace(body.Profile)
 		if profile == "" {
 			profile = "standard"
 		}
 		name := taskDisplayName(target, "", "")
 		if strings.TrimSpace(body.DisplayName) != "" {
 			name = body.DisplayName
 		}
 		// Append profile tag.
 		name = fmt.Sprintf("%s · %s", name, profile)
 		if target == "nvidia-bench-power" && parallelGPUs {
 			writeError(w, http.StatusBadRequest, "power / thermal fit benchmark uses sequential or ramp-up modes only")
 			return
 		}
 		if rampUp && len(body.GPUIndices) > 1 {
 			// Ramp-up mode: RunNvidiaPowerBench internally ramps from 1 to N GPUs
 			// in Phase 2 (one additional GPU per step). A single task with all
 			// selected GPUs is sufficient — spawning N tasks with growing subsets
 			// would repeat all earlier steps redundantly.
 			gpus, err := apiListNvidiaGPUs(h.opts.App)
 			if err != nil {
 				writeError(w, http.StatusBadRequest, err.Error())
 				return
 			}
 			resolved, err := expandSelectedGPUIndices(gpus, body.GPUIndices, body.ExcludeGPUIndices)
 			if err != nil {
 				writeError(w, http.StatusBadRequest, err.Error())
 				return
 			}
 			if len(resolved) < 2 {
 				// Fall through to normal single-task path.
 				rampUp = false
 			} else {
 				now := time.Now()
 				rampRunID := fmt.Sprintf("ramp-%s", now.UTC().Format("20060102-150405"))
 				taskName := fmt.Sprintf("%s · ramp 1–%d · GPU %s", name, len(resolved), formatGPUIndexList(resolved))
 				t := &Task{
 					ID:        newJobID("bee-bench-nvidia"),
 					Name:      taskName,
 					Target:    target,
 					Priority:  defaultTaskPriority(target, taskParams{}),
 					Status:    TaskPending,
 					CreatedAt: now,
 					params: taskParams{
 						GPUIndices:       append([]int(nil), resolved...),
 						SizeMB:           body.SizeMB,
 						BenchmarkProfile: body.Profile,
 						RunNCCL:          runNCCL,
 						ParallelGPUs:     true,
 						RampTotal:        len(resolved),
 						RampRunID:        rampRunID,
 						DisplayName:      taskName,
 					},
 				}
 				globalQueue.enqueue(t)
 				writeTaskRunResponse(w, []*Task{t})
 				return
 			}
 		}
 		// For non-ramp tasks append mode tag.
 		if parallelGPUs {
 			name = fmt.Sprintf("%s · parallel", name)
 		} else {
 			name = fmt.Sprintf("%s · sequential", name)
 		}
 		params := taskParams{
 			GPUIndices:        body.GPUIndices,
 			ExcludeGPUIndices: body.ExcludeGPUIndices,
 			SizeMB:            body.SizeMB,
 			BenchmarkProfile:  body.Profile,
 			RunNCCL:           runNCCL,
 			ParallelGPUs:      parallelGPUs,
 			DisplayName:       body.DisplayName,
 		}
 		tasks, err := buildNvidiaTaskSet(target, defaultTaskPriority(target, params), time.Now(), params, name, h.opts.App, "bee-bench-nvidia")
 		if err != nil {
 			writeError(w, http.StatusBadRequest, err.Error())
 			return
 		}
 		for _, t := range tasks {
 			globalQueue.enqueue(t)
 		}
 		writeTaskRunResponse(w, tasks)
 	}
 }
 func (h *handler) handleAPIBenchmarkAutotuneRun() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		if h.opts.App == nil {
 			writeError(w, http.StatusServiceUnavailable, "app not configured")
 			return
 		}
 		var body struct {
 			Profile       string `json:"profile"`
 			BenchmarkKind string `json:"benchmark_kind"`
 			SizeMB        int    `json:"size_mb"`
 		}
 		if r.Body != nil {
 			if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
 				writeError(w, http.StatusBadRequest, "invalid request body")
 				return
 			}
 		}
 		profile := strings.TrimSpace(body.Profile)
 		if profile == "" {
 			profile = "standard"
 		}
 		benchmarkKind := strings.TrimSpace(body.BenchmarkKind)
 		if benchmarkKind == "" {
 			benchmarkKind = "power-fit"
 		}
 		now := time.Now()
 		taskName := fmt.Sprintf("NVIDIA Benchmark Autotune · %s · %s", profile, benchmarkKind)
 		t := &Task{
 			ID:        newJobID("bee-bench-autotune"),
 			Name:      taskName,
 			Target:    "nvidia-bench-autotune",
 			Priority:  defaultTaskPriority("nvidia-bench-autotune", taskParams{}),
 			Status:    TaskPending,
 			CreatedAt: now,
 			params: taskParams{
 				BenchmarkProfile: profile,
 				BenchmarkKind:    benchmarkKind,
 				SizeMB:           body.SizeMB,
 				DisplayName:      taskName,
 			},
 		}
 		globalQueue.enqueue(t)
 		writeTaskRunResponse(w, []*Task{t})
 	}
 }
 func (h *handler) handleAPIBenchmarkAutotuneStatus(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-
+	cfg, err := h.opts.App.LoadBenchmarkPowerAutotune()
-	var body struct {
+	if err != nil {
-		Profile           string `json:"profile"`
+		if os.IsNotExist(err) {
-		SizeMB            int    `json:"size_mb"`
+			w.WriteHeader(http.StatusOK)
-		GPUIndices        []int  `json:"gpu_indices"`
+			writeJSON(w, map[string]any{
-		ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
+				"configured": false,
-		RunNCCL           *bool  `json:"run_nccl"`
+				"decision":   platform.ResolveSystemPowerDecision(h.opts.ExportDir),
-		DisplayName       string `json:"display_name"`
+			})
 	}
 	if r.Body != nil {
 		if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
 			writeError(w, http.StatusBadRequest, "invalid request body")
 			return
 		}
-	}
+		writeError(w, http.StatusInternalServerError, err.Error())
 	runNCCL := true
 	if body.RunNCCL != nil {
 		runNCCL = *body.RunNCCL
 	}
 	name := taskDisplayName("nvidia-benchmark", "", "")
 	if strings.TrimSpace(body.DisplayName) != "" {
 		name = body.DisplayName
 	}
 	tasks, err := buildNvidiaTaskSet("nvidia-benchmark", 15, time.Now(), taskParams{
 		GPUIndices:        body.GPUIndices,
 		ExcludeGPUIndices: body.ExcludeGPUIndices,
 		SizeMB:            body.SizeMB,
 		BenchmarkProfile:  body.Profile,
 		RunNCCL:           runNCCL,
 		DisplayName:       body.DisplayName,
 	}, name, h.opts.App, "benchmark-nvidia")
 	if err != nil {
 		writeError(w, http.StatusBadRequest, err.Error())
 		return
 	}
-	for _, t := range tasks {
+	w.WriteHeader(http.StatusOK)
-		globalQueue.enqueue(t)
+	writeJSON(w, map[string]any{
-	}
+		"configured": true,
-	writeTaskRunResponse(w, tasks)
+		"config":     cfg,
 		"decision":   platform.ResolveSystemPowerDecision(h.opts.ExportDir),
 	})
 }
 func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
 	h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf").ServeHTTP(w, r)
 }
 func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
@@ -534,12 +806,14 @@ func (h *handler) handleAPISATAbort(w http.ResponseWriter, r *http.Request) {
 			now := time.Now()
 			t.DoneAt = &now
 		case TaskRunning:
-			if t.job != nil {
+			if t.job == nil || !t.job.abort() {
-				t.job.abort()
+				globalQueue.mu.Unlock()
 				writeJSON(w, map[string]string{"status": "not_running"})
 				return
 			}
-			t.Status = TaskCancelled
+			globalQueue.mu.Unlock()
-			now := time.Now()
+			writeJSON(w, map[string]string{"status": "aborting"})
-			t.DoneAt = &now
+			return
 		}
 		globalQueue.mu.Unlock()
 		writeJSON(w, map[string]string{"status": "aborted"})
@@ -782,6 +1056,42 @@ func (h *handler) handleAPIGNVIDIAGPUs(w http.ResponseWriter, _ *http.Request) {
 	writeJSON(w, gpus)
 }
 func (h *handler) handleAPIGNVIDIAGPUStatuses(w http.ResponseWriter, _ *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
 	gpus, err := apiListNvidiaGPUStatuses(h.opts.App)
 	if err != nil {
 		writeError(w, http.StatusInternalServerError, err.Error())
 		return
 	}
 	if gpus == nil {
 		gpus = []platform.NvidiaGPUStatus{}
 	}
 	writeJSON(w, gpus)
 }
 func (h *handler) handleAPIGNVIDIAReset(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
 	var req struct {
 		Index int `json:"index"`
 	}
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		writeError(w, http.StatusBadRequest, "invalid request body")
 		return
 	}
 	result, err := h.opts.App.ResetNvidiaGPU(req.Index)
 	status := "ok"
 	if err != nil {
 		status = "error"
 	}
 	writeJSON(w, map[string]string{"status": status, "output": result.Body})
 }
 func (h *handler) handleAPIGPUPresence(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
@@ -841,25 +1151,62 @@ func (h *handler) handleAPIRAMStatus(w http.ResponseWriter, r *http.Request) {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-	status := h.opts.App.LiveBootSource()
+	status := h.currentRAMStatus()
 	w.Header().Set("Content-Type", "application/json")
 	_ = json.NewEncoder(w).Encode(status)
 }
 type ramStatusResponse struct {
 	platform.LiveMediaRAMState
 	InstallTaskActive bool   `json:"install_task_active,omitempty"`
 	CopyTaskActive    bool   `json:"copy_task_active,omitempty"`
 	CanStartTask      bool   `json:"can_start_task,omitempty"`
 	BlockedReason     string `json:"blocked_reason,omitempty"`
 }
 func (h *handler) currentRAMStatus() ramStatusResponse {
 	state := h.opts.App.LiveMediaRAMState()
 	resp := ramStatusResponse{LiveMediaRAMState: state}
 	if globalQueue.hasActiveTarget("install") {
 		resp.InstallTaskActive = true
 		resp.BlockedReason = "install to disk is already running"
 		return resp
 	}
 	if globalQueue.hasActiveTarget("install-to-ram") {
 		resp.CopyTaskActive = true
 		resp.BlockedReason = "install to RAM task is already pending or running"
 		return resp
 	}
 	if state.InRAM {
 		resp.BlockedReason = "system is already running from RAM"
 		return resp
 	}
 	resp.CanStartTask = state.CanStartCopy
 	if !resp.CanStartTask && resp.BlockedReason == "" {
 		resp.BlockedReason = state.Message
 	}
 	return resp
 }
 func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-	if globalQueue.hasActiveTarget("install") {
+	status := h.currentRAMStatus()
-		writeError(w, http.StatusConflict, "install to disk is already running")
+	if !status.CanStartTask {
 		msg := strings.TrimSpace(status.BlockedReason)
 		if msg == "" {
 			msg = "install to RAM is not available"
 		}
 		writeError(w, http.StatusConflict, msg)
 		return
 	}
 	t := &Task{
 		ID:        newJobID("install-to-ram"),
 		Name:      "Install to RAM",
 		Target:    "install-to-ram",
-		Priority:  10,
+		Priority:  defaultTaskPriority("install-to-ram", taskParams{}),
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 	}
@@ -974,7 +1321,7 @@ func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) {
 		ID:        newJobID("install"),
 		Name:      "Install to Disk",
 		Target:    "install",
-		Priority:  20,
+		Priority:  defaultTaskPriority("install", taskParams{}),
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 		params: taskParams{
@@ -1250,6 +1597,11 @@ func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Reques
 	writeJSON(w, map[string]string{"status": "rolled back"})
 }
 func (h *handler) handleAPIBenchmarkResults(w http.ResponseWriter, r *http.Request) {
 	w.Header().Set("Content-Type", "text/html; charset=utf-8")
 	fmt.Fprint(w, renderBenchmarkResultsCard(h.opts.ExportDir))
 }
 func (h *handler) rollbackPendingNetworkChange() error {
 	h.pendingNetMu.Lock()
 	pnc := h.pendingNet
@@ -1266,108 +1618,3 @@ func (h *handler) rollbackPendingNetworkChange() error {
 	}
 	return nil
 }
 // ── Display / Screen Resolution ───────────────────────────────────────────────
 type displayMode struct {
 	Output  string `json:"output"`
 	Mode    string `json:"mode"`
 	Current bool   `json:"current"`
 }
 type displayInfo struct {
 	Output  string        `json:"output"`
 	Modes   []displayMode `json:"modes"`
 	Current string        `json:"current"`
 }
 var xrandrOutputRE = regexp.MustCompile(`^(\S+)\s+connected`)
 var xrandrModeRE = regexp.MustCompile(`^\s{3}(\d+x\d+)\s`)
 var xrandrCurrentRE = regexp.MustCompile(`\*`)
 func parseXrandrOutput(out string) []displayInfo {
 	var infos []displayInfo
 	var cur *displayInfo
 	for _, line := range strings.Split(out, "\n") {
 		if m := xrandrOutputRE.FindStringSubmatch(line); m != nil {
 			if cur != nil {
 				infos = append(infos, *cur)
 			}
 			cur = &displayInfo{Output: m[1]}
 			continue
 		}
 		if cur == nil {
 			continue
 		}
 		if m := xrandrModeRE.FindStringSubmatch(line); m != nil {
 			isCurrent := xrandrCurrentRE.MatchString(line)
 			mode := displayMode{Output: cur.Output, Mode: m[1], Current: isCurrent}
 			cur.Modes = append(cur.Modes, mode)
 			if isCurrent {
 				cur.Current = m[1]
 			}
 		}
 	}
 	if cur != nil {
 		infos = append(infos, *cur)
 	}
 	return infos
 }
 func xrandrCommand(args ...string) *exec.Cmd {
 	cmd := exec.Command("xrandr", args...)
 	env := append([]string{}, os.Environ()...)
 	hasDisplay := false
 	hasXAuthority := false
 	for _, kv := range env {
 		if strings.HasPrefix(kv, "DISPLAY=") && strings.TrimPrefix(kv, "DISPLAY=") != "" {
 			hasDisplay = true
 		}
 		if strings.HasPrefix(kv, "XAUTHORITY=") && strings.TrimPrefix(kv, "XAUTHORITY=") != "" {
 			hasXAuthority = true
 		}
 	}
 	if !hasDisplay {
 		env = append(env, "DISPLAY=:0")
 	}
 	if !hasXAuthority {
 		env = append(env, "XAUTHORITY=/home/bee/.Xauthority")
 	}
 	cmd.Env = env
 	return cmd
 }
 func (h *handler) handleAPIDisplayResolutions(w http.ResponseWriter, _ *http.Request) {
 	out, err := xrandrCommand().Output()
 	if err != nil {
 		writeError(w, http.StatusInternalServerError, "xrandr: "+err.Error())
 		return
 	}
 	writeJSON(w, parseXrandrOutput(string(out)))
 }
 func (h *handler) handleAPIDisplaySet(w http.ResponseWriter, r *http.Request) {
 	var req struct {
 		Output string `json:"output"`
 		Mode   string `json:"mode"`
 	}
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil || req.Output == "" || req.Mode == "" {
 		writeError(w, http.StatusBadRequest, "output and mode are required")
 		return
 	}
 	// Validate mode looks like WxH to prevent injection
 	if !regexp.MustCompile(`^\d+x\d+$`).MatchString(req.Mode) {
 		writeError(w, http.StatusBadRequest, "invalid mode format")
 		return
 	}
 	// Validate output name (no special chars)
 	if !regexp.MustCompile(`^[A-Za-z0-9_\-]+$`).MatchString(req.Output) {
 		writeError(w, http.StatusBadRequest, "invalid output name")
 		return
 	}
 	if out, err := xrandrCommand("--output", req.Output, "--mode", req.Mode).CombinedOutput(); err != nil {
 		writeError(w, http.StatusInternalServerError, "xrandr: "+strings.TrimSpace(string(out)))
 		return
 	}
 	writeJSON(w, map[string]string{"status": "ok", "output": req.Output, "mode": req.Mode})
 }
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -10,30 +10,6 @@ import (
 	"bee/audit/internal/platform"
 )
 func TestXrandrCommandAddsDefaultX11Env(t *testing.T) {
 	t.Setenv("DISPLAY", "")
 	t.Setenv("XAUTHORITY", "")
 	cmd := xrandrCommand("--query")
 	var hasDisplay bool
 	var hasXAuthority bool
 	for _, kv := range cmd.Env {
 		if kv == "DISPLAY=:0" {
 			hasDisplay = true
 		}
 		if kv == "XAUTHORITY=/home/bee/.Xauthority" {
 			hasXAuthority = true
 		}
 	}
 	if !hasDisplay {
 		t.Fatalf("DISPLAY not injected: %v", cmd.Env)
 	}
 	if !hasXAuthority {
 		t.Fatalf("XAUTHORITY not injected: %v", cmd.Env)
 	}
 }
 func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
@@ -63,6 +39,9 @@ func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
 	if got := globalQueue.tasks[0].params.BurnProfile; got != "smoke" {
 		t.Fatalf("burn profile=%q want smoke", got)
 	}
 	if got := globalQueue.tasks[0].Priority; got != taskPriorityValidate {
 		t.Fatalf("priority=%d want %d", got, taskPriorityValidate)
 	}
 }
 func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
@@ -85,7 +64,7 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
-	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
+	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/perf/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
 	rec := httptest.NewRecorder()
 	h.handleAPIBenchmarkNvidiaRun(rec, req)
@@ -99,8 +78,8 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	task := globalQueue.tasks[0]
-	if task.Target != "nvidia-benchmark" {
+	if task.Target != "nvidia-bench-perf" {
-		t.Fatalf("target=%q want nvidia-benchmark", task.Target)
+		t.Fatalf("target=%q want nvidia-bench-perf", task.Target)
 	}
 	if got := task.params.GPUIndices; len(got) != 2 || got[0] != 1 || got[1] != 3 {
 		t.Fatalf("gpu indices=%v want [1 3]", got)
@@ -108,6 +87,9 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 	if task.params.RunNCCL {
 		t.Fatal("RunNCCL should reflect explicit false from request")
 	}
 	if task.Priority != taskPriorityBenchmark {
 		t.Fatalf("priority=%d want %d", task.Priority, taskPriorityBenchmark)
 	}
 }
 func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
@@ -131,7 +113,7 @@ func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
-	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"run_nccl":false}`))
+	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/perf/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"run_nccl":false}`))
 	rec := httptest.NewRecorder()
 	h.handleAPIBenchmarkNvidiaRun(rec, req)
@@ -157,6 +139,94 @@ func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
 	if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 {
 		t.Fatalf("task[1] gpu indices=%v want [2]", got)
 	}
 	if got := globalQueue.tasks[0].Priority; got != taskPriorityBenchmark {
 		t.Fatalf("task[0] priority=%d want %d", got, taskPriorityBenchmark)
 	}
 	if got := globalQueue.tasks[1].Priority; got != taskPriorityBenchmark {
 		t.Fatalf("task[1] priority=%d want %d", got, taskPriorityBenchmark)
 	}
 }
 func TestHandleAPIBenchmarkPowerFitRampQueuesBenchmarkPowerFitTasks(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	prevList := apiListNvidiaGPUs
 	apiListNvidiaGPUs = func(_ *app.App) ([]platform.NvidiaGPU, error) {
 		return []platform.NvidiaGPU{
 			{Index: 0, Name: "NVIDIA H100 PCIe"},
 			{Index: 1, Name: "NVIDIA H100 PCIe"},
 			{Index: 2, Name: "NVIDIA H100 PCIe"},
 		}, nil
 	}
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/power/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"ramp_up":true}`))
 	rec := httptest.NewRecorder()
 	h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-power").ServeHTTP(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	// Ramp-up mode creates a single task that handles the 1→N GPU ramp internally
 	// (spawning N separate tasks would redundantly repeat all earlier ramp steps).
 	if len(globalQueue.tasks) != 1 {
 		t.Fatalf("tasks=%d want 1 (ramp-up uses single task)", len(globalQueue.tasks))
 	}
 	task := globalQueue.tasks[0]
 	if task.Target != "nvidia-bench-power" {
 		t.Fatalf("task target=%q want nvidia-bench-power", task.Target)
 	}
 	if task.Priority != taskPriorityBenchmark {
 		t.Fatalf("task priority=%d want %d", task.Priority, taskPriorityBenchmark)
 	}
 	if task.params.RampTotal != 3 {
 		t.Fatalf("task RampTotal=%d want 3", task.params.RampTotal)
 	}
 }
 func TestHandleAPIBenchmarkAutotuneRunQueuesTask(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/autotune/run", strings.NewReader(`{"profile":"standard","benchmark_kind":"power-fit"}`))
 	rec := httptest.NewRecorder()
 	h.handleAPIBenchmarkAutotuneRun().ServeHTTP(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 1 {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	task := globalQueue.tasks[0]
 	if task.Target != "nvidia-bench-autotune" {
 		t.Fatalf("task target=%q want nvidia-bench-autotune", task.Target)
 	}
 	if task.params.BenchmarkKind != "power-fit" {
 		t.Fatalf("task benchmark kind=%q want power-fit", task.params.BenchmarkKind)
 	}
 }
 func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) {
@@ -199,6 +269,41 @@ func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) {
 	if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 {
 		t.Fatalf("task[1] gpu indices=%v want [2]", got)
 	}
 	if got := globalQueue.tasks[0].Priority; got != taskPriorityValidate {
 		t.Fatalf("task[0] priority=%d want %d", got, taskPriorityValidate)
 	}
 	if got := globalQueue.tasks[1].Priority; got != taskPriorityValidate {
 		t.Fatalf("task[1] priority=%d want %d", got, taskPriorityValidate)
 	}
 }
 func TestDefaultTaskPriorityOrder(t *testing.T) {
 	got := []int{
 		defaultTaskPriority("install-to-ram", taskParams{}),
 		defaultTaskPriority("audit", taskParams{}),
 		defaultTaskPriority("cpu", taskParams{}),
 		defaultTaskPriority("cpu", taskParams{StressMode: true}),
 		defaultTaskPriority("nvidia-stress", taskParams{}),
 		defaultTaskPriority("nvidia-bench-perf", taskParams{}),
 		defaultTaskPriority("nvidia-bench-power", taskParams{}),
 	}
 	want := []int{
 		taskPriorityInstallToRAM,
 		taskPriorityAudit,
 		taskPriorityValidate,
 		taskPriorityValidateStress,
 		taskPriorityBurn,
 		taskPriorityBenchmark,
 		taskPriorityBenchmark,
 	}
 	for i := range want {
 		if got[i] != want[i] {
 			t.Fatalf("priority[%d]=%d want %d", i, got[i], want[i])
 		}
 	}
 	if !(got[0] > got[1] && got[1] > got[2] && got[2] > got[3] && got[3] > got[4] && got[4] > got[5] && got[5] == got[6]) {
 		t.Fatalf("priority order=%v", got)
 	}
 }
 func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
--- a/audit/internal/webui/charts_svg.go
+++ b/audit/internal/webui/charts_svg.go
@@ -83,6 +83,10 @@ func renderMetricChartSVG(title string, labels []string, times []time.Time, data
 		}
 	}
 	// Downsample to at most ~1400 points (one per pixel) before building SVG.
 	times, datasets = downsampleTimeSeries(times, datasets, 1400)
 	pointCount = len(times)
 	statsLabel := chartStatsLabel(datasets)
 	legendItems := []metricChartSeries{}
@@ -196,6 +200,19 @@ func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, s
 		}
 	}
 	// Downsample to at most ~1400 points before building SVG.
 	{
 		datasets := make([][]float64, len(series))
 		for i := range series {
 			datasets[i] = series[i].Values
 		}
 		times, datasets = downsampleTimeSeries(times, datasets, 1400)
 		pointCount = len(times)
 		for i := range series {
 			series[i].Values = datasets[i]
 		}
 	}
 	scales := make([]chartScale, len(series))
 	for i := range series {
 		min, max := chartSeriesBounds(series[i].Values)
@@ -445,6 +462,127 @@ func synthesizeChartTimes(times []time.Time, count int) []time.Time {
 	return out
 }
 // renderStackedMetricChartSVG renders a stacked area chart where each dataset
 // is visually "stacked" on top of the previous one. Intended for multi-PSU
 // power charts where the filled area of each PSU shows its individual
 // contribution and the total height equals the combined draw.
 func renderStackedMetricChartSVG(title string, labels []string, times []time.Time, datasets [][]float64, names []string, yMax *float64, canvasHeight int, timeline []chartTimelineSegment) ([]byte, error) {
 	pointCount := len(labels)
 	if len(times) > pointCount {
 		pointCount = len(times)
 	}
 	if pointCount == 0 {
 		pointCount = 1
 		labels = []string{""}
 		times = []time.Time{{}}
 	}
 	if len(labels) < pointCount {
 		padded := make([]string, pointCount)
 		copy(padded, labels)
 		labels = padded
 	}
 	if len(times) < pointCount {
 		times = synthesizeChartTimes(times, pointCount)
 	}
 	for i := range datasets {
 		if len(datasets[i]) == 0 {
 			datasets[i] = make([]float64, pointCount)
 		}
 	}
 	times, datasets = downsampleTimeSeries(times, datasets, 1400)
 	pointCount = len(times)
 	// Build cumulative sums per time point.
 	cumulative := make([][]float64, len(datasets)+1)
 	for i := range cumulative {
 		cumulative[i] = make([]float64, pointCount)
 	}
 	for i, ds := range datasets {
 		for j, v := range ds {
 			cumulative[i+1][j] = cumulative[i][j] + v
 		}
 	}
 	// Scale is based on the total (top cumulative row).
 	total := cumulative[len(cumulative)-1]
 	yMin := floatPtr(0)
 	if yMax == nil {
 		yMax = autoMax120(total)
 	}
 	scale := singleAxisChartScale([][]float64{total}, yMin, yMax)
 	legendItems := make([]metricChartSeries, len(datasets))
 	for i, name := range names {
 		color := metricChartPalette[i%len(metricChartPalette)]
 		legendItems[i] = metricChartSeries{Name: name, Color: color, Values: datasets[i]}
 	}
 	// Stats label from totals.
 	statsLabel := chartStatsLabel([][]float64{total})
 	layout := singleAxisChartLayout(canvasHeight, len(legendItems))
 	start, end := chartTimeBounds(times)
 	var b strings.Builder
 	writeSVGOpen(&b, layout.Width, layout.Height)
 	writeChartFrame(&b, title, statsLabel, layout.Width, layout.Height)
 	writeTimelineIdleSpans(&b, layout, start, end, timeline)
 	writeVerticalGrid(&b, layout, times, pointCount, 8)
 	writeHorizontalGrid(&b, layout, scale)
 	writeTimelineBoundaries(&b, layout, start, end, timeline)
 	writePlotBorder(&b, layout)
 	writeSingleAxisY(&b, layout, scale)
 	writeXAxisLabels(&b, layout, times, labels, start, end, 8)
 	// Draw stacked areas from top to bottom so lower layers are visible.
 	for i := len(datasets) - 1; i >= 0; i-- {
 		writeStackedArea(&b, layout, times, start, end, cumulative[i], cumulative[i+1], scale, legendItems[i].Color)
 	}
 	// Draw border polylines on top.
 	for i := len(datasets) - 1; i >= 0; i-- {
 		writeSeriesPolyline(&b, layout, times, start, end, cumulative[i+1], scale, legendItems[i].Color)
 	}
 	writeLegend(&b, layout, legendItems)
 	writeSVGClose(&b)
 	return []byte(b.String()), nil
 }
 // writeStackedArea draws a filled polygon between two cumulative value arrays
 // (baseline and top), using the given color at 55% opacity.
 func writeStackedArea(b *strings.Builder, layout chartLayout, times []time.Time, start, end time.Time, baseline, top []float64, scale chartScale, color string) {
 	n := len(top)
 	if n == 0 {
 		return
 	}
 	if len(baseline) < n {
 		baseline = make([]float64, n)
 	}
 	// Forward path along top values, then backward along baseline values.
 	var points strings.Builder
 	for i := 0; i < n; i++ {
 		x := chartXForTime(chartPointTime(times, i), start, end, layout.PlotLeft, layout.PlotRight)
 		y := chartYForValue(valueClamp(top[i], scale), scale, layout.PlotTop, layout.PlotBottom)
 		if i > 0 {
 			points.WriteByte(' ')
 		}
 		points.WriteString(strconv.FormatFloat(x, 'f', 1, 64))
 		points.WriteByte(',')
 		points.WriteString(strconv.FormatFloat(y, 'f', 1, 64))
 	}
 	for i := n - 1; i >= 0; i-- {
 		x := chartXForTime(chartPointTime(times, i), start, end, layout.PlotLeft, layout.PlotRight)
 		y := chartYForValue(valueClamp(baseline[i], scale), scale, layout.PlotTop, layout.PlotBottom)
 		points.WriteByte(' ')
 		points.WriteString(strconv.FormatFloat(x, 'f', 1, 64))
 		points.WriteByte(',')
 		points.WriteString(strconv.FormatFloat(y, 'f', 1, 64))
 	}
 	fmt.Fprintf(b, `<polygon points="%s" fill="%s" fill-opacity="0.55" stroke="none"/>`+"\n", points.String(), color)
 }
 func writeSVGOpen(b *strings.Builder, width, height int) {
 	fmt.Fprintf(b, `<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" viewBox="0 0 %d %d">`+"\n", width, height, width, height)
 }
@@ -626,6 +764,87 @@ func writeTimelineBoundaries(b *strings.Builder, layout chartLayout, start, end
 	b.WriteString(`</g>` + "\n")
 }
 // downsampleTimeSeries reduces the time series to at most maxPts points using
 // min-max bucketing. Each bucket contributes the index of its min and max value
 // (using the first full-length dataset as the reference series). All parallel
 // datasets are sampled at those same indices so all series stay aligned.
 // If len(times) <= maxPts the inputs are returned unchanged.
 func downsampleTimeSeries(times []time.Time, datasets [][]float64, maxPts int) ([]time.Time, [][]float64) {
 	n := len(times)
 	if n <= maxPts || maxPts <= 0 {
 		return times, datasets
 	}
 	buckets := maxPts / 2
 	if buckets < 1 {
 		buckets = 1
 	}
 	// Use the first dataset that has the same length as times as the reference
 	// for deciding which two indices to keep per bucket.
 	var ref []float64
 	for _, ds := range datasets {
 		if len(ds) == n {
 			ref = ds
 			break
 		}
 	}
 	selected := make([]int, 0, maxPts)
 	bucketSize := float64(n) / float64(buckets)
 	for b := 0; b < buckets; b++ {
 		lo := int(math.Round(float64(b) * bucketSize))
 		hi := int(math.Round(float64(b+1) * bucketSize))
 		if hi > n {
 			hi = n
 		}
 		if lo >= hi {
 			continue
 		}
 		if ref == nil {
 			selected = append(selected, lo)
 			if hi-1 != lo {
 				selected = append(selected, hi-1)
 			}
 			continue
 		}
 		minIdx, maxIdx := lo, lo
 		for i := lo + 1; i < hi; i++ {
 			if ref[i] < ref[minIdx] {
 				minIdx = i
 			}
 			if ref[i] > ref[maxIdx] {
 				maxIdx = i
 			}
 		}
 		if minIdx <= maxIdx {
 			selected = append(selected, minIdx)
 			if maxIdx != minIdx {
 				selected = append(selected, maxIdx)
 			}
 		} else {
 			selected = append(selected, maxIdx)
 			if minIdx != maxIdx {
 				selected = append(selected, minIdx)
 			}
 		}
 	}
 	outTimes := make([]time.Time, len(selected))
 	for i, idx := range selected {
 		outTimes[i] = times[idx]
 	}
 	outDatasets := make([][]float64, len(datasets))
 	for d, ds := range datasets {
 		if len(ds) != n {
 			outDatasets[d] = ds
 			continue
 		}
 		out := make([]float64, len(selected))
 		for i, idx := range selected {
 			out[i] = ds[idx]
 		}
 		outDatasets[d] = out
 	}
 	return outTimes, outDatasets
 }
 func chartXForTime(ts, start, end time.Time, left, right int) float64 {
 	if !end.After(start) {
 		return float64(left+right) / 2
--- a/audit/internal/webui/jobs.go
+++ b/audit/internal/webui/jobs.go
@@ -1,6 +1,9 @@
 package webui
 import (
 	"bufio"
 	"fmt"
 	"io"
 	"os"
 	"strings"
 	"sync"
@@ -17,6 +20,25 @@ type jobState struct {
 	cancel       func() // optional cancel function; nil if job is not cancellable
 	logPath      string
 	serialPrefix string
 	logFile      *os.File // kept open for the task lifetime to avoid per-line open/close
 	logBuf       *bufio.Writer
 }
 // readTaskLogFile reads a task log, refusing files over 50 MB.
 func readTaskLogFile(path string) ([]byte, error) {
 	f, err := os.Open(path)
 	if err != nil {
 		return nil, err
 	}
 	defer f.Close()
 	data, err := io.ReadAll(io.LimitReader(f, 50<<20+1))
 	if err != nil {
 		return nil, err
 	}
 	if int64(len(data)) > 50<<20 {
 		return nil, fmt.Errorf("task log %s too large (exceeds 50 MB)", path)
 	}
 	return data, nil
 }
 // abort cancels the job if it has a cancel function and is not yet done.
@@ -31,13 +53,21 @@ func (j *jobState) abort() bool {
 }
 func (j *jobState) append(line string) {
 	j.appendWithOptions(line, true, true)
 }
 func (j *jobState) appendFromLog(line string) {
 	j.appendWithOptions(line, false, false)
 }
 func (j *jobState) appendWithOptions(line string, persistLog, serialMirror bool) {
 	j.mu.Lock()
 	defer j.mu.Unlock()
 	j.lines = append(j.lines, line)
-	if j.logPath != "" {
+	if persistLog && j.logPath != "" {
-		appendJobLog(j.logPath, line)
+		j.writeLogLineLocked(line)
 	}
-	if j.serialPrefix != "" {
+	if serialMirror && j.serialPrefix != "" {
 		taskSerialWriteLine(j.serialPrefix + line)
 	}
 	for _, ch := range j.subs {
@@ -48,6 +78,35 @@ func (j *jobState) append(line string) {
 	}
 }
 // writeLogLineLocked writes a line to the persistent log file, opening it lazily.
 // Must be called with j.mu held. Uses a buffered writer kept open for the task
 // lifetime — avoids thousands of open/close syscalls during high-frequency logs.
 func (j *jobState) writeLogLineLocked(line string) {
 	if j.logFile == nil {
 		f, err := os.OpenFile(j.logPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
 		if err != nil {
 			return
 		}
 		j.logFile = f
 		j.logBuf = bufio.NewWriterSize(f, 64*1024)
 	}
 	_, _ = j.logBuf.WriteString(line + "\n")
 }
 // closeLog flushes and closes the log file. Called after all task output is done.
 func (j *jobState) closeLog() {
 	j.mu.Lock()
 	defer j.mu.Unlock()
 	if j.logBuf != nil {
 		_ = j.logBuf.Flush()
 	}
 	if j.logFile != nil {
 		_ = j.logFile.Close()
 		j.logFile = nil
 		j.logBuf = nil
 	}
 }
 func (j *jobState) finish(errMsg string) {
 	j.mu.Lock()
 	defer j.mu.Unlock()
@@ -119,7 +178,7 @@ func newTaskJobState(logPath string, serialPrefix ...string) *jobState {
 	if logPath == "" {
 		return j
 	}
-	data, err := os.ReadFile(logPath)
+	data, err := readTaskLogFile(logPath)
 	if err != nil || len(data) == 0 {
 		return j
 	}
--- a/audit/internal/webui/kmsg_watcher.go
+++ b/audit/internal/webui/kmsg_watcher.go
@@ -232,7 +232,7 @@ func truncate(s string, max int) string {
 // isSATTarget returns true for task targets that run hardware acceptance tests.
 func isSATTarget(target string) bool {
 	switch target {
-	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
+	case "nvidia", "nvidia-targeted-stress", "nvidia-bench-perf", "nvidia-bench-power", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
 		"nvidia-interconnect", "nvidia-bandwidth", "nvidia-stress", "memory", "memory-stress", "storage",
 		"cpu", "sat-stress", "amd", "amd-mem", "amd-bandwidth", "amd-stress",
 		"platform-stress":
--- a/audit/internal/webui/layout.go
+++ b/audit/internal/webui/layout.go
@@ -0,0 +1,137 @@
 package webui
 import (
 	"fmt"
 	"html"
 	"os"
 	"strings"
 )
 func layoutHead(title string) string {
 	return `<!DOCTYPE html>
 <html lang="en">
 <head>
 <meta charset="utf-8">
 <meta name="viewport" content="width=device-width,initial-scale=1">
 <title>` + html.EscapeString(title) + `</title>
 <style>
 :root{--bg:#fff;--surface:#fff;--surface-2:#f9fafb;--border:rgba(34,36,38,.15);--border-lite:rgba(34,36,38,.1);--ink:rgba(0,0,0,.87);--muted:rgba(0,0,0,.6);--accent:#2185d0;--accent-dark:#1678c2;--crit-bg:#fff6f6;--crit-fg:#9f3a38;--crit-border:#e0b4b4;--ok-bg:#fcfff5;--ok-fg:#2c662d;--warn-bg:#fffaf3;--warn-fg:#573a08}
 *{box-sizing:border-box;margin:0;padding:0}
 body{font:14px/1.5 Lato,"Helvetica Neue",Arial,Helvetica,sans-serif;background:var(--bg);color:var(--ink);display:flex;min-height:100vh}
 a{color:var(--accent);text-decoration:none}
 /* Sidebar */
 .sidebar{width:210px;min-height:100vh;background:#1b1c1d;flex-shrink:0;display:flex;flex-direction:column}
 .sidebar-logo{padding:18px 16px 12px;font-size:18px;font-weight:700;color:#fff;letter-spacing:-.5px}
 .sidebar-logo span{color:rgba(255,255,255,.5);font-weight:400;font-size:12px;display:block;margin-top:2px}
 .sidebar-version{padding:0 16px 14px;font-size:11px;color:rgba(255,255,255,.45)}
 .sidebar-badge{margin:0 12px 12px;padding:5px 8px;border-radius:4px;font-size:11px;font-weight:600;text-align:center}
 .sidebar-badge-warn{background:#7a4f00;color:#f6c90e}
 .sidebar-badge-crit{background:#5c1a1a;color:#ff6b6b}
 .nav{flex:1}
 .nav-item{display:block;padding:10px 16px;color:rgba(255,255,255,.7);font-size:13px;border-left:3px solid transparent;transition:all .15s}
 .nav-item:hover{color:#fff;background:rgba(255,255,255,.08)}
 .nav-item.active{color:#fff;background:rgba(33,133,208,.25);border-left-color:var(--accent)}
 /* Content */
 .main{flex:1;display:flex;flex-direction:column;overflow:auto}
 .topbar{padding:13px 24px;background:#1b1c1d;display:flex;align-items:center;gap:12px}
 .topbar h1{font-size:16px;font-weight:700;color:rgba(255,255,255,.9)}
 .content{padding:24px;flex:1}
 /* Cards */
 .card{background:var(--surface);border:1px solid var(--border);border-radius:4px;box-shadow:0 1px 2px rgba(34,36,38,.15);margin-bottom:16px;overflow:hidden}
 .card-head{padding:11px 16px;background:var(--surface-2);border-bottom:1px solid var(--border);font-weight:700;font-size:13px;display:flex;align-items:center;gap:8px}
 .card-head-actions{justify-content:space-between}
 .card-head-buttons{display:flex;align-items:center;gap:8px;margin-left:auto;flex-wrap:wrap}
 .card-body{padding:16px}
 /* Buttons */
 .btn{display:inline-flex;align-items:center;gap:6px;padding:8px 16px;border-radius:4px;font-size:13px;font-weight:700;cursor:pointer;border:none;transition:background .1s;font-family:inherit}
 .btn-primary{background:var(--accent);color:#fff}.btn-primary:hover{background:var(--accent-dark)}
 .btn-danger{background:#db2828;color:#fff}.btn-danger:hover{background:#b91c1c}
 .btn-secondary{background:var(--surface-2);color:var(--ink);border:1px solid var(--border)}.btn-secondary:hover{background:#eee}
 .btn-sm{padding:5px 10px;font-size:12px}
 /* Tables */
 table{width:100%;border-collapse:collapse;font-size:13px;background:var(--surface)}
 th{text-align:left;padding:9px 14px;color:var(--ink);font-weight:700;background:var(--surface-2);border-bottom:1px solid var(--border-lite)}
 td{padding:9px 14px;border-top:1px solid var(--border-lite)}
 tr:first-child td{border-top:0}
 tbody tr:hover td{background:rgba(0,0,0,.03)}
 /* Status badges */
 .badge{display:inline-block;padding:2px 9px;border-radius:4px;font-size:11px;font-weight:700}
 .badge-ok{background:var(--ok-bg);color:var(--ok-fg);border:1px solid #a3c293}
 .badge-warn{background:var(--warn-bg);color:var(--warn-fg);border:1px solid #c9ba9b}
 .badge-err{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)}
 .badge-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)}
 /* Component chips — one small square per device */
 .chips{display:inline-flex;flex-wrap:wrap;gap:3px;align-items:center;vertical-align:middle}
 .chip{display:inline-flex;align-items:center;justify-content:center;width:20px;height:20px;border-radius:3px;font-size:10px;font-weight:800;cursor:default;font-family:monospace;letter-spacing:0;user-select:none}
 .chip-ok{background:var(--ok-bg);color:var(--ok-fg);border:1px solid #a3c293}
 .chip-warn{background:var(--warn-bg);color:var(--warn-fg);border:1px solid #c9ba9b}
 .chip-fail{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)}
 .chip-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)}
 /* Output terminal */
 .terminal{background:#1b1c1d;border:1px solid rgba(0,0,0,.2);border-radius:4px;padding:14px;font-family:monospace;font-size:12px;color:#b5cea8;max-height:400px;overflow-y:auto;white-space:pre-wrap;word-break:break-all;user-select:text;-webkit-user-select:text}
 .terminal-wrap{position:relative}.terminal-copy{position:absolute;top:6px;right:6px;background:#2d2f30;border:1px solid #444;color:#aaa;font-size:11px;padding:2px 8px;border-radius:3px;cursor:pointer;opacity:.7}.terminal-copy:hover{opacity:1}
 /* Forms */
 .form-row{margin-bottom:14px}
 .form-row label{display:block;font-size:12px;color:var(--muted);margin-bottom:5px;font-weight:700}
 .form-row input,.form-row select{width:100%;padding:8px 10px;background:var(--surface);border:1px solid var(--border);border-radius:4px;color:var(--ink);font-size:13px;outline:none;font-family:inherit}
 .form-row input:focus,.form-row select:focus{border-color:var(--accent);box-shadow:0 0 0 2px rgba(33,133,208,.2)}
 /* Grid */
 .grid2{display:grid;grid-template-columns:1fr 1fr;gap:16px}
 .grid3{display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px}
@media(max-width:900px){.grid2,.grid3{grid-template-columns:1fr}.card-head-actions{align-items:flex-start;flex-direction:column}.card-head-buttons{margin-left:0}}
 /* iframe viewer */
 .viewer-frame{width:100%;height:calc(100vh - 160px);border:0;border-radius:4px;background:var(--surface-2)}
 /* Alerts */
 .alert{padding:10px 14px;border-radius:4px;font-size:13px;margin-bottom:14px}
 .alert-info{background:#dff0ff;border:1px solid #a9d4f5;color:#1e3a5f}
 .alert-warn{background:var(--warn-bg);border:1px solid #c9ba9b;color:var(--warn-fg)}
 </style>
 </head>
 <body>
 `
 }
 func layoutNav(active string, buildLabel string) string {
 	items := []struct{ id, label, href, onclick string }{
 		{"dashboard", "Dashboard", "/", ""},
 		{"audit", "Audit", "/audit", ""},
 		{"validate", "Validate", "/validate", ""},
 		{"burn", "Burn", "/burn", ""},
 		{"benchmark", "Benchmark", "/benchmark", ""},
 		{"tasks", "Tasks", "/tasks", ""},
 		{"tools", "Tools", "/tools", ""},
 	}
 	var b strings.Builder
 	b.WriteString(`<aside class="sidebar">`)
 	b.WriteString(`<div class="sidebar-logo">bee<span>hardware audit</span></div>`)
 	if strings.TrimSpace(buildLabel) == "" {
 		buildLabel = "dev"
 	}
 	b.WriteString(`<div class="sidebar-version">Version ` + html.EscapeString(buildLabel) + `</div>`)
 	if raw, err := os.ReadFile("/run/bee-nvidia-mode"); err == nil {
 		gspMode := strings.TrimSpace(string(raw))
 		switch gspMode {
 		case "gsp-off":
 			b.WriteString(`<div class="sidebar-badge sidebar-badge-warn">NVIDIA GSP=off</div>`)
 		case "gsp-stuck":
 			b.WriteString(`<div class="sidebar-badge sidebar-badge-crit">NVIDIA GSP stuck — reboot</div>`)
 		}
 	}
 	b.WriteString(`<nav class="nav">`)
 	for _, item := range items {
 		cls := "nav-item"
 		if item.id == active {
 			cls += " active"
 		}
 		if item.onclick != "" {
 			b.WriteString(fmt.Sprintf(`<a class="%s" href="%s" onclick="%s">%s</a>`,
 				cls, item.href, item.onclick, item.label))
 		} else {
 			b.WriteString(fmt.Sprintf(`<a class="%s" href="%s">%s</a>`,
 				cls, item.href, item.label))
 		}
 	}
 	b.WriteString(`</nav>`)
 	b.WriteString(`</aside>`)
 	return b.String()
 }
--- a/audit/internal/webui/metricsdb.go
+++ b/audit/internal/webui/metricsdb.go
@@ -53,6 +53,9 @@ CREATE TABLE IF NOT EXISTS sys_metrics (
  cpu_load_pct REAL,
  mem_load_pct REAL,
  power_w      REAL,
  power_source TEXT,
  power_mode   TEXT,
  power_reason TEXT,
  PRIMARY KEY (ts)
 );
 CREATE TABLE IF NOT EXISTS gpu_metrics (
@@ -86,7 +89,16 @@ CREATE TABLE IF NOT EXISTS temp_metrics (
 	if err := ensureMetricsColumn(db, "gpu_metrics", "clock_mhz", "REAL"); err != nil {
 		return err
 	}
-	return ensureMetricsColumn(db, "gpu_metrics", "mem_clock_mhz", "REAL")
+	if err := ensureMetricsColumn(db, "gpu_metrics", "mem_clock_mhz", "REAL"); err != nil {
 		return err
 	}
 	if err := ensureMetricsColumn(db, "sys_metrics", "power_source", "TEXT"); err != nil {
 		return err
 	}
 	if err := ensureMetricsColumn(db, "sys_metrics", "power_mode", "TEXT"); err != nil {
 		return err
 	}
 	return ensureMetricsColumn(db, "sys_metrics", "power_reason", "TEXT")
 }
 func ensureMetricsColumn(db *sql.DB, table, column, definition string) error {
@@ -125,8 +137,8 @@ func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
 	defer func() { _ = tx.Rollback() }()
 	_, err = tx.Exec(
-		`INSERT OR REPLACE INTO sys_metrics(ts,cpu_load_pct,mem_load_pct,power_w) VALUES(?,?,?,?)`,
+		`INSERT OR REPLACE INTO sys_metrics(ts,cpu_load_pct,mem_load_pct,power_w,power_source,power_mode,power_reason) VALUES(?,?,?,?,?,?,?)`,
-		ts, s.CPULoadPct, s.MemLoadPct, s.PowerW,
+		ts, s.CPULoadPct, s.MemLoadPct, s.PowerW, s.PowerSource, s.PowerMode, s.PowerReason,
 	)
 	if err != nil {
 		return err
@@ -161,14 +173,64 @@ func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
 	return tx.Commit()
 }
 // Downsample reduces density of old metrics rows to 1 sample per minute.
 // Only rows in the half-open window [deleteOlderThan, downsampleBefore) are
 // affected — rows newer than downsampleBefore keep full 5-second resolution.
 // For each 60-second bucket the row with the smallest ts is kept; the rest
 // are deleted. This trims ~92 % of rows in that window while preserving
 // the overall shape of every chart.
 //
 // Called hourly by the metrics collector background goroutine.
 func (m *MetricsDB) Downsample(downsampleBefore, deleteOlderThan time.Time) error {
 	if m == nil || m.db == nil {
 		return nil
 	}
 	start := deleteOlderThan.Unix()
 	end := downsampleBefore.Unix()
 	if end <= start {
 		return nil
 	}
 	// For each table: delete rows in [start, end) whose ts is NOT the minimum
 	// ts in its 60-second bucket (ts/60 integer division = bucket ID).
 	for _, table := range []string{"sys_metrics", "gpu_metrics", "fan_metrics", "temp_metrics"} {
 		_, err := m.db.Exec(`
 DELETE FROM `+table+` WHERE ts >= ? AND ts < ?
  AND ts NOT IN (
    SELECT MIN(ts) FROM `+table+`
    WHERE ts >= ? AND ts < ?
    GROUP BY ts / 60
  )`, start, end, start, end)
 		if err != nil {
 			return err
 		}
 	}
 	return nil
 }
 // Prune deletes all rows older than the given cutoff from every metrics table.
 // Called hourly by the metrics collector to keep the DB size bounded.
 func (m *MetricsDB) Prune(before time.Time) error {
 	if m == nil || m.db == nil {
 		return nil
 	}
 	cutTS := before.Unix()
 	for _, table := range []string{"sys_metrics", "gpu_metrics", "fan_metrics", "temp_metrics"} {
 		if _, err := m.db.Exec("DELETE FROM "+table+" WHERE ts < ?", cutTS); err != nil {
 			return err
 		}
 	}
 	_, _ = m.db.Exec("PRAGMA wal_checkpoint(TRUNCATE)")
 	return nil
 }
 // LoadRecent returns up to n samples in chronological order (oldest first).
 func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
-	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM (SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?) ORDER BY ts`, n)
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w,IFNULL(power_source,''),IFNULL(power_mode,''),IFNULL(power_reason,'') FROM (SELECT ts,cpu_load_pct,mem_load_pct,power_w,power_source,power_mode,power_reason FROM sys_metrics ORDER BY ts DESC LIMIT ?) ORDER BY ts`, n)
 }
 // LoadAll returns all persisted samples in chronological order (oldest first).
 func (m *MetricsDB) LoadAll() ([]platform.LiveMetricSample, error) {
-	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts`, nil)
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w,IFNULL(power_source,''),IFNULL(power_mode,''),IFNULL(power_reason,'') FROM sys_metrics ORDER BY ts`, nil)
 }
 // LoadBetween returns samples in chronological order within the given time window.
@@ -183,7 +245,7 @@ func (m *MetricsDB) LoadBetween(start, end time.Time) ([]platform.LiveMetricSamp
 		start, end = end, start
 	}
 	return m.loadSamples(
-		`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics WHERE ts>=? AND ts<=? ORDER BY ts`,
+		`SELECT ts,cpu_load_pct,mem_load_pct,power_w,IFNULL(power_source,''),IFNULL(power_mode,''),IFNULL(power_reason,'') FROM sys_metrics WHERE ts>=? AND ts<=? ORDER BY ts`,
 		start.Unix(), end.Unix(),
 	)
 }
@@ -199,11 +261,14 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	type sysRow struct {
 		ts            int64
 		cpu, mem, pwr float64
 		powerSource   string
 		powerMode     string
 		powerReason   string
 	}
 	var sysRows []sysRow
 	for rows.Next() {
 		var r sysRow
-		if err := rows.Scan(&r.ts, &r.cpu, &r.mem, &r.pwr); err != nil {
+		if err := rows.Scan(&r.ts, &r.cpu, &r.mem, &r.pwr, &r.powerSource, &r.powerMode, &r.powerReason); err != nil {
 			continue
 		}
 		sysRows = append(sysRows, r)
@@ -313,10 +378,13 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	samples := make([]platform.LiveMetricSample, len(sysRows))
 	for i, r := range sysRows {
 		s := platform.LiveMetricSample{
-			Timestamp:  time.Unix(r.ts, 0).UTC(),
+			Timestamp:   time.Unix(r.ts, 0).UTC(),
-			CPULoadPct: r.cpu,
+			CPULoadPct:  r.cpu,
-			MemLoadPct: r.mem,
+			MemLoadPct:  r.mem,
-			PowerW:     r.pwr,
+			PowerW:      r.pwr,
 			PowerSource: r.powerSource,
 			PowerMode:   r.powerMode,
 			PowerReason: r.powerReason,
 		}
 		for _, idx := range gpuIndices {
 			if g, ok := gpuData[gpuKey{r.ts, idx}]; ok {
--- a/audit/internal/webui/page_benchmark.go
+++ b/audit/internal/webui/page_benchmark.go
@@ -0,0 +1,613 @@
 package webui
 import (
 	"encoding/json"
 	"fmt"
 	"html"
 	"os"
 	"path/filepath"
 	"sort"
 	"strconv"
 	"strings"
 	"time"
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 )
 type benchmarkHistoryRun struct {
 	generatedAt   time.Time
 	displayTime   string
 	gpuScores     map[int]float64
 	gpuStatuses   map[int]string
 	overallStatus string
 }
 func renderBenchmark(opts HandlerOptions) string {
 	return `<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Benchmark runs generate a human-readable TXT report and machine-readable result bundle. Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
 <div class="grid2">
  <div class="card">
    <div class="card-head">Benchmark Setup</div>
    <div class="card-body">
      <div class="form-row">
        <label>Profile</label>
        <select id="benchmark-profile">
          <option value="standard" selected>Standard — Perf ` + validateFmtDur(platform.BenchmarkEstimatedPerfStandardSec) + ` / Power Fit ` + validateFmtDur(platform.BenchmarkEstimatedPowerStandardSec) + `</option>
          <option value="stability">Stability — Perf ` + validateFmtDur(platform.BenchmarkEstimatedPerfStabilitySec) + ` / Power Fit ` + validateFmtDur(platform.BenchmarkEstimatedPowerStabilitySec) + `</option>
          <option value="overnight">Overnight — Perf ` + validateFmtDur(platform.BenchmarkEstimatedPerfOvernightSec) + ` / Power Fit ` + validateFmtDur(platform.BenchmarkEstimatedPowerOvernightSec) + `</option>
        </select>
      </div>
      <div class="form-row">
        <label>GPU Selection</label>
        <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
          <button class="btn btn-sm btn-secondary" type="button" onclick="benchmarkSelectAll()">Select All</button>
          <button class="btn btn-sm btn-secondary" type="button" onclick="benchmarkSelectNone()">Clear</button>
        </div>
        <div id="benchmark-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
          <p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
        </div>
      </div>
      <label class="benchmark-cb-row">
        <input type="radio" name="benchmark-mode" value="sequential" onchange="benchmarkUpdateSelectionNote()">
        <span>Sequential — one GPU at a time</span>
      </label>
      <label class="benchmark-cb-row" id="benchmark-parallel-label">
        <input type="radio" name="benchmark-mode" value="parallel" onchange="benchmarkUpdateSelectionNote()">
        <span>Parallel — all selected GPUs simultaneously</span>
      </label>
      <label class="benchmark-cb-row" id="benchmark-ramp-label">
        <input type="radio" name="benchmark-mode" value="ramp-up" checked onchange="benchmarkUpdateSelectionNote()">
        <span>Ramp-up — 1 GPU → 2 → … → all selected (separate tasks)</span>
      </label>
      <p id="benchmark-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 14px">Select one GPU for single-card benchmarking or several GPUs for a constrained multi-GPU run.</p>
      <div style="display:flex;gap:8px;flex-wrap:wrap;align-items:center">
        <button id="benchmark-run-performance-btn" class="btn btn-primary" onclick="runNvidiaBenchmark('performance')" disabled>&#9654; Run Performance Benchmark</button>
        <button id="benchmark-run-power-fit-btn" class="btn btn-secondary" onclick="runNvidiaBenchmark('power-fit')" disabled>&#9654; Run Power / Thermal Fit</button>
        <button id="benchmark-run-autotune-btn" class="btn btn-secondary" onclick="runBenchmarkAutotune()">Autotune</button>
      </div>
      <span id="benchmark-run-nccl" hidden>nccl-auto</span>
      <span id="benchmark-run-status" style="margin-left:10px;font-size:12px;color:var(--muted)"></span>
      <div id="benchmark-autotune-status" style="margin-top:10px;font-size:12px;color:var(--muted)">Autotune status: loading…</div>
      <div style="margin-top:6px;font-size:12px;color:var(--muted)">Autotune overwrites the saved system-power source and applies it to all new power charts and tests.</div>
    </div>
  </div>
  <div class="card">
    <div class="card-head">Method Split</div>
    <div class="card-body">
      <p style="font-size:13px;color:var(--muted);margin-bottom:10px">The benchmark page now exposes two fundamentally different test families so compute score and server power-fit are not mixed into one number.</p>
      <table>
        <tr><th>Run Type</th><th>Engine</th><th>Question</th><th>Standard</th><th>Stability</th></tr>
        <tr><td>Performance Benchmark</td><td><code>bee-gpu-burn</code></td><td>How much isolated compute performance does the GPU realize in this server?</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPerfStandardSec) + `</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPerfStabilitySec) + `</td></tr>
        <tr><td>Power / Thermal Fit</td><td><code>dcgmproftester</code> + <code>nvidia-smi -pl</code></td><td>How much power per GPU can this server sustain as GPU count ramps up?</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPowerStandardSec) + `</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPowerStabilitySec) + `</td></tr>
      </table>
      <p style="font-size:12px;color:var(--muted);margin-top:10px">Timings are per full ramp-up run (1 GPU → all selected), measured on 4–8 GPU servers. Use ramp-up mode for capacity work: it creates 1 GPU → 2 GPU → … → all selected steps so analysis software can derive server total score and watts-per-GPU curves.</p>
    </div>
  </div>
 </div>
 ` + `<div id="benchmark-results-section">` + renderBenchmarkResultsCard(opts.ExportDir) + `</div>` + `
 <div id="benchmark-output" style="display:none;margin-top:16px" class="card">
  <div class="card-head">Benchmark Output <span id="benchmark-title"></span></div>
  <div class="card-body"><div id="benchmark-terminal" class="terminal"></div></div>
 </div>
 <style>
 .benchmark-cb-row { display:flex; align-items:flex-start; gap:8px; cursor:pointer; font-size:13px; }
 .benchmark-cb-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
 .benchmark-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
 .benchmark-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
 </style>
 <script>
 let benchmarkES = null;
 function benchmarkTaskIDs(payload) {
  if (payload && Array.isArray(payload.task_ids) && payload.task_ids.length) return payload.task_ids;
  if (payload && payload.task_id) return [payload.task_id];
  return [];
 }
 function benchmarkSelectedGPUIndices() {
  return Array.from(document.querySelectorAll('.benchmark-gpu-checkbox'))
    .filter(function(el) { return el.checked && !el.disabled; })
    .map(function(el) { return parseInt(el.value, 10); })
    .filter(function(v) { return !Number.isNaN(v); })
    .sort(function(a, b) { return a - b; });
 }
 function benchmarkMode() {
  const el = document.querySelector('input[name="benchmark-mode"]:checked');
  return el ? el.value : 'sequential';
 }
 function benchmarkUpdateSelectionNote() {
  const selected = benchmarkSelectedGPUIndices();
  const perfBtn = document.getElementById('benchmark-run-performance-btn');
  const fitBtn = document.getElementById('benchmark-run-power-fit-btn');
  const note = document.getElementById('benchmark-selection-note');
  if (!selected.length) {
    perfBtn.disabled = true;
    fitBtn.disabled = true;
    note.textContent = 'Select at least one NVIDIA GPU to run the benchmark.';
    return;
  }
  perfBtn.disabled = false;
  fitBtn.disabled = false;
  const mode = benchmarkMode();
  if (mode === 'ramp-up') {
    note.textContent = 'Ramp-up: ' + selected.length + ' tasks (1 GPU → ' + selected.length + ' GPUs). Performance uses compute benchmark; Power / Thermal Fit uses dcgmproftester load with nvidia-smi power-limit search per step.';
  } else if (mode === 'parallel') {
    note.textContent = 'Parallel: all ' + selected.length + ' GPU(s) simultaneously. Only the performance benchmark supports this mode.';
  } else {
    note.textContent = 'Sequential: each selected GPU benchmarked separately.';
  }
 }
 function benchmarkRenderGPUList(gpus) {
  const root = document.getElementById('benchmark-gpu-list');
  if (!gpus || !gpus.length) {
    root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
    benchmarkUpdateSelectionNote();
    return;
  }
  root.innerHTML = gpus.map(function(gpu) {
    const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
    return '<label class="benchmark-gpu-row">'
      + '<input class="benchmark-gpu-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="benchmarkUpdateSelectionNote()">'
      + '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
      + '</label>';
  }).join('');
  benchmarkApplyMultiGPUState(gpus.length);
  benchmarkUpdateSelectionNote();
 }
 function benchmarkApplyMultiGPUState(gpuCount) {
  var multiValues = ['parallel', 'ramp-up'];
  var radios = document.querySelectorAll('input[name="benchmark-mode"]');
  radios.forEach(function(el) {
    var isMulti = multiValues.indexOf(el.value) >= 0;
    if (gpuCount < 2 && isMulti) {
      el.disabled = true;
      if (el.checked) {
        var seq = document.querySelector('input[name="benchmark-mode"][value="sequential"]');
        if (seq) seq.checked = true;
      }
      var label = el.closest('label');
      if (label) label.style.opacity = '0.4';
    } else {
      el.disabled = false;
      if (gpuCount >= 2 && el.value === 'ramp-up') el.checked = true;
      var label = el.closest('label');
      if (label) label.style.opacity = '';
    }
  });
  benchmarkUpdateSelectionNote();
 }
 function benchmarkLoadGPUs() {
  const status = document.getElementById('benchmark-run-status');
  status.textContent = '';
  fetch('/api/gpu/nvidia').then(function(r) {
    return r.json().then(function(body) {
      if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
      return body;
    });
  }).then(function(gpus) {
    benchmarkRenderGPUList(gpus);
  }).catch(function(err) {
    document.getElementById('benchmark-gpu-list').innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
    benchmarkUpdateSelectionNote();
  });
 }
 function benchmarkSelectAll() {
  document.querySelectorAll('.benchmark-gpu-checkbox').forEach(function(el) { el.checked = true; });
  benchmarkUpdateSelectionNote();
 }
 function benchmarkSelectNone() {
  document.querySelectorAll('.benchmark-gpu-checkbox').forEach(function(el) { el.checked = false; });
  benchmarkUpdateSelectionNote();
 }
 function runNvidiaBenchmark(kind) {
  const selected = benchmarkSelectedGPUIndices();
  const status = document.getElementById('benchmark-run-status');
  if (!selected.length) {
    status.textContent = 'Select at least one GPU.';
    return;
  }
  if (benchmarkES) { benchmarkES.close(); benchmarkES = null; }
  const mode = benchmarkMode();
  const rampUp = mode === 'ramp-up' && selected.length > 1;
  const parallelGPUs = mode === 'parallel' && kind === 'performance';
  if (kind === 'power-fit' && mode === 'parallel') {
    status.textContent = 'Power / Thermal Fit supports sequential or ramp-up only.';
    return;
  }
  const body = {
    profile: document.getElementById('benchmark-profile').value || 'standard',
    gpu_indices: selected,
    run_nccl: kind === 'performance' && selected.length > 1,
    parallel_gpus: parallelGPUs,
    ramp_up: rampUp,
    display_name: kind === 'power-fit' ? 'NVIDIA Power / Thermal Fit' : 'NVIDIA Performance Benchmark'
  };
  document.getElementById('benchmark-output').style.display = 'block';
  document.getElementById('benchmark-title').textContent = '— ' + body.display_name + ' · ' + body.profile + ' [' + selected.join(', ') + ']';
  const term = document.getElementById('benchmark-terminal');
  term.textContent = 'Enqueuing ' + body.display_name + ' for GPUs ' + selected.join(', ') + '...\n';
  status.textContent = 'Queueing...';
  const endpoint = kind === 'power-fit' ? '/api/bee-bench/nvidia/power/run' : '/api/bee-bench/nvidia/perf/run';
  fetch(endpoint, {
    method: 'POST',
    headers: {'Content-Type':'application/json'},
    body: JSON.stringify(body)
  }).then(function(r) {
    return r.json().then(function(payload) {
      if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
      return payload;
    });
  }).then(function(d) {
    const taskIds = benchmarkTaskIDs(d);
    if (!taskIds.length) throw new Error('No benchmark task was queued.');
    status.textContent = taskIds.length === 1 ? ('Task ' + taskIds[0] + ' queued.') : ('Queued ' + taskIds.length + ' tasks.');
    const streamNext = function(idx, failures) {
      if (idx >= taskIds.length) {
        status.textContent = failures ? 'Completed with failures.' : 'Completed.';
        return;
      }
      const taskId = taskIds[idx];
      term.textContent += '\n[' + (idx + 1) + '/' + taskIds.length + '] Task ' + taskId + ' queued. Streaming log...\n';
      benchmarkES = new EventSource('/api/tasks/' + taskId + '/stream');
      benchmarkES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
      benchmarkES.addEventListener('done', function(e) {
        benchmarkES.close();
        benchmarkES = null;
        if (e.data) failures += 1;
        term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
        term.scrollTop = term.scrollHeight;
        const isLast = (idx + 1 >= taskIds.length);
        streamNext(idx + 1, failures);
        if (isLast) { benchmarkRefreshResults(); }
      });
      benchmarkES.onerror = function() {
        if (benchmarkES) {
          benchmarkES.close();
          benchmarkES = null;
        }
        term.textContent += '\nERROR: stream disconnected.\n';
        term.scrollTop = term.scrollHeight;
        streamNext(idx + 1, failures + 1);
      };
    };
    streamNext(0, 0);
  }).catch(function(err) {
    status.textContent = 'Error.';
    term.textContent += 'ERROR: ' + err.message + '\n';
  });
 }
 function benchmarkRenderAutotuneStatus(payload) {
  const el = document.getElementById('benchmark-autotune-status');
  if (!el) return;
  if (!payload || !payload.configured || !payload.config) {
    el.textContent = 'Autotune status: not configured. Temporary fallback source is used until autotune completes.';
    return;
  }
  const cfg = payload.config || {};
  const decision = payload.decision || {};
  const updated = cfg.updated_at ? new Date(cfg.updated_at).toLocaleString() : 'unknown time';
  const confidence = typeof cfg.confidence === 'number' ? (' · confidence ' + Math.round(cfg.confidence * 100) + '%') : '';
  const effective = decision.effective_source ? (' · effective ' + decision.effective_source) : '';
  const mode = decision.mode ? (' · mode ' + decision.mode) : '';
  el.textContent = 'Autotune status: ' + cfg.selected_source + effective + mode + ' · updated ' + updated + confidence;
 }
 function loadBenchmarkAutotuneStatus() {
  fetch('/api/bee-bench/nvidia/autotune/status')
    .then(function(r) {
      return r.json().then(function(body) {
        if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
        return body;
      });
    })
    .then(function(body) { benchmarkRenderAutotuneStatus(body); })
    .catch(function(err) {
      const el = document.getElementById('benchmark-autotune-status');
      if (el) el.textContent = 'Autotune status error: ' + err.message;
    });
 }
 function runBenchmarkAutotune() {
  const selected = benchmarkSelectedGPUIndices();
  const status = document.getElementById('benchmark-run-status');
  const term = document.getElementById('benchmark-terminal');
  if (benchmarkES) { benchmarkES.close(); benchmarkES = null; }
  document.getElementById('benchmark-output').style.display = 'block';
  document.getElementById('benchmark-title').textContent = '— NVIDIA Benchmark Autotune';
  term.textContent = 'Enqueuing benchmark autotune...\n';
  status.textContent = 'Queueing autotune...';
  fetch('/api/bee-bench/nvidia/autotune/run', {
    method: 'POST',
    headers: {'Content-Type':'application/json'},
    body: JSON.stringify({
      profile: document.getElementById('benchmark-profile').value || 'standard',
      benchmark_kind: benchmarkMode() === 'parallel' ? 'performance' : 'power-fit',
      gpu_indices: selected
    })
  }).then(function(r) {
    return r.json().then(function(payload) {
      if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
      return payload;
    });
  }).then(function(d) {
    const taskIds = benchmarkTaskIDs(d);
    if (!taskIds.length) throw new Error('No autotune task was queued.');
    const taskId = taskIds[0];
    status.textContent = 'Autotune queued: ' + taskId;
    benchmarkES = new EventSource('/api/tasks/' + taskId + '/stream');
    benchmarkES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
    benchmarkES.addEventListener('done', function(e) {
      if (benchmarkES) {
        benchmarkES.close();
        benchmarkES = null;
      }
      term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
      status.textContent = e.data ? 'Autotune failed.' : 'Autotune completed.';
      loadBenchmarkAutotuneStatus();
    });
  }).catch(function(err) {
    status.textContent = 'Autotune error.';
    term.textContent += 'ERROR: ' + err.message + '\n';
  });
 }
 benchmarkLoadGPUs();
 loadBenchmarkAutotuneStatus();
 function benchmarkRefreshResults() {
  fetch('/api/benchmark/results')
    .then(function(r) { return r.text(); })
    .then(function(html) {
      const el = document.getElementById('benchmark-results-section');
      if (el) el.innerHTML = html;
    })
    .catch(function() {});
 }
 </script>`
 }
 func renderBenchmarkResultsCard(exportDir string) string {
 	maxIdx, runs := loadBenchmarkHistory(exportDir)
 	perf := renderBenchmarkResultsCardFromRuns(
 		"Perf Results",
 		"Composite score by saved benchmark run and GPU.",
 		"No saved performance benchmark runs yet.",
 		maxIdx,
 		runs,
 	)
 	power := renderPowerBenchmarkResultsCard(exportDir)
 	return perf + "\n" + power
 }
 func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string, maxGPUIndex int, runs []benchmarkHistoryRun) string {
 	if len(runs) == 0 {
 		return `<div class="card"><div class="card-head">` + html.EscapeString(title) + `</div><div class="card-body"><p style="color:var(--muted);font-size:13px">` + html.EscapeString(emptyMessage) + `</p></div></div>`
 	}
 	var b strings.Builder
 	b.WriteString(`<div class="card"><div class="card-head">` + html.EscapeString(title) + `</div><div class="card-body">`)
 	if strings.TrimSpace(description) != "" {
 		b.WriteString(`<p style="color:var(--muted);font-size:13px;margin-bottom:12px">` + html.EscapeString(description) + `</p>`)
 	}
 	b.WriteString(`<div style="overflow-x:auto">`)
 	b.WriteString(`<table><thead><tr><th>Run</th><th>Time</th><th>Status</th>`)
 	for i := 0; i <= maxGPUIndex; i++ {
 		b.WriteString(`<th>GPU ` + strconv.Itoa(i) + `</th>`)
 	}
 	b.WriteString(`</tr></thead><tbody>`)
 	for i, run := range runs {
 		b.WriteString(`<tr>`)
 		b.WriteString(`<td>#` + strconv.Itoa(i+1) + `</td>`)
 		b.WriteString(`<td>` + html.EscapeString(run.displayTime) + `</td>`)
 		overallColor := "var(--ok)"
 		overallLabel := run.overallStatus
 		if overallLabel == "" {
 			overallLabel = "OK"
 		}
 		if overallLabel == "FAILED" {
 			overallColor = "var(--crit-fg,#9f3a38)"
 		} else if overallLabel != "OK" {
 			overallColor = "var(--warn)"
 		}
 		b.WriteString(`<td style="color:` + overallColor + `;font-weight:600">` + html.EscapeString(overallLabel) + `</td>`)
 		for idx := 0; idx <= maxGPUIndex; idx++ {
 			score, ok := run.gpuScores[idx]
 			if !ok {
 				b.WriteString(`<td style="color:var(--muted)">-</td>`)
 				continue
 			}
 			gpuStatus := run.gpuStatuses[idx]
 			scoreColor := ""
 			switch gpuStatus {
 			case "FAILED":
 				scoreColor = ` style="color:var(--crit-fg,#9f3a38);font-weight:600"`
 			case "WARNING", "PARTIAL":
 				scoreColor = ` style="color:var(--warn);font-weight:600"`
 			case "", "OK":
 			default:
 				scoreColor = ` style="color:var(--warn);font-weight:600"`
 			}
 			b.WriteString(`<td` + scoreColor + `>` + fmt.Sprintf("%.2f", score) + `</td>`)
 		}
 		b.WriteString(`</tr>`)
 	}
 	b.WriteString(`</tbody></table></div></div></div>`)
 	return b.String()
 }
 func loadBenchmarkHistory(exportDir string) (int, []benchmarkHistoryRun) {
 	baseDir := app.DefaultBeeBenchPerfDir
 	if strings.TrimSpace(exportDir) != "" {
 		baseDir = filepath.Join(exportDir, "bee-bench", "perf")
 	}
 	paths, err := filepath.Glob(filepath.Join(baseDir, "perf-*", "result.json"))
 	if err != nil || len(paths) == 0 {
 		return -1, nil
 	}
 	sort.Strings(paths)
 	return loadBenchmarkHistoryFromPaths(paths)
 }
 func loadBenchmarkHistoryFromPaths(paths []string) (int, []benchmarkHistoryRun) {
 	runs := make([]benchmarkHistoryRun, 0, len(paths))
 	maxGPUIndex := -1
 	for _, path := range paths {
 		raw, err := os.ReadFile(path)
 		if err != nil {
 			continue
 		}
 		var result platform.NvidiaBenchmarkResult
 		if err := json.Unmarshal(raw, &result); err != nil {
 			continue
 		}
 		run := benchmarkHistoryRun{
 			generatedAt:   result.GeneratedAt,
 			displayTime:   result.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
 			gpuScores:     make(map[int]float64),
 			gpuStatuses:   make(map[int]string),
 			overallStatus: result.OverallStatus,
 		}
 		for _, gpu := range result.GPUs {
 			run.gpuScores[gpu.Index] = gpu.Scores.CompositeScore
 			run.gpuStatuses[gpu.Index] = gpu.Status
 			if gpu.Index > maxGPUIndex {
 				maxGPUIndex = gpu.Index
 			}
 		}
 		runs = append(runs, run)
 	}
 	sort.Slice(runs, func(i, j int) bool {
 		return runs[i].generatedAt.After(runs[j].generatedAt)
 	})
 	return maxGPUIndex, runs
 }
 func renderPowerBenchmarkResultsCard(exportDir string) string {
 	baseDir := app.DefaultBeeBenchPowerDir
 	if strings.TrimSpace(exportDir) != "" {
 		baseDir = filepath.Join(exportDir, "bee-bench", "power")
 	}
 	paths, err := filepath.Glob(filepath.Join(baseDir, "power-*", "result.json"))
 	if err != nil || len(paths) == 0 {
 		return `<div class="card" style="margin-top:16px"><div class="card-head">Power / Thermal Fit Results</div><div class="card-body"><p style="color:var(--muted);font-size:13px">No saved power benchmark runs yet.</p></div></div>`
 	}
 	sort.Strings(paths)
 	type powerRun struct {
 		generatedAt time.Time
 		displayTime string
 		result      platform.NvidiaPowerBenchResult
 	}
 	var runs []powerRun
 	for _, path := range paths {
 		raw, err := os.ReadFile(path)
 		if err != nil {
 			continue
 		}
 		var r platform.NvidiaPowerBenchResult
 		if err := json.Unmarshal(raw, &r); err != nil {
 			continue
 		}
 		runs = append(runs, powerRun{
 			generatedAt: r.GeneratedAt,
 			displayTime: r.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
 			result:      r,
 		})
 	}
 	sort.Slice(runs, func(i, j int) bool {
 		return runs[i].generatedAt.After(runs[j].generatedAt)
 	})
 	var b strings.Builder
 	b.WriteString(`<div class="card" style="margin-top:16px"><div class="card-head">Power / Thermal Fit Results</div><div class="card-body">`)
 	latest := runs[0].result
 	b.WriteString(`<p style="font-size:12px;color:var(--muted);margin-bottom:10px">Latest run: ` + html.EscapeString(runs[0].displayTime))
 	if latest.Hostname != "" {
 		b.WriteString(` — ` + html.EscapeString(latest.Hostname))
 	}
 	if latest.OverallStatus != "" {
 		statusColor := "var(--ok)"
 		if latest.OverallStatus != "OK" {
 			statusColor = "var(--warn)"
 		}
 		b.WriteString(` — <span style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(latest.OverallStatus) + `</span>`)
 	}
 	b.WriteString(`</p>`)
 	if len(latest.GPUs) > 0 {
 		b.WriteString(`<div style="overflow-x:auto"><table><thead><tr>`)
 		b.WriteString(`<th>GPU</th><th>Model</th><th>Nominal W</th><th>Single-card W</th><th>Multi-GPU W</th><th>P95 Observed W</th><th>Status</th>`)
 		b.WriteString(`</tr></thead><tbody>`)
 		for _, gpu := range latest.GPUs {
 			finalLimitW := gpu.StablePowerLimitW
 			if finalLimitW <= 0 {
 				finalLimitW = gpu.AppliedPowerLimitW
 			}
 			derated := gpu.Derated ||
 				(gpu.DefaultPowerLimitW > 0 && finalLimitW > 0 && finalLimitW < gpu.DefaultPowerLimitW-1)
 			rowStyle := ""
 			finalStyle := ""
 			if derated {
 				rowStyle = ` style="background:rgba(255,180,0,0.08)"`
 				finalStyle = ` style="color:#e6a000;font-weight:600"`
 			}
 			statusLabel := gpu.Status
 			if statusLabel == "" {
 				statusLabel = "OK"
 			}
 			statusColor := "var(--ok)"
 			if statusLabel == "FAILED" {
 				statusColor = "var(--crit-fg,#9f3a38)"
 			} else if statusLabel != "OK" {
 				statusColor = "var(--warn)"
 			}
 			nominalStr := "-"
 			if gpu.DefaultPowerLimitW > 0 {
 				nominalStr = fmt.Sprintf("%.0f", gpu.DefaultPowerLimitW)
 			}
 			singleStr := "-"
 			if gpu.AppliedPowerLimitW > 0 {
 				singleStr = fmt.Sprintf("%.0f", gpu.AppliedPowerLimitW)
 			}
 			multiStr := "-"
 			if gpu.StablePowerLimitW > 0 {
 				multiStr = fmt.Sprintf("%.0f", gpu.StablePowerLimitW)
 			}
 			p95Str := "-"
 			if gpu.MaxObservedPowerW > 0 {
 				p95Str = fmt.Sprintf("%.0f", gpu.MaxObservedPowerW)
 			}
 			b.WriteString(`<tr` + rowStyle + `>`)
 			b.WriteString(`<td>` + strconv.Itoa(gpu.Index) + `</td>`)
 			b.WriteString(`<td>` + html.EscapeString(gpu.Name) + `</td>`)
 			b.WriteString(`<td>` + nominalStr + `</td>`)
 			b.WriteString(`<td>` + singleStr + `</td>`)
 			b.WriteString(`<td` + finalStyle + `>` + multiStr + `</td>`)
 			b.WriteString(`<td>` + p95Str + `</td>`)
 			b.WriteString(`<td style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(statusLabel) + `</td>`)
 			b.WriteString(`</tr>`)
 		}
 		b.WriteString(`</tbody></table></div>`)
 	}
 	if len(runs) > 1 {
 		b.WriteString(`<details style="margin-top:12px"><summary style="font-size:12px;color:var(--muted);cursor:pointer">` + strconv.Itoa(len(runs)) + ` runs total</summary>`)
 		b.WriteString(`<div style="overflow-x:auto;margin-top:8px"><table><thead><tr><th>#</th><th>Time</th><th>GPUs</th><th>Status</th></tr></thead><tbody>`)
 		for i, run := range runs {
 			statusColor := "var(--ok)"
 			if run.result.OverallStatus != "OK" {
 				statusColor = "var(--warn)"
 			}
 			b.WriteString(`<tr>`)
 			b.WriteString(`<td>#` + strconv.Itoa(i+1) + `</td>`)
 			b.WriteString(`<td>` + html.EscapeString(run.displayTime) + `</td>`)
 			b.WriteString(`<td>` + strconv.Itoa(len(run.result.GPUs)) + `</td>`)
 			b.WriteString(`<td style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(run.result.OverallStatus) + `</td>`)
 			b.WriteString(`</tr>`)
 		}
 		b.WriteString(`</tbody></table></div></details>`)
 	}
 	b.WriteString(`</div></div>`)
 	return b.String()
 }
--- a/audit/internal/webui/page_burn.go
+++ b/audit/internal/webui/page_burn.go
@@ -0,0 +1,383 @@
 package webui
 func renderBurn() string {
 	return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>&#9888; Warning:</strong> Stress tests on this page run hardware at high load. Repeated or prolonged use may reduce hardware lifespan. Use only when necessary.</div>
 <div class="alert alert-info" style="margin-bottom:16px"><strong>Scope:</strong> Burn exposes sustained GPU compute load recipes. DCGM diagnostics (` + "targeted_stress, targeted_power, pulse_test" + `) and LINPACK remain in <a href="/validate">Validate → Stress mode</a>; NCCL and NVBandwidth are available directly from <a href="/validate">Validate</a>.</div>
 <p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">Burn Profile</div>
  <div class="card-body burn-profile-body">
    <div class="burn-profile-col">
      <div class="form-row" style="margin:0 0 8px"><label>Preset</label></div>
      <label class="cb-row"><input type="radio" name="burn-profile" value="smoke" checked><span>Smoke — 5 min/GPU (sequential) or 5 min (parallel)</span></label>
      <label class="cb-row"><input type="radio" name="burn-profile" value="acceptance"><span>Acceptance — 1 h/GPU (sequential) or 1 h (parallel)</span></label>
      <label class="cb-row"><input type="radio" name="burn-profile" value="overnight"><span>Overnight — 8 h/GPU (sequential) or 8 h (parallel)</span></label>
    </div>
    <div class="burn-profile-col burn-profile-action">
      <button type="button" class="btn btn-primary" onclick="runAllBurnTasks()">Burn one by one</button>
      <p>Runs checked tests as separate sequential tasks. In sequential GPU mode, total time = profile duration × N GPU. In parallel mode, all selected GPUs burn simultaneously for one profile duration.</p>
    </div>
    <div class="burn-profile-col burn-profile-action">
      <button type="button" class="btn btn-secondary" onclick="runPlatformStress()">Thermal Cycling</button>
      <p>Run checked core test modules (CPU, MEM, GPU). Tests start at the same time and run for a period with short cooldown phases to stress the server cooling system.</p>
    </div>
  </div>
  <div class="card-body" style="padding-top:0;display:flex;justify-content:center">
    <span id="burn-all-status" style="font-size:12px;color:var(--muted)"></span>
  </div>
 </div>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">NVIDIA GPU Selection</div>
  <div class="card-body">
    <p style="font-size:12px;color:var(--muted);margin:0 0 10px">Official NVIDIA recipes and custom NVIDIA stressors use only the GPUs selected here. Multi-GPU interconnect tests are limited to this selection as well.</p>
    <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
      <button class="btn btn-sm btn-secondary" type="button" onclick="burnSelectAll()">Select All</button>
      <button class="btn btn-sm btn-secondary" type="button" onclick="burnSelectNone()">Clear</button>
    </div>
 	    <div id="burn-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
 	      <p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
 	    </div>
 	    <p id="burn-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 0">Select at least one NVIDIA GPU to enable NVIDIA burn recipes.</p>
 	    <div style="display:flex;flex-direction:column;gap:4px;margin-top:10px">
 	      <label class="cb-row">
 	        <input type="radio" name="burn-nvidia-mode" value="sequential" checked>
 	        <span>Sequential — selected GPUs one at a time</span>
 	      </label>
 	      <label class="cb-row" id="burn-parallel-label">
 	        <input type="radio" name="burn-nvidia-mode" value="parallel">
 	        <span>Parallel — all selected GPUs simultaneously</span>
 	      </label>
 	      <label class="cb-row" id="burn-ramp-label">
 	        <input type="radio" name="burn-nvidia-mode" value="ramp-up">
 	        <span>Ramp-up — add one GPU at a time</span>
 	      </label>
 	    </div>
 	  </div>
 	</div>
 <div class="burn-section">Core Burn Paths</div>
 <div class="grid2 burn-grid" style="margin-bottom:16px">
 <div class="card burn-card">
  <div class="card-head card-head-actions"><span>GPU Max Load</span><button class="btn btn-primary btn-sm" onclick="runBurnTaskSet([{id:'burn-nvidia-compute',target:'nvidia-compute',label:'NVIDIA Max Compute Load (dcgmproftester)',nvidia:true},{id:'burn-gpu-bee',target:'nvidia-stress',label:'GPU Burn (bee-gpu-burn)',nvidia:true,extra:{loader:'builtin'}},{id:'burn-gpu-john',target:'nvidia-stress',label:'John GPU Stress (john/OpenCL)',nvidia:true,extra:{loader:'john'}},{id:'burn-gpu-rvs',target:'amd-stress',label:'AMD GPU Stress (rvs gst)'}])">Run</button></div>
  <div class="card-body burn-card-body">
    <p style="font-size:12px;color:var(--muted);margin:0 0 10px">Combine vendor-backed and custom GPU max-load recipes in one run set. ` + "dcgmproftester" + ` is the primary official NVIDIA path; custom stressors remain available as parallel checkbox options.</p>
    <label class="cb-row"><input type="checkbox" id="burn-nvidia-compute" checked disabled><span>NVIDIA Max Compute Load (dcgmproftester) <span class="cb-note" id="note-nvidia-compute"></span></span></label>
    <label class="cb-row"><input type="checkbox" id="burn-gpu-bee" checked disabled><span>GPU Burn (bee-gpu-burn) <span class="cb-note" id="note-bee"></span></span></label>
    <label class="cb-row"><input type="checkbox" id="burn-gpu-john" disabled><span>John GPU Stress (john/OpenCL) <span class="cb-note" id="note-john"></span></span></label>
    <label class="cb-row"><input type="checkbox" id="burn-gpu-rvs" disabled><span>AMD GPU Stress (rvs gst) <span class="cb-note" id="note-rvs"></span></span></label>
  </div>
 </div>
 <div class="card burn-card">
  <div class="card-head card-head-actions"><span>Compute Stress</span><button class="btn btn-primary btn-sm" onclick="runBurnTaskSet([{id:'burn-cpu',target:'cpu',label:'CPU Burn-in'},{id:'burn-mem-stress',target:'memory-stress',label:'Memory Burn-in'},{id:'burn-sat-stress',target:'sat-stress',label:'SAT Stress (stressapptest)'}])">Run</button></div>
  <div class="card-body burn-card-body">
    <p style="font-size:12px;color:var(--muted);margin:0 0 10px">Select which subsystems to stress. Each checked item runs as a separate task.</p>
    <label class="cb-row"><input type="checkbox" id="burn-cpu" checked><span>CPU stress (stress-ng)</span></label>
    <label class="cb-row"><input type="checkbox" id="burn-mem-stress" checked><span>Memory stress (stress-ng --vm)</span></label>
    <label class="cb-row"><input type="checkbox" id="burn-sat-stress"><span>stressapptest (CPU + memory bus)</span></label>
  </div>
 </div>
 </div>
 <div id="bi-output" style="display:none;margin-top:16px" class="card">
  <div class="card-head">Output <span id="bi-title"></span></div>
  <div class="card-body"><div id="bi-terminal" class="terminal"></div></div>
 </div>
 <style>
 .cb-row { display:flex; align-items:flex-start; gap:8px; padding:4px 0; cursor:pointer; font-size:13px; }
 .cb-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
 .cb-row input[type=checkbox]:disabled { opacity:0.4; cursor:not-allowed; }
 .cb-row input[type=checkbox]:disabled ~ span { opacity:0.45; cursor:not-allowed; }
 .cb-note { font-size:11px; color:var(--muted); font-style:italic; }
 .burn-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
 .burn-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
 .burn-profile-body { display:grid; grid-template-columns:1fr 1fr 1fr; gap:24px; align-items:stretch; }
 .burn-profile-col { min-width:0; }
 .burn-profile-action { display:flex; flex-direction:column; align-items:center; justify-content:flex-start; gap:8px; }
 .burn-profile-action p { font-size:12px; color:var(--muted); margin:0; width:100%; text-align:left; }
 .burn-section { font-size:12px; font-weight:700; letter-spacing:.06em; text-transform:uppercase; color:var(--muted); margin:0 0 10px; padding-top:4px; }
 .burn-grid { align-items:stretch; }
 .burn-card { height:100%; display:flex; flex-direction:column; }
 .burn-card-body { flex:1; display:flex; flex-direction:column; }
 .card-head-actions { justify-content:space-between; }
 .card-head-buttons { display:flex; align-items:center; gap:8px; margin-left:auto; }
@media(max-width:900px){ .card-head-actions { align-items:flex-start; flex-direction:column; } .card-head-buttons { margin-left:0; } .burn-profile-body { grid-template-columns:1fr; } }
 </style>
 <script>
 let biES = null;
 function burnTaskIDs(payload) {
  if (payload && Array.isArray(payload.task_ids) && payload.task_ids.length) return payload.task_ids;
  if (payload && payload.task_id) return [payload.task_id];
  return [];
 }
 function burnProfile() {
  const selected = document.querySelector('input[name="burn-profile"]:checked');
  return selected ? selected.value : 'smoke';
 }
 function burnSelectedGPUIndices() {
  return Array.from(document.querySelectorAll('.burn-gpu-checkbox'))
    .filter(function(el) { return el.checked && !el.disabled; })
    .map(function(el) { return parseInt(el.value, 10); })
    .filter(function(v) { return !Number.isNaN(v); })
    .sort(function(a, b) { return a - b; });
 }
 function burnNvidiaMode() {
  const el = document.querySelector('input[name="burn-nvidia-mode"]:checked');
  return el ? el.value : 'sequential';
 }
 function burnApplyMultiGPUState(gpuCount) {
  var multiValues = ['parallel', 'ramp-up'];
  var radios = document.querySelectorAll('input[name="burn-nvidia-mode"]');
  radios.forEach(function(el) {
    var isMulti = multiValues.indexOf(el.value) >= 0;
    if (gpuCount < 2 && isMulti) {
      el.disabled = true;
      if (el.checked) {
        var seq = document.querySelector('input[name="burn-nvidia-mode"][value="sequential"]');
        if (seq) seq.checked = true;
      }
      var label = el.closest('label');
      if (label) label.style.opacity = '0.4';
    } else {
      el.disabled = false;
      var label = el.closest('label');
      if (label) label.style.opacity = '';
    }
  });
 }
 function burnUpdateSelectionNote() {
  const note = document.getElementById('burn-selection-note');
  const selected = burnSelectedGPUIndices();
  if (!selected.length) {
    note.textContent = 'Select at least one NVIDIA GPU to enable NVIDIA burn recipes.';
    return;
  }
  note.textContent = 'Selected NVIDIA GPUs: ' + selected.join(', ') + '. Official and custom NVIDIA tasks will use only these GPUs.';
 }
 function burnRenderGPUList(gpus) {
  const root = document.getElementById('burn-gpu-list');
  if (!gpus || !gpus.length) {
    root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
    burnUpdateSelectionNote();
    return;
  }
  root.innerHTML = gpus.map(function(gpu) {
    const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
    return '<label class="burn-gpu-row">'
      + '<input class="burn-gpu-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="burnUpdateSelectionNote()">'
      + '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
      + '</label>';
  }).join('');
  burnApplyMultiGPUState(gpus.length);
  burnUpdateSelectionNote();
 }
 function burnSelectAll() {
  document.querySelectorAll('.burn-gpu-checkbox').forEach(function(el) { el.checked = true; });
  burnUpdateSelectionNote();
 }
 function burnSelectNone() {
  document.querySelectorAll('.burn-gpu-checkbox').forEach(function(el) { el.checked = false; });
  burnUpdateSelectionNote();
 }
 function burnLoadGPUs() {
  fetch('/api/gpu/nvidia').then(function(r) {
    return r.json().then(function(body) {
      if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
      return body;
    });
  }).then(function(gpus) {
    burnRenderGPUList(gpus);
  }).catch(function(err) {
    document.getElementById('burn-gpu-list').innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
    burnUpdateSelectionNote();
  });
 }
 function enqueueBurnTask(target, label, extra, useSelectedNvidia) {
  const body = Object.assign({ profile: burnProfile(), display_name: label }, extra || {});
  if (useSelectedNvidia) {
    const selected = burnSelectedGPUIndices();
    if (!selected.length) {
      return Promise.reject(new Error('Select at least one NVIDIA GPU.'));
    }
    body.gpu_indices = selected;
    const bMode = burnNvidiaMode();
    if (bMode === 'ramp-up' && selected.length > 1) {
      body.stagger_gpu_start = true;
    } else if (bMode === 'parallel' && selected.length > 1) {
      body.parallel_gpus = true;
    }
  }
  return fetch('/api/sat/' + target + '/run', {
    method: 'POST',
    headers: {'Content-Type':'application/json'},
    body: JSON.stringify(body)
  }).then(function(r) {
    return r.json().then(function(payload) {
      if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
      return payload;
    });
  });
 }
 function streamTask(taskId, label) {
  if (biES) { biES.close(); biES = null; }
  document.getElementById('bi-output').style.display = 'block';
  document.getElementById('bi-title').textContent = '— ' + label + ' [' + burnProfile() + ']';
  const term = document.getElementById('bi-terminal');
  term.textContent = 'Task ' + taskId + ' queued. Streaming...\n';
  biES = new EventSource('/api/tasks/' + taskId + '/stream');
  biES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
  biES.addEventListener('done', function(e) {
    biES.close();
    biES = null;
    term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
    term.scrollTop = term.scrollHeight;
  });
 }
 function streamBurnTask(taskId, label, resetTerminal) {
  return streamBurnTaskSet([taskId], label, resetTerminal);
 }
 function streamBurnTaskSet(taskIds, label, resetTerminal) {
  if (biES) { biES.close(); biES = null; }
  document.getElementById('bi-output').style.display = 'block';
  document.getElementById('bi-title').textContent = '— ' + label + ' [' + burnProfile() + ']';
  const term = document.getElementById('bi-terminal');
  if (resetTerminal) {
    term.textContent = '';
  }
  if (!Array.isArray(taskIds) || !taskIds.length) {
    term.textContent += 'ERROR: no tasks queued.\n';
    return Promise.resolve({ok:false, error:'no tasks queued'});
  }
  const streamNext = function(idx, failures) {
    if (idx >= taskIds.length) {
      return Promise.resolve({ok: failures === 0, error: failures ? (failures + ' task(s) failed') : ''});
    }
    const taskId = taskIds[idx];
    term.textContent += '[' + (idx + 1) + '/' + taskIds.length + '] Task ' + taskId + ' queued. Streaming...\n';
    return new Promise(function(resolve) {
      biES = new EventSource('/api/tasks/' + taskId + '/stream');
      biES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
      biES.addEventListener('done', function(e) {
        biES.close();
        biES = null;
        term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
        term.scrollTop = term.scrollHeight;
        resolve(failures + (e.data ? 1 : 0));
      });
      biES.onerror = function() {
        if (biES) {
          biES.close();
          biES = null;
        }
        term.textContent += '\nERROR: stream disconnected.\n';
        term.scrollTop = term.scrollHeight;
        resolve(failures + 1);
      };
    }).then(function(nextFailures) {
      return streamNext(idx + 1, nextFailures);
    });
  };
  return streamNext(0, 0);
 }
 function runBurnTaskSet(tasks, statusElId) {
  const enabled = tasks.filter(function(t) {
    const el = document.getElementById(t.id);
    return el && el.checked && !el.disabled;
  });
  const status = statusElId ? document.getElementById(statusElId) : null;
  if (status) status.textContent = '';
  if (!enabled.length) {
    if (status) status.textContent = 'No tasks selected.';
    return;
  }
  const term = document.getElementById('bi-terminal');
  document.getElementById('bi-output').style.display = 'block';
  document.getElementById('bi-title').textContent = '— Burn one by one [' + burnProfile() + ']';
  term.textContent = '';
  const runNext = function(idx) {
    if (idx >= enabled.length) {
      if (status) status.textContent = 'Completed ' + enabled.length + ' task(s).';
      return Promise.resolve();
    }
    const t = enabled[idx];
    term.textContent += '\n[' + (idx + 1) + '/' + enabled.length + '] ' + t.label + '\n';
    if (status) status.textContent = 'Running ' + (idx + 1) + '/' + enabled.length + '...';
    return enqueueBurnTask(t.target, t.label, t.extra, !!t.nvidia)
      .then(function(d) {
        return streamBurnTaskSet(burnTaskIDs(d), t.label, false);
      })
      .then(function() {
        return runNext(idx + 1);
      })
      .catch(function(err) {
        if (status) status.textContent = 'Error: ' + err.message;
        document.getElementById('bi-output').style.display = 'block';
        term.textContent += 'ERROR: ' + err.message + '\n';
        return Promise.reject(err);
      });
  };
  return runNext(0);
 }
 function runPlatformStress() {
  const comps = [];
  const computeIDs = ['burn-cpu', 'burn-mem-stress', 'burn-sat-stress'];
  const gpuIDs = ['burn-nvidia-compute', 'burn-gpu-bee', 'burn-gpu-john', 'burn-gpu-rvs'];
  const hasChecked = function(ids) {
    return ids.some(function(id) {
      const el = document.getElementById(id);
      return el && el.checked && !el.disabled;
    });
  };
  if (hasChecked(computeIDs)) comps.push('cpu');
  if (hasChecked(gpuIDs)) comps.push('gpu');
  if (!comps.length) {
    const status = document.getElementById('burn-all-status');
    if (status) status.textContent = 'Select at least one test in GPU Max Load or Compute Stress.';
    return;
  }
  const extra = comps.length > 0 ? {platform_components: comps} : {};
  enqueueBurnTask('platform-stress', 'Platform Thermal Cycling', extra, false).then(function(d) {
    streamTask(d.task_id, 'Platform Thermal Cycling');
  });
 }
 function runAllBurnTasks() {
  const status = document.getElementById('burn-all-status');
  const all = [
    {id:'burn-nvidia-compute',target:'nvidia-compute',label:'NVIDIA Max Compute Load (dcgmproftester)',nvidia:true},
    {id:'burn-gpu-bee',target:'nvidia-stress',label:'GPU Burn (bee-gpu-burn)',nvidia:true,extra:{loader:'builtin'}},
    {id:'burn-gpu-john',target:'nvidia-stress',label:'John GPU Stress (john/OpenCL)',nvidia:true,extra:{loader:'john'}},
    {id:'burn-gpu-rvs',target:'amd-stress',label:'AMD GPU Stress (rvs gst)'},
    {id:'burn-cpu',target:'cpu',label:'CPU Burn-in'},
    {id:'burn-mem-stress',target:'memory-stress',label:'Memory Burn-in'},
    {id:'burn-sat-stress',target:'sat-stress',label:'SAT Stress (stressapptest)'},
  ];
  status.textContent = 'Enqueuing...';
  runBurnTaskSet(all, 'burn-all-status');
 }
 fetch('/api/gpu/tools').then(function(r) { return r.json(); }).then(function(tools) {
  const map = {
    'nvidia-compute': {cb:'burn-nvidia-compute', note:'note-nvidia-compute', reason:'dcgmproftester not available or NVIDIA driver not running'},
    'bee-gpu-burn': {cb:'burn-gpu-bee', note:'note-bee', reason:'bee-gpu-burn not available or NVIDIA driver not running'},
    'john': {cb:'burn-gpu-john', note:'note-john', reason:'bee-john-gpu-stress not available or NVIDIA driver not running'},
    'rvs': {cb:'burn-gpu-rvs', note:'note-rvs', reason:'AMD driver not running'},
  };
  tools.forEach(function(t) {
    const spec = map[t.id];
    if (!spec) return;
    const cb = document.getElementById(spec.cb);
    const note = document.getElementById(spec.note);
    if (!cb) return;
    if (t.available) {
      cb.disabled = false;
    } else if (note) {
      note.textContent = '— ' + spec.reason;
    }
  });
 }).catch(function() {});
 burnLoadGPUs();
 </script>`
 }
--- a/audit/internal/webui/page_export_tools.go
+++ b/audit/internal/webui/page_export_tools.go
@@ -0,0 +1,434 @@
 package webui
 import (
 	"fmt"
 	"html"
 	"net/url"
 	"os"
 	"path/filepath"
 	"sort"
 	"strings"
 )
 func renderExport(exportDir string) string {
 	entries, _ := listExportFiles(exportDir)
 	var rows strings.Builder
 	for _, e := range entries {
 		rows.WriteString(fmt.Sprintf(`<tr><td><a href="/export/file?path=%s" target="_blank">%s</a></td></tr>`,
 			url.QueryEscape(e), html.EscapeString(e)))
 	}
 	if len(entries) == 0 {
 		rows.WriteString(`<tr><td style="color:var(--muted)">No export files found.</td></tr>`)
 	}
 	return `<div class="grid2">
 <div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
 <p style="font-size:13px;color:var(--muted);margin-bottom:12px">Creates a tar.gz archive of all audit files, SAT results, and logs.</p>
 ` + renderSupportBundleInline() + `
 </div></div>
 <div class="card"><div class="card-head">Export Files</div><div class="card-body">
 <table><tr><th>File</th></tr>` + rows.String() + `</table>
 </div></div>
 </div>
 ` + renderUSBExportCard()
 }
 func listExportFiles(exportDir string) ([]string, error) {
 	var entries []string
 	err := filepath.Walk(strings.TrimSpace(exportDir), func(path string, info os.FileInfo, err error) error {
 		if err != nil {
 			return err
 		}
 		if info.IsDir() {
 			return nil
 		}
 		rel, err := filepath.Rel(exportDir, path)
 		if err != nil {
 			return err
 		}
 		entries = append(entries, rel)
 		return nil
 	})
 	if err != nil && !os.IsNotExist(err) {
 		return nil, err
 	}
 	sort.Strings(entries)
 	return entries, nil
 }
 func renderSupportBundleInline() string {
 	return `<button id="support-bundle-btn" class="btn btn-primary" onclick="supportBundleDownload()">&#8595; Download Support Bundle</button>
 <div id="support-bundle-status" style="margin-top:10px;font-size:13px;color:var(--muted)"></div>
 <script>
 window.supportBundleDownload = function() {
  var btn = document.getElementById('support-bundle-btn');
  var status = document.getElementById('support-bundle-status');
  btn.disabled = true;
  btn.textContent = 'Building...';
  status.textContent = 'Collecting logs and export data\u2026';
  status.style.color = 'var(--muted)';
  var filename = 'bee-support.tar.gz';
  fetch('/export/support.tar.gz')
    .then(function(r) {
      if (!r.ok) throw new Error('HTTP ' + r.status);
      var cd = r.headers.get('Content-Disposition') || '';
      var m = cd.match(/filename="?([^";]+)"?/);
      if (m) filename = m[1];
      return r.blob();
    })
    .then(function(blob) {
      var url = URL.createObjectURL(blob);
      var a = document.createElement('a');
      a.href = url;
      a.download = filename;
      document.body.appendChild(a);
      a.click();
      document.body.removeChild(a);
      URL.revokeObjectURL(url);
      status.textContent = 'Download started.';
      status.style.color = 'var(--ok-fg)';
    })
    .catch(function(e) {
      status.textContent = 'Error: ' + e.message;
      status.style.color = 'var(--crit-fg)';
    })
    .finally(function() {
      btn.disabled = false;
      btn.textContent = '\u2195 Download Support Bundle';
    });
 };
 </script>`
 }
 func renderUSBExportCard() string {
 	return `<div class="card" style="margin-top:16px">
  <div class="card-head">Export to USB
    <button class="btn btn-sm btn-secondary" onclick="usbRefresh()" style="margin-left:auto">&#8635; Refresh</button>
  </div>
  <div class="card-body">` + renderUSBExportInline() + `</div>
 </div>`
 }
 func renderUSBExportInline() string {
 	return `<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Write audit JSON or support bundle directly to a removable USB drive.</p>
 <div id="usb-status" style="font-size:13px;color:var(--muted)">Scanning for USB devices...</div>
 <div id="usb-targets" style="margin-top:12px"></div>
 <div id="usb-msg" style="margin-top:10px;font-size:13px"></div>
 <script>
 (function(){
 function usbRefresh() {
  document.getElementById('usb-status').textContent = 'Scanning...';
  document.getElementById('usb-targets').innerHTML = '';
  document.getElementById('usb-msg').textContent = '';
  fetch('/api/export/usb').then(r=>r.json()).then(targets => {
    window._usbTargets = Array.isArray(targets) ? targets : [];
    const st = document.getElementById('usb-status');
    const ct = document.getElementById('usb-targets');
    if (!targets || targets.length === 0) {
      st.textContent = 'No removable USB devices found.';
      return;
    }
    st.textContent = targets.length + ' device(s) found:';
    ct.innerHTML = '<table><tr><th>Device</th><th>FS</th><th>Size</th><th>Label</th><th>Model</th><th>Actions</th></tr>' +
      targets.map((t, idx) => {
        const dev = t.device || '';
        const label = t.label || '';
        const model = t.model || '';
        return '<tr>' +
          '<td style="font-family:monospace">'+dev+'</td>' +
          '<td>'+t.fs_type+'</td>' +
          '<td>'+t.size+'</td>' +
          '<td>'+label+'</td>' +
          '<td style="font-size:12px;color:var(--muted)">'+model+'</td>' +
          '<td style="white-space:nowrap">' +
            '<button class="btn btn-sm btn-primary" onclick="usbExport(\'audit\','+idx+',this)">Audit JSON</button> ' +
            '<button class="btn btn-sm btn-secondary" onclick="usbExport(\'bundle\','+idx+',this)">Support Bundle</button>' +
            '<div class="usb-row-msg" style="margin-top:6px;font-size:12px;color:var(--muted)"></div>' +
          '</td></tr>';
      }).join('') + '</table>';
  }).catch(e => {
    document.getElementById('usb-status').textContent = 'Error: ' + e;
  });
 }
 window.usbExport = function(type, targetIndex, btn) {
  const target = (window._usbTargets || [])[targetIndex];
  if (!target) {
    const msg = document.getElementById('usb-msg');
    msg.style.color = 'var(--err,red)';
    msg.textContent = 'Error: USB target not found. Refresh and try again.';
    return;
  }
  const msg = document.getElementById('usb-msg');
  const row = btn ? btn.closest('td') : null;
  const rowMsg = row ? row.querySelector('.usb-row-msg') : null;
  const originalText = btn ? btn.textContent : '';
  if (btn) {
    btn.disabled = true;
    btn.textContent = 'Exporting...';
  }
  if (rowMsg) {
    rowMsg.style.color = 'var(--muted)';
    rowMsg.textContent = 'Working...';
  }
  msg.style.color = 'var(--muted)';
  msg.textContent = 'Exporting ' + (type === 'bundle' ? 'support bundle' : 'audit JSON') + ' to ' + (target.device||'') + '...';
  fetch('/api/export/usb/'+type, {
    method: 'POST',
    headers: {'Content-Type':'application/json'},
    body: JSON.stringify(target)
  }).then(async r => {
    const d = await r.json();
    if (!r.ok) throw new Error(d.error || ('HTTP ' + r.status));
    return d;
  }).then(d => {
    msg.style.color = 'var(--ok,green)';
    msg.textContent = d.message || 'Done.';
    if (rowMsg) {
      rowMsg.style.color = 'var(--ok,green)';
      rowMsg.textContent = d.message || 'Done.';
    }
  }).catch(e => {
    msg.style.color = 'var(--err,red)';
    msg.textContent = 'Error: '+e;
    if (rowMsg) {
      rowMsg.style.color = 'var(--err,red)';
      rowMsg.textContent = 'Error: ' + e;
    }
  }).finally(() => {
    if (btn) {
      btn.disabled = false;
      btn.textContent = originalText;
    }
  });
 };
 window.usbRefresh = usbRefresh;
 usbRefresh();
 })();
 </script>`
 }
 func renderNvidiaSelfHealInline() string {
 	return `<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Inspect NVIDIA GPU health, restart the bee-nvidia driver service, and issue a per-GPU reset when the driver reports reset required.</p>
 <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:12px">
  <button id="nvidia-restart-btn" class="btn btn-secondary" onclick="nvidiaRestartDrivers()">Restart GPU Drivers</button>
  <button class="btn btn-sm btn-secondary" onclick="loadNvidiaSelfHeal()">&#8635; Refresh</button>
 </div>
 <div id="nvidia-self-heal-status" style="font-size:13px;color:var(--muted);margin-bottom:12px">Loading NVIDIA GPU status...</div>
 <div id="nvidia-self-heal-table"><p style="color:var(--muted);font-size:13px">Loading...</p></div>
 <div id="nvidia-self-heal-out" style="display:none;margin-top:12px">
  <div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:4px">
    <span id="nvidia-self-heal-out-label" style="font-size:12px;font-weight:600;color:var(--muted)">Output</span>
    <span id="nvidia-self-heal-out-status" style="font-size:12px"></span>
  </div>
  <div id="nvidia-self-heal-terminal" class="terminal" style="max-height:220px;width:100%;box-sizing:border-box"></div>
 </div>
 <script>
 function nvidiaSelfHealShowResult(label, status, output) {
  var out = document.getElementById('nvidia-self-heal-out');
  var term = document.getElementById('nvidia-self-heal-terminal');
  var statusEl = document.getElementById('nvidia-self-heal-out-status');
  var labelEl = document.getElementById('nvidia-self-heal-out-label');
  out.style.display = 'block';
  labelEl.textContent = label;
  term.textContent = output || '(no output)';
  term.scrollTop = term.scrollHeight;
  if (status === 'ok') {
    statusEl.textContent = '✓ done';
    statusEl.style.color = 'var(--ok-fg, #2c662d)';
  } else {
    statusEl.textContent = '✗ failed';
    statusEl.style.color = 'var(--crit-fg, #9f3a38)';
  }
 }
 function nvidiaRestartDrivers() {
  var btn = document.getElementById('nvidia-restart-btn');
  var original = btn.textContent;
  btn.disabled = true;
  btn.textContent = 'Restarting...';
  nvidiaSelfHealShowResult('restart bee-nvidia', 'ok', 'Running...');
  fetch('/api/services/action', {
    method:'POST',
    headers:{'Content-Type':'application/json'},
    body:JSON.stringify({name:'bee-nvidia', action:'restart'})
  }).then(r=>r.json()).then(d => {
    nvidiaSelfHealShowResult('restart bee-nvidia', d.status || 'error', d.output || d.error || '(no output)');
    setTimeout(function() {
      loadServices();
      loadNvidiaSelfHeal();
    }, 800);
  }).catch(e => {
    nvidiaSelfHealShowResult('restart bee-nvidia', 'error', 'Request failed: ' + e);
  }).finally(() => {
    btn.disabled = false;
    btn.textContent = original;
  });
 }
 function nvidiaResetGPU(index, btn) {
  var original = btn.textContent;
  btn.disabled = true;
  btn.textContent = 'Resetting...';
  nvidiaSelfHealShowResult('reset gpu ' + index, 'ok', 'Running...');
  fetch('/api/gpu/nvidia-reset', {
    method:'POST',
    headers:{'Content-Type':'application/json'},
    body:JSON.stringify({index:index})
  }).then(r=>r.json()).then(d => {
    nvidiaSelfHealShowResult('reset gpu ' + index, d.status || 'error', d.output || '(no output)');
    setTimeout(loadNvidiaSelfHeal, 1000);
  }).catch(e => {
    nvidiaSelfHealShowResult('reset gpu ' + index, 'error', 'Request failed: ' + e);
  }).finally(() => {
    btn.disabled = false;
    btn.textContent = original;
  });
 }
 function loadNvidiaSelfHeal() {
  var status = document.getElementById('nvidia-self-heal-status');
  var table = document.getElementById('nvidia-self-heal-table');
  status.textContent = 'Loading NVIDIA GPU status...';
  status.style.color = 'var(--muted)';
  table.innerHTML = '<p style="color:var(--muted);font-size:13px">Loading...</p>';
  fetch('/api/gpu/nvidia-status').then(r=>r.json()).then(gpus => {
    if (!Array.isArray(gpus) || gpus.length === 0) {
      status.textContent = 'No NVIDIA GPUs detected or nvidia-smi is unavailable.';
      table.innerHTML = '';
      return;
    }
    status.textContent = gpus.length + ' NVIDIA GPU(s) detected.';
    const rows = gpus.map(g => {
      const serial = g.serial || '';
      const bdf = g.bdf || '';
      const id = serial || bdf || ('gpu-' + g.index);
      const badge = g.status === 'OK' ? 'badge-ok' : g.status === 'RESET_REQUIRED' ? 'badge-err' : 'badge-warn';
      const details = [];
      if (serial) details.push('serial ' + serial);
      if (bdf) details.push('bdf ' + bdf);
      if (g.parse_failure && g.raw_line) details.push(g.raw_line);
      return '<tr>'
        + '<td style="white-space:nowrap">' + g.index + '</td>'
        + '<td>' + (g.name || 'unknown') + '</td>'
        + '<td style="font-family:monospace">' + id + '</td>'
        + '<td><span class="badge ' + badge + '">' + (g.status || 'UNKNOWN') + '</span>'
        + (details.length ? '<div style="margin-top:4px;font-size:12px;color:var(--muted)">' + details.join(' | ') + '</div>' : '')
        + '</td>'
        + '<td style="white-space:nowrap"><button class="btn btn-sm btn-secondary" onclick="nvidiaResetGPU(' + g.index + ', this)">Reset GPU</button></td>'
        + '</tr>';
    }).join('');
    table.innerHTML = '<table><tr><th>GPU</th><th>Model</th><th>ID</th><th>Status</th><th>Action</th></tr>' + rows + '</table>';
  }).catch(e => {
    status.textContent = 'Error loading NVIDIA GPU status: ' + e;
    status.style.color = 'var(--crit-fg, #9f3a38)';
    table.innerHTML = '';
  });
 }
 loadNvidiaSelfHeal();
 </script>`
 }
 func renderTools() string {
 	return `<div class="card" style="margin-bottom:16px">
  <div class="card-head">System Install</div>
  <div class="card-body">
    <div style="margin-bottom:20px">
    <div style="font-weight:600;margin-bottom:8px">Install to RAM</div>
    <p id="boot-source-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Detecting boot source...</p>
    <p id="ram-status-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Checking...</p>
    <button id="ram-install-btn" class="btn btn-primary" onclick="installToRAM()" style="display:none">&#9654; Copy to RAM</button>
    </div>
    <div style="border-top:1px solid var(--line);padding-top:20px">
    <div style="font-weight:600;margin-bottom:8px">Install to Disk</div>` +
 		renderInstallInline() + `
    </div>
  </div>
 </div>
 <script>
 fetch('/api/system/ram-status').then(r=>r.json()).then(d=>{
  const boot = document.getElementById('boot-source-text');
  const txt = document.getElementById('ram-status-text');
  const btn = document.getElementById('ram-install-btn');
  let source = d.device || d.source || 'unknown source';
  let kind = d.kind || 'unknown';
  let label = source;
  if (kind === 'ram') label = 'RAM';
  else if (kind === 'usb') label = 'USB (' + source + ')';
  else if (kind === 'cdrom') label = 'CD-ROM (' + source + ')';
  else if (kind === 'disk') label = 'disk (' + source + ')';
  else label = source;
  boot.textContent = 'Current boot source: ' + label + '.';
  txt.textContent = d.message || 'Checking...';
  if (d.status === 'ok' || d.in_ram) {
    txt.style.color = 'var(--ok, green)';
  } else if (d.status === 'failed') {
    txt.style.color = 'var(--err, #b91c1c)';
  } else {
    txt.style.color = 'var(--muted)';
  }
  if (d.can_start_task) {
    btn.style.display = '';
    btn.disabled = false;
  } else {
    btn.style.display = 'none';
  }
 });
 function installToRAM() {
  document.getElementById('ram-install-btn').disabled = true;
  fetch('/api/system/install-to-ram', {method:'POST'}).then(r=>r.json()).then(d=>{
    window.location.href = '/tasks#' + d.task_id;
  });
 }
 </script>
 <div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
 <p style="font-size:13px;color:var(--muted);margin-bottom:12px">Downloads a tar.gz archive of all audit files, SAT results, and logs.</p>
 ` + renderSupportBundleInline() + `
 <div style="border-top:1px solid var(--border);margin-top:16px;padding-top:16px">
  <div style="font-weight:600;margin-bottom:8px">Export to USB</div>
  ` + renderUSBExportInline() + `
 </div>
 </div></div>
 <div class="card"><div class="card-head">Tool Check <button class="btn btn-sm btn-secondary" onclick="checkTools()" style="margin-left:auto">&#8635; Check</button></div>
 <div class="card-body"><div id="tools-table"><p style="color:var(--muted);font-size:13px">Checking...</p></div></div></div>
 <div class="card"><div class="card-head">NVIDIA Self Heal</div><div class="card-body">` +
 		renderNvidiaSelfHealInline() + `</div></div>
 <div class="card"><div class="card-head">Network</div><div class="card-body">` +
 		renderNetworkInline() + `</div></div>
 <div class="card"><div class="card-head">Services</div><div class="card-body">` +
 		renderServicesInline() + `</div></div>
 <script>
 function checkTools() {
  document.getElementById('tools-table').innerHTML = '<p style="color:var(--muted);font-size:13px">Checking...</p>';
  fetch('/api/tools/check').then(r=>r.json()).then(tools => {
    const rows = tools.map(t =>
      '<tr><td>'+t.Name+'</td><td><span class="badge '+(t.OK ? 'badge-ok' : 'badge-err')+'">'+(t.OK ? '&#10003; '+t.Path : '&#10007; missing')+'</span></td></tr>'
    ).join('');
    document.getElementById('tools-table').innerHTML =
      '<table><tr><th>Tool</th><th>Status</th></tr>'+rows+'</table>';
  });
 }
 checkTools();
 </script>`
 }
 func renderExportIndex(exportDir string) (string, error) {
 	entries, err := listExportFiles(exportDir)
 	if err != nil {
 		return "", err
 	}
 	var body strings.Builder
 	body.WriteString(`<!DOCTYPE html><html><head><meta charset="utf-8"><title>Bee Export Files</title></head><body>`)
 	body.WriteString(`<h1>Bee Export Files</h1><ul>`)
 	for _, entry := range entries {
 		body.WriteString(`<li><a href="/export/file?path=` + url.QueryEscape(entry) + `">` + html.EscapeString(entry) + `</a></li>`)
 	}
 	if len(entries) == 0 {
 		body.WriteString(`<li>No export files found.</li>`)
 	}
 	body.WriteString(`</ul></body></html>`)
 	return body.String(), nil
 }
--- a/audit/internal/webui/page_install_tasks.go
+++ b/audit/internal/webui/page_install_tasks.go
@@ -0,0 +1,314 @@
 package webui
 func renderInstallInline() string {
 	return `
    <div class="alert alert-warn" style="margin-bottom:16px">
      <strong>Warning:</strong> Installing will <strong>completely erase</strong> the selected
      disk and write the live system onto it. All existing data on the target disk will be lost.
      This operation cannot be undone.
    </div>
    <div id="install-loading" style="color:var(--muted);font-size:13px">Loading disk list…</div>
    <div id="install-disk-section" style="display:none">
      <div class="card" style="margin-bottom:0">
        <table id="install-disk-table">
          <thead><tr><th></th><th>Device</th><th>Model</th><th>Size</th><th>Status</th></tr></thead>
          <tbody id="install-disk-tbody"></tbody>
        </table>
      </div>
      <div style="margin-top:12px">
        <button class="btn btn-secondary btn-sm" onclick="installRefreshDisks()">↻ Refresh</button>
      </div>
    </div>
    <div id="install-confirm-section" style="display:none;margin-top:20px">
      <div id="install-confirm-warn" class="alert" style="background:#fff6f6;border:1px solid #e0b4b4;color:#9f3a38;font-size:13px"></div>
      <div class="form-row" style="max-width:360px">
        <label>Type the device name to confirm (e.g. /dev/sda)</label>
        <input type="text" id="install-confirm-input" placeholder="/dev/..." oninput="installCheckConfirm()" autocomplete="off" spellcheck="false">
      </div>
      <button class="btn btn-danger" id="install-start-btn" disabled onclick="installStart()">Install to Disk</button>
      <button class="btn btn-secondary" style="margin-left:8px" onclick="installDeselect()">Cancel</button>
    </div>
    <div id="install-progress-section" style="display:none;margin-top:20px">
      <div class="card-head" style="margin-bottom:8px">Installation Progress</div>
      <div id="install-terminal" class="terminal" style="max-height:500px"></div>
      <div id="install-status" style="margin-top:12px;font-size:13px"></div>
    </div>
 <style>
 #install-disk-tbody tr{cursor:pointer}
 #install-disk-tbody tr.selected td{background:rgba(33,133,208,.1)}
 #install-disk-tbody tr:hover td{background:rgba(33,133,208,.07)}
 </style>
 <script>
 var _installSelected = null;
 function installRefreshDisks() {
  document.getElementById('install-loading').style.display = '';
  document.getElementById('install-disk-section').style.display = 'none';
  document.getElementById('install-confirm-section').style.display = 'none';
  _installSelected = null;
  fetch('/api/install/disks').then(function(r){ return r.json(); }).then(function(disks){
    document.getElementById('install-loading').style.display = 'none';
    var tbody = document.getElementById('install-disk-tbody');
    tbody.innerHTML = '';
    if (!disks || disks.length === 0) {
      tbody.innerHTML = '<tr><td colspan="5" style="color:var(--muted);text-align:center">No installable disks found</td></tr>';
    } else {
      disks.forEach(function(d) {
        var warnings = (d.warnings || []);
        var statusHtml;
        if (warnings.length === 0) {
          statusHtml = '<span class="badge badge-ok">OK</span>';
        } else {
          var hasSmall = warnings.some(function(w){ return w.indexOf('too small') >= 0; });
          statusHtml = warnings.map(function(w){
            var cls = hasSmall ? 'badge-err' : 'badge-warn';
            return '<span class="badge ' + cls + '" title="' + w.replace(/"/g,'&quot;') + '">' +
              (w.length > 40 ? w.substring(0,38)+'…' : w) + '</span>';
          }).join(' ');
        }
        var mountedNote = (d.mounted_parts && d.mounted_parts.length > 0)
          ? ' <span style="color:var(--warn-fg);font-size:11px">(mounted)</span>' : '';
        var tr = document.createElement('tr');
        tr.dataset.device = d.device;
        tr.dataset.model = d.model || 'Unknown';
        tr.dataset.size = d.size;
        tr.dataset.warnings = JSON.stringify(warnings);
        tr.innerHTML =
          '<td><input type="radio" name="install-disk" value="' + d.device + '"></td>' +
          '<td><code>' + d.device + '</code>' + mountedNote + '</td>' +
          '<td>' + (d.model || '—') + '</td>' +
          '<td>' + d.size + '</td>' +
          '<td>' + statusHtml + '</td>';
        tr.addEventListener('click', function(){ installSelectDisk(this); });
        tbody.appendChild(tr);
      });
    }
    document.getElementById('install-disk-section').style.display = '';
  }).catch(function(e){
    document.getElementById('install-loading').textContent = 'Failed to load disk list: ' + e;
  });
 }
 function installSelectDisk(tr) {
  document.querySelectorAll('#install-disk-tbody tr').forEach(function(r){ r.classList.remove('selected'); });
  tr.classList.add('selected');
  var radio = tr.querySelector('input[type=radio]');
  if (radio) radio.checked = true;
  _installSelected = {
    device: tr.dataset.device,
    model: tr.dataset.model,
    size: tr.dataset.size,
    warnings: JSON.parse(tr.dataset.warnings || '[]')
  };
  var warnBox = document.getElementById('install-confirm-warn');
  var warnLines = '<strong>⚠ DANGER:</strong> ' + _installSelected.device +
    ' (' + _installSelected.model + ', ' + _installSelected.size + ')' +
    ' will be <strong>completely erased</strong> and repartitioned. All data will be lost.<br>';
  if (_installSelected.warnings.length > 0) {
    warnLines += '<br>' + _installSelected.warnings.map(function(w){ return '• ' + w; }).join('<br>');
  }
  warnBox.innerHTML = warnLines;
  document.getElementById('install-confirm-input').value = '';
  document.getElementById('install-start-btn').disabled = true;
  document.getElementById('install-confirm-section').style.display = '';
  document.getElementById('install-progress-section').style.display = 'none';
 }
 function installDeselect() {
  _installSelected = null;
  document.querySelectorAll('#install-disk-tbody tr').forEach(function(r){ r.classList.remove('selected'); });
  document.querySelectorAll('#install-disk-tbody input[type=radio]').forEach(function(r){ r.checked = false; });
  document.getElementById('install-confirm-section').style.display = 'none';
 }
 function installCheckConfirm() {
  var val = document.getElementById('install-confirm-input').value.trim();
  var ok = _installSelected && val === _installSelected.device;
  document.getElementById('install-start-btn').disabled = !ok;
 }
 function installStart() {
  if (!_installSelected) return;
  document.getElementById('install-confirm-section').style.display = 'none';
  document.getElementById('install-disk-section').style.display = 'none';
  document.getElementById('install-loading').style.display = 'none';
  var prog = document.getElementById('install-progress-section');
  var term = document.getElementById('install-terminal');
  var status = document.getElementById('install-status');
  prog.style.display = '';
  term.textContent = '';
  status.textContent = 'Starting installation…';
  status.style.color = 'var(--muted)';
  fetch('/api/install/run', {
    method: 'POST',
    headers: {'Content-Type': 'application/json'},
    body: JSON.stringify({device: _installSelected.device})
  }).then(function(r){
    return r.json().then(function(j){
      if (!r.ok) throw new Error(j.error || r.statusText);
      return j;
    });
  }).then(function(j){
    if (!j.task_id) throw new Error('missing task id');
    installStreamLog(j.task_id);
  }).catch(function(e){
    status.textContent = 'Error: ' + e;
    status.style.color = 'var(--crit-fg)';
  });
 }
 function installStreamLog(taskId) {
  var term = document.getElementById('install-terminal');
  var status = document.getElementById('install-status');
  var es = new EventSource('/api/tasks/' + taskId + '/stream');
  es.onmessage = function(e) {
    term.textContent += e.data + '\n';
    term.scrollTop = term.scrollHeight;
  };
  es.addEventListener('done', function(e) {
    es.close();
    if (!e.data) {
      status.innerHTML = '<span style="color:var(--ok-fg);font-weight:700">✓ Installation complete.</span> Remove the ISO and reboot.';
      var rebootBtn = document.createElement('button');
      rebootBtn.className = 'btn btn-primary btn-sm';
      rebootBtn.style.marginLeft = '12px';
      rebootBtn.textContent = 'Reboot now';
      rebootBtn.onclick = function(){
        fetch('/api/services/action', {method:'POST',headers:{'Content-Type':'application/json'},
          body: JSON.stringify({name:'', action:'reboot'})});
      };
      status.appendChild(rebootBtn);
    } else {
      status.textContent = '✗ Installation failed: ' + e.data;
      status.style.color = 'var(--crit-fg)';
    }
  });
  es.onerror = function() {
    es.close();
    status.textContent = '✗ Stream disconnected.';
    status.style.color = 'var(--crit-fg)';
  };
 }
 installRefreshDisks();
 </script>
 `
 }
 func renderInstall() string {
 	return `<div class="card"><div class="card-head">Install Live System to Disk</div><div class="card-body">` +
 		renderInstallInline() +
 		`</div></div>`
 }
 func renderTasks() string {
 	return `<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px;flex-wrap:wrap">
 <button class="btn btn-danger btn-sm" onclick="cancelAll()">Cancel All</button>
 <button class="btn btn-sm" style="background:#b45309;color:#fff" onclick="killWorkers()" title="Abort running tasks and kill orphaned test processes (bee-gpu-burn, dcgmi, nvvs, nvbandwidth, stress-ng, stressapptest, memtester)">Abort Tasks And Kill Orphans</button>
 <span id="kill-toast" style="font-size:12px;color:var(--muted);display:none"></span>
 <span style="font-size:12px;color:var(--muted)">Open a task to view its saved logs and charts.</span>
 </div>
 <div class="card">
 <div id="tasks-table"><p style="color:var(--muted);font-size:13px;padding:16px">Loading...</p></div>
 </div>
 <script>
 var _taskRefreshTimer = null;
 var _tasksAll = [];
 var _taskPage = 1;
 var _taskPageSize = 50;
 function loadTasks() {
  fetch('/api/tasks').then(r=>r.json()).then(tasks => {
    _tasksAll = Array.isArray(tasks) ? tasks : [];
    if (_tasksAll.length === 0) {
      _taskPage = 1;
      document.getElementById('tasks-table').innerHTML = '<p style="color:var(--muted);font-size:13px;padding:16px">No tasks.</p>';
      return;
    }
    const totalPages = Math.max(1, Math.ceil(_tasksAll.length / _taskPageSize));
    if (_taskPage > totalPages) _taskPage = totalPages;
    if (_taskPage < 1) _taskPage = 1;
    const start = (_taskPage - 1) * _taskPageSize;
    const pageTasks = _tasksAll.slice(start, start + _taskPageSize);
    const rows = pageTasks.map(t => {
      const dur = t.elapsed_sec ? formatDurSec(t.elapsed_sec) : '';
      const statusClass = {running:'badge-ok',pending:'badge-unknown',done:'badge-ok',failed:'badge-err',cancelled:'badge-unknown'}[t.status]||'badge-unknown';
      const statusLabel = {running:'&#9654; running',pending:'pending',done:'&#10003; done',failed:'&#10007; failed',cancelled:'cancelled'}[t.status]||t.status;
      let actions = '<a class="btn btn-sm btn-secondary" href="/tasks/'+encodeURIComponent(t.id)+'">Open</a>';
      if (t.status === 'running' || t.status === 'pending') {
        actions += ' <button class="btn btn-sm btn-danger" onclick="cancelTask(\''+t.id+'\')">Cancel</button>';
      }
      if (t.status === 'pending') {
        actions += ' <button class="btn btn-sm btn-secondary" onclick="setPriority(\''+t.id+'\',1)" title="Increase priority">&#8679;</button>';
        actions += ' <button class="btn btn-sm btn-secondary" onclick="setPriority(\''+t.id+'\',-1)" title="Decrease priority">&#8681;</button>';
      }
      return '<tr><td><a href="/tasks/'+encodeURIComponent(t.id)+'">'+escHtml(t.name)+'</a></td>' +
        '<td><span class="badge '+statusClass+'">'+statusLabel+'</span></td>' +
        '<td style="font-size:12px;color:var(--muted)">'+fmtTime(t.created_at)+'</td>' +
        '<td style="font-size:12px;color:var(--muted)">'+dur+'</td>' +
        '<td>'+t.priority+'</td>' +
        '<td>'+actions+'</td></tr>';
    }).join('');
    const showingFrom = start + 1;
    const showingTo = Math.min(start + pageTasks.length, _tasksAll.length);
    const pager =
      '<div style="display:flex;align-items:center;justify-content:space-between;gap:12px;flex-wrap:wrap;padding:12px 14px;border-top:1px solid var(--border-lite);background:var(--surface-2)">' +
        '<div style="font-size:12px;color:var(--muted)">Showing '+showingFrom+'-'+showingTo+' of '+_tasksAll.length+' tasks</div>' +
        '<div style="display:flex;align-items:center;gap:8px">' +
          '<button class="btn btn-sm btn-secondary" onclick="setTaskPage('+(_taskPage-1)+')" '+(_taskPage <= 1 ? 'disabled' : '')+'>Previous</button>' +
          '<span style="font-size:12px;color:var(--muted)">Page '+_taskPage+' / '+totalPages+'</span>' +
          '<button class="btn btn-sm btn-secondary" onclick="setTaskPage('+(_taskPage+1)+')" '+(_taskPage >= totalPages ? 'disabled' : '')+'>Next</button>' +
        '</div>' +
      '</div>';
    document.getElementById('tasks-table').innerHTML =
      '<table><tr><th>Name</th><th>Status</th><th>Created</th><th>Duration</th><th>Priority</th><th>Actions</th></tr>'+rows+'</table>' + pager;
  });
 }
 function escHtml(s) { return (s||'').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;'); }
 function fmtTime(s) { if (!s) return ''; try { return new Date(s).toLocaleTimeString(); } catch(e){ return s; } }
 function formatDurSec(sec) {
  sec = Math.max(0, Math.round(sec||0));
  if (sec < 60) return sec+'s';
  const m = Math.floor(sec/60), ss = sec%60;
  return m+'m '+ss+'s';
 }
 function setTaskPage(page) {
  const totalPages = Math.max(1, Math.ceil(_tasksAll.length / _taskPageSize));
  _taskPage = Math.min(totalPages, Math.max(1, page));
  loadTasks();
 }
 function cancelTask(id) {
  fetch('/api/tasks/'+id+'/cancel',{method:'POST'}).then(()=>loadTasks());
 }
 function cancelAll() {
  fetch('/api/tasks/cancel-all',{method:'POST'}).then(()=>loadTasks());
 }
 function killWorkers() {
  if (!confirm('Abort all queued/running tasks and kill orphaned test workers (bee-gpu-burn, dcgmi, nvvs, nvbandwidth, stress-ng, stressapptest, memtester)?\n\nRunning bee-worker processes will first be asked to stop gracefully; orphaned test processes will then be killed.')) return;
  fetch('/api/tasks/kill-workers',{method:'POST'})
    .then(r=>r.json())
    .then(d=>{
      loadTasks();
      var toast = document.getElementById('kill-toast');
      var parts = [];
      if (d.cancelled > 0) parts.push(d.cancelled+' task'+(d.cancelled===1?'':'s')+' cancelled');
      if (d.killed > 0) parts.push(d.killed+' process'+(d.killed===1?'':'es')+' killed');
      toast.textContent = parts.length ? parts.join(', ')+'.' : 'No processes found.';
      toast.style.display = '';
      setTimeout(()=>{ toast.style.display='none'; }, 5000);
    });
 }
 function setPriority(id, delta) {
  fetch('/api/tasks/'+id+'/priority',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({delta:delta})})
    .then(()=>loadTasks());
 }
 loadTasks();
 _taskRefreshTimer = setInterval(loadTasks, 2000);
 </script>`
 }
--- a/audit/internal/webui/page_metrics.go
+++ b/audit/internal/webui/page_metrics.go
@@ -0,0 +1,238 @@
 package webui
 func renderMetrics() string {
 	return `<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Live metrics — updated every 2 seconds.</p>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">Server — Load</div>
  <div class="card-body" style="padding:8px">
    <img id="chart-server-load" data-chart-refresh="1" src="/api/metrics/chart/server-load.svg" style="width:100%;display:block;border-radius:6px" alt="CPU/Mem load">
  </div>
 </div>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">Temperature — CPU</div>
  <div class="card-body" style="padding:8px">
    <img id="chart-server-temp-cpu" data-chart-refresh="1" src="/api/metrics/chart/server-temp-cpu.svg" style="width:100%;display:block;border-radius:6px" alt="CPU temperature">
  </div>
 </div>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">Temperature — Ambient Sensors</div>
  <div class="card-body" style="padding:8px">
    <img id="chart-server-temp-ambient" data-chart-refresh="1" src="/api/metrics/chart/server-temp-ambient.svg" style="width:100%;display:block;border-radius:6px" alt="Ambient temperature sensors">
  </div>
 </div>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">Server — Power</div>
  <div class="card-body" style="padding:8px">
    <img id="chart-server-power" data-chart-refresh="1" src="/api/metrics/chart/server-power.svg" style="width:100%;display:block;border-radius:6px" alt="System power">
  </div>
 </div>
 <div id="card-server-fans" class="card" style="margin-bottom:16px;display:none">
  <div class="card-head">Server — Fan RPM</div>
  <div class="card-body" style="padding:8px">
    <img id="chart-server-fans" data-chart-refresh="1" src="/api/metrics/chart/server-fans.svg" style="width:100%;display:block;border-radius:6px" alt="Fan RPM">
  </div>
 </div>
 <section id="gpu-metrics-section" style="display:none;margin-top:24px;padding:16px 16px 4px;border:1px solid #d7e0ea;border-radius:10px;background:linear-gradient(180deg,#f7fafc 0%,#eef4f8 100%)">
  <div style="display:flex;align-items:center;justify-content:space-between;gap:16px;flex-wrap:wrap;margin-bottom:14px">
    <div>
      <div style="font-size:12px;font-weight:700;letter-spacing:.08em;text-transform:uppercase;color:#486581">GPU Metrics</div>
      <div id="gpu-metrics-summary" style="font-size:13px;color:var(--muted);margin-top:4px">Detected GPUs are rendered in a dedicated section.</div>
    </div>
    <label style="display:inline-flex;align-items:center;gap:8px;font-size:13px;color:var(--ink);font-weight:700;cursor:pointer">
      <input id="gpu-chart-toggle" type="checkbox">
      <span>One chart per GPU</span>
    </label>
  </div>
  <div id="gpu-metrics-by-metric">
    <div class="card" style="margin-bottom:16px">
      <div class="card-head">GPU — Compute Load</div>
      <div class="card-body" style="padding:8px">
        <img id="chart-gpu-all-load" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU compute load">
      </div>
    </div>
    <div class="card" style="margin-bottom:16px">
      <div class="card-head">GPU — Memory Load</div>
      <div class="card-body" style="padding:8px">
        <img id="chart-gpu-all-memload" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-memload.svg" style="width:100%;display:block;border-radius:6px" alt="GPU memory load">
      </div>
    </div>
    <div class="card" style="margin-bottom:16px">
      <div class="card-head">GPU — Core Clock</div>
      <div class="card-body" style="padding:8px">
        <img id="chart-gpu-all-clock" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-clock.svg" style="width:100%;display:block;border-radius:6px" alt="GPU core clock">
      </div>
    </div>
    <div class="card" style="margin-bottom:16px">
      <div class="card-head">GPU — Power</div>
      <div class="card-body" style="padding:8px">
        <img id="chart-gpu-all-power" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU power">
      </div>
    </div>
    <div class="card" style="margin-bottom:16px">
      <div class="card-head">GPU — Temperature</div>
      <div class="card-body" style="padding:8px">
        <img id="chart-gpu-all-temp" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-temp.svg" style="width:100%;display:block;border-radius:6px" alt="GPU temperature">
      </div>
    </div>
  </div>
  <div id="gpu-metrics-by-gpu" style="display:none"></div>
 </section>
 <script>
 let gpuChartKey = '';
 const gpuChartModeStorageKey = 'bee.metrics.gpuChartMode';
 let metricsNvidiaGPUsPromise = null;
 function loadMetricsNvidiaGPUs() {
  if (!metricsNvidiaGPUsPromise) {
    metricsNvidiaGPUsPromise = fetch('/api/gpu/nvidia')
      .then(function(r) {
        if (!r.ok) throw new Error('Failed to load NVIDIA GPUs.');
        return r.json();
      })
      .then(function(list) { return Array.isArray(list) ? list : []; })
      .catch(function() { return []; });
  }
  return metricsNvidiaGPUsPromise;
 }
 function metricsGPUNameMap(list) {
  const out = {};
  (list || []).forEach(function(gpu) {
    const idx = Number(gpu.index);
    if (!Number.isFinite(idx) || !gpu.name) return;
    out[idx] = gpu.name;
  });
  return out;
 }
 function metricsGPUDisplayLabel(idx, names) {
  const name = names && names[idx];
  return name ? ('GPU ' + idx + ' — ' + name) : ('GPU ' + idx);
 }
 function loadGPUChartModePreference() {
  try {
    return sessionStorage.getItem(gpuChartModeStorageKey) === 'per-gpu';
  } catch (_) {
    return false;
  }
 }
 function saveGPUChartModePreference(perGPU) {
  try {
    sessionStorage.setItem(gpuChartModeStorageKey, perGPU ? 'per-gpu' : 'per-metric');
  } catch (_) {}
 }
 function refreshChartImage(el) {
  if (!el || el.dataset.loading === '1') return;
  if (el.offsetParent === null) return;
  const baseSrc = el.dataset.baseSrc || el.src.split('?')[0];
  const nextSrc = baseSrc + '?t=' + Date.now();
  const probe = new Image();
  el.dataset.baseSrc = baseSrc;
  el.dataset.loading = '1';
  probe.onload = function() {
    el.src = nextSrc;
    el.dataset.loading = '0';
  };
  probe.onerror = function() {
    el.dataset.loading = '0';
  };
  probe.src = nextSrc;
 }
 function refreshCharts() {
  document.querySelectorAll('img[data-chart-refresh="1"]').forEach(refreshChartImage);
 }
 function gpuIndices(rows) {
  const seen = {};
  const out = [];
  (rows || []).forEach(function(row) {
    const idx = Number(row.index);
    if (!Number.isFinite(idx) || seen[idx]) return;
    seen[idx] = true;
    out.push(idx);
  });
  return out.sort(function(a, b) { return a - b; });
 }
 function renderGPUOverviewCards(indices, names) {
  const host = document.getElementById('gpu-metrics-by-gpu');
  if (!host) return;
  host.innerHTML = indices.map(function(idx) {
    const label = metricsGPUDisplayLabel(idx, names);
    return '<div class="card" style="margin-bottom:16px">' +
      '<div class="card-head">' + label + ' — Overview</div>' +
      '<div class="card-body" style="padding:8px">' +
      '<img id="chart-gpu-' + idx + '-overview" data-chart-refresh="1" src="/api/metrics/chart/gpu/' + idx + '-overview.svg" style="width:100%;display:block;border-radius:6px" alt="' + label + ' overview">' +
      '</div></div>';
  }).join('');
 }
 function applyGPUChartMode() {
  const perMetric = document.getElementById('gpu-metrics-by-metric');
  const perGPU = document.getElementById('gpu-metrics-by-gpu');
  const toggle = document.getElementById('gpu-chart-toggle');
  const gpuModePerGPU = !!(toggle && toggle.checked);
  if (perMetric) perMetric.style.display = gpuModePerGPU ? 'none' : '';
  if (perGPU) perGPU.style.display = gpuModePerGPU ? '' : 'none';
 }
 function syncMetricsLayout(d) {
  const fanCard = document.getElementById('card-server-fans');
  if (fanCard) fanCard.style.display = (d.fans && d.fans.length > 0) ? '' : 'none';
  const section = document.getElementById('gpu-metrics-section');
  const summary = document.getElementById('gpu-metrics-summary');
  const indices = gpuIndices(d.gpus);
  loadMetricsNvidiaGPUs().then(function(gpus) {
    const names = metricsGPUNameMap(gpus);
    if (section) section.style.display = indices.length > 0 ? '' : 'none';
    if (summary) {
      summary.textContent = indices.length > 0
        ? ('Detected GPUs: ' + indices.map(function(idx) { return metricsGPUDisplayLabel(idx, names); }).join(', '))
        : 'No GPUs detected in live metrics.';
    }
    const nextKey = indices.join(',') + '|' + indices.map(function(idx) { return names[idx] || ''; }).join(',');
    if (nextKey !== gpuChartKey) {
      renderGPUOverviewCards(indices, names);
      gpuChartKey = nextKey;
    }
    applyGPUChartMode();
  });
 }
 function loadMetricsLayout() {
  fetch('/api/metrics/latest').then(function(r) { return r.json(); }).then(syncMetricsLayout).catch(function() {});
 }
 const gpuChartToggle = document.getElementById('gpu-chart-toggle');
 if (gpuChartToggle) {
  gpuChartToggle.checked = loadGPUChartModePreference();
 }
 applyGPUChartMode();
 if (gpuChartToggle) {
  gpuChartToggle.addEventListener('change', function() {
    saveGPUChartModePreference(!!gpuChartToggle.checked);
    applyGPUChartMode();
    refreshCharts();
  });
 }
 loadMetricsLayout();
 setInterval(refreshCharts, 3000);
 setInterval(loadMetricsLayout, 5000);
 </script>`
 }
--- a/audit/internal/webui/page_network_services.go
+++ b/audit/internal/webui/page_network_services.go
@@ -0,0 +1,213 @@
 package webui
 import "html"
 // renderNetworkInline returns the network UI without a wrapping card (for embedding in Tools).
 func renderNetworkInline() string {
 	return `<div id="net-pending" style="display:none" class="alert alert-warn">
 <strong>&#9888; Network change applied.</strong> Reverting in <span id="net-countdown">60</span>s unless confirmed.
 <button class="btn btn-primary btn-sm" style="margin-left:8px" onclick="confirmNetChange()">Confirm</button>
 <button class="btn btn-secondary btn-sm" style="margin-left:4px" onclick="rollbackNetChange()">Rollback</button>
 </div>
 <div id="iface-table"><p style="color:var(--muted);font-size:13px">Loading...</p></div>
 <div class="grid2" style="margin-top:16px">
 <div><div style="font-weight:700;font-size:13px;margin-bottom:8px">DHCP</div>
 <div class="form-row"><label>Interface (leave empty for all)</label><input type="text" id="dhcp-iface" placeholder="eth0"></div>
 <button class="btn btn-primary" onclick="runDHCP()">&#9654; Run DHCP</button>
 <div id="dhcp-out" style="margin-top:10px;font-size:12px;color:var(--ok-fg)"></div>
 </div>
 <div><div style="font-weight:700;font-size:13px;margin-bottom:8px">Static IPv4</div>
 <div class="form-row"><label>Interface</label><input type="text" id="st-iface" placeholder="eth0"></div>
 <div class="form-row"><label>Address</label><input type="text" id="st-addr" placeholder="192.168.1.100"></div>
 <div class="form-row"><label>Prefix length</label><input type="text" id="st-prefix" placeholder="24"></div>
 <div class="form-row"><label>Gateway</label><input type="text" id="st-gw" placeholder="192.168.1.1"></div>
 <div class="form-row"><label>DNS (comma-separated)</label><input type="text" id="st-dns" placeholder="8.8.8.8,8.8.4.4"></div>
 <button class="btn btn-primary" onclick="setStatic()">Apply Static IP</button>
 <div id="static-out" style="margin-top:10px;font-size:12px;color:var(--ok-fg)"></div>
 </div>
 </div>
 <script>
 var _netCountdownTimer = null;
 var _netRefreshTimer = null;
 const NET_ROLLBACK_SECS = 60;
 function loadNetwork() {
  fetch('/api/network').then(r=>r.json()).then(d => {
    const rows = (d.interfaces||[]).map(i =>
      '<tr><td style="cursor:pointer" onclick="selectIface(\''+i.Name+'\')" title="Use this interface in the forms below"><span style="text-decoration:underline">'+i.Name+'</span></td>' +
      '<td style="cursor:pointer" onclick="toggleIface(\''+i.Name+'\',\''+i.State+'\')" title="Click to toggle"><span class="badge '+(i.State==='up'?'badge-ok':'badge-warn')+'">'+i.State+'</span></td>' +
      '<td>'+(i.IPv4||[]).join(', ')+'</td></tr>'
    ).join('');
    document.getElementById('iface-table').innerHTML =
      '<table><tr><th>Interface</th><th>State (click to toggle)</th><th>Addresses</th></tr>'+rows+'</table>' +
      (d.default_route ? '<p style="font-size:12px;color:var(--muted);margin-top:8px">Default route: '+d.default_route+'</p>' : '');
    if (d.pending_change) showNetPending(d.rollback_in || NET_ROLLBACK_SECS);
    else hideNetPending();
  }).catch(function() {});
 }
 function selectIface(iface) {
  document.getElementById('dhcp-iface').value = iface;
  document.getElementById('st-iface').value = iface;
 }
 function toggleIface(iface, currentState) {
  showNetPending(NET_ROLLBACK_SECS);
  fetch('/api/network/toggle',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({iface:iface})})
    .then(r=>r.json()).then(d => {
      if (d.error) { hideNetPending(); alert('Error: '+d.error); return; }
      loadNetwork();
      showNetPending(d.rollback_in || NET_ROLLBACK_SECS);
    }).catch(function() {
      setTimeout(loadNetwork, 1500);
    });
 }
 function hideNetPending() {
  const el = document.getElementById('net-pending');
  if (_netCountdownTimer) clearInterval(_netCountdownTimer);
  _netCountdownTimer = null;
  el.style.display = 'none';
 }
 function showNetPending(secs) {
  if (!secs || secs < 1) { hideNetPending(); return; }
  const el = document.getElementById('net-pending');
  el.style.display = 'block';
  if (_netCountdownTimer) clearInterval(_netCountdownTimer);
  let remaining = secs;
  document.getElementById('net-countdown').textContent = remaining;
  _netCountdownTimer = setInterval(function() {
    remaining--;
    document.getElementById('net-countdown').textContent = remaining;
    if (remaining <= 0) { hideNetPending(); loadNetwork(); }
  }, 1000);
 }
 function confirmNetChange() {
  hideNetPending();
  fetch('/api/network/confirm',{method:'POST'}).then(()=>loadNetwork()).catch(()=>{});
 }
 function rollbackNetChange() {
  hideNetPending();
  fetch('/api/network/rollback',{method:'POST'}).then(()=>loadNetwork()).catch(()=>{});
 }
 function runDHCP() {
  const iface = document.getElementById('dhcp-iface').value.trim();
  showNetPending(NET_ROLLBACK_SECS);
  fetch('/api/network/dhcp',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({interface:iface||'all'})})
    .then(r=>r.json()).then(d => {
      document.getElementById('dhcp-out').textContent = d.output || d.error || 'Done.';
      if (d.error) { hideNetPending(); return; }
      showNetPending(d.rollback_in || NET_ROLLBACK_SECS);
      loadNetwork();
    }).catch(function() {
      setTimeout(loadNetwork, 1500);
    });
 }
 function setStatic() {
  const dns = document.getElementById('st-dns').value.split(',').map(s=>s.trim()).filter(Boolean);
  showNetPending(NET_ROLLBACK_SECS);
  fetch('/api/network/static',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({
    interface: document.getElementById('st-iface').value,
    address: document.getElementById('st-addr').value,
    prefix: document.getElementById('st-prefix').value,
    gateway: document.getElementById('st-gw').value,
    dns: dns,
  })}).then(r=>r.json()).then(d => {
    document.getElementById('static-out').textContent = d.output || d.error || 'Done.';
    if (d.error) { hideNetPending(); return; }
    showNetPending(d.rollback_in || NET_ROLLBACK_SECS);
    loadNetwork();
  }).catch(function() {
    setTimeout(loadNetwork, 1500);
  });
 }
 loadNetwork();
 if (_netRefreshTimer) clearInterval(_netRefreshTimer);
 _netRefreshTimer = setInterval(loadNetwork, 5000);
 </script>`
 }
 func renderNetwork() string {
 	return `<div class="card"><div class="card-head">Network Interfaces</div><div class="card-body">` +
 		renderNetworkInline() +
 		`</div></div>`
 }
 func renderServicesInline() string {
 	return `<p style="font-size:13px;color:var(--muted);margin-bottom:10px">` + html.EscapeString(`bee-selfheal.timer is expected to be active; the oneshot bee-selfheal.service itself is not shown as a long-running service.`) + `</p>
 <div style="display:flex;justify-content:flex-end;gap:8px;flex-wrap:wrap;margin-bottom:8px"><button class="btn btn-sm btn-secondary" onclick="loadServices()">&#8635; Refresh</button></div>
 <div id="svc-table"><p style="color:var(--muted);font-size:13px">Loading...</p></div>
 <div id="svc-out" style="display:none;margin-top:12px">
  <div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:4px">
    <span id="svc-out-label" style="font-size:12px;font-weight:600;color:var(--muted)">Output</span>
    <span id="svc-out-status" style="font-size:12px"></span>
  </div>
  <div id="svc-terminal" class="terminal" style="max-height:220px;width:100%;box-sizing:border-box"></div>
 </div>
 <script>
 function loadServices() {
  fetch('/api/services').then(r=>r.json()).then(svcs => {
    const rows = svcs.map(s => {
      const st = s.state||'unknown';
      const badge = st==='active' ? 'badge-ok' : st==='failed' ? 'badge-err' : 'badge-warn';
      const id = 'svc-body-'+s.name.replace(/[^a-z0-9]/g,'-');
      const body = (s.body||'').replace(/</g,'&lt;').replace(/>/g,'&gt;');
      return '<tr>' +
        '<td style="white-space:nowrap">'+s.name+'</td>' +
        '<td style="white-space:nowrap"><span class="badge '+badge+'" style="cursor:pointer" onclick="toggleBody(\''+id+'\')">'+st+' ▾</span>' +
        '<div id="'+id+'" style="display:none;margin-top:6px"><pre style="font-size:11px;white-space:pre-wrap;word-break:break-all;max-height:200px;overflow-y:auto;background:#1b1c1d;padding:8px;border-radius:4px;color:#b5cea8">'+body+'</pre></div>' +
        '</td>' +
        '<td style="white-space:nowrap">' +
        '<button class="btn btn-sm btn-secondary" id="btn-'+s.name+'-start"   onclick="svcAction(this,\''+s.name+'\',\'start\')">Start</button> ' +
        '<button class="btn btn-sm btn-secondary" id="btn-'+s.name+'-stop"    onclick="svcAction(this,\''+s.name+'\',\'stop\')">Stop</button> ' +
        '<button class="btn btn-sm btn-secondary" id="btn-'+s.name+'-restart" onclick="svcAction(this,\''+s.name+'\',\'restart\')">Restart</button>' +
        '</td></tr>';
    }).join('');
    document.getElementById('svc-table').innerHTML =
      '<table><tr><th>Unit</th><th>Status</th><th>Actions</th></tr>'+rows+'</table>';
  });
 }
 function toggleBody(id) {
  const el = document.getElementById(id);
  if (el) el.style.display = el.style.display==='none' ? 'block' : 'none';
 }
 function svcAction(btn, name, action) {
  var label = btn.textContent;
  btn.disabled = true;
  btn.textContent = '...';
  var out = document.getElementById('svc-out');
  var term = document.getElementById('svc-terminal');
  var statusEl = document.getElementById('svc-out-status');
  var labelEl = document.getElementById('svc-out-label');
  out.style.display = 'block';
  labelEl.textContent = action + ' ' + name;
  term.textContent = 'Running...';
  statusEl.textContent = '';
  statusEl.style.color = '';
  fetch('/api/services/action',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({name,action})})
    .then(r=>r.json()).then(d => {
      term.textContent = d.output || d.error || '(no output)';
      term.scrollTop = term.scrollHeight;
      if (d.status === 'ok') {
        statusEl.textContent = '✓ done';
        statusEl.style.color = 'var(--ok-fg, #2c662d)';
      } else {
        statusEl.textContent = '✗ failed';
        statusEl.style.color = 'var(--crit-fg, #9f3a38)';
      }
      btn.textContent = label;
      btn.disabled = false;
      setTimeout(loadServices, 800);
    }).catch(e => {
      term.textContent = 'Request failed: ' + e;
      statusEl.textContent = '✗ error';
      statusEl.style.color = 'var(--crit-fg, #9f3a38)';
      btn.textContent = label;
      btn.disabled = false;
    });
 }
 loadServices();
 </script>`
 }
 func renderServices() string {
 	return `<div class="card"><div class="card-head">Bee Services</div><div class="card-body">` +
 		renderServicesInline() +
 		`</div></div>`
 }
--- a/audit/internal/webui/page_validate.go
+++ b/audit/internal/webui/page_validate.go
@@ -0,0 +1,663 @@
 package webui
 import (
 	"encoding/json"
 	"fmt"
 	"html"
 	"sort"
 	"strings"
 	"bee/audit/internal/platform"
 	"bee/audit/internal/schema"
 )
 type validateInventory struct {
 	CPU            string
 	Memory         string
 	Storage        string
 	NVIDIA         string
 	AMD            string
 	NvidiaGPUCount int
 	AMDGPUCount    int
 }
 func validateFmtDur(secs int) string {
 	if secs < 120 {
 		return fmt.Sprintf("~%d s", secs)
 	}
 	mins := (secs + 29) / 60
 	return fmt.Sprintf("~%d min", mins)
 }
 func validateTotalValidateSec(n int) int {
 	if n < 0 {
 		n = 0
 	}
 	total := platform.SATEstimatedCPUValidateSec +
 		platform.SATEstimatedMemoryValidateSec +
 		platform.SATEstimatedNvidiaInterconnectSec +
 		platform.SATEstimatedNvidiaBandwidthSec
 	if n > 0 {
 		total += platform.SATEstimatedNvidiaGPUValidateSec
 	}
 	return total
 }
 func validateTotalStressSec(n int) int {
 	if n < 0 {
 		n = 0
 	}
 	total := platform.SATEstimatedCPUStressSec +
 		platform.SATEstimatedMemoryStressSec +
 		platform.SATEstimatedNvidiaPulseTestSec +
 		platform.SATEstimatedNvidiaInterconnectSec +
 		platform.SATEstimatedNvidiaBandwidthSec
 	if n > 0 {
 		total += platform.SATEstimatedNvidiaGPUStressSec +
 			platform.SATEstimatedNvidiaTargetedStressSec +
 			platform.SATEstimatedNvidiaTargetedPowerSec
 	}
 	return total
 }
 func renderValidate(opts HandlerOptions) string {
 	inv := loadValidateInventory(opts)
 	n := inv.NvidiaGPUCount
 	validateTotalStr := validateFmtDur(validateTotalValidateSec(n))
 	stressTotalStr := validateFmtDur(validateTotalStressSec(n))
 	gpuNote := ""
 	if n > 0 {
 		gpuNote = fmt.Sprintf(" (%d GPU)", n)
 	}
 	return `<div class="alert alert-info" style="margin-bottom:16px"><strong>Non-destructive:</strong> Validate tests collect diagnostics only. They do not write to disks, do not run sustained load, and do not increment hardware wear counters.</div>
 <p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
 	<div class="card" style="margin-bottom:16px">
 	  <div class="card-head">Validate Profile</div>
 	  <div class="card-body validate-profile-body">
 	    <div class="validate-profile-col">
 	      <div class="form-row" style="margin:12px 0 0"><label>Mode</label></div>
 	      <label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-validate" value="validate" checked onchange="satModeChanged()"><span>Validate — quick non-destructive check</span></label>
 	      <label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-stress" value="stress" onchange="satModeChanged()"><span>Stress — thorough load test (` + stressTotalStr + gpuNote + `)</span></label>
 	    </div>
 	    <div class="validate-profile-col validate-profile-action">
 	      <p style="color:var(--muted);font-size:12px;margin:0 0 10px">Runs validate modules sequentially. Validate: ` + validateTotalStr + gpuNote + `; Stress: ` + stressTotalStr + gpuNote + `. Estimates are based on real log data and scale with GPU count.</p>
 	      <button type="button" class="btn btn-primary" onclick="runAllSAT()">Validate one by one</button>
 	      <div style="margin-top:12px">
 	        <span id="sat-all-status" style="font-size:12px;color:var(--muted)"></span>
 	      </div>
 	    </div>
 	  </div>
 	</div>
 <div class="grid3">
 ` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody(
 		inv.CPU,
 		`Collects CPU inventory and temperatures, then runs a bounded CPU stress pass.`,
 		`<code>lscpu</code>, <code>sensors</code>, <code>stress-ng</code>`,
 		validateFmtDur(platform.SATEstimatedCPUValidateSec)+` in Validate (stress-ng 60 s). `+validateFmtDur(platform.SATEstimatedCPUStressSec)+` in Stress (stress-ng 30 min).`,
 	)) +
 		renderSATCard("memory", "Memory", "runSAT('memory')", "", renderValidateCardBody(
 			inv.Memory,
 			`Runs a RAM validation pass and records memory state around the test.`,
 			`<code>free</code>, <code>memtester</code>`,
 			validateFmtDur(platform.SATEstimatedMemoryValidateSec)+` in Validate (256 MB × 1 pass). `+validateFmtDur(platform.SATEstimatedMemoryStressSec)+` in Stress (512 MB × 1 pass).`,
 		)) +
 		renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody(
 			inv.Storage,
 			`Scans all storage devices and runs the matching health or self-test path for each device type.`,
 			`<code>lsblk</code>; NVMe: <code>nvme</code>; SATA/SAS: <code>smartctl</code>`,
 			`Seconds in Validate (NVMe: instant device query; SATA/SAS: short self-test). Up to ~1 h per device in Stress (extended self-test, device-dependent).`,
 		)) +
 		`</div>
 <div style="height:1px;background:var(--border);margin:16px 0"></div>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">NVIDIA GPU Selection</div>
  <div class="card-body">
    <p style="font-size:12px;color:var(--muted);margin:0 0 8px">` + inv.NVIDIA + `</p>
    <p style="font-size:12px;color:var(--muted);margin:0 0 10px">All NVIDIA validate tasks use only the GPUs selected here. The same selection is used by Validate one by one.</p>
    <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
      <button class="btn btn-sm btn-secondary" type="button" onclick="satSelectAllGPUs()">Select All</button>
      <button class="btn btn-sm btn-secondary" type="button" onclick="satSelectNoGPUs()">Clear</button>
    </div>
    <div id="sat-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
      <p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
    </div>
    <p id="sat-gpu-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 0">Select at least one NVIDIA GPU to enable NVIDIA validate tasks.</p>
  </div>
 </div>
 <div class="grid3">
 ` + renderSATCard("nvidia", "NVIDIA GPU", "runNvidiaValidateSet('nvidia')", "", renderValidateCardBody(
 		inv.NVIDIA,
 		`Runs NVIDIA diagnostics and board inventory checks.`,
 		`<code>nvidia-smi</code>, <code>dmidecode</code>, <code>dcgmi diag</code>`,
 		fmt.Sprintf("Validate: %s (Level 2, all GPUs simultaneously). Stress: %s (Level 3, all GPUs simultaneously).",
 			validateFmtDur(platform.SATEstimatedNvidiaGPUValidateSec),
 			validateFmtDur(platform.SATEstimatedNvidiaGPUStressSec)),
 	)) +
 		`<div id="sat-card-nvidia-targeted-stress">` +
 		renderSATCard("nvidia-targeted-stress", "NVIDIA GPU Targeted Stress", "runNvidiaValidateSet('nvidia-targeted-stress')", "", renderValidateCardBody(
 			inv.NVIDIA,
 			`Runs a controlled NVIDIA DCGM load to check stability under moderate stress.`,
 			`<code>dcgmi diag targeted_stress</code>`,
 		"Skipped in Validate. Stress: " + validateFmtDur(platform.SATEstimatedNvidiaTargetedStressSec) + ` (all GPUs simultaneously).<p id="sat-ts-mode-hint" style="color:var(--warn-fg);font-size:12px;margin:8px 0 0">Only runs in Stress mode. Switch mode above to enable in Run All.</p>`,
 		)) +
 		`</div>` +
 		`<div id="sat-card-nvidia-targeted-power">` +
 		renderSATCard("nvidia-targeted-power", "NVIDIA Targeted Power", "runNvidiaValidateSet('nvidia-targeted-power')", "", renderValidateCardBody(
 			inv.NVIDIA,
 			`Checks that the GPU can sustain its declared power delivery envelope. Pass/fail determined by DCGM.`,
 			`<code>dcgmi diag targeted_power</code>`,
 		"Skipped in Validate. Stress: " + validateFmtDur(platform.SATEstimatedNvidiaTargetedPowerSec) + ` (all GPUs simultaneously).<p id="sat-tp-mode-hint" style="color:var(--warn-fg);font-size:12px;margin:8px 0 0">Only runs in Stress mode. Switch mode above to enable in Run All.</p>`,
 		)) +
 		`</div>` +
 		`<div id="sat-card-nvidia-pulse">` +
 		renderSATCard("nvidia-pulse", "NVIDIA PSU Pulse Test", "runNvidiaFabricValidate('nvidia-pulse')", "", renderValidateCardBody(
 			inv.NVIDIA,
 			`Tests power supply transient response by pulsing all GPUs simultaneously between idle and full load. Synchronous pulses across all GPUs create worst-case PSU load spikes — running per-GPU would miss PSU-level failures.`,
 			`<code>dcgmi diag pulse_test</code>`,
 			`Skipped in Validate. Stress: `+validateFmtDur(platform.SATEstimatedNvidiaPulseTestSec)+` (all GPUs simultaneously; measured on 8-GPU system).`+`<p id="sat-pt-mode-hint" style="color:var(--warn-fg);font-size:12px;margin:8px 0 0">Only runs in Stress mode. Switch mode above to enable in Run All.</p>`,
 		)) +
 		`</div>` +
 		`<div id="sat-card-nvidia-interconnect">` +
 		renderSATCard("nvidia-interconnect", "NVIDIA Interconnect (NCCL)", "runNvidiaFabricValidate('nvidia-interconnect')", "", renderValidateCardBody(
 			inv.NVIDIA,
 			`Verifies NVLink/NVSwitch fabric bandwidth using NCCL all_reduce_perf across all selected GPUs. Pass/fail based on achieved bandwidth vs. theoretical.`,
 			`<code>all_reduce_perf</code> (NCCL tests)`,
 			`Validate and Stress: `+validateFmtDur(platform.SATEstimatedNvidiaInterconnectSec)+` (all GPUs simultaneously, requires ≥2).`,
 		)) +
 		`</div>` +
 		`<div id="sat-card-nvidia-bandwidth">` +
 		renderSATCard("nvidia-bandwidth", "NVIDIA Bandwidth (NVBandwidth)", "runNvidiaFabricValidate('nvidia-bandwidth')", "", renderValidateCardBody(
 			inv.NVIDIA,
 			`Validates GPU memory copy and peer-to-peer bandwidth paths using NVBandwidth.`,
 			`<code>nvbandwidth</code>`,
 			`Validate and Stress: `+validateFmtDur(platform.SATEstimatedNvidiaBandwidthSec)+` (all GPUs simultaneously; nvbandwidth runs all built-in tests without a time limit — duration set by the tool).`,
 		)) +
 		`</div>` +
 		`</div>
 <div class="grid3" style="margin-top:16px">
 ` + renderSATCard("amd", "AMD GPU", "runAMDValidateSet()", "", renderValidateCardBody(
 		inv.AMD,
 		`Runs the selected AMD checks only. GPU Validate collects inventory; MEM Integrity uses the RVS MEM module; MEM Bandwidth uses rocm-bandwidth-test and the RVS BABEL module.`,
 		`GPU Validate: <code>rocm-smi</code>, <code>dmidecode</code>; MEM Integrity: <code>rvs mem</code>; MEM Bandwidth: <code>rocm-bandwidth-test</code>, <code>rvs babel</code>`,
 		`<div style="display:flex;flex-direction:column;gap:4px"><label class="cb-row"><input type="checkbox" id="sat-amd-target" checked><span>GPU Validate</span></label><label class="cb-row"><input type="checkbox" id="sat-amd-mem-target" checked><span>MEM Integrity</span></label><label class="cb-row"><input type="checkbox" id="sat-amd-bandwidth-target" checked><span>MEM Bandwidth</span></label></div>`,
 	)) +
 		`</div>
 <div id="sat-output" style="display:none;margin-top:16px" class="card">
  <div class="card-head">Test Output <span id="sat-title"></span></div>
  <div class="card-body"><div id="sat-terminal" class="terminal"></div></div>
 </div>
 <style>
 .validate-profile-body { display:grid; grid-template-columns:1fr 1fr 1fr; gap:24px; align-items:stretch; }
 .validate-profile-col { min-width:0; display:flex; flex-direction:column; }
 .validate-profile-action { display:flex; flex-direction:column; align-items:center; justify-content:center; }
 .validate-card-body { padding:0; }
 .validate-card-section { padding:12px 16px 0; }
 .validate-card-section:last-child { padding-bottom:16px; }
 .sat-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
 .sat-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
@media(max-width:900px){ .validate-profile-body { grid-template-columns:1fr; } }
 </style>
 <script>
 let satES = null;
 function satStressMode() {
  return document.querySelector('input[name="sat-mode"]:checked')?.value === 'stress';
 }
 function satModeChanged() {
  const stress = satStressMode();
  [
    {card: 'sat-card-nvidia-targeted-stress', hint: 'sat-ts-mode-hint'},
    {card: 'sat-card-nvidia-targeted-power',  hint: 'sat-tp-mode-hint'},
    {card: 'sat-card-nvidia-pulse',           hint: 'sat-pt-mode-hint'},
  ].forEach(function(item) {
    const card = document.getElementById(item.card);
    if (card) {
      card.style.opacity = stress ? '1' : '0.5';
      const hint = document.getElementById(item.hint);
      if (hint) hint.style.display = stress ? 'none' : '';
    }
  });
 }
 function satLabels() {
  return {nvidia:'Validate GPU', 'nvidia-targeted-stress':'NVIDIA Targeted Stress (dcgmi diag targeted_stress)', 'nvidia-targeted-power':'NVIDIA Targeted Power (dcgmi diag targeted_power)', 'nvidia-pulse':'NVIDIA PSU Pulse Test (dcgmi diag pulse_test)', 'nvidia-interconnect':'NVIDIA Interconnect (NCCL all_reduce_perf)', 'nvidia-bandwidth':'NVIDIA Bandwidth (NVBandwidth)', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
 }
 let satNvidiaGPUsPromise = null;
 function loadSatNvidiaGPUs() {
  if (!satNvidiaGPUsPromise) {
    satNvidiaGPUsPromise = fetch('/api/gpu/nvidia')
      .then(r => {
        if (!r.ok) throw new Error('Failed to load NVIDIA GPUs.');
        return r.json();
      })
      .then(list => Array.isArray(list) ? list : []);
  }
  return satNvidiaGPUsPromise;
 }
 function satSelectedGPUIndices() {
  return Array.from(document.querySelectorAll('.sat-nvidia-checkbox'))
    .filter(function(el) { return el.checked && !el.disabled; })
    .map(function(el) { return parseInt(el.value, 10); })
    .filter(function(v) { return !Number.isNaN(v); })
    .sort(function(a, b) { return a - b; });
 }
 function satUpdateGPUSelectionNote() {
  const note = document.getElementById('sat-gpu-selection-note');
  if (!note) return;
  const selected = satSelectedGPUIndices();
  if (!selected.length) {
    note.textContent = 'Select at least one NVIDIA GPU to enable NVIDIA validate tasks.';
    return;
  }
  note.textContent = 'Selected GPUs: ' + selected.join(', ') + '. Multi-GPU tests will use all selected GPUs.';
 }
 function satRenderGPUList(gpus) {
  const root = document.getElementById('sat-gpu-list');
  if (!root) return;
  if (!gpus || !gpus.length) {
    root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
    satUpdateGPUSelectionNote();
    return;
  }
  root.innerHTML = gpus.map(function(gpu) {
    const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
    return '<label class="sat-gpu-row">'
      + '<input class="sat-nvidia-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="satUpdateGPUSelectionNote()">'
      + '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
      + '</label>';
  }).join('');
  satUpdateGPUSelectionNote();
 }
 function satSelectAllGPUs() {
  document.querySelectorAll('.sat-nvidia-checkbox').forEach(function(el) { el.checked = true; });
  satUpdateGPUSelectionNote();
 }
 function satSelectNoGPUs() {
  document.querySelectorAll('.sat-nvidia-checkbox').forEach(function(el) { el.checked = false; });
  satUpdateGPUSelectionNote();
 }
 function satLoadGPUs() {
  loadSatNvidiaGPUs().then(function(gpus) {
    satRenderGPUList(gpus);
  }).catch(function(err) {
    const root = document.getElementById('sat-gpu-list');
    if (root) {
      root.innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
    }
    satUpdateGPUSelectionNote();
  });
 }
 function satGPUDisplayName(gpu) {
  const idx = (gpu && Number.isFinite(Number(gpu.index))) ? Number(gpu.index) : 0;
  const name = gpu && gpu.name ? gpu.name : ('GPU ' + idx);
  return 'GPU ' + idx + ' — ' + name;
 }
 function satRequestBody(target, overrides) {
  const body = {};
  const labels = satLabels();
  body.display_name = labels[target] || ('Validate ' + target);
  body.stress_mode = satStressMode();
  if (target === 'cpu') body.duration = satStressMode() ? 1800 : 60;
  if (overrides) {
    Object.keys(overrides).forEach(key => { body[key] = overrides[key]; });
  }
  return body;
 }
 function enqueueSATTarget(target, overrides) {
  return fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(satRequestBody(target, overrides))})
    .then(r => r.json());
 }
 function streamSATTask(taskId, title, resetTerminal) {
  if (satES) { satES.close(); satES = null; }
  document.getElementById('sat-output').style.display='block';
  document.getElementById('sat-title').textContent = '— ' + title;
  const term = document.getElementById('sat-terminal');
  if (resetTerminal) {
    term.textContent = '';
  }
  term.textContent += 'Task ' + taskId + ' queued. Streaming log...\n';
  return new Promise(function(resolve) {
    satES = new EventSource('/api/tasks/' + taskId + '/stream');
    satES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
    satES.addEventListener('done', function(e) {
      satES.close();
      satES = null;
      term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
      term.scrollTop = term.scrollHeight;
      resolve({ok: !e.data, error: e.data || ''});
    });
    satES.onerror = function() {
      if (satES) {
        satES.close();
        satES = null;
      }
      term.textContent += '\nERROR: stream disconnected.\n';
      term.scrollTop = term.scrollHeight;
      resolve({ok: false, error: 'stream disconnected'});
    };
  });
 }
 function selectedAMDValidateTargets() {
  const targets = [];
  const gpu = document.getElementById('sat-amd-target');
  const mem = document.getElementById('sat-amd-mem-target');
  const bw = document.getElementById('sat-amd-bandwidth-target');
  if (gpu && gpu.checked && !gpu.disabled) targets.push('amd');
  if (mem && mem.checked && !mem.disabled) targets.push('amd-mem');
  if (bw && bw.checked && !bw.disabled) targets.push('amd-bandwidth');
  return targets;
 }
 function runSAT(target) {
  return runSATWithOverrides(target, null);
 }
 function runSATWithOverrides(target, overrides) {
  const title = (overrides && overrides.display_name) || target;
  const term = document.getElementById('sat-terminal');
  document.getElementById('sat-output').style.display='block';
  document.getElementById('sat-title').textContent = '— ' + title;
  term.textContent = 'Enqueuing ' + title + ' test...\n';
  return enqueueSATTarget(target, overrides)
    .then(d => streamSATTask(d.task_id, title, false));
 }
 const nvidiaPerGPUTargets = [];
 const nvidiaAllGPUTargets = ['nvidia', 'nvidia-targeted-stress', 'nvidia-targeted-power', 'nvidia-pulse', 'nvidia-interconnect', 'nvidia-bandwidth'];
 function satAllGPUIndicesForMulti() {
  return Promise.resolve(satSelectedGPUIndices());
 }
 function expandSATTarget(target) {
  if (nvidiaAllGPUTargets.indexOf(target) >= 0) {
    return satAllGPUIndicesForMulti().then(function(indices) {
      if (!indices.length) return Promise.reject(new Error('No NVIDIA GPUs available.'));
      return [{target: target, overrides: {gpu_indices: indices, display_name: satLabels()[target] || target}}];
    });
  }
  if (nvidiaPerGPUTargets.indexOf(target) < 0) {
    return Promise.resolve([{target: target}]);
  }
  const selected = satSelectedGPUIndices();
  if (!selected.length) {
    return Promise.reject(new Error('Select at least one NVIDIA GPU.'));
  }
  return loadSatNvidiaGPUs().then(gpus => gpus.filter(gpu => selected.indexOf(Number(gpu.index)) >= 0).map(gpu => ({
    target: target,
    overrides: {
      gpu_indices: [Number(gpu.index)],
      display_name: (satLabels()[target] || ('Validate ' + target)) + ' (' + satGPUDisplayName(gpu) + ')'
    },
    label: satGPUDisplayName(gpu),
  })));
 }
 function runNvidiaFabricValidate(target) {
  satAllGPUIndicesForMulti().then(function(indices) {
    if (!indices.length) { alert('No NVIDIA GPUs available.'); return; }
    runSATWithOverrides(target, {gpu_indices: indices, display_name: satLabels()[target] || target});
  });
 }
 function runNvidiaValidateSet(target) {
  const selected = satSelectedGPUIndices();
  if (!selected.length) { alert('Select at least one NVIDIA GPU.'); return; }
  return runSATWithOverrides(target, {gpu_indices: selected, display_name: satLabels()[target] || target});
 }
 function runAMDValidateSet() {
  const targets = selectedAMDValidateTargets();
  if (!targets.length) return;
  if (targets.length === 1) return runSAT(targets[0]);
  document.getElementById('sat-output').style.display='block';
  document.getElementById('sat-title').textContent = '— amd';
  const term = document.getElementById('sat-terminal');
  term.textContent = 'Running AMD validate set one by one...\n';
  const labels = satLabels();
  const runNext = (idx) => {
    if (idx >= targets.length) return Promise.resolve();
    const target = targets[idx];
    term.textContent += '\n[' + (idx + 1) + '/' + targets.length + '] ' + labels[target] + '\n';
    return enqueueSATTarget(target)
      .then(d => {
        return streamSATTask(d.task_id, labels[target], false);
      }).then(function() {
        return runNext(idx + 1);
      });
  };
  return runNext(0);
 }
 function runAllSAT() {
  const cycles = 1;
  const status = document.getElementById('sat-all-status');
  status.textContent = 'Enqueuing...';
  const stressOnlyTargets = ['nvidia-targeted-stress', 'nvidia-targeted-power', 'nvidia-pulse'];
  const baseTargets = ['nvidia','nvidia-targeted-stress','nvidia-targeted-power','nvidia-pulse','nvidia-interconnect','nvidia-bandwidth','memory','storage','cpu'].concat(selectedAMDValidateTargets());
  const activeTargets = baseTargets.filter(target => {
    if (stressOnlyTargets.indexOf(target) >= 0 && !satStressMode()) return false;
    const btn = document.getElementById('sat-btn-' + target);
    return !(btn && btn.disabled);
  });
  Promise.all(activeTargets.map(expandSATTarget)).then(groups => {
    const expanded = [];
    for (let cycle = 0; cycle < cycles; cycle++) {
      groups.forEach(group => group.forEach(item => expanded.push(item)));
    }
    const total = expanded.length;
    let enqueued = 0;
    if (!total) {
      status.textContent = 'No tasks selected.';
      return;
    }
    const runNext = (idx) => {
      if (idx >= expanded.length) { status.textContent = 'Completed ' + total + ' task(s).'; return Promise.resolve(); }
      const item = expanded[idx];
      status.textContent = 'Running ' + (idx + 1) + '/' + total + '...';
      return enqueueSATTarget(item.target, item.overrides)
        .then(() => {
          enqueued++;
          return runNext(idx + 1);
        });
    };
    return runNext(0);
  }).catch(err => {
    status.textContent = 'Error: ' + err.message;
  });
 }
 </script>
 <script>
 fetch('/api/gpu/presence').then(r=>r.json()).then(gp => {
    if (!gp.nvidia) disableSATCard('nvidia', 'No NVIDIA GPU detected');
    if (!gp.nvidia) disableSATCard('nvidia-targeted-stress', 'No NVIDIA GPU detected');
    if (!gp.nvidia) disableSATCard('nvidia-targeted-power', 'No NVIDIA GPU detected');
    if (!gp.nvidia) disableSATCard('nvidia-pulse', 'No NVIDIA GPU detected');
    if (!gp.nvidia) disableSATCard('nvidia-interconnect', 'No NVIDIA GPU detected');
    if (!gp.nvidia) disableSATCard('nvidia-bandwidth', 'No NVIDIA GPU detected');
    if (!gp.amd) disableSATCard('amd', 'No AMD GPU detected');
    if (!gp.amd) disableSATAMDOptions('No AMD GPU detected');
 });
 satLoadGPUs();
 function disableSATAMDOptions(reason) {
    ['sat-amd-target','sat-amd-mem-target','sat-amd-bandwidth-target'].forEach(function(id) {
        const cb = document.getElementById(id);
        if (!cb) return;
        cb.disabled = true;
        cb.checked = false;
        cb.title = reason;
    });
 }
 function disableSATCard(id, reason) {
    const btn = document.getElementById('sat-btn-' + id);
    if (!btn) return;
    btn.disabled = true;
    btn.title = reason;
    btn.style.opacity = '0.4';
    const card = btn.closest('.card');
    if (card) {
        let note = card.querySelector('.sat-unavail');
        if (!note) {
            note = document.createElement('p');
            note.className = 'sat-unavail';
            note.style.cssText = 'color:var(--muted);font-size:12px;margin:0 0 8px';
            const body = card.querySelector('.card-body');
            if (body) body.insertBefore(note, body.firstChild);
        }
        note.textContent = reason;
    }
 }
 </script>`
 }
 func loadValidateInventory(opts HandlerOptions) validateInventory {
 	unknown := "Audit snapshot not loaded."
 	out := validateInventory{
 		CPU:     unknown,
 		Memory:  unknown,
 		Storage: unknown,
 		NVIDIA:  unknown,
 		AMD:     unknown,
 	}
 	data, err := loadSnapshot(opts.AuditPath)
 	if err != nil {
 		return out
 	}
 	var snap schema.HardwareIngestRequest
 	if err := json.Unmarshal(data, &snap); err != nil {
 		return out
 	}
 	cpuCounts := map[string]int{}
 	cpuTotal := 0
 	for _, cpu := range snap.Hardware.CPUs {
 		if cpu.Present != nil && !*cpu.Present {
 			continue
 		}
 		cpuTotal++
 		addValidateModel(cpuCounts, validateFirstNonEmpty(validateTrimPtr(cpu.Model), validateTrimPtr(cpu.Manufacturer), "unknown"))
 	}
 	memCounts := map[string]int{}
 	memTotal := 0
 	for _, dimm := range snap.Hardware.Memory {
 		if dimm.Present != nil && !*dimm.Present {
 			continue
 		}
 		memTotal++
 		addValidateModel(memCounts, validateFirstNonEmpty(validateTrimPtr(dimm.PartNumber), validateTrimPtr(dimm.Type), validateTrimPtr(dimm.Manufacturer), "unknown"))
 	}
 	storageCounts := map[string]int{}
 	storageTotal := 0
 	for _, dev := range snap.Hardware.Storage {
 		if dev.Present != nil && !*dev.Present {
 			continue
 		}
 		storageTotal++
 		addValidateModel(storageCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown"))
 	}
 	nvidiaCounts := map[string]int{}
 	nvidiaTotal := 0
 	amdCounts := map[string]int{}
 	amdTotal := 0
 	for _, dev := range snap.Hardware.PCIeDevices {
 		if dev.Present != nil && !*dev.Present {
 			continue
 		}
 		if validateIsVendorGPU(dev, "nvidia") {
 			nvidiaTotal++
 			addValidateModel(nvidiaCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown"))
 		}
 		if validateIsVendorGPU(dev, "amd") {
 			amdTotal++
 			addValidateModel(amdCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown"))
 		}
 	}
 	out.CPU = formatValidateDeviceSummary(cpuTotal, cpuCounts, "CPU")
 	out.Memory = formatValidateDeviceSummary(memTotal, memCounts, "module")
 	out.Storage = formatValidateDeviceSummary(storageTotal, storageCounts, "device")
 	out.NVIDIA = formatValidateDeviceSummary(nvidiaTotal, nvidiaCounts, "GPU")
 	out.AMD = formatValidateDeviceSummary(amdTotal, amdCounts, "GPU")
 	out.NvidiaGPUCount = nvidiaTotal
 	out.AMDGPUCount = amdTotal
 	return out
 }
 func renderValidateCardBody(devices, description, commands, settings string) string {
 	return `<div class="validate-card-section"><div style="font-size:13px;color:var(--muted)">` + devices + `</div></div>` +
 		`<div class="validate-card-section"><div style="font-size:13px">` + description + `</div></div>` +
 		`<div class="validate-card-section"><div style="font-size:13px">` + commands + `</div></div>` +
 		`<div class="validate-card-section"><div style="font-size:13px;color:var(--muted)">` + settings + `</div></div>`
 }
 func formatValidateDeviceSummary(total int, models map[string]int, unit string) string {
 	if total == 0 {
 		return "0 " + unit + "s detected."
 	}
 	keys := make([]string, 0, len(models))
 	for key := range models {
 		keys = append(keys, key)
 	}
 	sort.Strings(keys)
 	parts := make([]string, 0, len(keys))
 	for _, key := range keys {
 		parts = append(parts, fmt.Sprintf("%d x %s", models[key], html.EscapeString(key)))
 	}
 	label := unit
 	if total != 1 {
 		label += "s"
 	}
 	if len(parts) == 1 {
 		return parts[0] + " " + label
 	}
 	return fmt.Sprintf("%d %s: %s", total, label, strings.Join(parts, ", "))
 }
 func addValidateModel(counts map[string]int, name string) {
 	name = strings.TrimSpace(name)
 	if name == "" {
 		name = "unknown"
 	}
 	counts[name]++
 }
 func validateTrimPtr(value *string) string {
 	if value == nil {
 		return ""
 	}
 	return strings.TrimSpace(*value)
 }
 func validateFirstNonEmpty(values ...string) string {
 	for _, value := range values {
 		value = strings.TrimSpace(value)
 		if value != "" {
 			return value
 		}
 	}
 	return ""
 }
 func validateIsVendorGPU(dev schema.HardwarePCIeDevice, vendor string) bool {
 	model := strings.ToLower(validateTrimPtr(dev.Model))
 	manufacturer := strings.ToLower(validateTrimPtr(dev.Manufacturer))
 	class := strings.ToLower(validateTrimPtr(dev.DeviceClass))
 	if strings.Contains(model, "aspeed") || strings.Contains(manufacturer, "aspeed") {
 		return false
 	}
 	switch vendor {
 	case "nvidia":
 		return strings.Contains(model, "nvidia") || strings.Contains(manufacturer, "nvidia")
 	case "amd":
 		isGPUClass := class == "processingaccelerator" || class == "displaycontroller" || class == "videocontroller"
 		isAMDVendor := strings.Contains(manufacturer, "advanced micro devices") || strings.Contains(manufacturer, "amd") || strings.Contains(manufacturer, "ati")
 		isAMDModel := strings.Contains(model, "instinct") || strings.Contains(model, "radeon") || strings.Contains(model, "amd")
 		return isGPUClass && (isAMDVendor || isAMDModel)
 	default:
 		return false
 	}
 }
 func renderSATCard(id, label, runAction, headerActions, body string) string {
 	actions := `<button id="sat-btn-` + id + `" class="btn btn-primary btn-sm" onclick="` + runAction + `">Run</button>`
 	if strings.TrimSpace(headerActions) != "" {
 		actions += headerActions
 	}
 	return fmt.Sprintf(`<div class="card"><div class="card-head card-head-actions"><span>%s</span><div class="card-head-buttons">%s</div></div><div class="card-body validate-card-body">%s</div></div>`,
 		label, actions, body)
 }
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -135,6 +135,14 @@ type namedMetricsRing struct {
 // At metricsCollectInterval = 5 s this covers 30 minutes of live history.
 const metricsChartWindow = 360
 // metricsDownsampleAge is the age after which old metrics rows are downsampled
 // to 1 sample per minute. Data fresher than this is kept at full resolution.
 const metricsDownsampleAge = 2 * time.Hour
 // metricsRetainWindow is the total retention period for metrics rows.
 // Rows older than this are deleted entirely by the background compactor.
 const metricsRetainWindow = 48 * time.Hour
 var metricsCollectInterval = 5 * time.Second
 // pendingNetChange tracks a network state change awaiting confirmation.
@@ -261,7 +269,11 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	mux.HandleFunc("POST /api/sat/platform-stress/run", h.handleAPISATRun("platform-stress"))
 	mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
 	mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
-	mux.HandleFunc("POST /api/benchmark/nvidia/run", h.handleAPIBenchmarkNvidiaRun)
+	mux.HandleFunc("POST /api/bee-bench/nvidia/perf/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf"))
 	mux.HandleFunc("POST /api/bee-bench/nvidia/power/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-power"))
 	mux.HandleFunc("POST /api/bee-bench/nvidia/autotune/run", h.handleAPIBenchmarkAutotuneRun())
 	mux.HandleFunc("GET /api/bee-bench/nvidia/autotune/status", h.handleAPIBenchmarkAutotuneStatus)
 	mux.HandleFunc("GET /api/benchmark/results", h.handleAPIBenchmarkResults)
 	// Tasks
 	mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
@@ -295,13 +307,11 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	// Tools
 	mux.HandleFunc("GET /api/tools/check", h.handleAPIToolsCheck)
 	// Display
 	mux.HandleFunc("GET /api/display/resolutions", h.handleAPIDisplayResolutions)
 	mux.HandleFunc("POST /api/display/set", h.handleAPIDisplaySet)
 	// GPU presence / tools
 	mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)
 	mux.HandleFunc("GET /api/gpu/nvidia", h.handleAPIGNVIDIAGPUs)
 	mux.HandleFunc("GET /api/gpu/nvidia-status", h.handleAPIGNVIDIAGPUStatuses)
 	mux.HandleFunc("POST /api/gpu/nvidia-reset", h.handleAPIGNVIDIAReset)
 	mux.HandleFunc("GET /api/gpu/tools", h.handleAPIGPUTools)
 	// System
@@ -335,13 +345,24 @@ func (h *handler) startMetricsCollector() {
 	goRecoverLoop("metrics collector", 2*time.Second, func() {
 		ticker := time.NewTicker(metricsCollectInterval)
 		defer ticker.Stop()
-		for range ticker.C {
+		pruneTicker := time.NewTicker(time.Hour)
-			sample := platform.SampleLiveMetrics()
+		defer pruneTicker.Stop()
-			if h.metricsDB != nil {
+		for {
-				_ = h.metricsDB.Write(sample)
+			select {
 			case <-ticker.C:
 				sample := platform.SampleLiveMetrics()
 				if h.metricsDB != nil {
 					_ = h.metricsDB.Write(sample)
 				}
 				h.feedRings(sample)
 				h.setLatestMetric(sample)
 			case <-pruneTicker.C:
 				if h.metricsDB != nil {
 					now := time.Now().UTC()
 					_ = h.metricsDB.Downsample(now.Add(-metricsDownsampleAge), now.Add(-metricsRetainWindow))
 					_ = h.metricsDB.Prune(now.Add(-metricsRetainWindow))
 				}
 			}
 			h.feedRings(sample)
 			h.setLatestMetric(sample)
 		}
 	})
 }
@@ -575,12 +596,14 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 	}
 	timeline := metricsTimelineSegments(samples, time.Now())
 	if idx, sub, ok := parseGPUChartPath(path); ok && sub == "overview" {
-		buf, ok, err := renderGPUOverviewChartSVG(idx, samples, timeline)
+		var overviewOk bool
 		var buf []byte
 		buf, overviewOk, err = renderGPUOverviewChartSVG(idx, samples, timeline)
 		if err != nil {
 			http.Error(w, err.Error(), http.StatusInternalServerError)
 			return
 		}
-		if !ok {
+		if !overviewOk {
 			http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
 			return
 		}
@@ -589,23 +612,37 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 		_, _ = w.Write(buf)
 		return
 	}
-	datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples)
+	datasets, names, labels, title, yMin, yMax, stacked, ok := chartDataFromSamples(path, samples)
 	if !ok {
 		http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
 		return
 	}
-	buf, err := renderMetricChartSVG(
+	var buf []byte
-		title,
+	if stacked {
-		labels,
+		buf, err = renderStackedMetricChartSVG(
-		sampleTimes(samples),
+			title,
-		datasets,
+			labels,
-		names,
+			sampleTimes(samples),
-		yMin,
+			datasets,
-		yMax,
+			names,
-		chartCanvasHeightForPath(path, len(names)),
+			yMax,
-		timeline,
+			chartCanvasHeightForPath(path, len(names)),
-	)
+			timeline,
 		)
 	} else {
 		buf, err = renderMetricChartSVG(
 			title,
 			labels,
 			sampleTimes(samples),
 			datasets,
 			names,
 			yMin,
 			yMax,
 			chartCanvasHeightForPath(path, len(names)),
 			timeline,
 		)
 	}
 	if err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
@@ -615,12 +652,8 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 	_, _ = w.Write(buf)
 }
-func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][]float64, []string, []string, string, *float64, *float64, bool) {
+func chartDataFromSamples(path string, samples []platform.LiveMetricSample) (datasets [][]float64, names []string, labels []string, title string, yMin, yMax *float64, stacked bool, ok bool) {
-	var datasets [][]float64
+	labels = sampleTimeLabels(samples)
 	var names []string
 	var title string
 	var yMin, yMax *float64
 	labels := sampleTimeLabels(samples)
 	switch {
 	case path == "server-load":
@@ -657,12 +690,19 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 	case path == "server-power":
 		title = "System Power"
 		power := make([]float64, len(samples))
 		label := "Power W"
 		for i, s := range samples {
 			power[i] = s.PowerW
 			if strings.TrimSpace(s.PowerSource) != "" {
 				label = fmt.Sprintf("Power W · %s", s.PowerSource)
 				if strings.TrimSpace(s.PowerMode) != "" {
 					label += fmt.Sprintf(" (%s)", s.PowerMode)
 				}
 			}
 		}
 		power = normalizePowerSeries(power)
 		datasets = [][]float64{power}
-		names = []string{"Power W"}
+		names = []string{label}
 		yMin = floatPtr(0)
 		yMax = autoMax120(power)
@@ -707,7 +747,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 	case strings.HasPrefix(path, "gpu/"):
 		idx, sub, ok := parseGPUChartPath(path)
 		if !ok {
-			return nil, nil, nil, "", nil, nil, false
+			return nil, nil, nil, "", nil, nil, false, false
 		}
 		switch sub {
 		case "load":
@@ -715,7 +755,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 			util := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.UsagePct })
 			mem := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct })
 			if util == nil && mem == nil {
-				return nil, nil, nil, "", nil, nil, false
+				return nil, nil, nil, "", nil, nil, false, false
 			}
 			datasets = [][]float64{coalesceDataset(util, len(samples)), coalesceDataset(mem, len(samples))}
 			names = []string{"Load %", "Mem %"}
@@ -725,7 +765,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 			title = gpuDisplayLabel(idx) + " Temperature"
 			temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
 			if temp == nil {
-				return nil, nil, nil, "", nil, nil, false
+				return nil, nil, nil, "", nil, nil, false, false
 			}
 			datasets = [][]float64{temp}
 			names = []string{"Temp °C"}
@@ -735,7 +775,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 			title = gpuDisplayLabel(idx) + " Core Clock"
 			clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
 			if clock == nil {
-				return nil, nil, nil, "", nil, nil, false
+				return nil, nil, nil, "", nil, nil, false, false
 			}
 			datasets = [][]float64{clock}
 			names = []string{"Core Clock MHz"}
@@ -744,7 +784,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 			title = gpuDisplayLabel(idx) + " Memory Clock"
 			clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
 			if clock == nil {
-				return nil, nil, nil, "", nil, nil, false
+				return nil, nil, nil, "", nil, nil, false, false
 			}
 			datasets = [][]float64{clock}
 			names = []string{"Memory Clock MHz"}
@@ -753,7 +793,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 			title = gpuDisplayLabel(idx) + " Power"
 			power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
 			if power == nil {
-				return nil, nil, nil, "", nil, nil, false
+				return nil, nil, nil, "", nil, nil, false, false
 			}
 			datasets = [][]float64{power}
 			names = []string{"Power W"}
@@ -761,10 +801,10 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 		}
 	default:
-		return nil, nil, nil, "", nil, nil, false
+		return nil, nil, nil, "", nil, nil, false, false
 	}
-	return datasets, names, labels, title, yMin, yMax, len(datasets) > 0
+	return datasets, names, labels, title, yMin, yMax, stacked, len(datasets) > 0
 }
 func parseGPUChartPath(path string) (idx int, sub string, ok bool) {
@@ -930,6 +970,37 @@ func normalizePowerSeries(ds []float64) []float64 {
 	return out
 }
 // psuSlotsFromSamples returns the sorted list of PSU slot numbers seen across samples.
 func psuSlotsFromSamples(samples []platform.LiveMetricSample) []int {
 	seen := map[int]struct{}{}
 	for _, s := range samples {
 		for _, p := range s.PSUs {
 			seen[p.Slot] = struct{}{}
 		}
 	}
 	slots := make([]int, 0, len(seen))
 	for s := range seen {
 		slots = append(slots, s)
 	}
 	sort.Ints(slots)
 	return slots
 }
 // psuStackedTotal returns the point-by-point sum of all PSU datasets (for scale calculation).
 func psuStackedTotal(datasets [][]float64) []float64 {
 	if len(datasets) == 0 {
 		return nil
 	}
 	n := len(datasets[0])
 	total := make([]float64, n)
 	for _, ds := range datasets {
 		for i, v := range ds {
 			total[i] += v
 		}
 	}
 	return total
 }
 func normalizeFanSeries(ds []float64) []float64 {
 	if len(ds) == 0 {
 		return nil
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -11,6 +11,7 @@ import (
 	"time"
 	"bee/audit/internal/platform"
 	"bee/audit/internal/schema"
 )
 func TestChartLegendNumber(t *testing.T) {
@@ -78,6 +79,16 @@ func TestRecoverMiddlewarePreservesStreamingInterfaces(t *testing.T) {
 	}
 }
 func TestBuildRuntimeToRAMRowShowsPartialCopyWarning(t *testing.T) {
 	row := buildRuntimeToRAMRow(schema.RuntimeHealth{ToRAMStatus: "partial"})
 	if row.Status != "WARNING" {
 		t.Fatalf("status=%q want WARNING", row.Status)
 	}
 	if !strings.Contains(row.Issue, "Partial or staged RAM copy detected") {
 		t.Fatalf("issue=%q", row.Issue)
 	}
 }
 func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
 	samples := []platform.LiveMetricSample{
 		{
@@ -109,7 +120,7 @@ func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
 		},
 	}
-	datasets, names, labels, title, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
+	datasets, names, labels, title, _, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
 	if !ok {
 		t.Fatal("chartDataFromSamples returned ok=false")
 	}
@@ -153,7 +164,7 @@ func TestChartDataFromSamplesKeepsStableGPUSeriesOrder(t *testing.T) {
 		},
 	}
-	datasets, names, _, title, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
+	datasets, names, _, title, _, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
 	if !ok {
 		t.Fatal("chartDataFromSamples returned ok=false")
 	}
@@ -198,7 +209,7 @@ func TestChartDataFromSamplesIncludesGPUClockCharts(t *testing.T) {
 		},
 	}
-	datasets, names, _, title, _, _, ok := chartDataFromSamples("gpu-all-clock", samples)
+	datasets, names, _, title, _, _, _, ok := chartDataFromSamples("gpu-all-clock", samples)
 	if !ok {
 		t.Fatal("gpu-all-clock returned ok=false")
 	}
@@ -409,6 +420,49 @@ func TestHandleMetricsChartSVGRendersCustomSVG(t *testing.T) {
 	}
 }
 func TestChartDataFromSamplesServerPowerUsesResolvedSystemPower(t *testing.T) {
 	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
 	samples := []platform.LiveMetricSample{
 		{
 			Timestamp: start,
 			PSUs: []platform.PSUReading{
 				{Slot: 1, PowerW: 120},
 				{Slot: 2, PowerW: 130},
 			},
 			PowerW:      250,
 			PowerSource: "sdr_psu_input",
 			PowerMode:   "autotuned",
 		},
 		{
 			Timestamp: start.Add(time.Minute),
 			PSUs: []platform.PSUReading{
 				{Slot: 1, PowerW: 140},
 				{Slot: 2, PowerW: 135},
 			},
 			PowerW:      275,
 			PowerSource: "sdr_psu_input",
 			PowerMode:   "autotuned",
 		},
 	}
 	datasets, names, _, title, _, _, stacked, ok := chartDataFromSamples("server-power", samples)
 	if !ok {
 		t.Fatal("expected server-power chart data")
 	}
 	if title != "System Power" {
 		t.Fatalf("title=%q", title)
 	}
 	if stacked {
 		t.Fatal("server-power should use resolved system power, not stacked PSU inputs")
 	}
 	if len(datasets) != 1 || len(names) != 1 {
 		t.Fatalf("datasets=%d names=%d want 1/1", len(datasets), len(names))
 	}
 	if names[0] != "Power W · sdr_psu_input (autotuned)" {
 		t.Fatalf("names=%v", names)
 	}
 }
 func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
 	got := normalizeFanSeries([]float64{4200, 0, 0, 4300, 0})
 	want := []float64{4200, 4200, 4200, 4300, 4300}
@@ -591,7 +645,7 @@ func TestTasksPageRendersOpenLinksAndPaginationControls(t *testing.T) {
 	}
 }
-func TestToolsPageRendersRestartGPUDriversButton(t *testing.T) {
+func TestToolsPageRendersNvidiaSelfHealSection(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/tools", nil))
@@ -599,11 +653,20 @@ func TestToolsPageRendersRestartGPUDriversButton(t *testing.T) {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	if !strings.Contains(body, `NVIDIA Self Heal`) {
 		t.Fatalf("tools page missing nvidia self heal section: %s", body)
 	}
 	if !strings.Contains(body, `Restart GPU Drivers`) {
 		t.Fatalf("tools page missing restart gpu drivers button: %s", body)
 	}
-	if !strings.Contains(body, `restartGPUDrivers()`) {
+	if !strings.Contains(body, `nvidiaRestartDrivers()`) {
-		t.Fatalf("tools page missing restartGPUDrivers action: %s", body)
+		t.Fatalf("tools page missing nvidiaRestartDrivers action: %s", body)
 	}
 	if !strings.Contains(body, `/api/gpu/nvidia-status`) {
 		t.Fatalf("tools page missing nvidia status api usage: %s", body)
 	}
 	if !strings.Contains(body, `nvidiaResetGPU(`) {
 		t.Fatalf("tools page missing nvidiaResetGPU action: %s", body)
 	}
 	if !strings.Contains(body, `id="boot-source-text"`) {
 		t.Fatalf("tools page missing boot source field: %s", body)
@@ -628,8 +691,14 @@ func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
 		`href="/benchmark"`,
 		`id="benchmark-gpu-list"`,
 		`/api/gpu/nvidia`,
-		`/api/benchmark/nvidia/run`,
+		`/api/bee-bench/nvidia/perf/run`,
 		`/api/bee-bench/nvidia/power/run`,
 		`/api/bee-bench/nvidia/autotune/run`,
 		`/api/bee-bench/nvidia/autotune/status`,
 		`benchmark-run-nccl`,
 		`Run Performance Benchmark`,
 		`Run Power / Thermal Fit`,
 		`Autotune`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("benchmark page missing %q: %s", needle, body)
@@ -640,7 +709,7 @@ func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
 func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
 	dir := t.TempDir()
 	exportDir := filepath.Join(dir, "export")
-	runDir := filepath.Join(exportDir, "bee-benchmark", "gpu-benchmark-20260406-120000")
+	runDir := filepath.Join(exportDir, "bee-bench", "perf", "perf-20260406-120000")
 	if err := os.MkdirAll(runDir, 0755); err != nil {
 		t.Fatal(err)
 	}
@@ -682,10 +751,10 @@ func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
 	body := rec.Body.String()
 	wantTime := result.GeneratedAt.Local().Format("2006-01-02 15:04:05")
 	for _, needle := range []string{
-		`Benchmark Results`,
+		`Perf Results`,
 		`Composite score by saved benchmark run and GPU.`,
-		`NVIDIA H100 PCIe / GPU 0`,
+		`GPU 0`,
-		`NVIDIA H100 PCIe / GPU 1`,
+		`GPU 1`,
 		`#1`,
 		wantTime,
 		`1176.25`,
@@ -711,6 +780,8 @@ func TestValidatePageRendersNvidiaTargetedStressCard(t *testing.T) {
 		`controlled NVIDIA DCGM load`,
 		`<code>dcgmi diag targeted_stress</code>`,
 		`NVIDIA GPU Selection`,
 		`All NVIDIA validate tasks use only the GPUs selected here.`,
 		`Select All`,
 		`id="sat-gpu-list"`,
 	} {
 		if !strings.Contains(body, needle) {
@@ -719,6 +790,26 @@ func TestValidatePageRendersNvidiaTargetedStressCard(t *testing.T) {
 	}
 }
 func TestValidatePageRendersNvidiaFabricCardsInValidateMode(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
 	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/validate", nil))
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
 	for _, needle := range []string{
 		`NVIDIA Interconnect (NCCL)`,
 		`Validate and Stress:`,
 		`NVIDIA Bandwidth (NVBandwidth)`,
 		`nvbandwidth runs all built-in tests without a time limit`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("validate page missing %q: %s", needle, body)
 		}
 	}
 }
 func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
@@ -730,8 +821,8 @@ func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
 	for _, needle := range []string{
 		`NVIDIA Max Compute Load`,
 		`dcgmproftester`,
-		`targeted_stress remain in <a href="/validate">Validate</a>`,
+		`NCCL`,
-		`NVIDIA Interconnect Test (NCCL all_reduce_perf)`,
+		`Validate → Stress mode`,
 		`id="burn-gpu-list"`,
 	} {
 		if !strings.Contains(body, needle) {
@@ -1083,6 +1174,7 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
 	}
 	body := rec.Body.String()
 	for _, needle := range []string{
 		// Runtime Health card — LiveCD checks only
 		`Runtime Health`,
 		`<th>Check</th><th>Status</th><th>Source</th><th>Issue</th>`,
 		`Export Directory`,
@@ -1091,16 +1183,18 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
 		`CUDA / ROCm`,
 		`Required Utilities`,
 		`Bee Services`,
 		`<td>CPU</td>`,
 		`<td>Memory</td>`,
 		`<td>Storage</td>`,
 		`<td>GPU</td>`,
 		`CUDA runtime is not ready for GPU SAT.`,
 		`Missing: nvidia-smi`,
 		`bee-nvidia=inactive`,
-		`cpu SAT: FAILED`,
+		// Hardware Summary card — component health badges
-		`storage SAT: FAILED`,
+		`Hardware Summary`,
-		`sat:nvidia`,
+		`>CPU<`,
 		`>Memory<`,
 		`>Storage<`,
 		`>GPU<`,
 		`>PSU<`,
 		`badge-warn`, // cpu Warning badge
 		`badge-err`,  // storage Critical badge
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("dashboard missing %q: %s", needle, body)
--- a/audit/internal/webui/stability.go
+++ b/audit/internal/webui/stability.go
@@ -7,14 +7,43 @@ import (
 	"time"
 )
 const (
 	recoverLoopMaxDelay   = 60 * time.Second
 	recoverLoopResetAfter = 30 * time.Second
 )
 // goRecoverLoop starts fn in a goroutine, restarting after panics.
 // restartDelay is the initial delay; successive panics double it up to
 // recoverLoopMaxDelay. The delay resets to restartDelay once fn runs
 // successfully for recoverLoopResetAfter without panicking.
 func goRecoverLoop(name string, restartDelay time.Duration, fn func()) {
 	go func() {
 		delay := restartDelay
 		consecutive := 0
 		for {
-			if !runRecoverable(name, fn) {
+			start := time.Now()
 			panicked := runRecoverable(name, fn)
 			if !panicked {
 				return
 			}
-			if restartDelay > 0 {
+			consecutive++
-				time.Sleep(restartDelay)
+			if time.Since(start) >= recoverLoopResetAfter {
 				delay = restartDelay
 				consecutive = 1
 			}
 			slog.Warn("goroutine restarting after panic",
 				"component", name,
 				"consecutive_panics", consecutive,
 				"next_delay", delay,
 			)
 			if delay > 0 {
 				time.Sleep(delay)
 			}
 			if delay < recoverLoopMaxDelay {
 				delay *= 2
 				if delay > recoverLoopMaxDelay {
 					delay = recoverLoopMaxDelay
 				}
 			}
 		}
 	}()
--- a/audit/internal/webui/task_report.go
+++ b/audit/internal/webui/task_report.go
@@ -171,21 +171,17 @@ func renderTaskChartSVG(path string, samples []platform.LiveMetricSample, timeli
 		}
 		return gpuDisplayLabel(idx) + " Overview", buf, true
 	}
-	datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples)
+	datasets, names, labels, title, yMin, yMax, stacked, ok := chartDataFromSamples(path, samples)
 	if !ok {
 		return "", nil, false
 	}
-	buf, err := renderMetricChartSVG(
+	var buf []byte
-		title,
+	var err error
-		labels,
+	if stacked {
-		sampleTimes(samples),
+		buf, err = renderStackedMetricChartSVG(title, labels, sampleTimes(samples), datasets, names, yMax, chartCanvasHeightForPath(path, len(names)), timeline)
-		datasets,
+	} else {
-		names,
+		buf, err = renderMetricChartSVG(title, labels, sampleTimes(samples), datasets, names, yMin, yMax, chartCanvasHeightForPath(path, len(names)), timeline)
-		yMin,
+	}
 		yMax,
 		chartCanvasHeightForPath(path, len(names)),
 		timeline,
 	)
 	if err != nil {
 		return "", nil, false
 	}
@@ -233,6 +229,9 @@ func renderTaskReportFragment(report taskReport, charts map[string]string, logTe
 	if benchmarkCard := renderTaskBenchmarkResultsCard(report.Target, logText); benchmarkCard != "" {
 		b.WriteString(benchmarkCard)
 	}
 	if powerCard := renderTaskPowerResultsCard(report.Target, logText); powerCard != "" {
 		b.WriteString(powerCard)
 	}
 	if len(report.Charts) > 0 {
 		for _, chart := range report.Charts {
@@ -251,7 +250,9 @@ func renderTaskReportFragment(report taskReport, charts map[string]string, logTe
 }
 func renderTaskBenchmarkResultsCard(target, logText string) string {
-	if strings.TrimSpace(target) != "nvidia-benchmark" {
+	switch strings.TrimSpace(target) {
 	case "nvidia-bench-perf":
 	default:
 		return ""
 	}
 	resultPath := taskBenchmarkResultPath(logText)
@@ -263,7 +264,7 @@ func renderTaskBenchmarkResultsCard(target, logText string) string {
 		return ""
 	}
 	return renderBenchmarkResultsCardFromRuns(
-		"Benchmark Results",
+		"Perf Results",
 		"Composite score for this benchmark task.",
 		"No benchmark results were saved for this task.",
 		columns,
@@ -271,15 +272,42 @@ func renderTaskBenchmarkResultsCard(target, logText string) string {
 	)
 }
 func renderTaskPowerResultsCard(target, logText string) string {
 	if strings.TrimSpace(target) != "nvidia-bench-power" {
 		return ""
 	}
 	resultPath := taskBenchmarkResultPath(logText)
 	if strings.TrimSpace(resultPath) == "" {
 		return ""
 	}
 	raw, err := os.ReadFile(resultPath)
 	if err != nil {
 		return ""
 	}
 	var result platform.NvidiaPowerBenchResult
 	if err := json.Unmarshal(raw, &result); err != nil {
 		return ""
 	}
 	var b strings.Builder
 	b.WriteString(`<div class="card"><div class="card-head">Power Results</div><div class="card-body">`)
 	if len(result.RecommendedSlotOrder) > 0 {
 		b.WriteString(`<p style="margin-bottom:10px"><strong>Recommended slot order:</strong> ` + html.EscapeString(joinTaskIndices(result.RecommendedSlotOrder)) + `</p>`)
 	}
 	b.WriteString(`<table><tr><th>GPU</th><th>Status</th><th>Max Power</th><th>Applied Limit</th></tr>`)
 	for _, gpu := range result.GPUs {
 		fmt.Fprintf(&b, `<tr><td>GPU %d</td><td>%s</td><td>%.0f W</td><td>%.0f W</td></tr>`,
 			gpu.Index, html.EscapeString(gpu.Status), gpu.MaxObservedPowerW, gpu.AppliedPowerLimitW)
 	}
 	b.WriteString(`</table></div></div>`)
 	return b.String()
 }
 func taskBenchmarkResultPath(logText string) string {
 	archivePath := taskArchivePathFromLog(logText)
 	if archivePath == "" {
 		return ""
 	}
 	runDir := strings.TrimSuffix(archivePath, ".tar.gz")
 	if runDir == archivePath {
 		return ""
 	}
 	return filepath.Join(runDir, "result.json")
 }
--- a/audit/internal/webui/task_runner.go
+++ b/audit/internal/webui/task_runner.go
@@ -0,0 +1,505 @@
 package webui
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"log/slog"
 	"os"
 	"os/signal"
 	"path/filepath"
 	"strings"
 	"syscall"
 	"time"
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 	"bee/audit/internal/runtimeenv"
 )
 type taskRunnerState struct {
 	PID       int       `json:"pid"`
 	Status    string    `json:"status"`
 	Error     string    `json:"error,omitempty"`
 	UpdatedAt time.Time `json:"updated_at"`
 }
 func taskRunnerStatePath(t *Task) string {
 	if t == nil || strings.TrimSpace(t.ArtifactsDir) == "" {
 		return ""
 	}
 	return filepath.Join(t.ArtifactsDir, "runner-state.json")
 }
 func writeTaskRunnerState(t *Task, state taskRunnerState) error {
 	path := taskRunnerStatePath(t)
 	if path == "" {
 		return nil
 	}
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return err
 	}
 	data, err := json.MarshalIndent(state, "", "  ")
 	if err != nil {
 		return err
 	}
 	tmp := path + ".tmp"
 	if err := os.WriteFile(tmp, data, 0644); err != nil {
 		return err
 	}
 	return os.Rename(tmp, path)
 }
 func readTaskRunnerState(t *Task) (taskRunnerState, bool) {
 	path := taskRunnerStatePath(t)
 	if path == "" {
 		return taskRunnerState{}, false
 	}
 	data, err := os.ReadFile(path)
 	if err != nil || len(data) == 0 {
 		return taskRunnerState{}, false
 	}
 	var state taskRunnerState
 	if err := json.Unmarshal(data, &state); err != nil {
 		return taskRunnerState{}, false
 	}
 	return state, true
 }
 func processAlive(pid int) bool {
 	if pid <= 0 {
 		return false
 	}
 	err := syscall.Kill(pid, 0)
 	return err == nil || err == syscall.EPERM
 }
 func finalizeTaskForResult(t *Task, errMsg string, cancelled bool) {
 	now := time.Now()
 	t.DoneAt = &now
 	switch {
 	case cancelled:
 		t.Status = TaskCancelled
 		t.ErrMsg = "aborted"
 	case strings.TrimSpace(errMsg) != "":
 		t.Status = TaskFailed
 		t.ErrMsg = errMsg
 	default:
 		t.Status = TaskDone
 		t.ErrMsg = ""
 	}
 }
 func executeTaskWithOptions(opts *HandlerOptions, t *Task, j *jobState, ctx context.Context) {
 	if opts == nil {
 		j.append("ERROR: handler options not configured")
 		j.finish("handler options not configured")
 		return
 	}
 	a := opts.App
 	recovered := len(j.lines) > 0
 	j.append(fmt.Sprintf("Starting %s...", t.Name))
 	if recovered {
 		j.append(fmt.Sprintf("Recovered after bee-web restart at %s", time.Now().UTC().Format(time.RFC3339)))
 	}
 	var (
 		archive string
 		err     error
 	)
 	switch t.Target {
 	case "nvidia":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		diagLevel := 2
 		if t.params.StressMode {
 			diagLevel = 3
 		}
 		if len(t.params.GPUIndices) > 0 || diagLevel > 0 {
 			result, e := a.RunNvidiaAcceptancePackWithOptions(ctx, "", diagLevel, t.params.GPUIndices, j.append)
 			if e != nil {
 				err = e
 			} else {
 				archive = result.Body
 			}
 		} else {
 			archive, err = a.RunNvidiaAcceptancePack("", j.append)
 		}
 	case "nvidia-targeted-stress":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if dur <= 0 {
 			dur = 300
 		}
 		archive, err = a.RunNvidiaTargetedStressValidatePack(ctx, "", dur, t.params.GPUIndices, j.append)
 	case "nvidia-bench-perf":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = a.RunNvidiaBenchmarkCtx(ctx, "", platform.NvidiaBenchmarkOptions{
 			Profile:           t.params.BenchmarkProfile,
 			SizeMB:            t.params.SizeMB,
 			GPUIndices:        t.params.GPUIndices,
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
 			RunNCCL:           t.params.RunNCCL,
 			ParallelGPUs:      t.params.ParallelGPUs,
 			RampStep:          t.params.RampStep,
 			RampTotal:         t.params.RampTotal,
 			RampRunID:         t.params.RampRunID,
 		}, j.append)
 	case "nvidia-bench-power":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = a.RunNvidiaPowerBenchCtx(ctx, app.DefaultBeeBenchPowerDir, platform.NvidiaBenchmarkOptions{
 			Profile:           t.params.BenchmarkProfile,
 			GPUIndices:        t.params.GPUIndices,
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
 			RampStep:          t.params.RampStep,
 			RampTotal:         t.params.RampTotal,
 			RampRunID:         t.params.RampRunID,
 		}, j.append)
 	case "nvidia-bench-autotune":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = a.RunNvidiaPowerSourceAutotuneCtx(ctx, app.DefaultBeeBenchAutotuneDir, platform.NvidiaBenchmarkOptions{
 			Profile: t.params.BenchmarkProfile,
 			SizeMB:  t.params.SizeMB,
 		}, t.params.BenchmarkKind, j.append)
 	case "nvidia-compute":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		rampPlan, planErr := resolveNvidiaRampPlan(t.params.BurnProfile, t.params.StaggerGPUStart, t.params.GPUIndices)
 		if planErr != nil {
 			err = planErr
 			break
 		}
 		if t.params.BurnProfile != "" && t.params.StaggerGPUStart && dur <= 0 {
 			dur = rampPlan.DurationSec
 		}
 		if rampPlan.StaggerSeconds > 0 {
 			j.append(fmt.Sprintf("NVIDIA staggered ramp-up enabled: %ds per GPU; post-ramp hold: %ds; total runtime: %ds", rampPlan.StaggerSeconds, dur, rampPlan.TotalDurationSec))
 		}
 		archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, rampPlan.StaggerSeconds, j.append)
 	case "nvidia-targeted-power":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = a.RunNvidiaTargetedPowerPack(ctx, "", dur, t.params.GPUIndices, j.append)
 	case "nvidia-pulse":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = a.RunNvidiaPulseTestPack(ctx, "", dur, t.params.GPUIndices, j.append)
 	case "nvidia-bandwidth":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = a.RunNvidiaBandwidthPack(ctx, "", t.params.GPUIndices, j.append)
 	case "nvidia-interconnect":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = a.RunNCCLTests(ctx, "", t.params.GPUIndices, j.append)
 	case "nvidia-stress":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		rampPlan, planErr := resolveNvidiaRampPlan(t.params.BurnProfile, t.params.StaggerGPUStart, t.params.GPUIndices)
 		if planErr != nil {
 			err = planErr
 			break
 		}
 		if t.params.BurnProfile != "" && t.params.StaggerGPUStart && dur <= 0 {
 			dur = rampPlan.DurationSec
 		}
 		if rampPlan.StaggerSeconds > 0 {
 			j.append(fmt.Sprintf("NVIDIA staggered ramp-up enabled: %ds per GPU; post-ramp hold: %ds; total runtime: %ds", rampPlan.StaggerSeconds, dur, rampPlan.TotalDurationSec))
 		}
 		archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
 			DurationSec:       dur,
 			Loader:            t.params.Loader,
 			GPUIndices:        t.params.GPUIndices,
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
 			StaggerSeconds:    rampPlan.StaggerSeconds,
 		}, j.append)
 	case "memory":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		sizeMB, passes := resolveMemoryValidatePreset(t.params.BurnProfile, t.params.StressMode)
 		j.append(fmt.Sprintf("Memory validate preset: %d MB x %d pass(es)", sizeMB, passes))
 		archive, err = runMemoryAcceptancePackCtx(a, ctx, "", sizeMB, passes, j.append)
 	case "storage":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = runStorageAcceptancePackCtx(a, ctx, "", t.params.StressMode, j.append)
 	case "cpu":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		if dur <= 0 {
 			if t.params.StressMode {
 				dur = 1800
 			} else {
 				dur = 60
 			}
 		}
 		j.append(fmt.Sprintf("CPU stress duration: %ds", dur))
 		archive, err = runCPUAcceptancePackCtx(a, ctx, "", dur, j.append)
 	case "amd":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = runAMDAcceptancePackCtx(a, ctx, "", j.append)
 	case "amd-mem":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = runAMDMemIntegrityPackCtx(a, ctx, "", j.append)
 	case "amd-bandwidth":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = runAMDMemBandwidthPackCtx(a, ctx, "", j.append)
 	case "amd-stress":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = runAMDStressPackCtx(a, ctx, "", dur, j.append)
 	case "memory-stress":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = runMemoryStressPackCtx(a, ctx, "", dur, j.append)
 	case "sat-stress":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = runSATStressPackCtx(a, ctx, "", dur, j.append)
 	case "platform-stress":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		runOpts := resolvePlatformStressPreset(t.params.BurnProfile)
 		runOpts.Components = t.params.PlatformComponents
 		archive, err = a.RunPlatformStress(ctx, "", runOpts, j.append)
 	case "audit":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		result, e := a.RunAuditNow(opts.RuntimeMode)
 		if e != nil {
 			err = e
 		} else {
 			for _, line := range splitLines(result.Body) {
 				j.append(line)
 			}
 		}
 	case "support-bundle":
 		j.append("Building support bundle...")
 		archive, err = buildSupportBundle(opts.ExportDir)
 	case "install":
 		if strings.TrimSpace(t.params.Device) == "" {
 			err = fmt.Errorf("device is required")
 			break
 		}
 		installLogPath := platform.InstallLogPath(t.params.Device)
 		j.append("Install log: " + installLogPath)
 		err = streamCmdJob(j, installCommand(ctx, t.params.Device, installLogPath))
 	case "install-to-ram":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		err = a.RunInstallToRAM(ctx, j.append)
 	default:
 		j.append("ERROR: unknown target: " + t.Target)
 		j.finish("unknown target")
 		return
 	}
 	if archive != "" {
 		archivePath := app.ExtractArchivePath(archive)
 		if err == nil && app.ReadSATOverallStatus(archivePath) == "FAILED" {
 			err = fmt.Errorf("SAT overall_status=FAILED (see summary.txt)")
 		}
 		if opts.App != nil && opts.App.StatusDB != nil {
 			app.ApplySATResultToDB(opts.App.StatusDB, t.Target, archivePath)
 		}
 	}
 	if err != nil {
 		if ctx.Err() != nil {
 			j.append("Aborted.")
 			j.finish("aborted")
 		} else {
 			j.append("ERROR: " + err.Error())
 			j.finish(err.Error())
 		}
 		return
 	}
 	if archive != "" {
 		j.append("Archive: " + archive)
 	}
 	j.finish("")
 }
 func loadPersistedTask(statePath, taskID string) (*Task, error) {
 	data, err := os.ReadFile(statePath)
 	if err != nil {
 		return nil, err
 	}
 	var persisted []persistedTask
 	if err := json.Unmarshal(data, &persisted); err != nil {
 		return nil, err
 	}
 	for _, pt := range persisted {
 		if pt.ID != taskID {
 			continue
 		}
 		t := &Task{
 			ID:             pt.ID,
 			Name:           pt.Name,
 			Target:         pt.Target,
 			Priority:       pt.Priority,
 			Status:         pt.Status,
 			CreatedAt:      pt.CreatedAt,
 			StartedAt:      pt.StartedAt,
 			DoneAt:         pt.DoneAt,
 			ErrMsg:         pt.ErrMsg,
 			LogPath:        pt.LogPath,
 			ArtifactsDir:   pt.ArtifactsDir,
 			ReportJSONPath: pt.ReportJSONPath,
 			ReportHTMLPath: pt.ReportHTMLPath,
 			params:         pt.Params,
 		}
 		ensureTaskReportPaths(t)
 		return t, nil
 	}
 	return nil, fmt.Errorf("task %s not found", taskID)
 }
 func RunPersistedTask(exportDir, taskID string, stdout, stderr io.Writer) int {
 	if strings.TrimSpace(exportDir) == "" || strings.TrimSpace(taskID) == "" {
 		fmt.Fprintln(stderr, "bee task-run: --export-dir and --task-id are required")
 		return 2
 	}
 	runtimeInfo, err := runtimeenv.Detect("auto")
 	if err != nil {
 		slog.Warn("resolve runtime for task-run", "err", err)
 	}
 	opts := &HandlerOptions{
 		ExportDir:   exportDir,
 		App:         app.New(platform.New()),
 		RuntimeMode: runtimeInfo.Mode,
 	}
 	statePath := filepath.Join(exportDir, "tasks-state.json")
 	task, err := loadPersistedTask(statePath, taskID)
 	if err != nil {
 		fmt.Fprintln(stderr, err.Error())
 		return 1
 	}
 	if task.StartedAt == nil || task.StartedAt.IsZero() {
 		now := time.Now()
 		task.StartedAt = &now
 	}
 	if task.Status == "" {
 		task.Status = TaskRunning
 	}
 	if err := writeTaskRunnerState(task, taskRunnerState{
 		PID:       os.Getpid(),
 		Status:    TaskRunning,
 		UpdatedAt: time.Now().UTC(),
 	}); err != nil {
 		fmt.Fprintln(stderr, err.Error())
 		return 1
 	}
 	ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
 	defer cancel()
 	j := newTaskJobState(task.LogPath, taskSerialPrefix(task))
 	executeTaskWithOptions(opts, task, j, ctx)
 	finalizeTaskForResult(task, j.err, ctx.Err() != nil)
 	if err := writeTaskReportArtifacts(task); err != nil {
 		appendJobLog(task.LogPath, "WARN: task report generation failed: "+err.Error())
 	}
 	j.closeLog()
 	if err := writeTaskRunnerState(task, taskRunnerState{
 		PID:       os.Getpid(),
 		Status:    task.Status,
 		Error:     task.ErrMsg,
 		UpdatedAt: time.Now().UTC(),
 	}); err != nil {
 		fmt.Fprintln(stderr, err.Error())
 	}
 	if task.ErrMsg != "" {
 		return 1
 	}
 	return 0
 }
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"log/slog"
 	"net/http"
 	"os"
@@ -13,6 +14,7 @@ import (
 	"sort"
 	"strings"
 	"sync"
 	"syscall"
 	"time"
 	"bee/audit/internal/app"
@@ -32,7 +34,9 @@ const (
 var taskNames = map[string]string{
 	"nvidia":                 "NVIDIA SAT",
 	"nvidia-targeted-stress": "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)",
-	"nvidia-benchmark":       "NVIDIA Benchmark",
+	"nvidia-bench-perf":      "NVIDIA Bee Bench Perf",
 	"nvidia-bench-power":     "NVIDIA Bee Bench Power",
 	"nvidia-bench-autotune":  "NVIDIA Bee Bench Power Source Autotune",
 	"nvidia-compute":         "NVIDIA Max Compute Load (dcgmproftester)",
 	"nvidia-targeted-power":  "NVIDIA Targeted Power (dcgmi diag targeted_power)",
 	"nvidia-pulse":           "NVIDIA Pulse Test (dcgmi diag pulse_test)",
@@ -108,21 +112,29 @@ type Task struct {
 	ReportHTMLPath string     `json:"report_html_path,omitempty"`
 	// runtime fields (not serialised)
-	job    *jobState
+	job       *jobState
-	params taskParams
+	runnerPID int
 	params    taskParams
 }
 // taskParams holds optional parameters parsed from the run request.
 type taskParams struct {
 	Duration           int      `json:"duration,omitempty"`
-	DiagLevel          int      `json:"diag_level,omitempty"`
+	StressMode         bool     `json:"stress_mode,omitempty"`
 	GPUIndices         []int    `json:"gpu_indices,omitempty"`
 	ExcludeGPUIndices  []int    `json:"exclude_gpu_indices,omitempty"`
 	StaggerGPUStart    bool     `json:"stagger_gpu_start,omitempty"`
 	SizeMB             int      `json:"size_mb,omitempty"`
 	Passes             int      `json:"passes,omitempty"`
 	Loader             string   `json:"loader,omitempty"`
 	BurnProfile        string   `json:"burn_profile,omitempty"`
 	BenchmarkProfile   string   `json:"benchmark_profile,omitempty"`
 	BenchmarkKind      string   `json:"benchmark_kind,omitempty"`
 	RunNCCL            bool     `json:"run_nccl,omitempty"`
 	ParallelGPUs       bool     `json:"parallel_gpus,omitempty"`
 	RampStep           int      `json:"ramp_step,omitempty"`
 	RampTotal          int      `json:"ramp_total,omitempty"`
 	RampRunID          string   `json:"ramp_run_id,omitempty"`
 	DisplayName        string   `json:"display_name,omitempty"`
 	Device             string   `json:"device,omitempty"` // for install
 	PlatformComponents []string `json:"platform_components,omitempty"`
@@ -149,6 +161,38 @@ type burnPreset struct {
 	DurationSec int
 }
 type nvidiaRampSpec struct {
 	DurationSec      int
 	StaggerSeconds   int
 	TotalDurationSec int
 }
 func resolveMemoryValidatePreset(profile string, stress bool) (sizeMB, passes int) {
 	switch strings.TrimSpace(strings.ToLower(profile)) {
 	case "overnight":
 		return 1024, 2
 	case "acceptance":
 		return 1024, 1
 	case "smoke":
 		return 256, 1
 	}
 	if stress {
 		return 512, 1
 	}
 	return 256, 1
 }
 func taskMayLeaveOrphanWorkers(target string) bool {
 	switch strings.TrimSpace(strings.ToLower(target)) {
 	case "nvidia", "nvidia-targeted-stress", "nvidia-targeted-power", "nvidia-pulse",
 		"nvidia-bandwidth", "nvidia-stress", "nvidia-compute", "nvidia-bench-perf",
 		"memory", "memory-stress", "cpu", "sat-stress", "platform-stress":
 		return true
 	default:
 		return false
 	}
 }
 func resolveBurnPreset(profile string) burnPreset {
 	switch profile {
 	case "overnight":
@@ -160,6 +204,45 @@ func resolveBurnPreset(profile string) burnPreset {
 	}
 }
 func resolveNvidiaRampPlan(profile string, enabled bool, selected []int) (nvidiaRampSpec, error) {
 	base := resolveBurnPreset(profile).DurationSec
 	plan := nvidiaRampSpec{
 		DurationSec:      base,
 		TotalDurationSec: base,
 	}
 	if !enabled {
 		return plan, nil
 	}
 	count := len(selected)
 	if count == 0 {
 		return nvidiaRampSpec{}, fmt.Errorf("staggered NVIDIA burn requires explicit GPU selection")
 	}
 	if count == 1 {
 		return plan, nil
 	}
 	switch profile {
 	case "acceptance":
 		plan.StaggerSeconds = 10 * 60
 		plan.TotalDurationSec = plan.DurationSec + plan.StaggerSeconds*(count-1)
 	case "overnight":
 		plan.StaggerSeconds = 60 * 60
 		plan.TotalDurationSec = 8 * 60 * 60
 		minTotal := count * 60 * 60
 		if plan.TotalDurationSec < minTotal {
 			plan.TotalDurationSec = minTotal
 		}
 		if plan.TotalDurationSec > 10*60*60 {
 			return nvidiaRampSpec{}, fmt.Errorf("overnight staggered NVIDIA burn supports at most 10 GPUs")
 		}
 		plan.DurationSec = plan.TotalDurationSec - plan.StaggerSeconds*(count-1)
 	default:
 		plan.StaggerSeconds = 2 * 60
 		plan.TotalDurationSec = plan.DurationSec + plan.StaggerSeconds*(count-1)
 	}
 	return plan, nil
 }
 func resolvePlatformStressPreset(profile string) platform.PlatformStressOptions {
 	acceptanceCycles := []platform.PlatformStressCycle{
 		{LoadSec: 85, IdleSec: 5},
@@ -214,11 +297,11 @@ var globalQueue = &taskQueue{trigger: make(chan struct{}, 1)}
 const maxTaskHistory = 50
 var (
-	runMemoryAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	runMemoryAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, sizeMB, passes int, logFunc func(string)) (string, error) {
-		return a.RunMemoryAcceptancePackCtx(ctx, baseDir, logFunc)
+		return a.RunMemoryAcceptancePackCtx(ctx, baseDir, sizeMB, passes, logFunc)
 	}
-	runStorageAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	runStorageAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, extended bool, logFunc func(string)) (string, error) {
-		return a.RunStorageAcceptancePackCtx(ctx, baseDir, logFunc)
+		return a.RunStorageAcceptancePackCtx(ctx, baseDir, extended, logFunc)
 	}
 	runCPUAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
 		return a.RunCPUAcceptancePackCtx(ctx, baseDir, durationSec, logFunc)
@@ -248,6 +331,13 @@ var (
 	installCommand     = func(ctx context.Context, device string, logPath string) *exec.Cmd {
 		return exec.CommandContext(ctx, "bee-install", device, logPath)
 	}
 	externalTaskRunnerCommand = func(exportDir, taskID string) (*exec.Cmd, error) {
 		exe, err := os.Executable()
 		if err != nil {
 			return nil, err
 		}
 		return exec.Command(exe, "bee-worker", "--export-dir", exportDir, "--task-id", taskID), nil
 	}
 )
 // enqueue adds a task to the queue and notifies the worker.
@@ -285,6 +375,11 @@ func (q *taskQueue) prune() {
 // nextPending returns the highest-priority pending task (nil if none).
 func (q *taskQueue) nextPending() *Task {
 	for _, t := range q.tasks {
 		if t.Status == TaskRunning {
 			return nil
 		}
 	}
 	var best *Task
 	for _, t := range q.tasks {
 		if t.Status != TaskPending {
@@ -404,6 +499,7 @@ func (q *taskQueue) startWorker(opts *HandlerOptions) {
 	if !q.started {
 		q.loadLocked()
 		q.started = true
 		q.resumeRunningTasksLocked()
 		goRecoverLoop("task worker", 2*time.Second, q.worker)
 	}
 	hasPending := q.nextPending() != nil
@@ -437,15 +533,12 @@ func (q *taskQueue) worker() {
 				t.StartedAt = &now
 				t.DoneAt = nil
 				t.ErrMsg = ""
-				j := newTaskJobState(t.LogPath, taskSerialPrefix(t))
+				j := newTaskJobState(t.LogPath)
 				t.job = j
 				q.persistLocked()
 				q.mu.Unlock()
-				taskCtx, taskCancel := context.WithCancel(context.Background())
+				q.runTaskExternal(t, j)
 				j.cancel = taskCancel
 				q.executeTask(t, j, taskCtx)
 				taskCancel()
 				q.mu.Lock()
 				q.prune()
@@ -457,6 +550,207 @@ func (q *taskQueue) worker() {
 	}
 }
 func (q *taskQueue) resumeRunningTasksLocked() {
 	for _, t := range q.tasks {
 		if t.Status != TaskRunning {
 			continue
 		}
 		if t.job == nil {
 			t.job = newTaskJobState(t.LogPath)
 		}
 		q.attachExternalTaskControlsLocked(t, t.job)
 		q.startRecoveredTaskMonitorLocked(t, t.job)
 	}
 }
 func (q *taskQueue) attachExternalTaskControlsLocked(t *Task, j *jobState) {
 	if t == nil || j == nil {
 		return
 	}
 	j.cancel = func() {
 		pid := t.runnerPID
 		if pid <= 0 {
 			if state, ok := readTaskRunnerState(t); ok {
 				pid = state.PID
 			}
 		}
 		if pid > 0 {
 			_ = syscall.Kill(pid, syscall.SIGTERM)
 		}
 	}
 }
 func (q *taskQueue) startRecoveredTaskMonitorLocked(t *Task, j *jobState) {
 	if t == nil || j == nil || t.runnerPID <= 0 {
 		return
 	}
 	goRecoverOnce("task runner monitor", func() {
 		stopTail := make(chan struct{})
 		doneTail := make(chan struct{})
 		go q.followTaskLog(t, j, stopTail, doneTail)
 		for processAlive(t.runnerPID) {
 			time.Sleep(500 * time.Millisecond)
 		}
 		close(stopTail)
 		<-doneTail
 		q.finishExternalTask(t, j, nil)
 	})
 }
 func (q *taskQueue) runTaskExternal(t *Task, j *jobState) {
 	stopTail := make(chan struct{})
 	doneTail := make(chan struct{})
 	defer func() {
 		close(stopTail)
 		<-doneTail
 	}()
 	go q.followTaskLog(t, j, stopTail, doneTail)
 	cmd, err := externalTaskRunnerCommand(q.opts.ExportDir, t.ID)
 	if err != nil {
 		j.appendFromLog("ERROR: " + err.Error())
 		q.finishExternalTask(t, j, err)
 		return
 	}
 	if err := cmd.Start(); err != nil {
 		j.appendFromLog("ERROR: " + err.Error())
 		q.finishExternalTask(t, j, err)
 		return
 	}
 	q.mu.Lock()
 	t.runnerPID = cmd.Process.Pid
 	q.attachExternalTaskControlsLocked(t, j)
 	q.persistLocked()
 	q.mu.Unlock()
 	waitErr := cmd.Wait()
 	time.Sleep(200 * time.Millisecond)
 	q.finishExternalTask(t, j, waitErr)
 }
 func (q *taskQueue) followTaskLog(t *Task, j *jobState, stop <-chan struct{}, done chan<- struct{}) {
 	defer close(done)
 	path := ""
 	if t != nil {
 		path = t.LogPath
 	}
 	if strings.TrimSpace(path) == "" {
 		return
 	}
 	offset := int64(0)
 	if info, err := os.Stat(path); err == nil {
 		offset = info.Size()
 	}
 	var partial string
 	ticker := time.NewTicker(250 * time.Millisecond)
 	defer ticker.Stop()
 	flush := func() {
 		data, newOffset, err := readTaskLogDelta(path, offset)
 		if err != nil || len(data) == 0 {
 			offset = newOffset
 			return
 		}
 		offset = newOffset
 		text := partial + strings.ReplaceAll(string(data), "\r\n", "\n")
 		lines := strings.Split(text, "\n")
 		partial = lines[len(lines)-1]
 		for _, line := range lines[:len(lines)-1] {
 			if line == "" {
 				continue
 			}
 			j.appendFromLog(line)
 		}
 	}
 	for {
 		select {
 		case <-ticker.C:
 			flush()
 		case <-stop:
 			flush()
 			if strings.TrimSpace(partial) != "" {
 				j.appendFromLog(partial)
 			}
 			return
 		}
 	}
 }
 func readTaskLogDelta(path string, offset int64) ([]byte, int64, error) {
 	f, err := os.Open(path)
 	if err != nil {
 		return nil, offset, err
 	}
 	defer f.Close()
 	info, err := f.Stat()
 	if err != nil {
 		return nil, offset, err
 	}
 	if info.Size() < offset {
 		offset = 0
 	}
 	if _, err := f.Seek(offset, io.SeekStart); err != nil {
 		return nil, offset, err
 	}
 	data, err := io.ReadAll(io.LimitReader(f, 1<<20))
 	return data, offset + int64(len(data)), err
 }
 func (q *taskQueue) finishExternalTask(t *Task, j *jobState, waitErr error) {
 	q.mu.Lock()
 	defer q.mu.Unlock()
 	if t.Status == TaskDone || t.Status == TaskFailed || t.Status == TaskCancelled {
 		if j != nil && !j.isDone() {
 			j.finish(t.ErrMsg)
 			j.closeLog()
 		}
 		select {
 		case q.trigger <- struct{}{}:
 		default:
 		}
 		return
 	}
 	state, ok := readTaskRunnerState(t)
 	switch {
 	case ok && state.Status != TaskRunning:
 		t.Status = state.Status
 		t.ErrMsg = state.Error
 		now := state.UpdatedAt
 		if now.IsZero() {
 			now = time.Now()
 		}
 		t.DoneAt = &now
 	case waitErr != nil:
 		now := time.Now()
 		t.Status = TaskFailed
 		t.ErrMsg = waitErr.Error()
 		t.DoneAt = &now
 	default:
 		now := time.Now()
 		t.Status = TaskFailed
 		t.ErrMsg = "task runner exited without final state"
 		t.DoneAt = &now
 	}
 	t.runnerPID = 0
 	q.finalizeTaskArtifactPathsLocked(t)
 	q.persistLocked()
 	if j != nil && !j.isDone() {
 		j.finish(t.ErrMsg)
 		j.closeLog()
 	}
 	if t.ErrMsg != "" {
 		taskSerialEvent(t, "finished with status="+t.Status+" error="+t.ErrMsg)
 	} else {
 		taskSerialEvent(t, "finished with status="+t.Status)
 	}
 	select {
 	case q.trigger <- struct{}{}:
 	default:
 	}
 }
 func (q *taskQueue) executeTask(t *Task, j *jobState, ctx context.Context) {
 	startedKmsgWatch := false
 	defer q.finalizeTaskRun(t, j)
@@ -507,6 +801,7 @@ func (q *taskQueue) finalizeTaskRun(t *Task, j *jobState) {
 	if err := writeTaskReportArtifacts(t); err != nil {
 		appendJobLog(t.LogPath, "WARN: task report generation failed: "+err.Error())
 	}
 	j.closeLog()
 	if t.ErrMsg != "" {
 		taskSerialEvent(t, "finished with status="+t.Status+" error="+t.ErrMsg)
 		return
@@ -535,8 +830,9 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 	}
 	a := q.opts.App
 	recovered := len(j.lines) > 0
 	j.append(fmt.Sprintf("Starting %s...", t.Name))
-	if len(j.lines) > 0 {
+	if recovered {
 		j.append(fmt.Sprintf("Recovered after bee-web restart at %s", time.Now().UTC().Format(time.RFC3339)))
 	}
@@ -551,7 +847,10 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			err = fmt.Errorf("app not configured")
 			break
 		}
-		diagLevel := t.params.DiagLevel
+		diagLevel := 2
 		if t.params.StressMode {
 			diagLevel = 3
 		}
 		if len(t.params.GPUIndices) > 0 || diagLevel > 0 {
 			result, e := a.RunNvidiaAcceptancePackWithOptions(
 				ctx, "", diagLevel, t.params.GPUIndices, j.append,
@@ -574,7 +873,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			dur = 300
 		}
 		archive, err = a.RunNvidiaTargetedStressValidatePack(ctx, "", dur, t.params.GPUIndices, j.append)
-	case "nvidia-benchmark":
+	case "nvidia-bench-perf":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
@@ -585,7 +884,33 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			GPUIndices:        t.params.GPUIndices,
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
 			RunNCCL:           t.params.RunNCCL,
 			ParallelGPUs:      t.params.ParallelGPUs,
 			RampStep:          t.params.RampStep,
 			RampTotal:         t.params.RampTotal,
 			RampRunID:         t.params.RampRunID,
 		}, j.append)
 	case "nvidia-bench-power":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = a.RunNvidiaPowerBenchCtx(ctx, app.DefaultBeeBenchPowerDir, platform.NvidiaBenchmarkOptions{
 			Profile:           t.params.BenchmarkProfile,
 			GPUIndices:        t.params.GPUIndices,
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
 			RampStep:          t.params.RampStep,
 			RampTotal:         t.params.RampTotal,
 			RampRunID:         t.params.RampRunID,
 		}, j.append)
 	case "nvidia-bench-autotune":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = a.RunNvidiaPowerSourceAutotuneCtx(ctx, app.DefaultBeeBenchAutotuneDir, platform.NvidiaBenchmarkOptions{
 			Profile: t.params.BenchmarkProfile,
 			SizeMB:  t.params.SizeMB,
 		}, t.params.BenchmarkKind, j.append)
 	case "nvidia-compute":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
@@ -595,7 +920,18 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
-		archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, j.append)
+		rampPlan, planErr := resolveNvidiaRampPlan(t.params.BurnProfile, t.params.StaggerGPUStart, t.params.GPUIndices)
 		if planErr != nil {
 			err = planErr
 			break
 		}
 		if t.params.BurnProfile != "" && t.params.StaggerGPUStart && dur <= 0 {
 			dur = rampPlan.DurationSec
 		}
 		if rampPlan.StaggerSeconds > 0 {
 			j.append(fmt.Sprintf("NVIDIA staggered ramp-up enabled: %ds per GPU; post-ramp hold: %ds; total runtime: %ds", rampPlan.StaggerSeconds, dur, rampPlan.TotalDurationSec))
 		}
 		archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, rampPlan.StaggerSeconds, j.append)
 	case "nvidia-targeted-power":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
@@ -627,15 +963,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			err = fmt.Errorf("app not configured")
 			break
 		}
-		dur := t.params.Duration
+		archive, err = a.RunNCCLTests(ctx, "", t.params.GPUIndices, j.append)
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
 			DurationSec: dur,
 			Loader:      platform.NvidiaStressLoaderNCCL,
 			GPUIndices:  t.params.GPUIndices,
 		}, j.append)
 	case "nvidia-stress":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
@@ -645,24 +973,38 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		rampPlan, planErr := resolveNvidiaRampPlan(t.params.BurnProfile, t.params.StaggerGPUStart, t.params.GPUIndices)
 		if planErr != nil {
 			err = planErr
 			break
 		}
 		if t.params.BurnProfile != "" && t.params.StaggerGPUStart && dur <= 0 {
 			dur = rampPlan.DurationSec
 		}
 		if rampPlan.StaggerSeconds > 0 {
 			j.append(fmt.Sprintf("NVIDIA staggered ramp-up enabled: %ds per GPU; post-ramp hold: %ds; total runtime: %ds", rampPlan.StaggerSeconds, dur, rampPlan.TotalDurationSec))
 		}
 		archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
 			DurationSec:       dur,
 			Loader:            t.params.Loader,
 			GPUIndices:        t.params.GPUIndices,
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
 			StaggerSeconds:    rampPlan.StaggerSeconds,
 		}, j.append)
 	case "memory":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
-		archive, err = runMemoryAcceptancePackCtx(a, ctx, "", j.append)
+		sizeMB, passes := resolveMemoryValidatePreset(t.params.BurnProfile, t.params.StressMode)
 		j.append(fmt.Sprintf("Memory validate preset: %d MB x %d pass(es)", sizeMB, passes))
 		archive, err = runMemoryAcceptancePackCtx(a, ctx, "", sizeMB, passes, j.append)
 	case "storage":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
-		archive, err = runStorageAcceptancePackCtx(a, ctx, "", j.append)
+		archive, err = runStorageAcceptancePackCtx(a, ctx, "", t.params.StressMode, j.append)
 	case "cpu":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
@@ -673,7 +1015,11 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		if dur <= 0 {
-			dur = 60
+			if t.params.StressMode {
 				dur = 1800
 			} else {
 				dur = 60
 			}
 		}
 		j.append(fmt.Sprintf("CPU stress duration: %ds", dur))
 		archive, err = runCPUAcceptancePackCtx(a, ctx, "", dur, j.append)
@@ -853,15 +1199,11 @@ func (h *handler) handleAPITasksCancel(w http.ResponseWriter, r *http.Request) {
 		taskSerialEvent(t, "finished with status="+t.Status)
 		writeJSON(w, map[string]string{"status": "cancelled"})
 	case TaskRunning:
-		if t.job != nil {
+		if t.job == nil || !t.job.abort() {
-			t.job.abort()
+			writeError(w, http.StatusConflict, "task is not cancellable")
 			return
 		}
-		t.Status = TaskCancelled
+		writeJSON(w, map[string]string{"status": "aborting"})
 		now := time.Now()
 		t.DoneAt = &now
 		globalQueue.persistLocked()
 		taskSerialEvent(t, "finished with status="+t.Status)
 		writeJSON(w, map[string]string{"status": "cancelled"})
 	default:
 		writeError(w, http.StatusConflict, "task is not running or pending")
 	}
@@ -907,9 +1249,6 @@ func (h *handler) handleAPITasksCancelAll(w http.ResponseWriter, _ *http.Request
 			if t.job != nil {
 				t.job.abort()
 			}
 			t.Status = TaskCancelled
 			t.DoneAt = &now
 			taskSerialEvent(t, "finished with status="+t.Status)
 			n++
 		}
 	}
@@ -934,6 +1273,9 @@ func (h *handler) handleAPITasksKillWorkers(w http.ResponseWriter, _ *http.Reque
 			if t.job != nil {
 				t.job.abort()
 			}
 			if taskMayLeaveOrphanWorkers(t.Target) {
 				platform.KillTestWorkers()
 			}
 			t.Status = TaskCancelled
 			t.DoneAt = &now
 			taskSerialEvent(t, "finished with status="+t.Status)
@@ -1037,15 +1379,29 @@ func (q *taskQueue) loadLocked() {
 		}
 		q.assignTaskLogPathLocked(t)
 		if t.Status == TaskRunning {
-			// The task was interrupted by a bee-web restart. Child processes
+			state, ok := readTaskRunnerState(t)
-			// (e.g. bee-gpu-burn-worker) survive the restart in their own
+			switch {
-			// process groups and cannot be cancelled retroactively. Mark the
+			case ok && state.Status == TaskRunning && processAlive(state.PID):
-			// task as failed so the user can decide whether to re-run it
+				t.runnerPID = state.PID
-			// rather than blindly re-launching duplicate workers.
+				t.job = newTaskJobState(t.LogPath)
-			now := time.Now()
+			case ok && state.Status != TaskRunning:
-			t.Status = TaskFailed
+				t.runnerPID = state.PID
-			t.DoneAt = &now
+				t.Status = state.Status
-			t.ErrMsg = "interrupted by bee-web restart"
+				t.ErrMsg = state.Error
 				now := state.UpdatedAt
 				if now.IsZero() {
 					now = time.Now()
 				}
 				t.DoneAt = &now
 			default:
 				if taskMayLeaveOrphanWorkers(t.Target) {
 					_ = platform.KillTestWorkers()
 				}
 				now := time.Now()
 				t.Status = TaskFailed
 				t.DoneAt = &now
 				t.ErrMsg = "interrupted by bee-web restart"
 			}
 		} else if t.Status == TaskPending {
 			t.StartedAt = nil
 			t.DoneAt = nil
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -366,7 +366,7 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	taskReportMetricsDBPath = metricsPath
 	t.Cleanup(func() { taskReportMetricsDBPath = prevMetricsPath })
-	benchmarkDir := filepath.Join(dir, "bee-benchmark", "gpu-benchmark-20260406-120000")
+	benchmarkDir := filepath.Join(dir, "bee-bench", "perf", "perf-20260406-120000")
 	if err := os.MkdirAll(benchmarkDir, 0755); err != nil {
 		t.Fatal(err)
 	}
@@ -398,14 +398,14 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	}
 	task := &Task{
 		ID:           "task-bench",
-		Name:         "NVIDIA Benchmark",
+		Name:         "NVIDIA Bee Bench Perf",
-		Target:       "nvidia-benchmark",
+		Target:       "nvidia-bench-perf",
 		Status:       TaskDone,
 		CreatedAt:    time.Now().UTC().Add(-time.Minute),
 		ArtifactsDir: artifactsDir,
 	}
 	ensureTaskReportPaths(task)
-	logText := "line-1\nArchive: " + filepath.Join(dir, "bee-benchmark", "gpu-benchmark-20260406-120000.tar.gz") + "\n"
+	logText := "line-1\nArchive: " + filepath.Join(dir, "bee-bench", "perf", "perf-20260406-120000.tar.gz") + "\n"
 	if err := os.WriteFile(task.LogPath, []byte(logText), 0644); err != nil {
 		t.Fatal(err)
 	}
@@ -420,9 +420,9 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	}
 	html := string(body)
 	for _, needle := range []string{
-		`Benchmark Results`,
+		`Perf Results`,
 		`Composite score for this benchmark task.`,
-		`NVIDIA H100 PCIe / GPU 0`,
+		`GPU 0`,
 		`1176.25`,
 	} {
 		if !strings.Contains(html, needle) {
@@ -491,6 +491,83 @@ func TestResolveBurnPreset(t *testing.T) {
 	}
 }
 func TestResolveNvidiaRampPlan(t *testing.T) {
 	tests := []struct {
 		name     string
 		profile  string
 		enabled  bool
 		selected []int
 		want     nvidiaRampSpec
 		wantErr  string
 	}{
 		{
 			name:     "disabled uses base preset",
 			profile:  "acceptance",
 			selected: []int{0, 1},
 			want:     nvidiaRampSpec{DurationSec: 60 * 60, TotalDurationSec: 60 * 60},
 		},
 		{
 			name:     "smoke ramp uses two minute steps",
 			profile:  "smoke",
 			enabled:  true,
 			selected: []int{0, 1, 2},
 			want:     nvidiaRampSpec{DurationSec: 5 * 60, StaggerSeconds: 2 * 60, TotalDurationSec: 9 * 60},
 		},
 		{
 			name:     "acceptance ramp uses ten minute steps",
 			profile:  "acceptance",
 			enabled:  true,
 			selected: []int{0, 1, 2},
 			want:     nvidiaRampSpec{DurationSec: 60 * 60, StaggerSeconds: 10 * 60, TotalDurationSec: 80 * 60},
 		},
 		{
 			name:     "overnight stays at eight hours when possible",
 			profile:  "overnight",
 			enabled:  true,
 			selected: []int{0, 1, 2},
 			want:     nvidiaRampSpec{DurationSec: 6 * 60 * 60, StaggerSeconds: 60 * 60, TotalDurationSec: 8 * 60 * 60},
 		},
 		{
 			name:     "overnight extends to keep one hour after final gpu",
 			profile:  "overnight",
 			enabled:  true,
 			selected: []int{0, 1, 2, 3, 4, 5, 6, 7, 8},
 			want:     nvidiaRampSpec{DurationSec: 60 * 60, StaggerSeconds: 60 * 60, TotalDurationSec: 9 * 60 * 60},
 		},
 		{
 			name:     "overnight rejects impossible gpu count",
 			profile:  "overnight",
 			enabled:  true,
 			selected: []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
 			wantErr:  "at most 10 GPUs",
 		},
 		{
 			name:    "enabled requires explicit selection",
 			profile: "smoke",
 			enabled: true,
 			wantErr: "requires explicit GPU selection",
 		},
 	}
 	for _, tc := range tests {
 		t.Run(tc.name, func(t *testing.T) {
 			got, err := resolveNvidiaRampPlan(tc.profile, tc.enabled, tc.selected)
 			if tc.wantErr != "" {
 				if err == nil || !strings.Contains(err.Error(), tc.wantErr) {
 					t.Fatalf("err=%v want substring %q", err, tc.wantErr)
 				}
 				return
 			}
 			if err != nil {
 				t.Fatalf("resolveNvidiaRampPlan error: %v", err)
 			}
 			if got != tc.want {
 				t.Fatalf("resolveNvidiaRampPlan(%q, %t, %v)=%+v want %+v", tc.profile, tc.enabled, tc.selected, got, tc.want)
 			}
 		})
 	}
 }
 func TestTaskDisplayNameUsesNvidiaStressLoader(t *testing.T) {
 	tests := []struct {
 		loader string
@@ -595,6 +672,36 @@ func TestRunTaskUsesBurnProfileDurationForCPU(t *testing.T) {
 	}
 }
 func TestRunTaskUsesQuickPresetForMemoryValidate(t *testing.T) {
 	var gotSizeMB, gotPasses int
 	q := &taskQueue{
 		opts: &HandlerOptions{App: &app.App{}},
 	}
 	tk := &Task{
 		ID:        "mem-validate-1",
 		Name:      "Memory SAT",
 		Target:    "memory",
 		Status:    TaskRunning,
 		CreatedAt: time.Now(),
 		params:    taskParams{StressMode: true},
 	}
 	j := &jobState{}
 	orig := runMemoryAcceptancePackCtx
 	runMemoryAcceptancePackCtx = func(_ *app.App, _ context.Context, _ string, sizeMB, passes int, _ func(string)) (string, error) {
 		gotSizeMB = sizeMB
 		gotPasses = passes
 		return "/tmp/memory-validate.tar.gz", nil
 	}
 	defer func() { runMemoryAcceptancePackCtx = orig }()
 	q.runTask(tk, j, context.Background())
 	if gotSizeMB != 512 || gotPasses != 1 {
 		t.Fatalf("memory validate preset=%dMB x%d want 512MB x1", gotSizeMB, gotPasses)
 	}
 }
 func TestRunTaskBuildsSupportBundleWithoutApp(t *testing.T) {
 	dir := t.TempDir()
 	q := &taskQueue{
--- a/bible-local/docs/benchmark-clock-calibration.md
+++ b/bible-local/docs/benchmark-clock-calibration.md
@@ -0,0 +1,277 @@
 # Benchmark clock calibration research
 ## Benchmark methodology versioning
 Every benchmark methodology change must bump the benchmark version constant in
 source code by exactly `+1`.
 Methodology change means any change that affects comparability of benchmark
 results, including for example:
 - phase durations or phase order
 - enabled/disabled precisions
 - fallback rules
 - normalization rules
 - score formulas or weights
 - degradation thresholds
 - power calibration logic
 - thermal/power penalty logic
 Requirements:
 - benchmark version must be stored in source code as an explicit version
  constant, not inferred from git tag or build metadata
 - benchmark report must always print the benchmark version
 - `result.json` must always include the benchmark version
 - results from different benchmark versions must be treated as non-comparable by
  default
 Purpose:
 - prevent accidental comparison of runs produced by different methodologies
 - make historical benchmark archives self-describing even when detached from git
 - force deliberate version bumps whenever scoring or execution semantics change
 ## Status
 In progress. Baseline data from production servers pending.
 ## Background
 The benchmark locks GPU clocks to `MaxGraphicsClockMHz` (boost) via `nvidia-smi -lgc`
 before the steady-state phase. The metric `low_sm_clock_vs_target` fires when
 `avg_steady_clock < locked_target * 0.90`.
 Problem: boost clock is the theoretical maximum under ideal cooling. In practice,
 even a healthy GPU in a non-ideal server will sustain clocks well below boost.
 The 90% threshold has no empirical basis.
 ## Key observations (2026-04-06)
 ### H100 PCIe — new card, server not designed for it
 - avg clock 1384 MHz, P95 1560 MHz (unstable, proba boost 1755 MHz)
 - Thermal sustain: 0.0 (sw_thermal covers entire steady window)
 - Stability: 70.0 — clocks erratic, no equilibrium found
 - Degradation: power_capped, thermal_limited, low_sm_clock_vs_target, variance_too_high
 ### H200 NVL — new card, server not designed for it
 - avg clock = P95 = 1635 MHz (perfectly stable)
 - Thermal sustain: 0.0 (sw_thermal + sw_power cover entire steady window)
 - Stability: 92.0 — found stable thermal equilibrium at 1635 MHz
 - Degradation: power_capped, thermal_limited
 - Compute: 989 TOPS — card is computing correctly for its frequency
 ### Key insight
 The meaningful distinction is not *whether* the card throttles but *how stably*
 it throttles. H200 found a thermal equilibrium (avg == P95, Stability 92),
 H100 did not (avg << P95, Stability 70). Both are new cards; the H100's
 instability may reflect a more severe thermal mismatch or a card issue.
 `sw_power ≈ sw_thermal` pattern = server cooling constraint, card likely OK.
 `hw_thermal >> sw_thermal` pattern = card itself overheating, investigate.
 ## Hypothesis for baseline
 After testing on servers designed for their GPUs (proper cooling):
 - Healthy GPU under sustained load will run at a stable fraction of boost
 - Expected: avg_steady ≈ 80–95% of boost depending on model and TDP class
 - Base clock (`clocks.base.gr`) may be a better reference than boost:
  a healthy card under real workload should comfortably exceed base clock
 ## Baseline: H100 PCIe HBM2e — designed server (2026-04-06, 10 samples)
 Source: external stress test tool, ~90s runs, designed server, adequate power.
 ### Healthy fingerprint
 - **Power**: hits cap ~340–360W immediately, stays flat throughout — HEALTHY
 - **Clock**: starts ~1750 MHz, oscillates and declines to ~1540–1600 MHz by 90s
  - Avg steady (visual): **~1580–1620 MHz**
  - vs boost 1755 MHz: **~91–92%**
  - Oscillation is NORMAL — this is the boost algorithm balancing under power cap
  - Stable power + oscillating clocks = healthy power-cap behavior
 - **Temperature**: linear rise ~38°C → 75–80°C over 90s (no runaway)
 - **Consistency**: all 10 samples within ±20 MHz — very repeatable
 ### Characteristic patten
 Flat power line + oscillating/declining clock line = GPU correctly managed by
 power cap algorithm. Do NOT flag this as instability.
 ### Clock CV implication
 The healthy oscillation WILL produce moderate ClockCVPct (~5–10%).
 The current `variance_too_high` threshold (StabilityScore < 85) may fire on
 healthy HBM2e PCIe cards. Needs recalibration.
 ---
 ## Baseline: H100 HBM3 OEM SXM Custom (restored) — 2 confirmed samples
 Source: pytorch_training_loop stress test, 120s (90s stress + 30s cooldown).
 Confirmed GPU: NVIDIA H100 80GB HBM3, GH100 rev a1.
 ### GPU clock reference (from nvidia-smi, idle):
 - base_clock_mhz: **1095**
 - boost_clock_mhz: **1755** (nvidia-smi `clocks.max.graphics` at idle)
 - achieved_max_clock_mhz: **1980** (actual burst max observed by tool)
 - Our benchmark locks to `clocks.max.graphics` = likely 1980 MHz for this chip
 ### Observed under 700W sustained load (both samples nearly identical):
 - Power: ~700W flat — SXM slot, adequate power confirmed
 - Clock steady range: **~1380–1480 MHz**, avg **~1420–1460 MHz**
 - vs 1980 MHz (lock target): **72–74%** — severely below
 - vs 1755 MHz (nvidia-smi boost): **81–83%**
 - vs 1095 MHz (base): 130% — above base but far below expected for SXM
 - Clock/Watt: ~2.1 MHz/W vs HBM2e ~4.6 MHz/W — 2× worse efficiency
 - Temperature: 38°C → 79–80°C (same rate as HBM2e)
 - Oscillation: present, similar character to HBM2e but at much lower frequency
 ### Diagnosis
 These restored cards are degraded. A healthy H100 SXM in a designed server
 (DGX H100, HGX H100) should sustain ~1800–1900 MHz at 700W (~91–96% of 1980).
 The 72–74% result is a clear signal of silicon or VRM degradation from the
 refurbishment process.
 ### Clock pattern note
 Images 8/9 (previously marked as "HBM3 restored") are now confirmed identical
 to images 19/20. Both sample sets show same degraded pattern — same batch.
 ---
 ## Baseline matrix (filled where data available)
 | GPU model | Config | Avg clock steady | vs boost | Clock/Watt | Notes |
 |---|---|---|---|---|---|
 | H100 PCIe HBM2e | designed server | 1580–1620 MHz | 91–92% | ~4.6 MHz/W | 10 samples, healthy |
 | H100 SXM HBM3 restored | 700W full | 1420–1460 MHz | 72–74% of 1980 | ~2.1 MHz/W | 4 samples confirmed, degraded |
 | H100 SXM HBM3 healthy | designed | ~1800–1900 MHz est. | ~91–96% est. | ~2.7 MHz/W est. | need real baseline |
 | H200 NVL | designed | TBD | TBD | TBD | need baseline |
 ---
 ## H100 official spec (from NVIDIA datasheet)
 Source: NVIDIA H100 Tensor Core GPU Datasheet (image 23, 2026-04-06).
 All TOPS marked * are with structural sparsity enabled. Divide by 2 for dense.
 | Model | FP16 Tensor (dense) | TF32 (dense) | FP8 (dense) | TDP | Memory |
 |---|---|---|---|---|---|
 | H100 80GB PCIe | 756 TFLOPS | 378 TFLOPS | 1,513 TFLOPS | 350W | HBM2e |
 | H100 NVL 94GB PCIe | 990 TFLOPS | 495 TFLOPS | 1,980 TFLOPS | 400W | HBM3 |
 | H100 80GB SXM (BQQV) | 989 TFLOPS | 494 TFLOPS | — | 700W | HBM3 |
 | H100 94GB SXM (BUBB) | 989 TFLOPS | 494 TFLOPS | — | 700W | HBM2e |
 Notes:
 - SXM boards do NOT list FP8 peak in this table (field empty)
 - fp8_e5m2 is unsupported on H100 PCIe HBM2e — confirmed in our tests
 - Tensor Cores: PCIe = 456, SXM = 528 (16% more on SXM)
 ## Observed efficiency (H100 80GB PCIe, throttled server)
 From the report in this session (power+thermal throttle throughout steady):
 | Precision | Measured | Spec (dense) | % of spec |
 |---|---|---|---|
 | fp16_tensor | 329 TOPS | 756 TFLOPS | 44% |
 | fp32_tf32 | 115 TOPS | 378 TFLOPS | 30% |
 | fp8_e4m3 | 505 TOPS | 1,513 TFLOPS | 33% |
 33–44% of spec is expected given sustained power+thermal throttle (avg clock
 1384 MHz vs boost 1755 MHz = 79%). The GPU is computing correctly for its
 actual frequency — the low TOPS comes from throttle, not silicon defect.
 ## H200 official spec (from NVIDIA datasheet, image 24, 2026-04-06)
 Format: without sparsity / with sparsity.
 | Model | FP16 Tensor (dense) | TF32 (dense) | FP8 (dense) | TDP | Memory |
 |---|---|---|---|---|---|
 | H200 NVL PCIe | 836 TFLOPS | 418 TFLOPS | 1,570 TFLOPS | 600W | HBM3e 141GB |
 | H200 SXM | 990 TFLOPS | 495 TFLOPS | 1,979 TFLOPS | 700W | HBM3e 141GB |
 ## Observed efficiency (H200 NVL PCIe, throttled non-designed server)
 Avg clock 1635 MHz (62% of boost ~2619 MHz). Entire steady in thermal throttle.
 | Precision | Measured | Spec (dense) | % of spec |
 |---|---|---|---|
 | fp16_tensor | 340 TOPS | 836 TFLOPS | 41% |
 | fp32_tf32 | 120 TOPS | 418 TFLOPS | 29% |
 | fp8_e4m3 | 529 TOPS | 1,570 TFLOPS | 34% |
 Comparable to H100 PCIe efficiency (33–44%) despite different architecture —
 both are throttle-limited. Confirms that % of spec is not a quality signal,
 it reflects the thermal environment. tops_per_sm_per_ghz is the right metric.
 ## Real-world GEMM efficiency reference (2026-04-06, web research)
 Sources: SemiAnalysis MI300X vs H100 vs H200 training benchmark; cuBLAS optimization
 worklog (hamzaelshafie.bearblog.dev); Lambda AI H100 performance analysis.
 ### What healthy systems actually achieve:
 - H100 SXM in designed server: **~720 TFLOPS FP16 = ~73% of spec**
 - cuBLAS large square GEMM (8192³): up to **~83% flop utilization**
 - H200 NVL PCIe: no public data, extrapolating ~73% → ~610 TFLOPS FP16
 ### Our results vs expectation:
 | GPU | Our FP16 | Expected (73%) | Our % of spec | Gap |
 |---|---|---|---|---|
 | H100 PCIe HBM2e | 329 TOPS | ~552 TFLOPS | 44% | ~1.7× below |
 | H200 NVL PCIe | 340 TOPS | ~610 TFLOPS | 41% | ~1.8× below |
 Our results are roughly **half** of what a healthy system achieves even under throttle.
 This is NOT normal — 30-44% is not the industry baseline.
 ### Likely causes of the gap (in order of probability):
 1. **Thermal throttle** — confirmed, sw_thermal covers entire steady window
 2. **Power limit below TDP** — GPU may be software-limited below 350W/600W.
   Previous user may have set a lower limit via nvidia-smi -pl and it was not
   reset. Our normalization sets clock locks but does NOT reset power limit.
   Key check: `nvidia-smi -q | grep "Power Limit"` — default vs enforced.
 3. **Matrix size** — ruled out. bee-gpu-burn uses 4096×4096×4096 for fp16,
   8192×8192×4096 for fp8. These are large enough for peak tensor utilization.
 ### Power limit gap analysis (H100 PCIe):
 - Avg clock 1384 MHz = 79% of boost 1755 MHz
 - Expected TOPS at 79% clock: 756 × 0.79 ≈ 597 TFLOPS
 - Actually measured: 329 TOPS = 55% of that estimate
 - Remaining gap after accounting for clock throttle: ~45%
 - Most likely explanation: enforced power limit < 350W TDP, further reducing
  sustainable clock beyond what sw_thermal alone would cause.
 ### Action item:
 Add `power.limit` (enforced) AND `power.default_limit` to queryBenchmarkGPUInfo
 so result.json shows if the card was pre-configured with a non-default limit.
 If enforced < default × 0.95 → add finding "GPU power limit is below default TDP".
 ### CPU/RAM impact on GPU FLOPS:
 None. Pure on-GPU GEMM is fully compute-bound once data is in VRAM.
 CPU core count and host RAM are irrelevant.
 ## Compute efficiency metric (proposed, no hardcode)
 Instead of comparing TOPS to a hardcoded spec, compute:
  tops_per_sm_per_ghz = measured_tops / (sm_count × avg_clock_ghz)
 This is model-agnostic. A GPU computing correctly at its actual frequency
 will show a consistent tops_per_sm_per_ghz regardless of throttle level.
 A GPU with degraded silicon will show low tops_per_sm_per_ghz even at
 normal clocks.
 SM count is queryable: nvidia-smi --query-gpu=attribute.multiprocessor_count
 (needs to be added to queryBenchmarkGPUInfo).
 Reference values to establish after baseline runs:
 - H100 PCIe fp16_tensor: TBD tops/SM/GHz
 - H100 SXM fp16_tensor: TBD tops/SM/GHz
 ## Proposed threshold changes (pending more data)
 1. **`low_sm_clock_vs_target`**: raise threshold from 90% to 85% based on observed
   91–92% on healthy HBM2e. Or remove entirely — sw_power/sw_thermal already
   capture the root cause.
 2. **`variance_too_high`** (StabilityScore < 85): healthy HBM2e WILL oscillate
   under power cap. Consider suppressing this flag when power is flat and usage
   is 100% (oscillation is expected). Or lower threshold to 70.
 3. **New signal: MHz/Watt efficiency**: if base_graphics_clock_mhz is available,
   ratio avg_clock / power_w could identify degraded silicon (HBM3 restored S1
   would have been caught by this).
 Decision deferred until baseline on SXM designed servers collected.
--- a/bible-local/docs/gpu-model-propagation.md
+++ b/bible-local/docs/gpu-model-propagation.md
@@ -0,0 +1,121 @@
 # GPU Model Name Propagation
 How GPU model names are detected, stored, and displayed throughout the project.
 ---
 ## Detection Sources
 There are **two separate pipelines** for GPU model names — they use different structs and don't share state.
 ### Pipeline A — Live / SAT (nvidia-smi query at runtime)
 **File:** `audit/internal/platform/sat.go`
 - `ListNvidiaGPUs()` → `NvidiaGPU.Name` (field: `name`, from `nvidia-smi --query-gpu=index,name,...`)
 - `ListNvidiaGPUStatuses()` → `NvidiaGPUStatus.Name`
 - Used by: GPU selection UI, live metrics labels, burn/stress test logic
 ### Pipeline B — Benchmark results
 **File:** `audit/internal/platform/benchmark.go`, line 124
 - `queryBenchmarkGPUInfo(selected)` → `benchmarkGPUInfo.Name`
 - Stored in `BenchmarkGPUResult.Name` (`json:"name,omitempty"`)
 - Used by: benchmark history table, benchmark report
 ### Pipeline C — Hardware audit JSON (PCIe schema)
 **File:** `audit/internal/schema/hardware.go`
 - `HardwarePCIeDevice.Model *string` (field name is **Model**, not Name)
 - For AMD GPUs: populated by `audit/internal/collector/amdgpu.go` from `info.Product`
 - For NVIDIA GPUs: **NOT populated** by `audit/internal/collector/nvidia.go` — the NVIDIA enricher sets telemetry/status but skips the Model field
 - Used by: hardware summary page (`hwDescribeGPU` in `pages.go:487`)
 ---
 ## Key Inconsistency: NVIDIA PCIe Model is Never Set
 `audit/internal/collector/nvidia.go` — `enrichPCIeWithNVIDIAData()` enriches NVIDIA PCIe devices with telemetry and status but does **not** populate `HardwarePCIeDevice.Model`.
 This means:
 - Hardware summary page shows "Unknown GPU" for all NVIDIA devices (falls back at `pages.go:486`)
 - AMD GPUs do have their model populated
 The fix would be: copy `gpu.Name` from the SAT pipeline into `dev.Model` inside `enrichPCIeWithNVIDIAData`.
 ---
 ## Benchmark History "Unknown GPU" Issue
 **Symptom:** Benchmark history table shows "GPU #N — Unknown GPU" columns instead of real GPU model names.
 **Root cause:** `BenchmarkGPUResult.Name` has tag `json:"name,omitempty"`. If `queryBenchmarkGPUInfo()` fails (warns at `benchmark.go:126`) or returns empty names, the Name field is never set and is omitted from JSON. Loaded results have empty Name → falls back to "Unknown GPU" at `pages.go:2226, 2237`.
 This happens for:
 - Older result files saved before the `Name` field was added
 - Runs where nvidia-smi query failed before the benchmark started
 ---
 ## Fallback Strings — Current State
 | Location | File | Fallback string |
 |---|---|---|
 | Hardware summary (PCIe) | `pages.go:486` | `"Unknown GPU"` |
 | Benchmark report summary | `benchmark_report.go:43` | `"Unknown GPU"` |
 | Benchmark report scorecard | `benchmark_report.go:93` | `"Unknown"` ← inconsistent |
 | Benchmark report detail | `benchmark_report.go:122` | `"Unknown GPU"` |
 | Benchmark history per-GPU col | `pages.go:2226` | `"Unknown GPU"` |
 | Benchmark history parallel col | `pages.go:2237` | `"Unknown GPU"` |
 | SAT status file write | `sat.go:922` | `"unknown"` ← lowercase, inconsistent |
 | GPU selection API | `api.go:163` | `"GPU N"` (no "Unknown") |
 **Rule:** all UI fallbacks should use `"Unknown GPU"`. The two outliers are `benchmark_report.go:93` (`"Unknown"`) and `sat.go:922` (`"unknown"`).
 ---
 ## GPU Selection UI
 **File:** `audit/internal/webui/pages.go`
 - Source: `GET /api/gpus` → `api.go` → `ListNvidiaGPUs()` → live nvidia-smi
 - Render: `'GPU ' + gpu.index + ' — ' + gpu.name + ' · ' + mem`
 - Fallback: `gpu.name || 'GPU ' + idx` (JS, line ~1432)
 This always shows the correct model because it queries nvidia-smi live. It is **not** connected to benchmark result data.
 ---
 ## Data Flow Summary
 ```
 nvidia-smi (live)
  └─ ListNvidiaGPUs() → NvidiaGPU.Name
       ├─ GPU selection UI (always correct)
       ├─ Live metrics labels (charts_svg.go)
       └─ SAT/burn status file (sat.go)
 nvidia-smi (at benchmark start)
  └─ queryBenchmarkGPUInfo() → benchmarkGPUInfo.Name
       └─ BenchmarkGPUResult.Name (json:"name,omitempty")
            ├─ Benchmark report
            └─ Benchmark history table columns
 nvidia-smi / lspci (audit collection)
  └─ HardwarePCIeDevice.Model (NVIDIA: NOT populated; AMD: populated)
       └─ Hardware summary page hwDescribeGPU()
 ```
 ---
 ## Fixed Issues
 All previously open items are resolved:
 1. **NVIDIA PCIe Model** — `enrichPCIeWithNVIDIAData()` sets `dev.Model = &v` (`nvidia.go:78`).
 2. **Fallback consistency** — `sat.go` and `benchmark_report.go` both use `"Unknown GPU"`.
 3. **`tops_per_sm_per_ghz`** — computed in `benchmark.go` and stored in `BenchmarkGPUScore.TOPSPerSMPerGHz`.
 4. **`MultiprocessorCount`, `PowerLimitW`, `DefaultPowerLimitW`** — present in `benchmark_types.go`.
 5. **Old benchmark JSONs** — no fix possible for already-saved results with missing names (display-only issue).
--- a/bible-local/docs/iso-build-rules.md
+++ b/bible-local/docs/iso-build-rules.md
@@ -15,6 +15,41 @@ This applies to:
 - `iso/builder/config/package-lists/*.list.chroot`
 - Any package referenced in bootloader configs, hooks, or overlay scripts
 ## Bootloader sync rule
 The ISO has two independent bootloader configs that must be kept in sync manually:
 | File | Used by |
 |------|---------|
 | `config/bootloaders/grub-efi/grub.cfg` | UEFI (all modern servers) |
 | `config/bootloaders/isolinux/live.cfg.in` | CSM / legacy BIOS (syslinux) |
 live-build does NOT derive one from the other. Any new boot entry, kernel parameter
 change, or new mode added to one file must be manually mirrored in the other.
 **Canonical entry list** (both files must have all of these):
 | Label | Key params |
 |-------|-----------|
 | normal (default) | `nomodeset bee.nvidia.mode=normal` + full param set |
 | load to RAM | `toram nomodeset bee.nvidia.mode=normal` + full param set |
 | GSP=off | `nomodeset bee.nvidia.mode=gsp-off` + full param set |
 | KMS | no `nomodeset`, `bee.nvidia.mode=normal` + full param set |
 | KMS + GSP=off | no `nomodeset`, `bee.nvidia.mode=gsp-off` + full param set |
 | fail-safe | `nomodeset bee.nvidia.mode=gsp-off noapic noapm nodma nomce nolapic nosmp` |
 **Full standard param set** (append after `@APPEND_LIVE@` / `nomodeset` flags):
 ```
 net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always
 numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1
 nowatchdog nosoftlockup
 ```
 (fail-safe is the exception — it deliberately uses minimal params.)
 **Historical note:** `grub-pc/` was mistakenly used instead of `grub-efi/` until v8.25.
 live-build reads `config/bootloaders/grub-efi/` for UEFI because the build is
 configured with `--bootloaders "grub-efi,syslinux"`. Directory `grub-pc` is ignored.
 ## Memtest rule
 Do not assume live-build's built-in memtest integration is sufficient for `bee`.
--- a/iso/builder/VERSIONS
+++ b/iso/builder/VERSIONS
@@ -1,12 +1,13 @@
 DEBIAN_VERSION=12
 DEBIAN_KERNEL_ABI=auto
 NVIDIA_DRIVER_VERSION=590.48.01
 NVIDIA_FABRICMANAGER_VERSION=590.48.01-1
 NCCL_VERSION=2.28.9-1
 NCCL_CUDA_VERSION=13.0
 NCCL_SHA256=2e6faafd2c19cffc7738d9283976a3200ea9db9895907f337f0c7e5a25563186
 NCCL_TESTS_VERSION=2.13.10
 NVCC_VERSION=12.8
-CUBLAS_VERSION=13.0.2.14-1
+CUBLAS_VERSION=13.1.1.3-1
 CUDA_USERSPACE_VERSION=13.0.96-1
 DCGM_VERSION=4.5.3-1
 JOHN_JUMBO_COMMIT=67fcf9fe5a
@@ -21,3 +22,4 @@ HIPBLASLT_VERSION=0.10.0.60304-76~22.04
 COMGR_VERSION=2.8.0.60304-76~22.04
 GO_VERSION=1.24.0
 AUDIT_VERSION=1.0.0
 MEMTEST_VERSION=6.10-4
--- a/iso/builder/auto/config
+++ b/iso/builder/auto/config
@@ -23,16 +23,17 @@ lb config noauto \
    --bootloaders "grub-efi,syslinux" \
    --debian-installer none \
    --archive-areas "main contrib non-free non-free-firmware" \
-    --mirror-bootstrap "https://deb.debian.org/debian" \
+    --mirror-bootstrap "http://mirror.mephi.ru/debian/" \
-    --mirror-chroot "https://deb.debian.org/debian" \
+    --mirror-chroot "http://mirror.mephi.ru/debian/" \
-    --mirror-binary "https://deb.debian.org/debian" \
+    --mirror-binary "http://mirror.mephi.ru/debian/" \
    --security true \
    --linux-flavours "amd64" \
    --linux-packages "${LB_LINUX_PACKAGES}" \
    --memtest memtest86+ \
    --iso-volume "EASY_BEE_${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
    --iso-application "EASY-BEE-${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
-    --bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=3 systemd.show_status=1 username=bee user-fullname=Bee modprobe.blacklist=nouveau,snd_hda_intel,snd_hda_codec_realtek,snd_hda_codec_generic,soundcore" \
+    --bootappend-live "boot=live components video=1920x1080 console=ttyS0,115200n8 console=tty0 loglevel=3 systemd.show_status=1 username=bee user-fullname=Bee modprobe.blacklist=nouveau,snd_hda_intel,snd_hda_codec_realtek,snd_hda_codec_generic,soundcore" \
    --debootstrap-options "--include=ca-certificates" \
    --apt-recommends false \
    --chroot-squashfs-compression-type zstd \
    "${@}"
--- a/iso/builder/bee-gpu-stress.c
+++ b/iso/builder/bee-gpu-stress.c
@@ -33,10 +33,10 @@ typedef void *CUstream;
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76
 #define MAX_STRESS_STREAMS 16
 #define MAX_CUBLAS_PROFILES 5
 #define MIN_PROFILE_BUDGET_BYTES ((size_t)4u * 1024u * 1024u)
 #define MIN_STREAM_BUDGET_BYTES ((size_t)64u * 1024u * 1024u)
-#define STRESS_LAUNCH_DEPTH 8
+#define MAX_SINGLE_PRECISION_STREAMS 4
 #define MAX_SINGLE_PRECISION_PROFILE_BUDGET_BYTES ((size_t)2u * 1024u * 1024u * 1024u)
 static const char *ptx_source =
    ".version 6.0\n"
@@ -298,6 +298,13 @@ static int choose_stream_count(int mp_count, int planned_profiles, size_t total_
    return stream_count;
 }
 static size_t clamp_single_precision_profile_budget(size_t profile_budget_bytes) {
    if (profile_budget_bytes > MAX_SINGLE_PRECISION_PROFILE_BUDGET_BYTES) {
        return MAX_SINGLE_PRECISION_PROFILE_BUDGET_BYTES;
    }
    return profile_budget_bytes;
 }
 static void destroy_streams(struct cuda_api *api, CUstream *streams, int count) {
    if (!api->cuStreamDestroy) {
        return;
@@ -344,7 +351,6 @@ static int run_ptx_fallback(struct cuda_api *api,
    unsigned long iterations = 0;
    int mp_count = 0;
    int stream_count = 1;
    int launches_per_wave = 0;
    memset(report, 0, sizeof(*report));
    snprintf(report->backend, sizeof(report->backend), "driver-ptx");
@@ -419,44 +425,42 @@ static int run_ptx_fallback(struct cuda_api *api,
    unsigned int threads = 256;
-    double start = now_seconds();
+    double deadline = now_seconds() + (double)seconds;
-    double deadline = start + (double)seconds;
+    double next_sync = now_seconds() + 1.0;
    while (now_seconds() < deadline) {
-        launches_per_wave = 0;
+        int launched = 0;
-        for (int depth = 0; depth < STRESS_LAUNCH_DEPTH && now_seconds() < deadline; depth++) {
+        for (int lane = 0; lane < stream_count; lane++) {
-            int launched_this_batch = 0;
+            unsigned int blocks = (unsigned int)((words[lane] + threads - 1) / threads);
-            for (int lane = 0; lane < stream_count; lane++) {
+            if (!check_rc(api,
-                unsigned int blocks = (unsigned int)((words[lane] + threads - 1) / threads);
+                          "cuLaunchKernel",
-                if (!check_rc(api,
+                          api->cuLaunchKernel(kernel,
-                              "cuLaunchKernel",
+                                              blocks,
-                              api->cuLaunchKernel(kernel,
+                                              1,
-                                                  blocks,
+                                              1,
-                                                  1,
+                                              threads,
-                                                  1,
+                                              1,
-                                                  threads,
+                                              1,
-                                                  1,
+                                              0,
-                                                  1,
+                                              streams[lane],
-                                                  0,
+                                              params[lane],
-                                                  streams[lane],
+                                              NULL))) {
-                                                  params[lane],
+                goto fail;
                                                  NULL))) {
                    goto fail;
                }
                launches_per_wave++;
                launched_this_batch++;
            }
            if (launched_this_batch <= 0) {
                break;
            }
            launched++;
            iterations++;
        }
-        if (launches_per_wave <= 0) {
+        if (launched <= 0) {
            goto fail;
        }
-        if (!check_rc(api, "cuCtxSynchronize", api->cuCtxSynchronize())) {
+        double now = now_seconds();
-            goto fail;
+        if (now >= next_sync || now >= deadline) {
            if (!check_rc(api, "cuCtxSynchronize", api->cuCtxSynchronize())) {
                goto fail;
            }
            next_sync = now + 1.0;
        }
        iterations += (unsigned long)launches_per_wave;
    }
    api->cuCtxSynchronize();
    if (!check_rc(api, "cuMemcpyDtoH", api->cuMemcpyDtoH(sample, device_mem[0], sizeof(sample)))) {
        goto fail;
@@ -468,11 +472,10 @@ static int run_ptx_fallback(struct cuda_api *api,
    report->iterations = iterations;
    snprintf(report->details,
             sizeof(report->details),
-             "fallback_int32=OK requested_mb=%d actual_mb=%d streams=%d queue_depth=%d per_stream_mb=%zu iterations=%lu\n",
+             "fallback_int32=OK requested_mb=%d actual_mb=%d streams=%d per_stream_mb=%zu iterations=%lu\n",
             size_mb,
             report->buffer_mb,
             report->stream_count,
             STRESS_LAUNCH_DEPTH,
             bytes_per_stream[0] / (1024u * 1024u),
             iterations);
@@ -606,6 +609,20 @@ struct prepared_profile {
 };
 static const struct profile_desc k_profiles[] = {
    {
        "fp64",
        "fp64",
        80,
        1,
        0,
        0,
        8,
        CUDA_R_64F,
        CUDA_R_64F,
        CUDA_R_64F,
        CUDA_R_64F,
        CUBLAS_COMPUTE_64F,
    },
    {
        "fp32_tf32",
        "fp32",
@@ -634,6 +651,20 @@ static const struct profile_desc k_profiles[] = {
        CUDA_R_16F,
        CUBLAS_COMPUTE_32F_FAST_16F,
    },
    {
        "int8_tensor",
        "int8",
        75,
        1,
        0,
        0,
        128,
        CUDA_R_8I,
        CUDA_R_8I,
        CUDA_R_32I,
        CUDA_R_32I,
        CUBLAS_COMPUTE_32I,
    },
    {
        "fp8_e4m3",
        "fp8",
@@ -680,6 +711,21 @@ static const struct profile_desc k_profiles[] = {
 #endif
 };
 #define PROFILE_COUNT ((int)(sizeof(k_profiles) / sizeof(k_profiles[0])))
 static int profile_allowed_for_run(const struct profile_desc *desc, int cc, const char *precision_filter) {
    if (!(desc->enabled && cc >= desc->min_cc)) {
        return 0;
    }
    if (precision_filter != NULL) {
        return strcmp(desc->block_label, precision_filter) == 0;
    }
    /* Mixed/all phases intentionally exclude fp64/fp4 for now: both paths are
     * unstable on the current benchmark fleet and can abort the whole mixed
     * pass after earlier phases already collected useful telemetry. */
    return strcmp(desc->block_label, "fp64") != 0 && strcmp(desc->block_label, "fp4") != 0;
 }
 static int load_cublaslt(struct cublaslt_api *api) {
    memset(api, 0, sizeof(*api));
    api->lib = dlopen("libcublasLt.so.13", RTLD_NOW | RTLD_LOCAL);
@@ -750,10 +796,12 @@ static int check_cublas(const char *step, cublasStatus_t status) {
 static size_t bytes_for_elements(cudaDataType_t type, uint64_t elements) {
    switch (type) {
        case CUDA_R_32F:
        case CUDA_R_32I:
            return (size_t)(elements * 4u);
        case CUDA_R_16F:
        case CUDA_R_16BF:
            return (size_t)(elements * 2u);
        case CUDA_R_8I:
        case CUDA_R_8F_E4M3:
        case CUDA_R_8F_E5M2:
            return (size_t)(elements);
@@ -766,6 +814,16 @@ static size_t bytes_for_elements(cudaDataType_t type, uint64_t elements) {
    }
 }
 static cudaDataType_t matmul_scale_type(const struct profile_desc *desc) {
    if (desc->compute_type == CUBLAS_COMPUTE_32I) {
        return CUDA_R_32I;
    }
    if (desc->compute_type == CUBLAS_COMPUTE_64F) {
        return CUDA_R_64F;
    }
    return CUDA_R_32F;
 }
 static size_t fp4_scale_bytes(uint64_t rows, uint64_t cols) {
    uint64_t row_tiles = (rows + 127u) / 128u;
    uint64_t col_tiles = (cols + 63u) / 64u;
@@ -872,11 +930,9 @@ static int prepare_profile(struct cublaslt_api *cublas,
                           CUstream stream,
                           size_t profile_budget_bytes,
                           struct prepared_profile *out) {
    memset(out, 0, sizeof(*out));
    out->desc = *desc;
    out->stream = stream;
    size_t bytes_per_cell = 0;
    size_t attempt_budget = profile_budget_bytes;
    bytes_per_cell += bytes_for_elements(desc->a_type, 1);
    bytes_per_cell += bytes_for_elements(desc->b_type, 1);
    bytes_per_cell += bytes_for_elements(desc->c_type, 1);
@@ -885,105 +941,115 @@ static int prepare_profile(struct cublaslt_api *cublas,
        return 0;
    }
-    uint64_t dim = choose_square_dim(profile_budget_bytes, bytes_per_cell, desc->min_multiple);
+    while (attempt_budget >= MIN_PROFILE_BUDGET_BYTES) {
-    out->m = dim;
+        memset(out, 0, sizeof(*out));
-    out->n = dim;
+        out->desc = *desc;
-    out->k = dim;
+        out->stream = stream;
-    size_t desired_workspace = profile_budget_bytes / 8u;
+        uint64_t dim = choose_square_dim(attempt_budget, bytes_per_cell, desc->min_multiple);
-    if (desired_workspace > 32u * 1024u * 1024u) {
+        out->m = dim;
-        desired_workspace = 32u * 1024u * 1024u;
+        out->n = dim;
-    }
+        out->k = dim;
    desired_workspace = round_down_size(desired_workspace, 256u);
-    size_t a_bytes = 0;
+        size_t desired_workspace = attempt_budget / 8u;
-    size_t b_bytes = 0;
+        if (desired_workspace > 32u * 1024u * 1024u) {
-    size_t c_bytes = 0;
+            desired_workspace = 32u * 1024u * 1024u;
-    size_t d_bytes = 0;
+        }
-    size_t scale_bytes = 0;
+        desired_workspace = round_down_size(desired_workspace, 256u);
    while (1) {
        a_bytes = bytes_for_elements(desc->a_type, out->k * out->m);
        b_bytes = bytes_for_elements(desc->b_type, out->k * out->n);
        c_bytes = bytes_for_elements(desc->c_type, out->m * out->n);
        d_bytes = bytes_for_elements(desc->d_type, out->m * out->n);
        scale_bytes = profile_scale_bytes(desc, out->m, out->n, out->k);
-        size_t matrix_bytes = a_bytes + b_bytes + c_bytes + d_bytes + scale_bytes;
+        size_t a_bytes = 0;
-        if (matrix_bytes <= profile_budget_bytes) {
+        size_t b_bytes = 0;
-            size_t remaining = profile_budget_bytes - matrix_bytes;
+        size_t c_bytes = 0;
-            out->workspace_size = desired_workspace;
+        size_t d_bytes = 0;
-            if (out->workspace_size > remaining) {
+        size_t scale_bytes = 0;
-                out->workspace_size = round_down_size(remaining, 256u);
+        while (1) {
            a_bytes = bytes_for_elements(desc->a_type, out->k * out->m);
            b_bytes = bytes_for_elements(desc->b_type, out->k * out->n);
            c_bytes = bytes_for_elements(desc->c_type, out->m * out->n);
            d_bytes = bytes_for_elements(desc->d_type, out->m * out->n);
            scale_bytes = profile_scale_bytes(desc, out->m, out->n, out->k);
            size_t matrix_bytes = a_bytes + b_bytes + c_bytes + d_bytes + scale_bytes;
            if (matrix_bytes <= attempt_budget) {
                size_t remaining = attempt_budget - matrix_bytes;
                out->workspace_size = desired_workspace;
                if (out->workspace_size > remaining) {
                    out->workspace_size = round_down_size(remaining, 256u);
                }
                break;
            }
-            break;
+
            if (out->m <= (uint64_t)desc->min_multiple) {
                break;
            }
            out->m -= (uint64_t)desc->min_multiple;
            out->n = out->m;
            out->k = out->m;
        }
        if (out->m < (uint64_t)desc->min_multiple) {
            attempt_budget /= 2u;
            continue;
        }
-        if (out->m <= (uint64_t)desc->min_multiple) {
+        if (!alloc_filled(cuda, &out->a_dev, a_bytes, 0x11) ||
-            return 0;
+            !alloc_filled(cuda, &out->b_dev, b_bytes, 0x11) ||
-        }
+            !alloc_filled(cuda, &out->c_dev, c_bytes, 0x00) ||
-        out->m -= (uint64_t)desc->min_multiple;
+            !alloc_filled(cuda, &out->d_dev, d_bytes, 0x00)) {
        out->n = out->m;
        out->k = out->m;
    }
    if (!alloc_filled(cuda, &out->a_dev, a_bytes, 0x11) ||
        !alloc_filled(cuda, &out->b_dev, b_bytes, 0x11) ||
        !alloc_filled(cuda, &out->c_dev, c_bytes, 0x00) ||
        !alloc_filled(cuda, &out->d_dev, d_bytes, 0x00)) {
        destroy_profile(cublas, cuda, out);
        return 0;
    }
    if (!check_cublas("cublasLtMatmulDescCreate",
                      cublas->cublasLtMatmulDescCreate(&out->op_desc, desc->compute_type, CUDA_R_32F))) {
        destroy_profile(cublas, cuda, out);
        return 0;
    }
    cublasOperation_t transa = CUBLAS_OP_T;
    cublasOperation_t transb = CUBLAS_OP_N;
    if (!check_cublas("set TRANSA",
                      cublas->cublasLtMatmulDescSetAttribute(out->op_desc,
                                                             CUBLASLT_MATMUL_DESC_TRANSA,
                                                             &transa,
                                                             sizeof(transa))) ||
        !check_cublas("set TRANSB",
                      cublas->cublasLtMatmulDescSetAttribute(out->op_desc,
                                                             CUBLASLT_MATMUL_DESC_TRANSB,
                                                             &transb,
                                                             sizeof(transb)))) {
        destroy_profile(cublas, cuda, out);
        return 0;
    }
    if (desc->needs_scalar_scale) {
        float one = 1.0f;
        if (!alloc_filled(cuda, &out->a_scale_dev, sizeof(one), 0x00) ||
            !alloc_filled(cuda, &out->b_scale_dev, sizeof(one), 0x00)) {
            destroy_profile(cublas, cuda, out);
            return 0;
        }
-        if (!device_upload(cuda, out->a_scale_dev, &one, sizeof(one)) ||
+
-            !device_upload(cuda, out->b_scale_dev, &one, sizeof(one))) {
+        cudaDataType_t scale_type = matmul_scale_type(desc);
        if (!check_cublas("cublasLtMatmulDescCreate",
                          cublas->cublasLtMatmulDescCreate(&out->op_desc, desc->compute_type, scale_type))) {
            destroy_profile(cublas, cuda, out);
            return 0;
        }
-        void *a_scale_ptr = (void *)(uintptr_t)out->a_scale_dev;
+
-        void *b_scale_ptr = (void *)(uintptr_t)out->b_scale_dev;
+        cublasOperation_t transa = CUBLAS_OP_T;
-        if (!check_cublas("set A scale ptr",
+        cublasOperation_t transb = CUBLAS_OP_N;
        if (!check_cublas("set TRANSA",
                          cublas->cublasLtMatmulDescSetAttribute(out->op_desc,
-                                                                 CUBLASLT_MATMUL_DESC_A_SCALE_POINTER,
+                                                                 CUBLASLT_MATMUL_DESC_TRANSA,
-                                                                 &a_scale_ptr,
+                                                                 &transa,
-                                                                 sizeof(a_scale_ptr))) ||
+                                                                 sizeof(transa))) ||
-            !check_cublas("set B scale ptr",
+            !check_cublas("set TRANSB",
                          cublas->cublasLtMatmulDescSetAttribute(out->op_desc,
-                                                                 CUBLASLT_MATMUL_DESC_B_SCALE_POINTER,
+                                                                 CUBLASLT_MATMUL_DESC_TRANSB,
-                                                                 &b_scale_ptr,
+                                                                 &transb,
-                                                                 sizeof(b_scale_ptr)))) {
+                                                                 sizeof(transb)))) {
            destroy_profile(cublas, cuda, out);
            return 0;
        }
-    }
+
        if (desc->needs_scalar_scale) {
            float one = 1.0f;
            if (!alloc_filled(cuda, &out->a_scale_dev, sizeof(one), 0x00) ||
                !alloc_filled(cuda, &out->b_scale_dev, sizeof(one), 0x00)) {
                destroy_profile(cublas, cuda, out);
                return 0;
            }
            if (!device_upload(cuda, out->a_scale_dev, &one, sizeof(one)) ||
                !device_upload(cuda, out->b_scale_dev, &one, sizeof(one))) {
                destroy_profile(cublas, cuda, out);
                return 0;
            }
            void *a_scale_ptr = (void *)(uintptr_t)out->a_scale_dev;
            void *b_scale_ptr = (void *)(uintptr_t)out->b_scale_dev;
            if (!check_cublas("set A scale ptr",
                              cublas->cublasLtMatmulDescSetAttribute(out->op_desc,
                                                                     CUBLASLT_MATMUL_DESC_A_SCALE_POINTER,
                                                                     &a_scale_ptr,
                                                                     sizeof(a_scale_ptr))) ||
                !check_cublas("set B scale ptr",
                              cublas->cublasLtMatmulDescSetAttribute(out->op_desc,
                                                                     CUBLASLT_MATMUL_DESC_B_SCALE_POINTER,
                                                                     &b_scale_ptr,
                                                                     sizeof(b_scale_ptr)))) {
                destroy_profile(cublas, cuda, out);
                return 0;
            }
        }
 #if defined(CUBLASLT_MATMUL_MATRIX_SCALE_VEC16_UE4M3)
    if (desc->needs_block_scale) {
@@ -1023,78 +1089,94 @@ static int prepare_profile(struct cublaslt_api *cublas,
    }
 #endif
-    if (!check_cublas("create A layout",
+        if (!check_cublas("create A layout",
-                      cublas->cublasLtMatrixLayoutCreate(&out->a_layout, desc->a_type, out->k, out->m, out->k)) ||
+                          cublas->cublasLtMatrixLayoutCreate(&out->a_layout, desc->a_type, out->k, out->m, out->k)) ||
-        !check_cublas("create B layout",
+            !check_cublas("create B layout",
-                      cublas->cublasLtMatrixLayoutCreate(&out->b_layout, desc->b_type, out->k, out->n, out->k)) ||
+                          cublas->cublasLtMatrixLayoutCreate(&out->b_layout, desc->b_type, out->k, out->n, out->k)) ||
-        !check_cublas("create C layout",
+            !check_cublas("create C layout",
-                      cublas->cublasLtMatrixLayoutCreate(&out->c_layout, desc->c_type, out->m, out->n, out->m)) ||
+                          cublas->cublasLtMatrixLayoutCreate(&out->c_layout, desc->c_type, out->m, out->n, out->m)) ||
-        !check_cublas("create D layout",
+            !check_cublas("create D layout",
-                      cublas->cublasLtMatrixLayoutCreate(&out->d_layout, desc->d_type, out->m, out->n, out->m))) {
+                          cublas->cublasLtMatrixLayoutCreate(&out->d_layout, desc->d_type, out->m, out->n, out->m))) {
        destroy_profile(cublas, cuda, out);
        return 0;
    }
    if (!check_cublas("create preference", cublas->cublasLtMatmulPreferenceCreate(&out->preference))) {
        destroy_profile(cublas, cuda, out);
        return 0;
    }
    if (out->workspace_size > 0) {
        if (!alloc_filled(cuda, &out->workspace_dev, out->workspace_size, 0x00)) {
            destroy_profile(cublas, cuda, out);
            return 0;
        }
        if (!check_cublas("create preference", cublas->cublasLtMatmulPreferenceCreate(&out->preference))) {
            destroy_profile(cublas, cuda, out);
            return 0;
        }
        if (out->workspace_size > 0) {
            if (!alloc_filled(cuda, &out->workspace_dev, out->workspace_size, 0x00)) {
                destroy_profile(cublas, cuda, out);
                return 0;
            }
        }
        if (!check_cublas("set workspace",
                          cublas->cublasLtMatmulPreferenceSetAttribute(
                              out->preference,
                              CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES,
                              &out->workspace_size,
                              sizeof(out->workspace_size)))) {
            destroy_profile(cublas, cuda, out);
            return 0;
        }
        int found = 0;
        if (check_cublas("heuristic",
                         cublas->cublasLtMatmulAlgoGetHeuristic(handle,
                                                                out->op_desc,
                                                                out->a_layout,
                                                                out->b_layout,
                                                                out->c_layout,
                                                                out->d_layout,
                                                                out->preference,
                                                                1,
                                                                &out->heuristic,
                                                                &found)) &&
            found > 0) {
            out->ready = 1;
            return 1;
        }
        destroy_profile(cublas, cuda, out);
        attempt_budget = round_down_size(attempt_budget * 3u / 4u, 256u);
        if (attempt_budget < MIN_PROFILE_BUDGET_BYTES) {
            break;
        }
    }
-    if (!check_cublas("set workspace",
+    return 0;
                      cublas->cublasLtMatmulPreferenceSetAttribute(
                          out->preference,
                          CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES,
                          &out->workspace_size,
                          sizeof(out->workspace_size)))) {
        destroy_profile(cublas, cuda, out);
        return 0;
    }
    int found = 0;
    if (!check_cublas("heuristic",
                      cublas->cublasLtMatmulAlgoGetHeuristic(handle,
                                                             out->op_desc,
                                                             out->a_layout,
                                                             out->b_layout,
                                                             out->c_layout,
                                                             out->d_layout,
                                                             out->preference,
                                                             1,
                                                             &out->heuristic,
                                                             &found))) {
        destroy_profile(cublas, cuda, out);
        return 0;
    }
    if (found <= 0) {
        destroy_profile(cublas, cuda, out);
        return 0;
    }
    out->ready = 1;
    return 1;
 }
 static int run_cublas_profile(cublasLtHandle_t handle,
                              struct cublaslt_api *cublas,
                              struct prepared_profile *profile) {
    int32_t alpha_i32 = 1;
    int32_t beta_i32 = 0;
    double alpha_f64 = 1.0;
    double beta_f64 = 0.0;
    float alpha = 1.0f;
    float beta = 0.0f;
    const void *alpha_ptr = &alpha;
    const void *beta_ptr = &beta;
    if (profile->desc.compute_type == CUBLAS_COMPUTE_32I) {
        alpha_ptr = &alpha_i32;
        beta_ptr = &beta_i32;
    } else if (profile->desc.compute_type == CUBLAS_COMPUTE_64F) {
        alpha_ptr = &alpha_f64;
        beta_ptr = &beta_f64;
    }
    return check_cublas(profile->desc.name,
                        cublas->cublasLtMatmul(handle,
                                               profile->op_desc,
-                                               &alpha,
+                                               alpha_ptr,
                                               (const void *)(uintptr_t)profile->a_dev,
                                               profile->a_layout,
                                               (const void *)(uintptr_t)profile->b_dev,
                                               profile->b_layout,
-                                               &beta,
+                                               beta_ptr,
                                               (const void *)(uintptr_t)profile->c_dev,
                                               profile->c_layout,
                                               (void *)(uintptr_t)profile->d_dev,
@@ -1112,9 +1194,10 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                               int cc_minor,
                               int seconds,
                               int size_mb,
                               const char *precision_filter,
                               struct stress_report *report) {
    struct cublaslt_api cublas;
-    struct prepared_profile prepared[MAX_STRESS_STREAMS * MAX_CUBLAS_PROFILES];
+    struct prepared_profile prepared[MAX_STRESS_STREAMS * PROFILE_COUNT];
    cublasLtHandle_t handle = NULL;
    CUcontext ctx = NULL;
    CUstream streams[MAX_STRESS_STREAMS] = {0};
@@ -1124,12 +1207,12 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
    int active = 0;
    int mp_count = 0;
    int stream_count = 1;
-    int profile_count = (int)(sizeof(k_profiles) / sizeof(k_profiles[0]));
+    int profile_count = PROFILE_COUNT;
    int prepared_count = 0;
    int wave_launches = 0;
    size_t requested_budget = 0;
    size_t total_budget = 0;
    size_t per_profile_budget = 0;
    int budget_profiles = 0;
    memset(report, 0, sizeof(*report));
    snprintf(report->backend, sizeof(report->backend), "cublasLt");
@@ -1150,8 +1233,9 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }
    /* Count profiles matching the filter (for deciding what to run). */
    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
-        if (k_profiles[i].enabled && cc >= k_profiles[i].min_cc) {
+        if (profile_allowed_for_run(&k_profiles[i], cc, precision_filter)) {
            planned++;
        }
    }
@@ -1162,18 +1246,42 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }
    /* Count all profiles active on this GPU regardless of filter.
     * Mixed phases still divide budget across the full precision set, while
     * single-precision benchmark phases dedicate budget only to active
     * profiles matching precision_filter. */
    int planned_total = 0;
    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
        if (profile_allowed_for_run(&k_profiles[i], cc, precision_filter)) {
            planned_total++;
        }
    }
    if (planned_total < planned) {
        planned_total = planned;
    }
    budget_profiles = planned_total;
    if (precision_filter != NULL) {
        budget_profiles = planned;
    }
    if (budget_profiles <= 0) {
        budget_profiles = planned_total;
    }
    requested_budget = (size_t)size_mb * 1024u * 1024u;
-    if (requested_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
+    if (requested_budget < (size_t)budget_profiles * MIN_PROFILE_BUDGET_BYTES) {
-        requested_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
+        requested_budget = (size_t)budget_profiles * MIN_PROFILE_BUDGET_BYTES;
    }
    total_budget = clamp_budget_to_free_memory(cuda, requested_budget);
-    if (total_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
+    if (total_budget < (size_t)budget_profiles * MIN_PROFILE_BUDGET_BYTES) {
-        total_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
+        total_budget = (size_t)budget_profiles * MIN_PROFILE_BUDGET_BYTES;
    }
    if (query_multiprocessor_count(cuda, dev, &mp_count) &&
        cuda->cuStreamCreate &&
        cuda->cuStreamDestroy) {
-        stream_count = choose_stream_count(mp_count, planned, total_budget, 1);
+        stream_count = choose_stream_count(mp_count, budget_profiles, total_budget, 1);
    }
    if (precision_filter != NULL && stream_count > MAX_SINGLE_PRECISION_STREAMS) {
        stream_count = MAX_SINGLE_PRECISION_STREAMS;
    }
    if (stream_count > 1) {
        int created = 0;
@@ -1186,19 +1294,22 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        }
    }
    report->stream_count = stream_count;
-    per_profile_budget = total_budget / ((size_t)planned * (size_t)stream_count);
+    per_profile_budget = total_budget / ((size_t)budget_profiles * (size_t)stream_count);
    if (per_profile_budget < MIN_PROFILE_BUDGET_BYTES) {
        per_profile_budget = MIN_PROFILE_BUDGET_BYTES;
    }
    if (precision_filter != NULL) {
        per_profile_budget = clamp_single_precision_profile_budget(per_profile_budget);
    }
    report->buffer_mb = (int)(total_budget / (1024u * 1024u));
    append_detail(report->details,
                  sizeof(report->details),
-                  "requested_mb=%d actual_mb=%d streams=%d queue_depth=%d mp_count=%d per_worker_mb=%zu\n",
+                  "requested_mb=%d actual_mb=%d streams=%d mp_count=%d budget_profiles=%d per_worker_mb=%zu\n",
                  size_mb,
                  report->buffer_mb,
                  report->stream_count,
                  STRESS_LAUNCH_DEPTH,
                  mp_count,
                  budget_profiles,
                  per_profile_budget / (1024u * 1024u));
    for (int i = 0; i < profile_count; i++) {
@@ -1211,6 +1322,13 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                          desc->min_cc);
            continue;
        }
        if (!profile_allowed_for_run(desc, cc, precision_filter)) {
            append_detail(report->details,
                          sizeof(report->details),
                          "%s=SKIPPED benchmark_disabled\n",
                          desc->name);
            continue;
        }
        for (int lane = 0; lane < stream_count; lane++) {
            CUstream stream = streams[lane];
            if (prepared_count >= (int)(sizeof(prepared) / sizeof(prepared[0]))) {
@@ -1246,50 +1364,55 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }
    /* Keep the GPU queue continuously full by submitting kernels without
     * synchronizing after every wave.  A sync barrier after each small batch
     * creates CPU↔GPU ping-pong gaps that prevent full TDP utilisation,
     * especially when individual kernels are short.  Instead we sync at most
     * once per second (for error detection) and once at the very end. */
    double deadline = now_seconds() + (double)seconds;
    double next_sync = now_seconds() + 1.0;
    while (now_seconds() < deadline) {
-        wave_launches = 0;
+        int launched = 0;
-        for (int depth = 0; depth < STRESS_LAUNCH_DEPTH && now_seconds() < deadline; depth++) {
+        for (int i = 0; i < prepared_count; i++) {
-            int launched_this_batch = 0;
+            if (!prepared[i].ready) {
-            for (int i = 0; i < prepared_count; i++) {
+                continue;
                if (!prepared[i].ready) {
                    continue;
                }
                if (!run_cublas_profile(handle, &cublas, &prepared[i])) {
                    append_detail(report->details,
                                  sizeof(report->details),
                                  "%s=FAILED runtime\n",
                                  prepared[i].desc.name);
                    for (int j = 0; j < prepared_count; j++) {
                        destroy_profile(&cublas, cuda, &prepared[j]);
                    }
                    cublas.cublasLtDestroy(handle);
                    destroy_streams(cuda, streams, stream_count);
                    cuda->cuCtxDestroy(ctx);
                    return 0;
                }
                prepared[i].iterations++;
                report->iterations++;
                wave_launches++;
                launched_this_batch++;
            }
-            if (launched_this_batch <= 0) {
+            if (!run_cublas_profile(handle, &cublas, &prepared[i])) {
-                break;
+                append_detail(report->details,
                              sizeof(report->details),
                              "%s=FAILED runtime\n",
                              prepared[i].desc.name);
                for (int j = 0; j < prepared_count; j++) {
                    destroy_profile(&cublas, cuda, &prepared[j]);
                }
                cublas.cublasLtDestroy(handle);
                destroy_streams(cuda, streams, stream_count);
                cuda->cuCtxDestroy(ctx);
                return 0;
            }
            prepared[i].iterations++;
            report->iterations++;
            launched++;
        }
-        if (wave_launches <= 0) {
+        if (launched <= 0) {
            break;
        }
-        if (!check_rc(cuda, "cuCtxSynchronize", cuda->cuCtxSynchronize())) {
+        double now = now_seconds();
-            for (int i = 0; i < prepared_count; i++) {
+        if (now >= next_sync || now >= deadline) {
-                destroy_profile(&cublas, cuda, &prepared[i]);
+            if (!check_rc(cuda, "cuCtxSynchronize", cuda->cuCtxSynchronize())) {
                for (int i = 0; i < prepared_count; i++) {
                    destroy_profile(&cublas, cuda, &prepared[i]);
                }
                cublas.cublasLtDestroy(handle);
                destroy_streams(cuda, streams, stream_count);
                cuda->cuCtxDestroy(ctx);
                return 0;
            }
-            cublas.cublasLtDestroy(handle);
+            next_sync = now + 1.0;
            destroy_streams(cuda, streams, stream_count);
            cuda->cuCtxDestroy(ctx);
            return 0;
        }
    }
    /* Final drain — ensure all queued work finishes before we read results. */
    cuda->cuCtxSynchronize();
    for (int i = 0; i < prepared_count; i++) {
        if (!prepared[i].ready) {
@@ -1323,10 +1446,29 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
 }
 #endif
 static void print_stress_report(const struct stress_report *report, int device_index, int seconds) {
    printf("device=%s\n", report->device);
    printf("device_index=%d\n", device_index);
    printf("compute_capability=%d.%d\n", report->cc_major, report->cc_minor);
    printf("backend=%s\n", report->backend);
    printf("duration_s=%d\n", seconds);
    printf("buffer_mb=%d\n", report->buffer_mb);
    printf("streams=%d\n", report->stream_count);
    printf("iterations=%lu\n", report->iterations);
    printf("checksum=%llu\n", (unsigned long long)report->checksum);
    if (report->details[0] != '\0') {
        printf("%s", report->details);
    }
    printf("status=OK\n");
 }
 int main(int argc, char **argv) {
    int seconds = 5;
    int size_mb = 64;
    int device_index = 0;
    const char *precision_filter = NULL; /* NULL = all; else block_label to match */
    const char *precision_plan = NULL;
    const char *precision_plan_seconds = NULL;
    for (int i = 1; i < argc; i++) {
        if ((strcmp(argv[i], "--seconds") == 0 || strcmp(argv[i], "-t") == 0) && i + 1 < argc) {
            seconds = atoi(argv[++i]);
@@ -1334,8 +1476,16 @@ int main(int argc, char **argv) {
            size_mb = atoi(argv[++i]);
        } else if ((strcmp(argv[i], "--device") == 0 || strcmp(argv[i], "-d") == 0) && i + 1 < argc) {
            device_index = atoi(argv[++i]);
        } else if (strcmp(argv[i], "--precision") == 0 && i + 1 < argc) {
            precision_filter = argv[++i];
        } else if (strcmp(argv[i], "--precision-plan") == 0 && i + 1 < argc) {
            precision_plan = argv[++i];
        } else if (strcmp(argv[i], "--precision-plan-seconds") == 0 && i + 1 < argc) {
            precision_plan_seconds = argv[++i];
        } else {
-            fprintf(stderr, "usage: %s [--seconds N] [--size-mb N] [--device N]\n", argv[0]);
+            fprintf(stderr,
                    "usage: %s [--seconds N] [--size-mb N] [--device N] [--precision int8|fp8|fp16|fp32|fp64|fp4] [--precision-plan p1,p2,...,mixed] [--precision-plan-seconds s1,s2,...]\n",
                    argv[0]);
            return 2;
        }
    }
@@ -1395,26 +1545,94 @@ int main(int argc, char **argv) {
    int ok = 0;
 #if HAVE_CUBLASLT_HEADERS
-    ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, &report);
+    if (precision_plan != NULL && precision_plan[0] != '\0') {
        char *plan_copy = strdup(precision_plan);
        char *plan_seconds_copy = NULL;
        int phase_seconds[32] = {0};
        int phase_seconds_count = 0;
        int phase_ok = 0;
        if (plan_copy == NULL) {
            fprintf(stderr, "failed to allocate precision plan buffer\n");
            return 1;
        }
        if (precision_plan_seconds != NULL && precision_plan_seconds[0] != '\0') {
            plan_seconds_copy = strdup(precision_plan_seconds);
            if (plan_seconds_copy == NULL) {
                free(plan_copy);
                fprintf(stderr, "failed to allocate precision plan seconds buffer\n");
                return 1;
            }
            for (char *sec_token = strtok(plan_seconds_copy, ",");
                 sec_token != NULL && phase_seconds_count < (int)(sizeof(phase_seconds) / sizeof(phase_seconds[0]));
                 sec_token = strtok(NULL, ",")) {
                while (*sec_token == ' ' || *sec_token == '\t') {
                    sec_token++;
                }
                if (*sec_token == '\0') {
                    continue;
                }
                phase_seconds[phase_seconds_count++] = atoi(sec_token);
            }
        }
        int phase_idx = 0;
        for (char *token = strtok(plan_copy, ","); token != NULL; token = strtok(NULL, ","), phase_idx++) {
            while (*token == ' ' || *token == '\t') {
                token++;
            }
            if (*token == '\0') {
                continue;
            }
            const char *phase_name = token;
            const char *phase_filter = token;
            if (strcmp(token, "mixed") == 0 || strcmp(token, "all") == 0) {
                phase_filter = NULL;
            }
            int phase_duration = seconds;
            if (phase_idx < phase_seconds_count && phase_seconds[phase_idx] > 0) {
                phase_duration = phase_seconds[phase_idx];
            }
            printf("phase_begin=%s\n", phase_name);
            fflush(stdout);
            memset(&report, 0, sizeof(report));
            ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, phase_duration, size_mb, phase_filter, &report);
            if (ok) {
                print_stress_report(&report, device_index, phase_duration);
                phase_ok = 1;
            } else {
                printf("phase_error=%s\n", phase_name);
                if (report.details[0] != '\0') {
                    printf("%s", report.details);
                    if (report.details[strlen(report.details) - 1] != '\n') {
                        printf("\n");
                    }
                }
                printf("status=FAILED\n");
            }
            printf("phase_end=%s\n", phase_name);
            fflush(stdout);
        }
        free(plan_seconds_copy);
        free(plan_copy);
        return phase_ok ? 0 : 1;
    }
    ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, precision_filter, &report);
 #endif
    if (!ok) {
-        if (!run_ptx_fallback(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, &report)) {
+        if (precision_filter != NULL) {
            fprintf(stderr,
                    "requested precision path unavailable: precision=%s device=%s cc=%d.%d\n",
                    precision_filter,
                    name,
                    cc_major,
                    cc_minor);
            return 1;
        }
        int ptx_mb = size_mb;
        if (!run_ptx_fallback(&cuda, dev, name, cc_major, cc_minor, seconds, ptx_mb, &report)) {
            return 1;
        }
    }
-    printf("device=%s\n", report.device);
+    print_stress_report(&report, device_index, seconds);
    printf("device_index=%d\n", device_index);
    printf("compute_capability=%d.%d\n", report.cc_major, report.cc_minor);
    printf("backend=%s\n", report.backend);
    printf("duration_s=%d\n", seconds);
    printf("buffer_mb=%d\n", report.buffer_mb);
    printf("streams=%d\n", report.stream_count);
    printf("iterations=%lu\n", report.iterations);
    printf("checksum=%llu\n", (unsigned long long)report.checksum);
    if (report.details[0] != '\0') {
        printf("%s", report.details);
    }
    printf("status=OK\n");
    return 0;
 }
--- a/iso/builder/build-in-container.sh
+++ b/iso/builder/build-in-container.sh
@@ -161,6 +161,7 @@ run_variant() {
            -e GOMODCACHE=/cache/go-mod \
            -e TMPDIR=/cache/tmp \
            -e BEE_CACHE_DIR=/cache/bee \
            -e BEE_REQUIRE_MEMTEST=1 \
            -w /work \
            "${IMAGE_REF}" \
            sh /work/iso/builder/build.sh --variant "${_v}" \
@@ -175,6 +176,7 @@ run_variant() {
            -e GOMODCACHE=/cache/go-mod \
            -e TMPDIR=/cache/tmp \
            -e BEE_CACHE_DIR=/cache/bee \
            -e BEE_REQUIRE_MEMTEST=1 \
            -w /work \
            "${IMAGE_REF}" \
            sh /work/iso/builder/build.sh --variant "${_v}"
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
@@ -57,6 +57,7 @@ OVERLAY_STAGE_DIR="${DIST_DIR}/overlay-stage-${BUILD_VARIANT}"
 export BEE_GPU_VENDOR BEE_NVIDIA_MODULE_FLAVOR BUILD_VARIANT
 . "${BUILDER_DIR}/VERSIONS"
 export MEMTEST_VERSION
 export PATH="$PATH:/usr/local/go/bin"
 : "${BEE_REQUIRE_MEMTEST:=0}"
@@ -125,6 +126,37 @@ resolve_iso_version() {
    resolve_audit_version
 }
 sync_builder_workdir() {
    src_dir="$1"
    dst_dir="$2"
    mkdir -p "$dst_dir"
    # Historical bug: old workdirs could keep config/bootloaders/grub-pc even
    # after the source tree moved to grub-efi only. Remove bootloaders eagerly
    # so reused workdirs cannot leak stale templates into a new ISO build.
    rm -rf "$dst_dir/config/bootloaders"
    rsync -a --delete \
        --exclude='cache/' \
        --exclude='chroot/' \
        --exclude='.build/' \
        --exclude='*.iso' \
        --exclude='*.packages' \
        --exclude='*.contents' \
        --exclude='*.files' \
        "$src_dir/" "$dst_dir/"
    if [ ! -f "$dst_dir/config/bootloaders/grub-efi/grub.cfg" ]; then
        echo "ERROR: staged workdir is missing config/bootloaders/grub-efi/grub.cfg" >&2
        exit 1
    fi
    if [ -e "$dst_dir/config/bootloaders/grub-pc" ]; then
        echo "ERROR: stale config/bootloaders/grub-pc remained in staged workdir" >&2
        exit 1
    fi
 }
 iso_list_files() {
    iso_path="$1"
@@ -202,7 +234,7 @@ dump_memtest_debug() {
        echo "-- source bootloader templates --"
        for cfg in \
-            "${BUILDER_DIR}/config/bootloaders/grub-pc/grub.cfg" \
+            "${BUILDER_DIR}/config/bootloaders/grub-efi/grub.cfg" \
            "${BUILDER_DIR}/config/bootloaders/isolinux/live.cfg.in"; do
            if [ -f "$cfg" ]; then
                echo "  file: $cfg"
@@ -465,6 +497,75 @@ validate_iso_memtest() {
    echo "=== memtest validation OK ==="
 }
 validate_iso_live_boot_entries() {
    iso_path="$1"
    echo "=== validating live boot entries in ISO ==="
    [ -f "$iso_path" ] || {
        echo "ERROR: ISO not found for live boot validation: $iso_path" >&2
        exit 1
    }
    require_iso_reader "$iso_path" >/dev/null 2>&1 || {
        echo "ERROR: ISO reader unavailable for live boot validation" >&2
        exit 1
    }
    grub_cfg="$(mktemp)"
    isolinux_cfg="$(mktemp)"
    iso_read_member "$iso_path" boot/grub/grub.cfg "$grub_cfg" || {
        echo "ERROR: failed to read boot/grub/grub.cfg from ISO" >&2
        rm -f "$grub_cfg" "$isolinux_cfg"
        exit 1
    }
    iso_read_member "$iso_path" isolinux/live.cfg "$isolinux_cfg" || {
        echo "ERROR: failed to read isolinux/live.cfg from ISO" >&2
        rm -f "$grub_cfg" "$isolinux_cfg"
        exit 1
    }
    if grep -q '@APPEND_LIVE@\|@KERNEL_LIVE@\|@INITRD_LIVE@' "$grub_cfg" "$isolinux_cfg"; then
        echo "ERROR: unresolved live-build placeholders remain in ISO bootloader config" >&2
        rm -f "$grub_cfg" "$isolinux_cfg"
        exit 1
    fi
    grep -q 'menuentry "EASY-BEE"' "$grub_cfg" || {
        echo "ERROR: GRUB default EASY-BEE entry is missing" >&2
        rm -f "$grub_cfg" "$isolinux_cfg"
        exit 1
    }
    grep -q 'menuentry "EASY-BEE -- load to RAM (toram)"' "$grub_cfg" || {
        echo "ERROR: GRUB toram entry is missing" >&2
        rm -f "$grub_cfg" "$isolinux_cfg"
        exit 1
    }
    grep -q 'linux .*boot=live ' "$grub_cfg" || {
        echo "ERROR: GRUB live entry is missing boot=live" >&2
        rm -f "$grub_cfg" "$isolinux_cfg"
        exit 1
    }
    grep -q 'linux .*boot=live .*toram ' "$grub_cfg" || {
        echo "ERROR: GRUB toram entry is missing boot=live or toram" >&2
        rm -f "$grub_cfg" "$isolinux_cfg"
        exit 1
    }
    grep -q 'append .*boot=live ' "$isolinux_cfg" || {
        echo "ERROR: isolinux live entry is missing boot=live" >&2
        rm -f "$grub_cfg" "$isolinux_cfg"
        exit 1
    }
    grep -q 'append .*boot=live .*toram ' "$isolinux_cfg" || {
        echo "ERROR: isolinux toram entry is missing boot=live or toram" >&2
        rm -f "$grub_cfg" "$isolinux_cfg"
        exit 1
    }
    rm -f "$grub_cfg" "$isolinux_cfg"
    echo "=== live boot validation OK ==="
 }
 validate_iso_nvidia_runtime() {
    iso_path="$1"
    [ "$BEE_GPU_VENDOR" = "nvidia" ] || return 0
@@ -541,6 +642,185 @@ label memtest
 EOF
 }
 extract_live_grub_entry() {
    cfg="$1"
    live_linux="$(awk '/^[[:space:]]*linux[[:space:]]+\/live\// { print; exit }' "$cfg")"
    live_initrd="$(awk '/^[[:space:]]*initrd[[:space:]]+\/live\// { print; exit }' "$cfg")"
    [ -n "$live_linux" ] || return 1
    [ -n "$live_initrd" ] || return 1
    grub_kernel="$(printf '%s\n' "$live_linux" | awk '{print $2}')"
    grub_append="$(printf '%s\n' "$live_linux" | cut -d' ' -f3-)"
    grub_initrd="$(printf '%s\n' "$live_initrd" | awk '{print $2}')"
    [ -n "$grub_kernel" ] || return 1
    [ -n "$grub_append" ] || return 1
    [ -n "$grub_initrd" ] || return 1
    return 0
 }
 load_live_build_append() {
    lb_dir="$1"
    binary_cfg="$lb_dir/config/binary"
    [ -f "$binary_cfg" ] || return 1
    # config/binary is generated by live-build and contains shell variable
    # assignments such as LB_BOOTAPPEND_LIVE="boot=live ...".
    # shellcheck disable=SC1090
    . "$binary_cfg"
    [ -n "${LB_BOOTAPPEND_LIVE:-}" ] || return 1
    live_build_append="$LB_BOOTAPPEND_LIVE"
    return 0
 }
 extract_live_isolinux_entry() {
    cfg="$1"
    isolinux_linux="$(awk '/^[[:space:]]*linux[[:space:]]+\/live\// { print; exit }' "$cfg")"
    isolinux_initrd="$(awk '/^[[:space:]]*initrd[[:space:]]+\/live\// { print; exit }' "$cfg")"
    isolinux_append="$(awk '/^[[:space:]]*append[[:space:]]+/ { sub(/^[[:space:]]*append[[:space:]]+/, ""); print; exit }' "$cfg")"
    [ -n "$isolinux_linux" ] || return 1
    [ -n "$isolinux_initrd" ] || return 1
    [ -n "$isolinux_append" ] || return 1
    isolinux_kernel="$(printf '%s\n' "$isolinux_linux" | awk '{print $2}')"
    isolinux_initrd_path="$(printf '%s\n' "$isolinux_initrd" | awk '{print $2}')"
    [ -n "$isolinux_kernel" ] || return 1
    [ -n "$isolinux_initrd_path" ] || return 1
    return 0
 }
 write_canonical_grub_cfg() {
    cfg="$1"
    kernel="$2"
    append_live="$3"
    initrd="$4"
    cat > "$cfg" <<EOF
 source /boot/grub/config.cfg
 echo ""
 echo "  ███████╗ █████╗ ███████╗██╗   ██╗      ██████╗ ███████╗███████╗"
 echo "  ██╔════╝██╔══██╗██╔════╝╚██╗ ██╔╝      ██╔══██╗██╔════╝██╔════╝"
 echo "  █████╗  ███████║███████╗ ╚████╔╝ █████╗██████╔╝█████╗  █████╗"
 echo "  ██╔══╝  ██╔══██║╚════██║  ╚██╔╝  ╚════╝██╔══██╗██╔══╝  ██╔══╝"
 echo "  ███████╗██║  ██║███████║   ██║         ██████╔╝███████╗███████╗"
 echo "  ╚══════╝╚═╝  ╚═╝╚══════╝   ╚═╝         ╚═════╝ ╚══════╝╚══════╝"
 echo "  Hardware Audit LiveCD"
 echo ""
 menuentry "EASY-BEE" {
    linux   ${kernel} ${append_live} bee.display=kms bee.nvidia.mode=normal pci=realloc net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
    initrd  ${initrd}
 }
 menuentry "EASY-BEE -- load to RAM (toram)" {
    linux   ${kernel} ${append_live} toram bee.display=kms bee.nvidia.mode=normal pci=realloc net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
    initrd  ${initrd}
 }
 if [ "\${grub_platform}" = "efi" ]; then
    menuentry "Memory Test (memtest86+)" {
        chainloader /boot/memtest86+x64.efi
    }
 else
    menuentry "Memory Test (memtest86+)" {
        linux16 /boot/memtest86+x64.bin
    }
 fi
 if [ "\${grub_platform}" = "efi" ]; then
    menuentry "UEFI Firmware Settings" {
        fwsetup
    }
 fi
 EOF
 }
 write_canonical_isolinux_cfg() {
    cfg="$1"
    kernel="$2"
    initrd="$3"
    append_live="$4"
    cat > "$cfg" <<EOF
 label live-@FLAVOUR@-normal
    menu label ^EASY-BEE
    menu default
    linux ${kernel}
    initrd ${initrd}
    append ${append_live} nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-toram
    menu label EASY-BEE (^load to RAM)
    linux ${kernel}
    initrd ${initrd}
    append ${append_live} toram nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-gsp-off
    menu label EASY-BEE (^NVIDIA GSP=off)
    linux ${kernel}
    initrd ${initrd}
    append ${append_live} nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-kms
    menu label EASY-BEE (^KMS, no nomodeset)
    linux ${kernel}
    initrd ${initrd}
    append ${append_live} bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-kms-gsp-off
    menu label EASY-BEE (KMS, ^GSP=off)
    linux ${kernel}
    initrd ${initrd}
    append ${append_live} bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-failsafe
    menu label EASY-BEE (^fail-safe)
    linux ${kernel}
    initrd ${initrd}
    append ${append_live} nomodeset bee.nvidia.mode=gsp-off noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0
 label memtest
    menu label ^Memory Test (memtest86+)
    linux /boot/memtest86+x64.bin
 EOF
 }
 enforce_live_build_bootloader_assets() {
    lb_dir="$1"
    grub_cfg="$lb_dir/binary/boot/grub/grub.cfg"
    grub_dir="$lb_dir/binary/boot/grub"
    isolinux_cfg="$lb_dir/binary/isolinux/live.cfg"
    if ! load_live_build_append "$lb_dir"; then
        echo "bootloader sync: WARNING: could not load LB_BOOTAPPEND_LIVE from $lb_dir/config/binary" >&2
        live_build_append=""
    fi
    if [ -f "$grub_cfg" ]; then
        if extract_live_grub_entry "$grub_cfg"; then
            mkdir -p "$grub_dir/live-theme"
            cp "${BUILDER_DIR}/config/bootloaders/grub-efi/config.cfg" "$grub_dir/config.cfg"
            cp "${BUILDER_DIR}/config/bootloaders/grub-efi/theme.cfg" "$grub_dir/theme.cfg"
            cp -R "${BUILDER_DIR}/config/bootloaders/grub-efi/live-theme/." "$grub_dir/live-theme/"
            write_canonical_grub_cfg "$grub_cfg" "$grub_kernel" "${live_build_append:-$grub_append}" "$grub_initrd"
            echo "bootloader sync: rewrote binary/boot/grub/grub.cfg with canonical EASY-BEE menu"
        else
            echo "bootloader sync: WARNING: could not extract live entry from $grub_cfg" >&2
        fi
    fi
    if [ -f "$isolinux_cfg" ]; then
        if extract_live_isolinux_entry "$isolinux_cfg"; then
            write_canonical_isolinux_cfg "$isolinux_cfg" "$isolinux_kernel" "$isolinux_initrd_path" "${live_build_append:-$isolinux_append}"
            echo "bootloader sync: rewrote binary/isolinux/live.cfg with canonical EASY-BEE menu"
        else
            echo "bootloader sync: WARNING: could not extract live entry from $isolinux_cfg" >&2
        fi
    fi
 }
 copy_memtest_from_deb() {
    deb="$1"
    dst_boot="$2"
@@ -775,6 +1055,7 @@ run_optional_step_sh() {
        return 0
    fi
    mkdir -p "${LOG_DIR}" 2>/dev/null || true
    step_log="${LOG_DIR}/${step_slug}.log"
    echo ""
    echo "=== optional step: ${step_name} ==="
@@ -798,13 +1079,14 @@ start_build_log
 # install them on the fly so NVIDIA modules and ISO kernel always match.
 if [ -z "${DEBIAN_KERNEL_ABI}" ] || [ "${DEBIAN_KERNEL_ABI}" = "auto" ]; then
    echo "=== refreshing apt index to detect current kernel ABI ==="
-    apt-get update -qq
+    apt-get update -qq || echo "WARNING: apt-get update failed, trying cached index"
    DEBIAN_KERNEL_ABI=$(apt-cache depends linux-image-amd64 2>/dev/null \
        | awk '/Depends:.*linux-image-[0-9]/{print $2}' \
        | grep -oE '[0-9]+\.[0-9]+\.[0-9]+-[0-9]+' \
        | head -1)
    if [ -z "${DEBIAN_KERNEL_ABI}" ]; then
        echo "ERROR: could not auto-detect kernel ABI from apt-cache" >&2
        echo "Hint: set DEBIAN_KERNEL_ABI=x.y.z-N in iso/builder/VERSIONS to skip auto-detection" >&2
        exit 1
    fi
    echo "=== kernel ABI: ${DEBIAN_KERNEL_ABI} ==="
@@ -873,9 +1155,37 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
    CUBLAS_CACHE="${DIST_DIR}/cublas-${CUBLAS_VERSION}+cuda${NCCL_CUDA_VERSION}"
    echo "=== bee-gpu-burn FP4 header probe ==="
    fp4_type_match="$(grep -Rsnm 1 'CUDA_R_4F_E2M1' "${CUBLAS_CACHE}/include" 2>/dev/null || true)"
    fp4_scale_match="$(grep -Rsnm 1 'CUBLASLT_MATMUL_MATRIX_SCALE_VEC16_UE4M3' "${CUBLAS_CACHE}/include" 2>/dev/null || true)"
    if [ -n "$fp4_type_match" ]; then
        echo "fp4_header_symbol=present"
        echo "$fp4_type_match"
    else
        echo "fp4_header_symbol=missing"
    fi
    if [ -n "$fp4_scale_match" ]; then
        echo "fp4_scale_mode_symbol=present"
        echo "$fp4_scale_match"
    else
        echo "fp4_scale_mode_symbol=missing"
    fi
    GPU_STRESS_NEED_BUILD=1
-    if [ -f "$GPU_BURN_WORKER_BIN" ] && [ "${BUILDER_DIR}/bee-gpu-stress.c" -ot "$GPU_BURN_WORKER_BIN" ]; then
+    if [ -f "$GPU_BURN_WORKER_BIN" ]; then
        GPU_STRESS_NEED_BUILD=0
        for dep in \
            "${BUILDER_DIR}/bee-gpu-stress.c" \
            "${BUILDER_DIR}/VERSIONS"; do
            if [ "$dep" -nt "$GPU_BURN_WORKER_BIN" ]; then
                GPU_STRESS_NEED_BUILD=1
                break
            fi
        done
        if [ "$GPU_STRESS_NEED_BUILD" = "0" ] && \
            find "${CUBLAS_CACHE}/include" "${CUBLAS_CACHE}/lib" -type f -newer "$GPU_BURN_WORKER_BIN" | grep -q .; then
            GPU_STRESS_NEED_BUILD=1
        fi
    fi
    if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then
@@ -889,21 +1199,19 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
    else
        echo "=== bee-gpu-burn worker up to date, skipping build ==="
    fi
    echo "=== bee-gpu-burn compiled profile probe ==="
    if grep -aq 'fp4_e2m1' "$GPU_BURN_WORKER_BIN"; then
        echo "fp4_profile_string=present"
    else
        echo "fp4_profile_string=missing"
    fi
 fi
 echo "=== preparing staged overlay (${BUILD_VARIANT}) ==="
 mkdir -p "${BUILD_WORK_DIR}" "${OVERLAY_STAGE_DIR}"
 # Sync builder config into variant work dir, preserving lb cache.
-rsync -a --delete \
+sync_builder_workdir "${BUILDER_DIR}" "${BUILD_WORK_DIR}"
    --exclude='cache/' \
    --exclude='chroot/' \
    --exclude='.build/' \
    --exclude='*.iso' \
    --exclude='*.packages' \
    --exclude='*.contents' \
    --exclude='*.files' \
    "${BUILDER_DIR}/" "${BUILD_WORK_DIR}/"
 # Share deb package cache across variants.
 # Restore: populate work dir cache from shared cache before build.
@@ -1145,6 +1453,7 @@ fi
 # --- substitute version placeholders in package list and archive ---
 if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
    sed -i \
        -e "s/%%NVIDIA_FABRICMANAGER_VERSION%%/${NVIDIA_FABRICMANAGER_VERSION}/g" \
        -e "s/%%DCGM_VERSION%%/${DCGM_VERSION}/g" \
        "${BUILD_WORK_DIR}/config/package-lists/bee-gpu.list.chroot"
 elif [ "$BEE_GPU_VENDOR" = "amd" ]; then
@@ -1187,10 +1496,18 @@ BEE_GPU_VENDOR_UPPER="$(echo "${BUILD_VARIANT}" | tr 'a-z-' 'A-Z_')"
 export BEE_GPU_VENDOR_UPPER
 cd "${LB_DIR}"
-run_step_sh "live-build clean" "80-lb-clean" "lb clean 2>&1 | tail -3"
+run_step_sh "live-build clean" "80-lb-clean" "lb clean --all 2>&1 | tail -3"
 run_step_sh "live-build config" "81-lb-config" "lb config 2>&1 | tail -5"
 dump_memtest_debug "pre-build" "${LB_DIR}"
 run_step_sh "live-build build" "90-lb-build" "lb build 2>&1"
 echo "=== enforcing canonical bootloader assets ==="
 enforce_live_build_bootloader_assets "${LB_DIR}"
 reset_live_build_stage "${LB_DIR}" "binary_checksums"
 reset_live_build_stage "${LB_DIR}" "binary_iso"
 reset_live_build_stage "${LB_DIR}" "binary_zsync"
 run_step_sh "rebuild live-build checksums after bootloader sync" "91b-lb-checksums" "lb binary_checksums 2>&1"
 run_step_sh "rebuild ISO after bootloader sync" "91c-lb-binary-iso" "lb binary_iso 2>&1"
 run_step_sh "rebuild zsync after bootloader sync" "91d-lb-zsync" "lb binary_zsync 2>&1"
 # --- persist deb package cache back to shared location ---
 # This allows the second variant to reuse all downloaded packages.
@@ -1215,6 +1532,7 @@ if [ -f "$ISO_RAW" ]; then
        fi
    fi
    validate_iso_memtest "$ISO_RAW"
    validate_iso_live_boot_entries "$ISO_RAW"
    validate_iso_nvidia_runtime "$ISO_RAW"
    cp "$ISO_RAW" "$ISO_OUT"
    echo ""
--- a/iso/builder/config/bootloaders/grub-efi/config.cfg
+++ b/iso/builder/config/bootloaders/grub-efi/config.cfg
@@ -23,9 +23,9 @@ insmod serial
 serial --unit=0 --speed=115200 --word=8 --parity=no --stop=1
 insmod gfxterm
 insmod png
 source /boot/grub/theme.cfg
 terminal_input console serial
 terminal_output gfxterm serial
 insmod png
 source /boot/grub/theme.cfg
--- a/iso/builder/config/bootloaders/grub-efi/grub.cfg
+++ b/iso/builder/config/bootloaders/grub-efi/grub.cfg
@@ -0,0 +1,28 @@
 source /boot/grub/config.cfg
 menuentry "EASY-BEE" {
    linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=normal pci=realloc net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
    initrd  @INITRD_LIVE@
 }
 menuentry "EASY-BEE -- load to RAM (toram)" {
    linux   @KERNEL_LIVE@ @APPEND_LIVE@ toram bee.display=kms bee.nvidia.mode=normal pci=realloc net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
    initrd  @INITRD_LIVE@
 }
 if [ "${grub_platform}" = "efi" ]; then
    menuentry "Memory Test (memtest86+)" {
        chainloader /boot/memtest86+x64.efi
    }
 else
    menuentry "Memory Test (memtest86+)" {
        linux16 /boot/memtest86+x64.bin
    }
 fi
 if [ "${grub_platform}" = "efi" ]; then
    menuentry "UEFI Firmware Settings" {
        fwsetup
    }
 fi
--- a/iso/builder/config/bootloaders/grub-efi/live-theme/bee-logo.png
+++ b/iso/builder/config/bootloaders/grub-efi/live-theme/bee-logo.png
--- a/iso/builder/config/bootloaders/grub-efi/live-theme/theme.txt
+++ b/iso/builder/config/bootloaders/grub-efi/live-theme/theme.txt
@@ -5,6 +5,13 @@ title-text: ""
 message-font: "Unifont Regular 16"
 terminal-font: "Unifont Regular 16"
 #bee logo - centered, upper third of screen
 + image {
        top = 4%
        left = 50%-200
        file = "bee-logo.png"
 }
 #help bar at the bottom
 + label {
        top = 100%-50
@@ -21,17 +28,17 @@ terminal-font: "Unifont Regular 16"
 + boot_menu {
        left = 20%
        width = 60%
-        top = 62%
+        top = 65%
-        height = 38%-80
+        height = 35%-80
        item_color = "#c88000"
        item_font = "Unifont Regular 16"
        selected_item_color= "#f5a800"
        selected_item_font = "Unifont Regular 16"
-        item_height = 16
+        item_height = 20
-        item_padding = 0
+        item_padding = 2
        item_spacing = 4
        icon_width = 0
-        icon_heigh = 0
+        icon_height = 0
        item_icon_space = 0
 }
--- a/iso/builder/config/bootloaders/grub-efi/theme.cfg
+++ b/iso/builder/config/bootloaders/grub-efi/theme.cfg
@@ -0,0 +1,9 @@
 set color_normal=light-gray/black
 set color_highlight=yellow/black
 if [ -e /boot/grub/live-theme/theme.txt ]; then
    set theme=/boot/grub/live-theme/theme.txt
 else
    set menu_color_normal=yellow/black
    set menu_color_highlight=white/brown
 fi
--- a/iso/builder/config/bootloaders/grub-pc/grub.cfg
+++ b/iso/builder/config/bootloaders/grub-pc/grub.cfg
@@ -1,49 +0,0 @@
 source /boot/grub/config.cfg
 echo ""
 echo "  ███████╗ █████╗ ███████╗██╗   ██╗      ██████╗ ███████╗███████╗"
 echo "  ██╔════╝██╔══██╗██╔════╝╚██╗ ██╔╝      ██╔══██╗██╔════╝██╔════╝"
 echo "  █████╗  ███████║███████╗ ╚████╔╝ █████╗██████╔╝█████╗  █████╗"
 echo "  ██╔══╝  ██╔══██║╚════██║  ╚██╔╝  ╚════╝██╔══██╗██╔══╝  ██╔══╝"
 echo "  ███████╗██║  ██║███████║   ██║         ██████╔╝███████╗███████╗"
 echo "  ╚══════╝╚═╝  ╚═╝╚══════╝   ╚═╝         ╚═════╝ ╚══════╝╚══════╝"
 echo "  Hardware Audit LiveCD"
 echo ""
 menuentry "EASY-BEE" {
    linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
    initrd  @INITRD_LIVE@
 }
 submenu "EASY-BEE (advanced options) -->" {
    menuentry "EASY-BEE — GSP=off" {
        linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
        initrd  @INITRD_LIVE@
    }
    menuentry "EASY-BEE — KMS (no nomodeset)" {
        linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
        initrd  @INITRD_LIVE@
    }
    menuentry "EASY-BEE — fail-safe" {
        linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0
        initrd  @INITRD_LIVE@
    }
 }
 if [ "${grub_platform}" = "efi" ]; then
    menuentry "Memory Test (memtest86+)" {
        chainloader /boot/memtest86+x64.efi
    }
 else
    menuentry "Memory Test (memtest86+)" {
        linux16 /boot/memtest86+x64.bin
    }
 fi
 if [ "${grub_platform}" = "efi" ]; then
    menuentry "UEFI Firmware Settings" {
        fwsetup
    }
 fi
--- a/iso/builder/config/bootloaders/grub-pc/theme.cfg
+++ b/iso/builder/config/bootloaders/grub-pc/theme.cfg
@@ -1,9 +0,0 @@
 set color_normal=light-gray/black
 set color_highlight=white/dark-gray
 if [ -e /boot/grub/splash.png ]; then
    set theme=/boot/grub/live-theme/theme.txt
 else
    set menu_color_normal=cyan/black
    set menu_color_highlight=white/dark-gray
 fi
--- a/iso/builder/config/bootloaders/isolinux/live.cfg.in
+++ b/iso/builder/config/bootloaders/isolinux/live.cfg.in
@@ -3,37 +3,37 @@ label live-@FLAVOUR@-normal
    menu default
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.nvidia.mode=normal
+    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-kms
    menu label EASY-BEE (^graphics/KMS)
    linux @LINUX@
    initrd @INITRD@
    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=normal
 label live-@FLAVOUR@-toram
    menu label EASY-BEE (^load to RAM)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ toram bee.nvidia.mode=normal
+    append @APPEND_LIVE@ toram nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-gsp-off
    menu label EASY-BEE (^NVIDIA GSP=off)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off
+    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
-label live-@FLAVOUR@-kms-gsp-off
+label live-@FLAVOUR@-kms
-    menu label EASY-BEE (g^raphics/KMS, GSP=off)
+    menu label EASY-BEE (^KMS, no nomodeset)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=gsp-off
+    append @APPEND_LIVE@ bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-kms-gsp-off
    menu label EASY-BEE (KMS, ^GSP=off)
    linux @LINUX@
    initrd @INITRD@
    append @APPEND_LIVE@ bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-failsafe
    menu label EASY-BEE (^fail-safe)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal
+    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0
 label memtest
    menu label ^Memory Test (memtest86+)
--- a/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
+++ b/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
@@ -25,6 +25,7 @@ ensure_bee_console_user() {
 ensure_bee_console_user
 # Enable common bee services
 systemctl enable bee-hpc-tuning.service
 systemctl enable bee-network.service
 systemctl enable bee-preflight.service
 systemctl enable bee-audit.service
@@ -42,6 +43,7 @@ systemctl enable bee-journal-mirror@ttyS1.service 2>/dev/null || true
 # Enable GPU-vendor specific services
 if [ "$GPU_VENDOR" = "nvidia" ]; then
    systemctl enable nvidia-dcgm.service 2>/dev/null || true
    systemctl enable nvidia-fabricmanager.service 2>/dev/null || true
    systemctl enable bee-nvidia.service
 elif [ "$GPU_VENDOR" = "amd" ]; then
    # ROCm symlinks (packages install to /opt/rocm-*/bin/)
@@ -55,13 +57,16 @@ fi
 # nogpu: no GPU services needed
 # Ensure scripts are executable
 chmod +x /usr/local/bin/bee-hpc-tuning  2>/dev/null || true
 chmod +x /usr/local/bin/bee-network.sh  2>/dev/null || true
 chmod +x /usr/local/bin/bee-sshsetup   2>/dev/null || true
 chmod +x /usr/local/bin/bee-smoketest  2>/dev/null || true
 chmod +x /usr/local/bin/bee            2>/dev/null || true
 chmod +x /usr/local/bin/bee-log-run    2>/dev/null || true
-chmod +x /usr/local/bin/bee-selfheal      2>/dev/null || true
+chmod +x /usr/local/bin/bee-selfheal        2>/dev/null || true
-chmod +x /usr/local/bin/bee-boot-status  2>/dev/null || true
+chmod +x /usr/local/bin/bee-boot-status    2>/dev/null || true
 chmod +x /usr/local/bin/bee-install        2>/dev/null || true
 chmod +x /usr/local/bin/bee-remount-medium 2>/dev/null || true
 if [ "$GPU_VENDOR" = "nvidia" ]; then
    chmod +x /usr/local/bin/bee-nvidia-load 2>/dev/null || true
    chmod +x /usr/local/bin/bee-gpu-burn 2>/dev/null || true
--- a/iso/builder/config/hooks/normal/9001-wallpaper.hook.chroot
+++ b/iso/builder/config/hooks/normal/9001-wallpaper.hook.chroot
@@ -1,76 +0,0 @@
 #!/bin/sh
 # 9001-wallpaper.hook.chroot — generate /usr/share/bee/wallpaper.png inside chroot
 set -e
 echo "=== generating bee wallpaper ==="
 mkdir -p /usr/share/bee
 python3 - <<'PYEOF'
 from PIL import Image, ImageDraw, ImageFont
 import os
 W, H = 1920, 1080
 LOGO = """\
  \u2588\u2588\u2588\u2588\u2588\u2588\u2557 \u2588\u2588\u2588\u2588\u2588\u2557 \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2557   \u2588\u2588\u2557      \u2588\u2588\u2588\u2588\u2588\u2588\u2557 \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557
  \u2588\u2588\u2554\u2550\u2550\u2550\u2550\u255d\u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2557\u2588\u2588\u2554\u2550\u2550\u2550\u2550\u255d\u255a\u2588\u2588\u2557 \u2588\u2588\u2554\u255d      \u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2557\u2588\u2588\u2554\u2550\u2550\u2550\u2550\u255d\u2588\u2588\u2554\u2550\u2550\u2550\u2550\u255d
  \u2588\u2588\u2588\u2588\u2588\u2557  \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2551\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557 \u255a\u2588\u2588\u2588\u2588\u2554\u255d \u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2588\u2588\u2588\u2588\u2554\u255d\u2588\u2588\u2588\u2588\u2588\u2557  \u2588\u2588\u2588\u2588\u2588\u2557
  \u2588\u2588\u2554\u2550\u2550\u255d  \u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2551\u255a\u2550\u2550\u2550\u2550\u2588\u2588\u2551  \u255a\u2588\u2588\u2554\u255d  \u255a\u2550\u2550\u2550\u2550\u255d\u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2557\u2588\u2588\u2554\u2550\u2550\u255d  \u2588\u2588\u2554\u2550\u2550\u255d
  \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2551  \u2588\u2588\u2551\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2551   \u2588\u2588\u2551         \u2588\u2588\u2588\u2588\u2588\u2588\u2554\u255d\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557
  \u255a\u2550\u2550\u2550\u2550\u2550\u2550\u255d\u255a\u2550\u255d  \u255a\u2550\u255d\u255a\u2550\u2550\u2550\u2550\u2550\u2550\u255d   \u255a\u2550\u255d         \u255a\u2550\u2550\u2550\u2550\u2550\u255d \u255a\u2550\u2550\u2550\u2550\u2550\u2550\u255d\u255a\u2550\u2550\u2550\u2550\u2550\u2550\u255d
  Hardware Audit LiveCD"""
 # Find a monospace font that supports box-drawing characters
 FONT_CANDIDATES = [
    '/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf',
    '/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf',
    '/usr/share/fonts/truetype/freefont/FreeMono.ttf',
    '/usr/share/fonts/truetype/noto/NotoMono-Regular.ttf',
 ]
 font_path = None
 for p in FONT_CANDIDATES:
    if os.path.exists(p):
        font_path = p
        break
 SIZE = 22
 if font_path:
    font_logo = ImageFont.truetype(font_path, SIZE)
    font_sub  = ImageFont.truetype(font_path, SIZE)
 else:
    font_logo = ImageFont.load_default()
    font_sub  = font_logo
 img  = Image.new('RGB', (W, H), (0, 0, 0))
 draw = ImageDraw.Draw(img)
 # Measure logo block line by line to avoid font ascender offset
 lines = LOGO.split('\n')
 logo_lines = lines[:6]
 sub_line   = lines[6] if len(lines) > 6 else ''
 line_h = SIZE + 2
 block_h = len(logo_lines) * line_h + 8 + (SIZE if sub_line else 0)
 # Width: measure the widest logo line
 max_w = 0
 for line in logo_lines:
    bb = draw.textbbox((0, 0), line, font=font_logo)
    max_w = max(max_w, bb[2] - bb[0])
 x = (W - max_w) // 2
 y = (H - block_h) // 2
 cy = y
 for line in logo_lines:
    draw.text((x, cy), line, font=font_logo, fill=(0xf6, 0xc9, 0x0e))
    cy += line_h
 cy += 8
 if sub_line:
    draw.text((x, cy), sub_line, font=font_sub, fill=(0x80, 0x68, 0x18))
 img.save('/usr/share/bee/wallpaper.png', optimize=True)
 print('wallpaper written: /usr/share/bee/wallpaper.png')
 PYEOF
 echo "=== wallpaper done ==="
--- a/iso/builder/config/hooks/normal/9010-fix-toram.hook.chroot
+++ b/iso/builder/config/hooks/normal/9010-fix-toram.hook.chroot
@@ -0,0 +1,41 @@
 #!/bin/sh
 # 9010-fix-toram.hook.chroot — patch live-boot toram to work with tmpfs (no O_DIRECT)
 #
 # live-boot tries "losetup --replace --direct-io=on" when re-associating the
 # loop device to the RAM copy in /dev/shm.  tmpfs does not support O_DIRECT,
 # so the ioctl returns EINVAL and the verification step fails.
 #
 # The patch replaces the replace call so that if --direct-io=on fails it falls
 # back to a plain replace without direct-io, and also relaxes the verification
 # to a warning so the boot continues even when re-association is imperfect.
 set -e
 TORAM_SCRIPT="/usr/lib/live/boot/9990-toram-todisk.sh"
 if [ ! -f "${TORAM_SCRIPT}" ]; then
    echo "9010-fix-toram: ${TORAM_SCRIPT} not found, skipping"
    exit 0
 fi
 echo "9010-fix-toram: patching ${TORAM_SCRIPT}"
 # Replace any losetup --replace call that includes --direct-io=on with a
 # version that first tries with direct-io, then retries without it.
 #
 # The sed expression turns:
 #   losetup --replace ... --direct-io=on LOOP FILE
 # into a shell snippet that tries both, silently.
 #
 # We also downgrade the fatal "Task finished with error." block to a warning
 # so the boot continues if re-association fails (squashfs still accessible).
 # 1. Strip --direct-io=on from the losetup --replace call so it works on tmpfs.
 sed -i 's/losetup --replace --direct-io=on/losetup --replace/g' "${TORAM_SCRIPT}"
 sed -i 's/losetup --replace --direct-io/losetup --replace/g' "${TORAM_SCRIPT}"
 # 2. Turn the hard error into a warning so boot continues.
 #    live-boot prints this exact string when verification fails.
 sed -i 's/echo "Task finished with error\."/echo "Warning: toram re-association failed, continuing boot (squashfs still in RAM)"/' "${TORAM_SCRIPT}"
 echo "9010-fix-toram: patch applied"
 grep -n "losetup" "${TORAM_SCRIPT}" | head -20 || true
--- a/iso/builder/config/hooks/normal/9011-toram-rsync.hook.chroot
+++ b/iso/builder/config/hooks/normal/9011-toram-rsync.hook.chroot
@@ -0,0 +1,46 @@
 #!/bin/sh
 # 9011-toram-rsync.hook.chroot
 #
 # Adds rsync to the initramfs so that live-boot's toram code takes the
 # rsync --progress path instead of the silent "cp -a" fallback.
 #
 # live-boot's 9990-toram-todisk.sh already contains:
 #   if [ -x /bin/rsync ]; then
 #       rsync -a --progress ... 1>/dev/console
 #   else
 #       cp -a ...   # no output
 #   fi
 #
 # We install an initramfs-tools hook that calls copy_exec /usr/bin/rsync,
 # which copies the binary + all shared-library dependencies into the initrd.
 set -e
 HOOK_DIR="/etc/initramfs-tools/hooks"
 HOOK="${HOOK_DIR}/bee-rsync"
 mkdir -p "${HOOK_DIR}"
 cat > "${HOOK}" << 'EOF'
 #!/bin/sh
 # initramfs hook: include rsync for live-boot toram progress output
 PREREQ=""
 prereqs() { echo "$PREREQ"; }
 case "$1" in prereqs) prereqs; exit 0 ;; esac
 . /usr/share/initramfs-tools/hook-functions
 if [ -x /usr/bin/rsync ]; then
    copy_exec /usr/bin/rsync /bin
 fi
 EOF
 chmod +x "${HOOK}"
 echo "9011-toram-rsync: installed initramfs hook at ${HOOK}"
 # Rebuild initramfs so the hook takes effect in the ISO's initrd.img
 KVER=$(ls /lib/modules | sort -V | tail -1)
 echo "9011-toram-rsync: rebuilding initramfs for kernel ${KVER}"
 update-initramfs -u -k "${KVER}"
 echo "9011-toram-rsync: done"
--- a/iso/builder/config/hooks/normal/9100-memtest.hook.binary
+++ b/iso/builder/config/hooks/normal/9100-memtest.hook.binary
@@ -5,6 +5,8 @@ set -e
 : "${BEE_REQUIRE_MEMTEST:=0}"
 # memtest86+ 6.x uses memtest86+.bin (no x64 suffix) for the BIOS binary,
 # while 5.x used memtest86+x64.bin. We normalise both to x64 names in the ISO.
 MEMTEST_FILES="memtest86+x64.bin memtest86+x64.efi"
 BINARY_BOOT_DIR="binary/boot"
 GRUB_CFG="binary/boot/grub/grub.cfg"
@@ -24,15 +26,23 @@ fail_or_warn() {
    return 0
 }
 # grub.cfg and live.cfg may not exist yet when binary hooks run — live-build
 # creates them after this hook (lb binary_grub-efi / lb binary_syslinux).
 # The template already has memtest entries hardcoded, so a missing config file
 # here is not an error; validate_iso_memtest() checks the final ISO instead.
 warn_only() {
    log "WARNING: $1"
 }
 copy_memtest_file() {
    src="$1"
-    base="$(basename "$src")"
+    dst_name="${2:-$(basename "$src")}"
-    dst="${BINARY_BOOT_DIR}/${base}"
+    dst="${BINARY_BOOT_DIR}/${dst_name}"
    [ -f "$src" ] || return 1
    mkdir -p "${BINARY_BOOT_DIR}"
    cp "$src" "$dst"
-    log "copied ${base} from ${src}"
+    log "copied ${dst_name} from ${src}"
 }
 extract_memtest_from_deb() {
@@ -41,14 +51,44 @@ extract_memtest_from_deb() {
    log "extracting memtest payload from ${deb}"
    dpkg-deb -x "$deb" "$tmpdir"
-    for f in ${MEMTEST_FILES}; do
+
-        if [ -f "${tmpdir}/boot/${f}" ]; then
+    # EFI binary: both 5.x and 6.x use memtest86+x64.efi
-            copy_memtest_file "${tmpdir}/boot/${f}"
+    if [ -f "${tmpdir}/boot/memtest86+x64.efi" ]; then
-        fi
+        copy_memtest_file "${tmpdir}/boot/memtest86+x64.efi"
-    done
+    fi
    # BIOS binary: 5.x = memtest86+x64.bin, 6.x = memtest86+.bin
    if [ -f "${tmpdir}/boot/memtest86+x64.bin" ]; then
        copy_memtest_file "${tmpdir}/boot/memtest86+x64.bin"
    elif [ -f "${tmpdir}/boot/memtest86+.bin" ]; then
        copy_memtest_file "${tmpdir}/boot/memtest86+.bin" "memtest86+x64.bin"
    fi
    rm -rf "$tmpdir"
 }
 download_and_extract_memtest() {
    tmpdl="$(mktemp -d)"
    if [ -n "${MEMTEST_VERSION:-}" ]; then
        pkg_spec="memtest86+=${MEMTEST_VERSION}"
    else
        pkg_spec="memtest86+"
    fi
    log "downloading ${pkg_spec} from apt"
    if ! ( cd "$tmpdl" && apt-get download "$pkg_spec" 2>/dev/null ); then
        log "apt download failed, retrying after apt-get update"
        apt-get update -qq >/dev/null 2>&1 || true
        ( cd "$tmpdl" && apt-get download "$pkg_spec" 2>/dev/null ) || true
    fi
    deb="$(find "$tmpdl" -maxdepth 1 -type f -name 'memtest86+*.deb' 2>/dev/null | head -1)"
    if [ -n "$deb" ]; then
        extract_memtest_from_deb "$deb"
    else
        log "apt download of memtest86+ failed"
    fi
    rm -rf "$tmpdl"
 }
 ensure_memtest_binaries() {
    missing=0
    for f in ${MEMTEST_FILES}; do
@@ -56,10 +96,15 @@ ensure_memtest_binaries() {
    done
    [ "$missing" -eq 1 ] || return 0
    # 1. Try files already placed by lb binary_memtest or chroot
    for root in chroot/boot /boot; do
        for f in ${MEMTEST_FILES}; do
            [ -f "${BINARY_BOOT_DIR}/${f}" ] || copy_memtest_file "${root}/${f}" || true
        done
        # 6.x BIOS binary may lack x64 in name — copy with normalised name
        if [ ! -f "${BINARY_BOOT_DIR}/memtest86+x64.bin" ]; then
            copy_memtest_file "${root}/memtest86+.bin" "memtest86+x64.bin" || true
        fi
    done
    missing=0
@@ -68,6 +113,7 @@ ensure_memtest_binaries() {
    done
    [ "$missing" -eq 1 ] || return 0
    # 2. Try apt package cache (may be empty if lb binary_memtest already purged)
    for root in cache chroot/var/cache/apt/archives /var/cache/apt/archives; do
        [ -d "$root" ] || continue
        deb="$(find "$root" -type f \( -name 'memtest86+_*.deb' -o -name 'memtest86+*.deb' \) 2>/dev/null | head -1)"
@@ -76,6 +122,15 @@ ensure_memtest_binaries() {
        break
    done
    missing=0
    for f in ${MEMTEST_FILES}; do
        [ -f "${BINARY_BOOT_DIR}/${f}" ] || missing=1
    done
    [ "$missing" -eq 1 ] || return 0
    # 3. Fallback: download fresh from apt (lb binary_memtest purges the cache)
    download_and_extract_memtest
    missing=0
    for f in ${MEMTEST_FILES}; do
        if [ ! -f "${BINARY_BOOT_DIR}/${f}" ]; then
@@ -88,7 +143,7 @@ ensure_memtest_binaries() {
 ensure_grub_entry() {
    [ -f "$GRUB_CFG" ] || {
-        fail_or_warn "missing ${GRUB_CFG}"
+        warn_only "missing ${GRUB_CFG} (will be created by lb binary_grub-efi from template)"
        return 0
    }
@@ -114,7 +169,7 @@ EOF
 ensure_isolinux_entry() {
    [ -f "$ISOLINUX_CFG" ] || {
-        fail_or_warn "missing ${ISOLINUX_CFG}"
+        warn_only "missing ${ISOLINUX_CFG} (will be created by lb binary_syslinux from template)"
        return 0
    }
--- a/iso/builder/config/package-lists/bee-nvidia.list.chroot
+++ b/iso/builder/config/package-lists/bee-nvidia.list.chroot
@@ -5,6 +5,7 @@
 # DCGM 4 is packaged per CUDA major. The image ships NVIDIA driver 590 with
 # CUDA 13 userspace, so install the CUDA 13 build plus proprietary components
 # explicitly.
 nvidia-fabricmanager=%%NVIDIA_FABRICMANAGER_VERSION%%
 datacenter-gpu-manager-4-cuda13=1:%%DCGM_VERSION%%
 datacenter-gpu-manager-4-proprietary=1:%%DCGM_VERSION%%
 datacenter-gpu-manager-4-proprietary-cuda13=1:%%DCGM_VERSION%%
--- a/iso/builder/config/package-lists/bee.list.chroot
+++ b/iso/builder/config/package-lists/bee.list.chroot
@@ -3,6 +3,7 @@ dmidecode
 smartmontools
 nvme-cli
 pciutils
 rsync
 ipmitool
 util-linux
 e2fsprogs
--- a/iso/builder/smoketest.sh
+++ b/iso/builder/smoketest.sh
@@ -27,6 +27,7 @@ echo ""
 KVER=$(uname -r)
 info "kernel: $KVER"
 NVIDIA_BOOT_MODE="normal"
 NVIDIA_MODULES_FLAVOR="proprietary"
 for arg in $(cat /proc/cmdline 2>/dev/null); do
    case "$arg" in
        bee.nvidia.mode=*)
@@ -34,7 +35,11 @@ for arg in $(cat /proc/cmdline 2>/dev/null); do
            ;;
    esac
 done
 if [ -f /etc/bee-nvidia-modules-flavor ]; then
    NVIDIA_MODULES_FLAVOR="$(tr -d '[:space:]' </etc/bee-nvidia-modules-flavor 2>/dev/null || echo proprietary)"
 fi
 info "nvidia boot mode: ${NVIDIA_BOOT_MODE}"
 info "nvidia modules flavor: ${NVIDIA_MODULES_FLAVOR}"
 # --- PATH & binaries ---
 echo "-- PATH & binaries --"
@@ -110,10 +115,12 @@ fi
 for mod in nvidia_modeset nvidia_uvm; do
    if /sbin/lsmod 2>/dev/null | grep -q "^$mod "; then
        ok "module loaded: $mod"
-    elif [ "${NVIDIA_BOOT_MODE}" = "normal" ] || [ "${NVIDIA_BOOT_MODE}" = "full" ]; then
+    elif [ "${NVIDIA_MODULES_FLAVOR}" = "proprietary" ] && { [ "${NVIDIA_BOOT_MODE}" = "normal" ] || [ "${NVIDIA_BOOT_MODE}" = "full" ]; }; then
        fail "module NOT loaded in normal mode: $mod"
-    else
+    elif [ "${NVIDIA_MODULES_FLAVOR}" = "proprietary" ]; then
        warn "module not loaded in GSP-off mode: $mod"
    else
        fail "module NOT loaded: $mod"
    fi
 done
@@ -129,10 +136,12 @@ done
 if [ -e /dev/nvidia-uvm ]; then
    ok "/dev/nvidia-uvm exists"
-elif [ "${NVIDIA_BOOT_MODE}" = "normal" ] || [ "${NVIDIA_BOOT_MODE}" = "full" ]; then
+elif [ "${NVIDIA_MODULES_FLAVOR}" = "proprietary" ] && { [ "${NVIDIA_BOOT_MODE}" = "normal" ] || [ "${NVIDIA_BOOT_MODE}" = "full" ]; }; then
    fail "/dev/nvidia-uvm missing in normal mode"
-else
+elif [ "${NVIDIA_MODULES_FLAVOR}" = "proprietary" ]; then
    warn "/dev/nvidia-uvm missing — CUDA stress path may be unavailable until loaded on demand"
 else
    fail "/dev/nvidia-uvm missing"
 fi
 echo ""
--- a/iso/overlay/etc/systemd/system/bee-hpc-tuning.service
+++ b/iso/overlay/etc/systemd/system/bee-hpc-tuning.service
@@ -0,0 +1,14 @@
 [Unit]
 Description=Bee: HPC tuning (CPU governor, C-states)
 After=local-fs.target
 Before=bee-nvidia.service bee-audit.service
 [Service]
 Type=oneshot
 ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-hpc-tuning.log /usr/local/bin/bee-hpc-tuning
 StandardOutput=journal
 StandardError=journal
 RemainAfterExit=yes
 [Install]
 WantedBy=multi-user.target
--- a/iso/overlay/etc/systemd/system/bee-web.service
+++ b/iso/overlay/etc/systemd/system/bee-web.service
@@ -10,6 +10,8 @@ RestartSec=3
 StandardOutput=journal
 StandardError=journal
 LimitMEMLOCK=infinity
 # No MemoryMax: bee-web spawns GPU test subprocesses (dcgmproftester etc.)
 # that legitimately use several GB; a cgroup limit kills them via OOM.
 # Keep the web server responsive during GPU/CPU stress (children inherit nice+10
 # via Setpriority in runCmdJob, but the bee-web parent stays at 0).
 Nice=0
--- a/iso/overlay/usr/local/bin/bee-dcgmproftester-staggered
+++ b/iso/overlay/usr/local/bin/bee-dcgmproftester-staggered
@@ -0,0 +1,110 @@
 #!/bin/sh
 set -eu
 SECONDS=300
 STAGGER_SECONDS=180
 DEVICES=""
 EXCLUDE=""
 usage() {
    echo "usage: $0 [--seconds N] [--stagger-seconds N] [--devices 0,1] [--exclude 2,3]" >&2
    exit 2
 }
 normalize_list() {
    echo "${1:-}" | tr ',' '\n' | sed 's/[[:space:]]//g' | awk 'NF' | sort -n | uniq | paste -sd, -
 }
 contains_csv() {
    needle="$1"
    haystack="${2:-}"
    echo ",${haystack}," | grep -q ",${needle},"
 }
 resolve_dcgmproftester() {
    for candidate in dcgmproftester dcgmproftester13 dcgmproftester12 dcgmproftester11; do
        if command -v "${candidate}" >/dev/null 2>&1; then
            command -v "${candidate}"
            return 0
        fi
    done
    return 1
 }
 while [ "$#" -gt 0 ]; do
    case "$1" in
        --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
        --stagger-seconds) [ "$#" -ge 2 ] || usage; STAGGER_SECONDS="$2"; shift 2 ;;
        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
        *) usage ;;
    esac
 done
 PROF=$(resolve_dcgmproftester) || { echo "dcgmproftester not found in PATH" >&2; exit 1; }
 ALL_DEVICES=$(nvidia-smi --query-gpu=index --format=csv,noheader,nounits 2>/dev/null | sed 's/[[:space:]]//g' | awk 'NF' | paste -sd, -)
 [ -n "${ALL_DEVICES}" ] || { echo "nvidia-smi found no NVIDIA GPUs" >&2; exit 1; }
 DEVICES=$(normalize_list "${DEVICES}")
 EXCLUDE=$(normalize_list "${EXCLUDE}")
 SELECTED="${DEVICES}"
 if [ -z "${SELECTED}" ]; then
    SELECTED="${ALL_DEVICES}"
 fi
 FINAL=""
 for id in $(echo "${SELECTED}" | tr ',' ' '); do
    [ -n "${id}" ] || continue
    if contains_csv "${id}" "${EXCLUDE}"; then
        continue
    fi
    if [ -z "${FINAL}" ]; then
        FINAL="${id}"
    else
        FINAL="${FINAL},${id}"
    fi
 done
 [ -n "${FINAL}" ] || { echo "no NVIDIA GPUs selected after filters" >&2; exit 1; }
 echo "loader=dcgmproftester-staggered"
 echo "selected_gpus=${FINAL}"
 echo "stagger_seconds=${STAGGER_SECONDS}"
 TMP_DIR=$(mktemp -d)
 trap 'rm -rf "${TMP_DIR}"' EXIT INT TERM
 GPU_COUNT=$(echo "${FINAL}" | tr ',' '\n' | awk 'NF' | wc -l | tr -d '[:space:]')
 gpu_pos=0
 WORKERS=""
 for id in $(echo "${FINAL}" | tr ',' ' '); do
    gpu_pos=$((gpu_pos + 1))
    log="${TMP_DIR}/gpu-${id}.log"
    extra_sec=$(( STAGGER_SECONDS * (GPU_COUNT - gpu_pos) ))
    gpu_seconds=$(( SECONDS + extra_sec ))
    echo "starting gpu ${id} seconds=${gpu_seconds}"
    CUDA_VISIBLE_DEVICES="${id}" "${PROF}" --no-dcgm-validation -t 1004 -d "${gpu_seconds}" >"${log}" 2>&1 &
    pid=$!
    WORKERS="${WORKERS} ${pid}:${id}:${log}"
    if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${gpu_pos}" -lt "${GPU_COUNT}" ]; then
        sleep "${STAGGER_SECONDS}"
    fi
 done
 status=0
 for spec in ${WORKERS}; do
    pid=${spec%%:*}
    rest=${spec#*:}
    id=${rest%%:*}
    log=${rest#*:}
    if wait "${pid}"; then
        echo "gpu ${id} finished: OK"
    else
        rc=$?
        echo "gpu ${id} finished: FAILED rc=${rc}"
        status=1
    fi
    sed "s/^/[gpu ${id}] /" "${log}" || true
 done
 exit "${status}"
--- a/iso/overlay/usr/local/bin/bee-gpu-burn
+++ b/iso/overlay/usr/local/bin/bee-gpu-burn
@@ -2,13 +2,17 @@
 set -eu
 SECONDS=5
 STAGGER_SECONDS=0
 SIZE_MB=0
 DEVICES=""
 EXCLUDE=""
 PRECISION=""
 PRECISION_PLAN=""
 PRECISION_PLAN_SECONDS=""
 WORKER="/usr/local/lib/bee/bee-gpu-burn-worker"
 usage() {
-    echo "usage: $0 [--seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3]" >&2
+    echo "usage: $0 [--seconds N] [--stagger-seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3] [--precision int8|fp8|fp16|fp32|fp64|fp4] [--precision-plan p1,p2,...,mixed] [--precision-plan-seconds s1,s2,...]" >&2
    exit 2
 }
@@ -25,9 +29,13 @@ contains_csv() {
 while [ "$#" -gt 0 ]; do
    case "$1" in
        --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
        --stagger-seconds) [ "$#" -ge 2 ] || usage; STAGGER_SECONDS="$2"; shift 2 ;;
        --size-mb|-m) [ "$#" -ge 2 ] || usage; SIZE_MB="$2"; shift 2 ;;
        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
        --precision) [ "$#" -ge 2 ] || usage; PRECISION="$2"; shift 2 ;;
        --precision-plan) [ "$#" -ge 2 ] || usage; PRECISION_PLAN="$2"; shift 2 ;;
        --precision-plan-seconds) [ "$#" -ge 2 ] || usage; PRECISION_PLAN_SECONDS="$2"; shift 2 ;;
        *) usage ;;
    esac
 done
@@ -61,14 +69,18 @@ done
 echo "loader=bee-gpu-burn"
 echo "selected_gpus=${FINAL}"
 echo "stagger_seconds=${STAGGER_SECONDS}"
 export CUDA_DEVICE_ORDER="PCI_BUS_ID"
 TMP_DIR=$(mktemp -d)
 trap 'rm -rf "${TMP_DIR}"' EXIT INT TERM
 GPU_COUNT=$(echo "${FINAL}" | tr ',' '\n' | awk 'NF' | wc -l | tr -d '[:space:]')
 gpu_pos=0
 WORKERS=""
 for id in $(echo "${FINAL}" | tr ',' ' '); do
    gpu_pos=$((gpu_pos + 1))
    log="${TMP_DIR}/gpu-${id}.log"
    gpu_size_mb="${SIZE_MB}"
    if [ "${gpu_size_mb}" -le 0 ] 2>/dev/null; then
@@ -79,11 +91,22 @@ for id in $(echo "${FINAL}" | tr ',' ' '); do
            gpu_size_mb=512
        fi
    fi
-    echo "starting gpu ${id} size=${gpu_size_mb}MB"
+    extra_sec=$(( STAGGER_SECONDS * (GPU_COUNT - gpu_pos) ))
    gpu_seconds=$(( SECONDS + extra_sec ))
    echo "starting gpu ${id} size=${gpu_size_mb}MB seconds=${gpu_seconds}"
    precision_arg=""
    [ -n "${PRECISION}" ] && precision_arg="--precision ${PRECISION}"
    precision_plan_arg=""
    [ -n "${PRECISION_PLAN}" ] && precision_plan_arg="--precision-plan ${PRECISION_PLAN}"
    precision_plan_seconds_arg=""
    [ -n "${PRECISION_PLAN_SECONDS}" ] && precision_plan_seconds_arg="--precision-plan-seconds ${PRECISION_PLAN_SECONDS}"
    CUDA_VISIBLE_DEVICES="${id}" \
-        "${WORKER}" --device 0 --seconds "${SECONDS}" --size-mb "${gpu_size_mb}" >"${log}" 2>&1 &
+        "${WORKER}" --device 0 --seconds "${gpu_seconds}" --size-mb "${gpu_size_mb}" ${precision_arg} ${precision_plan_arg} ${precision_plan_seconds_arg} >"${log}" 2>&1 &
    pid=$!
    WORKERS="${WORKERS} ${pid}:${id}:${log}"
    if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${gpu_pos}" -lt "${GPU_COUNT}" ]; then
        sleep "${STAGGER_SECONDS}"
    fi
 done
 status=0
--- a/iso/overlay/usr/local/bin/bee-hpc-tuning
+++ b/iso/overlay/usr/local/bin/bee-hpc-tuning
@@ -0,0 +1,41 @@
 #!/bin/sh
 # bee-hpc-tuning — apply HPC tuning for deterministic benchmarking
 # Called by bee-hpc-tuning.service at boot.
 log() { echo "[bee-hpc-tuning] $*"; }
 # ── CPU governor ────────────────────────────────────────────────────────────
 # Set all CPU cores to performance governor via sysfs.
 # cpupower is not available; write directly to scaling_governor.
 governor_ok=0
 governor_fail=0
 for gov_path in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do
    [ -f "$gov_path" ] || continue
    if echo performance > "$gov_path" 2>/dev/null; then
        governor_ok=$((governor_ok + 1))
    else
        governor_fail=$((governor_fail + 1))
    fi
 done
 if [ "$governor_ok" -gt 0 ] && [ "$governor_fail" -eq 0 ]; then
    log "CPU governor set to performance on ${governor_ok} core(s)"
 elif [ "$governor_ok" -gt 0 ]; then
    log "WARN: CPU governor: ${governor_ok} OK, ${governor_fail} failed"
 elif [ "$governor_fail" -gt 0 ]; then
    log "WARN: failed to set CPU governor on ${governor_fail} core(s)"
 else
    log "WARN: no cpufreq scaling_governor paths found (C-state governor or HW-controlled)"
 fi
 # ── Transparent Huge Pages ───────────────────────────────────────────────────
 # Kernel cmdline sets transparent_hugepage=always at boot, but confirm and log.
 thp_path=/sys/kernel/mm/transparent_hugepage/enabled
 if [ -f "$thp_path" ]; then
    current=$(cat "$thp_path" 2>/dev/null)
    log "transparent_hugepage: ${current}"
 else
    log "WARN: transparent_hugepage sysfs path not found"
 fi
 log "done"
--- a/iso/overlay/usr/local/bin/bee-install
+++ b/iso/overlay/usr/local/bin/bee-install
@@ -65,6 +65,9 @@ done
 SQUASHFS="/run/live/medium/live/filesystem.squashfs"
 if [ ! -f "$SQUASHFS" ]; then
    echo "ERROR: squashfs not found at $SQUASHFS" >&2
    echo "  The live medium may have been disconnected." >&2
    echo "  Reconnect the disc and run:  bee-remount-medium --wait" >&2
    echo "  Then re-run bee-install." >&2
    exit 1
 fi
@@ -162,10 +165,59 @@ log "  Mounted."
 log "--- Step 5/7: Unpacking filesystem (this takes 10-20 minutes) ---"
 log "  Source: $SQUASHFS"
 log "  Target: $MOUNT_ROOT"
-unsquashfs -f -d "$MOUNT_ROOT" "$SQUASHFS" 2>&1 | \
+
-    grep -E '^\[|^inod|^created|^extract' | \
+# unsquashfs does not support resume, so retry the entire unpack step if the
-    while read -r line; do log "  $line"; done || true
+# source medium disappears mid-copy (e.g. CD physically disconnected).
-log "  Unpack complete."
+UNPACK_ATTEMPTS=0
 UNPACK_MAX=5
 while true; do
    UNPACK_ATTEMPTS=$(( UNPACK_ATTEMPTS + 1 ))
    if [ "$UNPACK_ATTEMPTS" -gt "$UNPACK_MAX" ]; then
        die "Unpack failed $UNPACK_MAX times — giving up. Check the disc and logs."
    fi
    [ "$UNPACK_ATTEMPTS" -gt 1 ] && log "  Retry attempt $UNPACK_ATTEMPTS / $UNPACK_MAX ..."
    # Re-check squashfs is reachable before each attempt
    if [ ! -f "$SQUASHFS" ]; then
        log "  SOURCE LOST: $SQUASHFS not found."
        log "  Reconnect the disc and run 'bee-remount-medium --wait' in another terminal,"
        log "  then press Enter here to retry."
        read -r _
        continue
    fi
    # wipe partial unpack so unsquashfs starts clean
    if [ "$UNPACK_ATTEMPTS" -gt 1 ]; then
        log "  Cleaning partial unpack from $MOUNT_ROOT ..."
        # keep the mount point itself but remove its contents
        find "$MOUNT_ROOT" -mindepth 1 -maxdepth 1 -exec rm -rf {} + 2>/dev/null || true
    fi
    UNPACK_OK=0
    unsquashfs -f -d "$MOUNT_ROOT" "$SQUASHFS" 2>&1 | \
        grep -E '^\[|^inod|^created|^extract|^ERROR|failed' | \
        while IFS= read -r line; do log "  $line"; done || UNPACK_OK=$?
    # Check squashfs is still reachable (gone = disc pulled during copy)
    if [ ! -f "$SQUASHFS" ]; then
        log "  WARNING: source medium lost during unpack — will retry after remount."
        log "  Run 'bee-remount-medium --wait' in another terminal, then press Enter."
        read -r _
        continue
    fi
    # Verify the unpack produced a usable root (presence of /etc is a basic check)
    if [ -d "${MOUNT_ROOT}/etc" ]; then
        log "  Unpack complete."
        break
    else
        log "  WARNING: unpack produced no /etc — squashfs may be corrupt or incomplete."
        if [ "$UNPACK_ATTEMPTS" -lt "$UNPACK_MAX" ]; then
            log "  Retrying in 5 s ..."
            sleep 5
        fi
    fi
 done
 # ------------------------------------------------------------------
 log "--- Step 6/7: Configuring installed system ---"
--- a/Show More
+++ b/Show More