fix(grub): fix bitmap error and menu rendering

- Convert bee-logo.png to RGBA (color type 6) and strip all metadata chunks (cHRM, bKGD, tIME, tEXt) that confuse GRUB's minimal PNG parser - Move terminal_output gfxterm before insmod png / theme load so the theme initialises in an active gfxterm context - Remove echo ASCII art banner from grub.cfg — with gfxterm active and no terminal_box in the theme, echo output renders over the menu area - Fix icon_heigh typo → icon_height; increase item_height 16→20 with item_padding 0→2 for reliable text rendering in boot_menu Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
update submodule
2026-04-22 22:05:16 +03:00 · 2026-04-22 20:39:27 +03:00 · 2026-04-22 19:01:50 +03:00 · 2026-04-22 19:00:04 +03:00 · 2026-04-22 18:52:10 +03:00 · 2026-04-22 18:52:04 +03:00
98 changed files with 13954 additions and 4385 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
 .DS_Store
 dist/
 iso/out/
+build-cache/
+audit/bee
--- a/audit/bee
+++ b/audit/bee
--- a/audit/go.mod
+++ b/audit/go.mod
@@ -5,22 +5,18 @@ go 1.25.0
 replace reanimator/chart => ../internal/chart

 require (
-	github.com/go-analyze/charts v0.5.26
+	modernc.org/sqlite v1.48.0
 	reanimator/chart v0.0.0-00010101000000-000000000000
 )

 require (
 	github.com/dustin/go-humanize v1.0.1 // indirect
-	github.com/go-analyze/bulk v0.1.3 // indirect
-	github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/ncruces/go-strftime v1.0.0 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
-	golang.org/x/image v0.24.0 // indirect
 	golang.org/x/sys v0.42.0 // indirect
-	modernc.org/libc v1.70.0 // indirect
+	modernc.org/libc v1.72.0 // indirect
 	modernc.org/mathutil v1.7.1 // indirect
 	modernc.org/memory v1.11.0 // indirect
-	modernc.org/sqlite v1.48.0 // indirect
 )
--- a/audit/go.sum
+++ b/audit/go.sum
@@ -1,37 +1,51 @@
-github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
-github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
 github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
-github.com/go-analyze/bulk v0.1.3 h1:pzRdBqzHDAT9PyROt0SlWE0YqPtdmTcEpIJY0C3vF0c=
-github.com/go-analyze/bulk v0.1.3/go.mod h1:afon/KtFJYnekIyN20H/+XUvcLFjE8sKR1CfpqfClgM=
-github.com/go-analyze/charts v0.5.26 h1:rSwZikLQuFX6cJzwI8OAgaWZneG1kDYxD857ms00ZxY=
-github.com/go-analyze/charts v0.5.26/go.mod h1:s1YvQhjiSwtLx1f2dOKfiV9x2TT49nVSL6v2rlRpTbY=
-github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
-github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
+github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
 github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
-github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
-github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
-github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
-github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
-golang.org/x/image v0.24.0 h1:AN7zRgVsbvmTfNyqIbbOraYL8mSwcKncEj8ofjgzcMQ=
-golang.org/x/image v0.24.0/go.mod h1:4b/ITuLfqYq1hqZcjofwctIhi7sZh2WaCjvsBNjjya8=
+golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
+golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
+golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
+golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
 golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
-gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
-gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-modernc.org/libc v1.70.0 h1:U58NawXqXbgpZ/dcdS9kMshu08aiA6b7gusEusqzNkw=
-modernc.org/libc v1.70.0/go.mod h1:OVmxFGP1CI/Z4L3E0Q3Mf1PDE0BucwMkcXjjLntvHJo=
+golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
+golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
+modernc.org/cc/v4 v4.27.3 h1:uNCgn37E5U09mTv1XgskEVUJ8ADKpmFMPxzGJ0TSo+U=
+modernc.org/cc/v4 v4.27.3/go.mod h1:3YjcbCqhoTTHPycJDRl2WZKKFj0nwcOIPBfEZK0Hdk8=
+modernc.org/ccgo/v4 v4.32.4 h1:L5OB8rpEX4ZsXEQwGozRfJyJSFHbbNVOoQ59DU9/KuU=
+modernc.org/ccgo/v4 v4.32.4/go.mod h1:lY7f+fiTDHfcv6YlRgSkxYfhs+UvOEEzj49jAn2TOx0=
+modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM=
+modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU=
+modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
+modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
+modernc.org/gc/v3 v3.1.2 h1:ZtDCnhonXSZexk/AYsegNRV1lJGgaNZJuKjJSWKyEqo=
+modernc.org/gc/v3 v3.1.2/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
+modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
+modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
+modernc.org/libc v1.72.0 h1:IEu559v9a0XWjw0DPoVKtXpO2qt5NVLAnFaBbjq+n8c=
+modernc.org/libc v1.72.0/go.mod h1:tTU8DL8A+XLVkEY3x5E/tO7s2Q/q42EtnNWda/L5QhQ=
 modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
 modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
 modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
 modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
+modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
+modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
+modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
+modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
 modernc.org/sqlite v1.48.0 h1:ElZyLop3Q2mHYk5IFPPXADejZrlHu7APbpB0sF78bq4=
 modernc.org/sqlite v1.48.0/go.mod h1:hWjRO6Tj/5Ik8ieqxQybiEOUXy0NJFNp2tpvVpKlvig=
+modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
+modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
+modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
+modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -19,18 +19,22 @@ import (
 )

 var (
-	DefaultExportDir        = "/appdata/bee/export"
-	DefaultAuditJSONPath    = DefaultExportDir + "/bee-audit.json"
-	DefaultAuditLogPath     = DefaultExportDir + "/bee-audit.log"
-	DefaultWebLogPath       = DefaultExportDir + "/bee-web.log"
-	DefaultNetworkLogPath   = DefaultExportDir + "/bee-network.log"
-	DefaultNvidiaLogPath    = DefaultExportDir + "/bee-nvidia.log"
-	DefaultSSHLogPath       = DefaultExportDir + "/bee-sshsetup.log"
-	DefaultRuntimeJSONPath  = DefaultExportDir + "/runtime-health.json"
-	DefaultRuntimeLogPath   = DefaultExportDir + "/runtime-health.log"
-	DefaultTechDumpDir      = DefaultExportDir + "/techdump"
-	DefaultSATBaseDir       = DefaultExportDir + "/bee-sat"
-	DefaultBenchmarkBaseDir = DefaultExportDir + "/bee-benchmark"
+	DefaultExportDir                     = "/appdata/bee/export"
+	DefaultAuditJSONPath                 = DefaultExportDir + "/bee-audit.json"
+	DefaultAuditLogPath                  = DefaultExportDir + "/bee-audit.log"
+	DefaultWebLogPath                    = DefaultExportDir + "/bee-web.log"
+	DefaultNetworkLogPath                = DefaultExportDir + "/bee-network.log"
+	DefaultNvidiaLogPath                 = DefaultExportDir + "/bee-nvidia.log"
+	DefaultSSHLogPath                    = DefaultExportDir + "/bee-sshsetup.log"
+	DefaultRuntimeJSONPath               = DefaultExportDir + "/runtime-health.json"
+	DefaultRuntimeLogPath                = DefaultExportDir + "/runtime-health.log"
+	DefaultTechDumpDir                   = DefaultExportDir + "/techdump"
+	DefaultSATBaseDir                    = DefaultExportDir + "/bee-sat"
+	DefaultBeeBenchBaseDir               = DefaultExportDir + "/bee-bench"
+	DefaultBeeBenchAutotuneDir           = DefaultBeeBenchBaseDir + "/autotune"
+	DefaultBeeBenchPerfDir               = DefaultBeeBenchBaseDir + "/perf"
+	DefaultBeeBenchPowerDir              = DefaultBeeBenchBaseDir + "/power"
+	DefaultBeeBenchPowerSourceConfigPath = DefaultBeeBenchBaseDir + "/power-source-autotune.json"
 )

 type App struct {
@@ -84,6 +88,7 @@ type installer interface {
 	InstallToDisk(ctx context.Context, device string, logFile string) error
 	IsLiveMediaInRAM() bool
 	LiveBootSource() platform.LiveBootSource
+	LiveMediaRAMState() platform.LiveMediaRAMState
 	RunInstallToRAM(ctx context.Context, logFunc func(string)) error
 }

@@ -108,6 +113,10 @@ func (a *App) LiveBootSource() platform.LiveBootSource {
 	return a.installer.LiveBootSource()
 }

+func (a *App) LiveMediaRAMState() platform.LiveMediaRAMState {
+	return a.installer.LiveMediaRAMState()
+}
+
 func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
 	return a.installer.RunInstallToRAM(ctx, logFunc)
 }
@@ -117,7 +126,9 @@ type satRunner interface {
 	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBenchmark(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
-	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
+	RunNvidiaPowerBench(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
+	RunNvidiaPowerSourceAutotune(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error)
+	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error)
@@ -138,7 +149,7 @@ type satRunner interface {
 	RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
 	RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error)
-	RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	RunNCCLTests(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error)
 }

 type runtimeChecker interface {
@@ -190,6 +201,7 @@ func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, erro
 	}
 	result := collector.Run(runtimeMode)
 	applyLatestSATStatuses(&result.Hardware, DefaultSATBaseDir, a.StatusDB)
+	writePSUStatusesToDB(a.StatusDB, result.Hardware.PowerSupplies)
 	if health, err := ReadRuntimeHealth(DefaultRuntimeJSONPath); err == nil {
 		result.Runtime = &health
 	}
@@ -295,7 +307,7 @@ func (a *App) ExportLatestAudit(target platform.RemovableTarget) (string, error)
 	}
 	filename := fmt.Sprintf("audit-%s-%s.json", sanitizeFilename(hostnameOr("unknown")), time.Now().UTC().Format("20060102-150405"))
 	tmpPath := filepath.Join(os.TempDir(), filename)
-	data, err := os.ReadFile(DefaultAuditJSONPath)
+	data, err := readFileLimited(DefaultAuditJSONPath, 100<<20)
 	if err != nil {
 		return "", err
 	}
@@ -561,16 +573,66 @@ func (a *App) RunNvidiaBenchmark(baseDir string, opts platform.NvidiaBenchmarkOp

 func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
-		baseDir = DefaultBenchmarkBaseDir
+		baseDir = DefaultBeeBenchPerfDir
 	}
+	resolved, err := a.ensureBenchmarkPowerAutotune(ctx, baseDir, opts, "performance", logFunc)
+	if err != nil {
+		return "", err
+	}
+	opts.ServerPowerSource = resolved.SelectedSource
 	return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
 }

-func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+func (a *App) RunNvidiaPowerBenchCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultBeeBenchPowerDir
+	}
+	resolved, err := a.ensureBenchmarkPowerAutotune(ctx, baseDir, opts, "power-fit", logFunc)
+	if err != nil {
+		return "", err
+	}
+	opts.ServerPowerSource = resolved.SelectedSource
+	return a.sat.RunNvidiaPowerBench(ctx, baseDir, opts, logFunc)
+}
+
+func (a *App) RunNvidiaPowerSourceAutotuneCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultBeeBenchAutotuneDir
+	}
+	return a.sat.RunNvidiaPowerSourceAutotune(ctx, baseDir, opts, benchmarkKind, logFunc)
+}
+
+func (a *App) LoadBenchmarkPowerAutotune() (*platform.BenchmarkPowerAutotuneConfig, error) {
+	return platform.LoadBenchmarkPowerAutotuneConfig(DefaultBeeBenchPowerSourceConfigPath)
+}
+
+func (a *App) ensureBenchmarkPowerAutotune(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (platform.BenchmarkPowerAutotuneConfig, error) {
+	cfgPath := platform.BenchmarkPowerSourceConfigPath(baseDir)
+	if cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(cfgPath); err == nil {
+		if logFunc != nil {
+			logFunc(fmt.Sprintf("benchmark autotune: using saved server power source %s", cfg.SelectedSource))
+		}
+		return *cfg, nil
+	}
+	if logFunc != nil {
+		logFunc("benchmark autotune: no saved power source config, running autotune first")
+	}
+	autotuneDir := filepath.Join(filepath.Dir(baseDir), "autotune")
+	if _, err := a.RunNvidiaPowerSourceAutotuneCtx(ctx, autotuneDir, opts, benchmarkKind, logFunc); err != nil {
+		return platform.BenchmarkPowerAutotuneConfig{}, err
+	}
+	cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(cfgPath)
+	if err != nil {
+		return platform.BenchmarkPowerAutotuneConfig{}, err
+	}
+	return *cfg, nil
+}
+
+func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
+	return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, staggerSec, logFunc)
 }

 func (a *App) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
@@ -728,8 +790,15 @@ func (a *App) RunPlatformStress(ctx context.Context, baseDir string, opts platfo
 	return a.sat.RunPlatformStress(ctx, baseDir, opts, logFunc)
 }

+func (a *App) RunNCCLTests(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunNCCLTests(ctx, baseDir, gpuIndices, logFunc)
+}
+
 func (a *App) RunNCCLTestsResult(ctx context.Context) (ActionResult, error) {
-	path, err := a.sat.RunNCCLTests(ctx, DefaultSATBaseDir, nil)
+	path, err := a.RunNCCLTests(ctx, DefaultSATBaseDir, nil, nil)
 	body := "Results: " + path
 	if err != nil && err != context.Canceled {
 		body += "\nERROR: " + err.Error()
@@ -926,6 +995,41 @@ func bodyOr(body, fallback string) string {
 	return body
 }

+// writePSUStatusesToDB records PSU statuses collected during audit into the
+// component-status DB so they are visible in the Hardware Summary card.
+// PSU status is sourced from IPMI (ipmitool fru + sdr) during audit.
+func writePSUStatusesToDB(db *ComponentStatusDB, psus []schema.HardwarePowerSupply) {
+	if db == nil || len(psus) == 0 {
+		return
+	}
+	const source = "audit:ipmi"
+	worstStatus := "OK"
+	for _, psu := range psus {
+		if psu.Status == nil {
+			continue
+		}
+		slot := "?"
+		if psu.Slot != nil {
+			slot = *psu.Slot
+		}
+		st := *psu.Status
+		detail := ""
+		if psu.ErrorDescription != nil {
+			detail = *psu.ErrorDescription
+		}
+		db.Record("psu:"+slot, source, st, detail)
+		switch st {
+		case "Critical":
+			worstStatus = "Critical"
+		case "Warning":
+			if worstStatus != "Critical" {
+				worstStatus = "Warning"
+			}
+		}
+	}
+	db.Record("psu:all", source, worstStatus, "")
+}
+
 func ReadRuntimeHealth(path string) (schema.RuntimeHealth, error) {
 	raw, err := os.ReadFile(path)
 	if err != nil {
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -9,6 +9,7 @@ import (
 	"io"
 	"os"
 	"path/filepath"
+	"strings"
 	"testing"

 	"bee/audit/internal/platform"
@@ -122,11 +123,14 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
 type fakeSAT struct {
 	runNvidiaFn               func(string) (string, error)
 	runNvidiaBenchmarkFn      func(string, platform.NvidiaBenchmarkOptions) (string, error)
+	runNvidiaPowerBenchFn     func(string, platform.NvidiaBenchmarkOptions) (string, error)
+	runNvidiaAutotuneFn       func(string, platform.NvidiaBenchmarkOptions, string) (string, error)
 	runNvidiaStressFn         func(string, platform.NvidiaStressOptions) (string, error)
 	runNvidiaComputeFn        func(string, int, []int) (string, error)
 	runNvidiaPowerFn          func(string, int, []int) (string, error)
 	runNvidiaPulseFn          func(string, int, []int) (string, error)
 	runNvidiaBandwidthFn      func(string, []int) (string, error)
+	runNCCLFn                 func(string, []int) (string, error)
 	runNvidiaTargetedStressFn func(string, int, []int) (string, error)
 	runMemoryFn               func(string) (string, error)
 	runStorageFn              func(string) (string, error)
@@ -154,6 +158,20 @@ func (f fakeSAT) RunNvidiaBenchmark(_ context.Context, baseDir string, opts plat
 	return f.runNvidiaFn(baseDir)
 }

+func (f fakeSAT) RunNvidiaPowerBench(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, _ func(string)) (string, error) {
+	if f.runNvidiaPowerBenchFn != nil {
+		return f.runNvidiaPowerBenchFn(baseDir, opts)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaPowerSourceAutotune(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, _ func(string)) (string, error) {
+	if f.runNvidiaAutotuneFn != nil {
+		return f.runNvidiaAutotuneFn(baseDir, opts, benchmarkKind)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
 func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaTargetedStressFn != nil {
 		return f.runNvidiaTargetedStressFn(baseDir, durationSec, gpuIndices)
@@ -161,7 +179,7 @@ func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir
 	return f.runNvidiaFn(baseDir)
 }

-func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
+func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ int, _ func(string)) (string, error) {
 	if f.runNvidiaComputeFn != nil {
 		return f.runNvidiaComputeFn(baseDir, durationSec, gpuIndices)
 	}
@@ -279,10 +297,43 @@ func (f fakeSAT) RunPlatformStress(_ context.Context, _ string, _ platform.Platf
 	return "", nil
 }

-func (f fakeSAT) RunNCCLTests(_ context.Context, _ string, _ func(string)) (string, error) {
+func (f fakeSAT) RunNCCLTests(_ context.Context, baseDir string, gpuIndices []int, _ func(string)) (string, error) {
+	if f.runNCCLFn != nil {
+		return f.runNCCLFn(baseDir, gpuIndices)
+	}
 	return "", nil
 }

+func TestRunNCCLTestsPassesSelectedGPUs(t *testing.T) {
+	t.Parallel()
+
+	var gotBaseDir string
+	var gotGPUIndices []int
+	a := &App{
+		sat: fakeSAT{
+			runNCCLFn: func(baseDir string, gpuIndices []int) (string, error) {
+				gotBaseDir = baseDir
+				gotGPUIndices = append([]int(nil), gpuIndices...)
+				return "/tmp/nccl-tests.tar.gz", nil
+			},
+		},
+	}
+
+	path, err := a.RunNCCLTests(context.Background(), "/tmp/sat", []int{3, 1}, nil)
+	if err != nil {
+		t.Fatalf("RunNCCLTests error: %v", err)
+	}
+	if path != "/tmp/nccl-tests.tar.gz" {
+		t.Fatalf("path=%q want %q", path, "/tmp/nccl-tests.tar.gz")
+	}
+	if gotBaseDir != "/tmp/sat" {
+		t.Fatalf("baseDir=%q want %q", gotBaseDir, "/tmp/sat")
+	}
+	if len(gotGPUIndices) != 2 || gotGPUIndices[0] != 3 || gotGPUIndices[1] != 1 {
+		t.Fatalf("gpuIndices=%v want [3 1]", gotGPUIndices)
+	}
+}
+
 func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
 	t.Parallel()

@@ -542,8 +593,6 @@ func TestActionResultsUseFallbackBody(t *testing.T) {
 }

 func TestExportSupportBundleResultMentionsUnmountedUSB(t *testing.T) {
-	t.Parallel()
-
 	tmp := t.TempDir()
 	oldExportDir := DefaultExportDir
 	DefaultExportDir = tmp
@@ -580,8 +629,6 @@ func TestExportSupportBundleResultMentionsUnmountedUSB(t *testing.T) {
 }

 func TestExportSupportBundleResultDoesNotPretendSuccessOnError(t *testing.T) {
-	t.Parallel()
-
 	tmp := t.TempDir()
 	oldExportDir := DefaultExportDir
 	DefaultExportDir = tmp
@@ -643,8 +690,6 @@ func TestRunNvidiaAcceptancePackResult(t *testing.T) {
 }

 func TestRunSATDefaultsToExportDir(t *testing.T) {
-	t.Parallel()
-
 	oldSATBaseDir := DefaultSATBaseDir
 	DefaultSATBaseDir = "/tmp/export/bee-sat"
 	t.Cleanup(func() { DefaultSATBaseDir = oldSATBaseDir })
@@ -773,6 +818,12 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run", "verbose.log"), []byte("sat verbose"), 0644); err != nil {
 		t.Fatal(err)
 	}
+	if err := os.MkdirAll(filepath.Join(exportDir, "bee-bench"), 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(exportDir, "bee-bench", "power-source-autotune.json"), []byte(`{"version":1,"updated_at":"2026-04-20T01:02:03Z","selected_source":"sdr_psu_input","reason":"selected lowest relative error"}`), 0644); err != nil {
+		t.Fatal(err)
+	}
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run.tar.gz"), []byte("nested sat archive"), 0644); err != nil {
 		t.Fatal(err)
 	}
@@ -800,6 +851,7 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	tr := tar.NewReader(gzr)
 	var names []string
 	var auditJSON string
+	var manifest string
 	for {
 		hdr, err := tr.Next()
 		if errors.Is(err, io.EOF) {
@@ -816,6 +868,13 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 			}
 			auditJSON = string(body)
 		}
+		if strings.HasSuffix(hdr.Name, "/manifest.txt") {
+			body, err := io.ReadAll(tr)
+			if err != nil {
+				t.Fatalf("read manifest entry: %v", err)
+			}
+			manifest = string(body)
+		}
 	}

 	for _, want := range []string{
@@ -859,6 +918,12 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	if !contains(auditJSON, "PASCARI") || !contains(auditJSON, "NVIDIA H100") {
 		t.Fatalf("support bundle should keep real devices:\n%s", auditJSON)
 	}
+	if !contains(manifest, "files:") {
+		t.Fatalf("support bundle manifest missing files section:\n%s", manifest)
+	}
+	if !strings.Contains(manifest, "power_autotune_selected_source=sdr_psu_input") {
+		t.Fatalf("support bundle manifest missing autotune source:\n%s", manifest)
+	}
 }

 func TestMainBanner(t *testing.T) {
--- a/audit/internal/app/atomic_write.go
+++ b/audit/internal/app/atomic_write.go
@@ -2,10 +2,29 @@ package app

 import (
 	"fmt"
+	"io"
 	"os"
 	"path/filepath"
 )

+// readFileLimited reads path into memory, refusing files larger than maxBytes.
+// Prevents OOM on corrupted or unexpectedly large data files.
+func readFileLimited(path string, maxBytes int64) ([]byte, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+	data, err := io.ReadAll(io.LimitReader(f, maxBytes+1))
+	if err != nil {
+		return nil, err
+	}
+	if int64(len(data)) > maxBytes {
+		return nil, fmt.Errorf("file %s too large (exceeds %d bytes)", path, maxBytes)
+	}
+	return data, nil
+}
+
 func atomicWriteFile(path string, data []byte, perm os.FileMode) error {
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return fmt.Errorf("mkdir %s: %w", filepath.Dir(path), err)
--- a/audit/internal/app/component_status_db.go
+++ b/audit/internal/app/component_status_db.go
@@ -46,7 +46,7 @@ func OpenComponentStatusDB(path string) (*ComponentStatusDB, error) {
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return nil, err
 	}
-	data, err := os.ReadFile(path)
+	data, err := readFileLimited(path, 10<<20)
 	if err != nil && !os.IsNotExist(err) {
 		return nil, err
 	}
--- a/audit/internal/app/support_bundle.go
+++ b/audit/internal/app/support_bundle.go
@@ -2,6 +2,7 @@ package app

 import (
 	"archive/tar"
+	"bee/audit/internal/platform"
 	"compress/gzip"
 	"fmt"
 	"io"
@@ -22,6 +23,8 @@ var supportBundleServices = []string{
 	"bee-selfheal.service",
 	"bee-selfheal.timer",
 	"bee-sshsetup.service",
+	"nvidia-dcgm.service",
+	"nvidia-fabricmanager.service",
 }

 var supportBundleCommands = []struct {
@@ -48,13 +51,50 @@ else
 fi
 `}},
 	{name: "system/nvidia-smi-q.txt", cmd: []string{"nvidia-smi", "-q"}},
+	{name: "system/nvidia-smi-topo.txt", cmd: []string{"sh", "-c", `
+if command -v nvidia-smi >/dev/null 2>&1; then
+  nvidia-smi topo -m 2>&1 || true
+else
+  echo "nvidia-smi not found"
+fi
+`}},
+	{name: "system/systemctl-nvidia-units.txt", cmd: []string{"sh", "-c", `
+if ! command -v systemctl >/dev/null 2>&1; then
+  echo "systemctl not found"
+  exit 0
+fi
+echo "=== unit files ==="
+systemctl list-unit-files --no-pager --all 'nvidia*' 'fabric*' 2>&1 || true
+echo
+echo "=== active units ==="
+systemctl list-units --no-pager --all 'nvidia*' 'fabric*' 2>&1 || true
+echo
+echo "=== failed units ==="
+systemctl --failed --no-pager 2>&1 | grep -iE 'nvidia|fabric' || echo "no failed nvidia/fabric units"
+`}},
+	{name: "system/fabric-manager-paths.txt", cmd: []string{"sh", "-c", `
+for candidate in \
+  /usr/bin/nvidia-fabricmanager \
+  /usr/bin/nv-fabricmanager \
+  /usr/bin/nvidia-fabricmanagerd \
+  /usr/bin/nvlsm; do
+  if [ -e "$candidate" ]; then
+    echo "=== $candidate ==="
+    ls -l "$candidate" 2>&1 || true
+    echo
+  fi
+done
+if ! ls /usr/bin/nvidia-fabricmanager /usr/bin/nv-fabricmanager /usr/bin/nvidia-fabricmanagerd /usr/bin/nvlsm >/dev/null 2>&1; then
+  echo "no fabric manager binaries found"
+fi
+`}},
 	{name: "system/lspci-nvidia-bridges-vv.txt", cmd: []string{"sh", "-c", `
 if ! command -v lspci >/dev/null 2>&1; then
  echo "lspci not found"
  exit 0
 fi
 found=0
-for gpu in $(lspci -Dn | awk '$3 ~ /^10de:/ {print $1}'); do
+	for gpu in $(lspci -Dn | awk '$2 ~ /^03(00|02):$/ && $3 ~ /^10de:/ {print $1}'); do
  found=1
  echo "=== GPU $gpu ==="
  lspci -s "$gpu" -vv 2>&1 || true
@@ -73,8 +113,13 @@ fi
 	{name: "system/pcie-nvidia-link.txt", cmd: []string{"sh", "-c", `
 for d in /sys/bus/pci/devices/*/; do
  vendor=$(cat "$d/vendor" 2>/dev/null)
-  [ "$vendor" = "0x10de" ] || continue
-  dev=$(basename "$d")
+	  [ "$vendor" = "0x10de" ] || continue
+	  class=$(cat "$d/class" 2>/dev/null)
+	  case "$class" in
+	    0x030000|0x030200) ;;
+	    *) continue ;;
+	  esac
+	  dev=$(basename "$d")
  echo "=== $dev ==="
  for f in current_link_speed current_link_width max_link_speed max_link_width; do
    printf "  %-22s %s\n" "$f" "$(cat "$d/$f" 2>/dev/null)"
@@ -190,6 +235,10 @@ var supportBundleOptionalFiles = []struct {
 }{
 	{name: "system/kern.log", src: "/var/log/kern.log"},
 	{name: "system/syslog.txt", src: "/var/log/syslog"},
+	{name: "system/fabricmanager.log", src: "/var/log/fabricmanager.log"},
+	{name: "system/nvlsm.log", src: "/var/log/nvlsm.log"},
+	{name: "system/fabricmanager/fabricmanager.log", src: "/var/log/fabricmanager/fabricmanager.log"},
+	{name: "system/fabricmanager/nvlsm.log", src: "/var/log/fabricmanager/nvlsm.log"},
 }

 const supportBundleGlob = "????-??-?? (BEE-SP*)*.tar.gz"
@@ -208,7 +257,7 @@ func BuildSupportBundle(exportDir string) (string, error) {

 	now := time.Now().UTC()
 	date := now.Format("2006-01-02")
-	tod := now.Format("15:04:05")
+	tod := now.Format("150405")
 	ver := bundleVersion()
 	model := serverModelForBundle()
 	sn := serverSerialForBundle()
@@ -376,6 +425,13 @@ func writeManifest(dst, exportDir, stageRoot string) error {
 	fmt.Fprintf(&body, "host=%s\n", hostnameOr("unknown"))
 	fmt.Fprintf(&body, "generated_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
 	fmt.Fprintf(&body, "export_dir=%s\n", exportDir)
+	if cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(filepath.Join(exportDir, "bee-bench", "power-source-autotune.json")); err == nil && cfg != nil {
+		fmt.Fprintf(&body, "power_autotune_selected_source=%s\n", cfg.SelectedSource)
+		fmt.Fprintf(&body, "power_autotune_updated_at=%s\n", cfg.UpdatedAt.UTC().Format(time.RFC3339))
+		if strings.TrimSpace(cfg.Reason) != "" {
+			fmt.Fprintf(&body, "power_autotune_reason=%s\n", cfg.Reason)
+		}
+	}
 	fmt.Fprintf(&body, "\nfiles:\n")

 	var files []string
--- a/audit/internal/collector/nic_mellanox.go
+++ b/audit/internal/collector/nic_mellanox.go
@@ -179,11 +179,3 @@ func commandOutputWithTimeout(timeout time.Duration, name string, args ...string
 	defer cancel()
 	return exec.CommandContext(ctx, name, args...).Output()
 }
-
-func interfaceHasCarrier(iface string) bool {
-	raw, err := readNetCarrierFile(iface)
-	if err != nil {
-		return false
-	}
-	return strings.TrimSpace(raw) == "1"
-}
--- a/audit/internal/collector/nic_telemetry.go
+++ b/audit/internal/collector/nic_telemetry.go
@@ -58,12 +58,10 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw
 			}
 		}

-		if interfaceHasCarrier(iface) {
-			if out, err := ethtoolModuleQuery(iface); err == nil {
-				if injectSFPDOMTelemetry(&devs[i], out) {
-					enriched++
-					continue
-				}
+		if out, err := ethtoolModuleQuery(iface); err == nil {
+			if injectSFPDOMTelemetry(&devs[i], out) {
+				enriched++
+				continue
 			}
 		}
 		if len(devs[i].MacAddresses) > 0 || devs[i].Firmware != nil {
@@ -115,8 +113,38 @@ func injectSFPDOMTelemetry(dev *schema.HardwarePCIeDevice, raw string) bool {
 		}
 		key := strings.ToLower(strings.TrimSpace(trimmed[:idx]))
 		val := strings.TrimSpace(trimmed[idx+1:])
+		if val == "" || strings.EqualFold(val, "not supported") || strings.EqualFold(val, "unknown") {
+			continue
+		}

 		switch {
+		case key == "identifier":
+			s := parseSFPIdentifier(val)
+			dev.SFPIdentifier = &s
+			t := true
+			dev.SFPPresent = &t
+			changed = true
+		case key == "connector":
+			s := parseSFPConnector(val)
+			dev.SFPConnector = &s
+			changed = true
+		case key == "vendor name":
+			s := strings.TrimSpace(val)
+			dev.SFPVendor = &s
+			changed = true
+		case key == "vendor pn":
+			s := strings.TrimSpace(val)
+			dev.SFPPartNumber = &s
+			changed = true
+		case key == "vendor sn":
+			s := strings.TrimSpace(val)
+			dev.SFPSerialNumber = &s
+			changed = true
+		case strings.Contains(key, "laser wavelength"):
+			if f, ok := firstFloat(val); ok {
+				dev.SFPWavelengthNM = &f
+				changed = true
+			}
 		case strings.Contains(key, "module temperature"):
 			if f, ok := firstFloat(val); ok {
 				dev.SFPTemperatureC = &f
@@ -147,12 +175,61 @@ func injectSFPDOMTelemetry(dev *schema.HardwarePCIeDevice, raw string) bool {
 	return changed
 }

+// parseSFPIdentifier extracts the human-readable transceiver type from the
+// raw ethtool identifier line, e.g. "0x03 (SFP)" → "SFP".
+func parseSFPIdentifier(val string) string {
+	if s := extractParens(val); s != "" {
+		return s
+	}
+	return val
+}
+
+// parseSFPConnector extracts the connector type from the raw ethtool line,
+// e.g. "0x07 (LC)" → "LC".
+func parseSFPConnector(val string) string {
+	if s := extractParens(val); s != "" {
+		return s
+	}
+	return val
+}
+
+var parenRe = regexp.MustCompile(`\(([^)]+)\)`)
+
+func extractParens(s string) string {
+	m := parenRe.FindStringSubmatch(s)
+	if len(m) < 2 {
+		return ""
+	}
+	return strings.TrimSpace(m[1])
+}
+
 func parseSFPDOM(raw string) map[string]any {
 	dev := schema.HardwarePCIeDevice{}
 	if !injectSFPDOMTelemetry(&dev, raw) {
 		return map[string]any{}
 	}
 	out := map[string]any{}
+	if dev.SFPPresent != nil {
+		out["sfp_present"] = *dev.SFPPresent
+	}
+	if dev.SFPIdentifier != nil {
+		out["sfp_identifier"] = *dev.SFPIdentifier
+	}
+	if dev.SFPConnector != nil {
+		out["sfp_connector"] = *dev.SFPConnector
+	}
+	if dev.SFPVendor != nil {
+		out["sfp_vendor"] = *dev.SFPVendor
+	}
+	if dev.SFPPartNumber != nil {
+		out["sfp_part_number"] = *dev.SFPPartNumber
+	}
+	if dev.SFPSerialNumber != nil {
+		out["sfp_serial_number"] = *dev.SFPSerialNumber
+	}
+	if dev.SFPWavelengthNM != nil {
+		out["sfp_wavelength_nm"] = *dev.SFPWavelengthNM
+	}
 	if dev.SFPTemperatureC != nil {
 		out["sfp_temperature_c"] = *dev.SFPTemperatureC
 	}
--- a/audit/internal/collector/nic_telemetry_test.go
+++ b/audit/internal/collector/nic_telemetry_test.go
@@ -122,10 +122,7 @@ func TestEnrichPCIeWithNICTelemetrySkipsModuleQueryWithoutCarrier(t *testing.T)
 	readNetAddressFile = func(string) (string, error) { return "aa:bb:cc:dd:ee:ff", nil }
 	readNetCarrierFile = func(string) (string, error) { return "0", nil }
 	ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
-	ethtoolModuleQuery = func(string) (string, error) {
-		t.Fatal("ethtool -m should not be called without carrier")
-		return "", nil
-	}
+	ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("no module") }

 	class := "EthernetController"
 	bdf := "0000:18:00.0"
--- a/audit/internal/collector/nvidia.go
+++ b/audit/internal/collector/nvidia.go
@@ -15,6 +15,7 @@ const nvidiaVendorID = 0x10de
 type nvidiaGPUInfo struct {
 	Index              int
 	BDF                string
+	Name               string
 	Serial             string
 	VBIOS              string
 	TemperatureC       *float64
@@ -73,6 +74,9 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
 			continue
 		}

+		if v := strings.TrimSpace(info.Name); v != "" {
+			devs[i].Model = &v
+		}
 		if v := strings.TrimSpace(info.Serial); v != "" {
 			devs[i].SerialNumber = &v
 		}
@@ -99,7 +103,7 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
 func queryNVIDIAGPUs() (map[string]nvidiaGPUInfo, error) {
 	out, err := exec.Command(
 		"nvidia-smi",
-		"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown,pcie.link.gen.current,pcie.link.gen.max,pcie.link.width.current,pcie.link.width.max",
+		"--query-gpu=index,pci.bus_id,name,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown,pcie.link.gen.current,pcie.link.gen.max,pcie.link.width.current,pcie.link.width.max",
 		"--format=csv,noheader,nounits",
 	).Output()
 	if err != nil {
@@ -123,8 +127,8 @@ func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
 		if len(rec) == 0 {
 			continue
 		}
-		if len(rec) < 13 {
-			return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 13", len(rec))
+		if len(rec) < 14 {
+			return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 14", len(rec))
 		}

 		bdf := normalizePCIeBDF(rec[1])
@@ -135,17 +139,18 @@ func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
 		info := nvidiaGPUInfo{
 			Index:              parseRequiredInt(rec[0]),
 			BDF:                bdf,
-			Serial:             strings.TrimSpace(rec[2]),
-			VBIOS:              strings.TrimSpace(rec[3]),
-			TemperatureC:       parseMaybeFloat(rec[4]),
-			PowerW:             parseMaybeFloat(rec[5]),
-			ECCUncorrected:     parseMaybeInt64(rec[6]),
-			ECCCorrected:       parseMaybeInt64(rec[7]),
-			HWSlowdown:         parseMaybeBool(rec[8]),
-			PCIeLinkGenCurrent: parseMaybeInt(rec[9]),
-			PCIeLinkGenMax:     parseMaybeInt(rec[10]),
-			PCIeLinkWidthCur:   parseMaybeInt(rec[11]),
-			PCIeLinkWidthMax:   parseMaybeInt(rec[12]),
+			Name:               strings.TrimSpace(rec[2]),
+			Serial:             strings.TrimSpace(rec[3]),
+			VBIOS:              strings.TrimSpace(rec[4]),
+			TemperatureC:       parseMaybeFloat(rec[5]),
+			PowerW:             parseMaybeFloat(rec[6]),
+			ECCUncorrected:     parseMaybeInt64(rec[7]),
+			ECCCorrected:       parseMaybeInt64(rec[8]),
+			HWSlowdown:         parseMaybeBool(rec[9]),
+			PCIeLinkGenCurrent: parseMaybeInt(rec[10]),
+			PCIeLinkGenMax:     parseMaybeInt(rec[11]),
+			PCIeLinkWidthCur:   parseMaybeInt(rec[12]),
+			PCIeLinkWidthMax:   parseMaybeInt(rec[13]),
 		}
 		result[bdf] = info
 	}
--- a/audit/internal/collector/nvidia_test.go
+++ b/audit/internal/collector/nvidia_test.go
@@ -6,7 +6,7 @@ import (
 )

 func TestParseNVIDIASMIQuery(t *testing.T) {
-	raw := "0, 00000000:65:00.0, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active, 4, 4, 16, 16\n"
+	raw := "0, 00000000:65:00.0, NVIDIA H100 80GB HBM3, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active, 4, 4, 16, 16\n"
 	byBDF, err := parseNVIDIASMIQuery(raw)
 	if err != nil {
 		t.Fatalf("parse failed: %v", err)
@@ -16,6 +16,9 @@ func TestParseNVIDIASMIQuery(t *testing.T) {
 	if !ok {
 		t.Fatalf("gpu by normalized bdf not found")
 	}
+	if gpu.Name != "NVIDIA H100 80GB HBM3" {
+		t.Fatalf("name: got %q", gpu.Name)
+	}
 	if gpu.Serial != "GPU-SERIAL-1" {
 		t.Fatalf("serial: got %q", gpu.Serial)
 	}
--- a/audit/internal/collector/pcie.go
+++ b/audit/internal/collector/pcie.go
@@ -2,6 +2,7 @@ package collector

 import (
 	"bee/audit/internal/schema"
+	"fmt"
 	"log/slog"
 	"os/exec"
 	"strconv"
@@ -79,6 +80,25 @@ func shouldIncludePCIeDevice(class, vendor, device string) bool {
 		}
 	}

+	// Exclude BMC/management virtual VGA adapters — these are firmware video chips,
+	// not real GPUs, and pollute the GPU inventory (e.g. iBMC, iDRAC, iLO VGA).
+	if strings.Contains(c, "vga") || strings.Contains(c, "display") || strings.Contains(c, "3d") {
+		bmcPatterns := []string{
+			"management system chip",
+			"management controller",
+			"ibmc",
+			"idrac",
+			"ilo vga",
+			"aspeed",
+			"matrox",
+		}
+		for _, bad := range bmcPatterns {
+			if strings.Contains(d, bad) {
+				return false
+			}
+		}
+	}
+
 	if strings.Contains(v, "advanced micro devices") || strings.Contains(v, "[amd]") {
 		internalAMDPatterns := []string{
 			"dummy function",
@@ -153,6 +173,9 @@ func parseLspciDevice(fields map[string]string) schema.HardwarePCIeDevice {

 	// SVendor/SDevice available but not in schema — skip

+	// Warn if PCIe link is running below its maximum negotiated speed.
+	applyPCIeLinkSpeedWarning(&dev)
+
 	return dev
 }

@@ -222,6 +245,41 @@ func readPCIStringAttribute(bdf, attribute string) (string, bool) {
 	return value, true
 }

+// applyPCIeLinkSpeedWarning sets the device status to Warning if the current PCIe link
+// speed is below the maximum negotiated speed supported by both ends.
+func applyPCIeLinkSpeedWarning(dev *schema.HardwarePCIeDevice) {
+	if dev.LinkSpeed == nil || dev.MaxLinkSpeed == nil {
+		return
+	}
+	if pcieLinkSpeedRank(*dev.LinkSpeed) < pcieLinkSpeedRank(*dev.MaxLinkSpeed) {
+		warn := statusWarning
+		dev.Status = &warn
+		desc := fmt.Sprintf("PCIe link speed degraded: running at %s, capable of %s", *dev.LinkSpeed, *dev.MaxLinkSpeed)
+		dev.ErrorDescription = &desc
+	}
+}
+
+// pcieLinkSpeedRank returns a numeric rank for a normalized Gen string (e.g. "Gen4" → 4).
+// Returns 0 for unrecognised values so comparisons fail safe.
+func pcieLinkSpeedRank(gen string) int {
+	switch gen {
+	case "Gen1":
+		return 1
+	case "Gen2":
+		return 2
+	case "Gen3":
+		return 3
+	case "Gen4":
+		return 4
+	case "Gen5":
+		return 5
+	case "Gen6":
+		return 6
+	default:
+		return 0
+	}
+}
+
 func normalizePCILinkSpeed(raw string) string {
 	raw = strings.TrimSpace(strings.ToLower(raw))
 	switch {
--- a/audit/internal/collector/pcie_filter_test.go
+++ b/audit/internal/collector/pcie_filter_test.go
@@ -1,6 +1,7 @@
 package collector

 import (
+	"bee/audit/internal/schema"
 	"encoding/json"
 	"strings"
 	"testing"
@@ -29,6 +30,8 @@ func TestShouldIncludePCIeDevice(t *testing.T) {
 		{name: "raid", class: "RAID bus controller", want: true},
 		{name: "nvme", class: "Non-Volatile memory controller", want: true},
 		{name: "vga", class: "VGA compatible controller", want: true},
+		{name: "ibmc vga", class: "VGA compatible controller", vendor: "Huawei Technologies Co., Ltd.", device: "Hi171x Series [iBMC Intelligent Management system chip w/VGA support]", want: false},
+		{name: "aspeed vga", class: "VGA compatible controller", vendor: "ASPEED Technology, Inc.", device: "ASPEED Graphics Family", want: false},
 		{name: "other encryption controller", class: "Encryption controller", vendor: "Intel Corporation", device: "QuickAssist", want: true},
 	}

@@ -139,3 +142,77 @@ func TestNormalizePCILinkSpeed(t *testing.T) {
 		}
 	}
 }
+
+func TestApplyPCIeLinkSpeedWarning(t *testing.T) {
+	ptr := func(s string) *string { return &s }
+
+	tests := []struct {
+		name        string
+		linkSpeed   *string
+		maxSpeed    *string
+		wantWarning bool
+		wantGenIn   string // substring expected in ErrorDescription when warning
+	}{
+		{
+			name:        "degraded Gen1 vs Gen5",
+			linkSpeed:   ptr("Gen1"),
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: true,
+			wantGenIn:   "Gen1",
+		},
+		{
+			name:        "at max Gen5",
+			linkSpeed:   ptr("Gen5"),
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: false,
+		},
+		{
+			name:        "degraded Gen4 vs Gen5",
+			linkSpeed:   ptr("Gen4"),
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: true,
+			wantGenIn:   "Gen4",
+		},
+		{
+			name:        "missing current speed — no warning",
+			linkSpeed:   nil,
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: false,
+		},
+		{
+			name:        "missing max speed — no warning",
+			linkSpeed:   ptr("Gen1"),
+			maxSpeed:    nil,
+			wantWarning: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			dev := schema.HardwarePCIeDevice{}
+			ok := statusOK
+			dev.Status = &ok
+			dev.LinkSpeed = tt.linkSpeed
+			dev.MaxLinkSpeed = tt.maxSpeed
+
+			applyPCIeLinkSpeedWarning(&dev)
+
+			gotWarn := dev.Status != nil && *dev.Status == statusWarning
+			if gotWarn != tt.wantWarning {
+				t.Fatalf("wantWarning=%v gotWarning=%v (status=%v)", tt.wantWarning, gotWarn, dev.Status)
+			}
+			if tt.wantWarning {
+				if dev.ErrorDescription == nil {
+					t.Fatal("expected ErrorDescription to be set")
+				}
+				if !strings.Contains(*dev.ErrorDescription, tt.wantGenIn) {
+					t.Fatalf("ErrorDescription %q does not contain %q", *dev.ErrorDescription, tt.wantGenIn)
+				}
+			} else {
+				if dev.ErrorDescription != nil {
+					t.Fatalf("unexpected ErrorDescription: %s", *dev.ErrorDescription)
+				}
+			}
+		})
+	}
+}
--- a/audit/internal/collector/psu.go
+++ b/audit/internal/collector/psu.go
@@ -160,11 +160,57 @@ type psuSDR struct {
 }

 var psuSlotPatterns = []*regexp.Regexp{
-	regexp.MustCompile(`(?i)\bpsu?\s*([0-9]+)\b`),
-	regexp.MustCompile(`(?i)\bps\s*([0-9]+)\b`),
-	regexp.MustCompile(`(?i)\bpws\s*([0-9]+)\b`),
-	regexp.MustCompile(`(?i)\bpower\s*supply(?:\s*bay)?\s*([0-9]+)\b`),
-	regexp.MustCompile(`(?i)\bbay\s*([0-9]+)\b`),
+	// MSI/underscore style: PSU1_POWER_IN, PSU2_POWER_OUT — underscore is \w so \b
+	// does not fire after the digit; match explicitly with underscore terminator.
+	regexp.MustCompile(`(?i)\bpsu([0-9]+)_`),
+	regexp.MustCompile(`(?i)\bpsu?\s*([0-9]+)\b`),                    // PSU1, PS1, ps 2
+	regexp.MustCompile(`(?i)\bps\s*([0-9]+)\b`),                      // PS 6, PS6
+	regexp.MustCompile(`(?i)\bpws\s*([0-9]+)\b`),                     // PWS1
+	regexp.MustCompile(`(?i)\bpower\s*supply(?:\s*bay)?\s*([0-9]+)\b`), // Power Supply 1, Power Supply Bay 3
+	regexp.MustCompile(`(?i)\bbay\s*([0-9]+)\b`),                     // Bay 1
+	// Fallback for xFusion-style generic numbered PSU sensors (Power1, Power2, …).
+	// Must be last: "power supply N" is already caught by the pattern above.
+	regexp.MustCompile(`(?i)\bpower([0-9]+)\b`),
+}
+
+// psuInputPowerKeywords matches AC-input power sensor names across vendors:
+//   MSI:     PSU1_POWER_IN, PSU1_PIN
+//   MLT:     PSU1_PIN
+//   xFusion: (matched via default fallback — no explicit keyword)
+//   HPE:     PS1 Input Power, PS1 Input Watts
+func isPSUInputPower(name string) bool {
+	return strings.Contains(name, "input power") ||
+		strings.Contains(name, "input watts") ||
+		strings.Contains(name, "_pin") ||
+		strings.Contains(name, " pin") ||
+		strings.Contains(name, "_power_in") ||
+		strings.Contains(name, "power_in")
+}
+
+// isPSUOutputPower matches DC-output power sensor names across vendors:
+//   MSI:     PSU1_POWER_OUT
+//   MLT:     PSU1_POUT
+//   xFusion: PS1 POut
+func isPSUOutputPower(name string) bool {
+	return strings.Contains(name, "output power") ||
+		strings.Contains(name, "output watts") ||
+		strings.Contains(name, "_pout") ||
+		strings.Contains(name, " pout") ||
+		strings.Contains(name, "_power_out") ||
+		strings.Contains(name, "power_out") ||
+		strings.Contains(name, "power supply bay") ||
+		strings.Contains(name, "psu bay")
+}
+
+// parseBoundedFloat parses a numeric value from an SDR value field and
+// validates it is within (0, max]. Returns nil for zero, negative, or
+// out-of-range values — these indicate missing/off/fault sensor readings.
+func parseBoundedFloat(raw string, max float64) *float64 {
+	v := parseFloatPtr(raw)
+	if v == nil || *v <= 0 || *v > max {
+		return nil
+	}
+	return v
 }

 func parsePSUSDR(raw string) map[int]psuSDR {
@@ -194,24 +240,59 @@ func parsePSUSDR(raw string) map[int]psuSDR {

 		lowerName := strings.ToLower(name)
 		switch {
-		case strings.Contains(lowerName, "input power"):
-			entry.inputPowerW = parseFloatPtr(value)
-		case strings.Contains(lowerName, "output power"):
-			entry.outputPowerW = parseFloatPtr(value)
-		case strings.Contains(lowerName, "power supply bay"), strings.Contains(lowerName, "psu bay"):
-			entry.outputPowerW = parseFloatPtr(value)
+		case isPSUInputPower(lowerName):
+			entry.inputPowerW = parseBoundedFloat(value, 6000)
+		case isPSUOutputPower(lowerName):
+			entry.outputPowerW = parseBoundedFloat(value, 6000)
 		case strings.Contains(lowerName, "input voltage"), strings.Contains(lowerName, "ac input"):
 			entry.inputVoltage = parseFloatPtr(value)
 		case strings.Contains(lowerName, "temp"):
 			entry.temperatureC = parseFloatPtr(value)
 		case strings.Contains(lowerName, "health"), strings.Contains(lowerName, "remaining life"), strings.Contains(lowerName, "life remaining"):
 			entry.healthPct = parsePercentPtr(value)
+		default:
+			// Generic PSU power reading: sensor matched a slot pattern but carries
+			// no input/output keyword (e.g. xFusion "Power1", "Power2"). Treat as
+			// AC input if the value looks like wattage and no better data is set yet.
+			if entry.inputPowerW == nil {
+				entry.inputPowerW = parseBoundedFloat(value, 6000)
+			}
 		}
 		out[slot] = entry
 	}
 	return out
 }

+// PSUSlotPower holds SDR power readings for one PSU slot.
+// Slot key used by PSUSlotsFromSDR is the 0-based index string,
+// matching HardwarePowerSupply.Slot in the audit schema.
+type PSUSlotPower struct {
+	InputW  *float64 `json:"input_w,omitempty"`
+	OutputW *float64 `json:"output_w,omitempty"`
+	Status  string   `json:"status,omitempty"`
+}
+
+// PSUSlotsFromSDR parses `ipmitool sdr` output and returns per-slot PSU data
+// using the same battle-tested slot patterns as the hardware audit collector.
+// Works across MSI (PSU1_POWER_IN), xFusion (Power1, PS1 POut), MLT (PSU1_PIN).
+// Slot keys are 0-based index strings matching HardwarePowerSupply.Slot.
+func PSUSlotsFromSDR(sdrOutput string) map[string]PSUSlotPower {
+	sdr := parsePSUSDR(sdrOutput)
+	if len(sdr) == 0 {
+		return nil
+	}
+	out := make(map[string]PSUSlotPower, len(sdr))
+	for slot, entry := range sdr {
+		key := strconv.Itoa(slot - 1) // audit uses 0-based slot
+		out[key] = PSUSlotPower{
+			InputW:  entry.inputPowerW,
+			OutputW: entry.outputPowerW,
+			Status:  entry.status,
+		}
+	}
+	return out
+}
+
 func synthesizePSUsFromSDR(sdr map[int]psuSDR) []schema.HardwarePowerSupply {
 	if len(sdr) == 0 {
 		return nil
--- a/audit/internal/collector/psu_sdr_test.go
+++ b/audit/internal/collector/psu_sdr_test.go
@@ -49,6 +49,10 @@ func TestParsePSUSlotVendorVariants(t *testing.T) {
 		{name: "PWS1 Status", want: 1},
 		{name: "Power Supply Bay 8", want: 8},
 		{name: "PS 6 Input Power", want: 6},
+		// MSI underscore format — \b does not fire between digit and '_'
+		{name: "PSU1_POWER_IN", want: 1},
+		{name: "PSU2_POWER_OUT", want: 2},
+		{name: "PSU4_STATUS", want: 4},
 	}

 	for _, tt := range tests {
@@ -59,6 +63,31 @@ func TestParsePSUSlotVendorVariants(t *testing.T) {
 	}
 }

+func TestParsePSUSDRMSIFormat(t *testing.T) {
+	t.Parallel()
+	raw := `
+PSU1_STATUS      | F1h | ok
+PSU1_POWER_OUT   | 928 Watts | ok
+PSU1_POWER_IN    | 976 Watts | ok
+PSU2_STATUS      | F2h | ok
+PSU2_POWER_OUT   | 944 Watts | ok
+PSU2_POWER_IN    | 992 Watts | ok
+`
+	got := parsePSUSDR(raw)
+	if len(got) != 2 {
+		t.Fatalf("len(got)=%d want 2", len(got))
+	}
+	if got[1].inputPowerW == nil || *got[1].inputPowerW != 976 {
+		t.Fatalf("psu1 input power=%v want 976", got[1].inputPowerW)
+	}
+	if got[1].outputPowerW == nil || *got[1].outputPowerW != 928 {
+		t.Fatalf("psu1 output power=%v want 928", got[1].outputPowerW)
+	}
+	if got[2].inputPowerW == nil || *got[2].inputPowerW != 992 {
+		t.Fatalf("psu2 input power=%v want 992", got[2].inputPowerW)
+	}
+}
+
 func TestSynthesizePSUsFromSDR(t *testing.T) {
 	t.Parallel()

--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
--- a/audit/internal/platform/benchmark_power_autotune.go
+++ b/audit/internal/platform/benchmark_power_autotune.go
@@ -0,0 +1,735 @@
+package platform
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"math"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"sort"
+	"strings"
+	"time"
+)
+
+const (
+	benchmarkPowerAutotuneVersion         = 1
+	benchmarkPowerAutotuneIdleSec         = 60
+	benchmarkPowerAutotuneLoadSec         = 90
+	benchmarkPowerAutotuneSampleInterval  = 3
+	defaultBenchmarkPowerSourceConfigPath = "/appdata/bee/export/bee-bench/power-source-autotune.json"
+)
+
+func BenchmarkPowerSourceConfigPath(baseDir string) string {
+	baseDir = strings.TrimSpace(baseDir)
+	if baseDir == "" {
+		return defaultBenchmarkPowerSourceConfigPath
+	}
+	return filepath.Join(filepath.Dir(baseDir), "power-source-autotune.json")
+}
+
+func LoadBenchmarkPowerAutotuneConfig(path string) (*BenchmarkPowerAutotuneConfig, error) {
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		return nil, err
+	}
+	var cfg BenchmarkPowerAutotuneConfig
+	if err := json.Unmarshal(raw, &cfg); err != nil {
+		return nil, err
+	}
+	if strings.TrimSpace(cfg.SelectedSource) == "" {
+		return nil, fmt.Errorf("autotune config missing selected_source")
+	}
+	return &cfg, nil
+}
+
+func SaveBenchmarkPowerAutotuneConfig(path string, cfg BenchmarkPowerAutotuneConfig) error {
+	if strings.TrimSpace(path) == "" {
+		return fmt.Errorf("empty autotune config path")
+	}
+	if cfg.Version <= 0 {
+		cfg.Version = benchmarkPowerAutotuneVersion
+	}
+	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		return err
+	}
+	data, err := json.MarshalIndent(cfg, "", "  ")
+	if err != nil {
+		return err
+	}
+	tmp := path + ".tmp"
+	if err := os.WriteFile(tmp, data, 0644); err != nil {
+		return err
+	}
+	return os.Rename(tmp, path)
+}
+
+func LoadSystemPowerSourceConfig(exportDir string) (*BenchmarkPowerAutotuneConfig, error) {
+	return LoadBenchmarkPowerAutotuneConfig(BenchmarkPowerSourceConfigPath(exportDir))
+}
+
+func ResetBenchmarkPowerAutotuneConfig(path string) error {
+	if strings.TrimSpace(path) == "" {
+		return fmt.Errorf("empty autotune config path")
+	}
+	if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	return nil
+}
+
+func normalizeBenchmarkPowerSource(source string) string {
+	switch strings.TrimSpace(strings.ToLower(source)) {
+	case BenchmarkPowerSourceSDRPSUInput:
+		return BenchmarkPowerSourceSDRPSUInput
+	default:
+		return BenchmarkPowerSourceDCMI
+	}
+}
+
+func ResolveSystemPowerDecision(exportDir string) SystemPowerSourceDecision {
+	cfg, err := LoadSystemPowerSourceConfig(exportDir)
+	if err == nil && cfg != nil && strings.TrimSpace(cfg.SelectedSource) != "" {
+		selected := normalizeBenchmarkPowerSource(cfg.SelectedSource)
+		return SystemPowerSourceDecision{
+			Configured:      true,
+			SelectedSource:  selected,
+			EffectiveSource: selected,
+			Mode:            "autotuned",
+			Reason:          strings.TrimSpace(cfg.Reason),
+			ConfiguredAt:    cfg.UpdatedAt,
+		}
+	}
+
+	sources := sampleBenchmarkPowerSources()
+	if value := sources[BenchmarkPowerSourceSDRPSUInput]; value > 0 {
+		return SystemPowerSourceDecision{
+			Configured:      false,
+			EffectiveSource: BenchmarkPowerSourceSDRPSUInput,
+			Mode:            "fallback",
+			Reason:          "autotune config not found; using temporary fallback source sdr_psu_input",
+		}
+	}
+	return SystemPowerSourceDecision{
+		Configured:      false,
+		EffectiveSource: BenchmarkPowerSourceDCMI,
+		Mode:            "fallback",
+		Reason:          "autotune config not found; using temporary fallback source dcmi",
+	}
+}
+
+func SampleSystemPowerResolved(exportDir string) (float64, SystemPowerSourceDecision, error) {
+	decision := ResolveSystemPowerDecision(exportDir)
+	if decision.EffectiveSource != "" {
+		if value, err := queryBenchmarkPowerSourceW(decision.EffectiveSource); err == nil && value > 0 {
+			return value, decision, nil
+		} else if decision.Configured {
+			fallback := BenchmarkPowerSourceDCMI
+			if decision.EffectiveSource == BenchmarkPowerSourceDCMI {
+				fallback = BenchmarkPowerSourceSDRPSUInput
+			}
+			if fallbackValue, fallbackErr := queryBenchmarkPowerSourceW(fallback); fallbackErr == nil && fallbackValue > 0 {
+				decision.Mode = "degraded"
+				decision.Reason = fmt.Sprintf("configured source %s unavailable; using degraded fallback %s", decision.SelectedSource, fallback)
+				decision.EffectiveSource = fallback
+				return fallbackValue, decision, nil
+			}
+			decision.Mode = "degraded"
+			decision.Reason = fmt.Sprintf("configured source %s unavailable and no fallback source responded", decision.SelectedSource)
+			return 0, decision, err
+		}
+	}
+	return 0, decision, fmt.Errorf("system power source unavailable")
+}
+
+func queryBenchmarkPowerSourceW(source string) (float64, error) {
+	switch normalizeBenchmarkPowerSource(source) {
+	case BenchmarkPowerSourceSDRPSUInput:
+		sdr := sampleIPMISDRPowerSensors()
+		if sdr.PSUInW > 0 {
+			return sdr.PSUInW, nil
+		}
+		return 0, fmt.Errorf("sdr psu input unavailable")
+	default:
+		return queryIPMIServerPowerW()
+	}
+}
+
+func sampleBenchmarkPowerSources() map[string]float64 {
+	out := map[string]float64{}
+	if w, err := queryIPMIServerPowerW(); err == nil && w > 0 {
+		out[BenchmarkPowerSourceDCMI] = w
+	}
+	if w, err := queryBenchmarkPowerSourceW(BenchmarkPowerSourceSDRPSUInput); err == nil && w > 0 {
+		out[BenchmarkPowerSourceSDRPSUInput] = w
+	}
+	return out
+}
+
+func sampleBenchmarkPowerSourceSeries(ctx context.Context, source string, durationSec, intervalSec int) (float64, bool) {
+	if durationSec <= 0 {
+		return 0, false
+	}
+	samples := collectSelectedPowerSourceSamples(ctx, source, durationSec, intervalSec)
+	if len(samples) == 0 {
+		return 0, false
+	}
+	return benchmarkMean(samples), true
+}
+
+func collectSelectedPowerSourceSamples(ctx context.Context, source string, durationSec, intervalSec int) []float64 {
+	if durationSec <= 0 {
+		return nil
+	}
+	stopCh := make(chan struct{})
+	doneCh := startSelectedPowerSourceSampler(stopCh, source, intervalSec)
+	select {
+	case <-ctx.Done():
+	case <-time.After(time.Duration(durationSec) * time.Second):
+	}
+	close(stopCh)
+	return <-doneCh
+}
+
+func startSelectedPowerSourceSampler(stopCh <-chan struct{}, source string, intervalSec int) <-chan []float64 {
+	if intervalSec <= 0 {
+		intervalSec = benchmarkPowerAutotuneSampleInterval
+	}
+	ch := make(chan []float64, 1)
+	go func() {
+		defer close(ch)
+		var samples []float64
+		record := func() {
+			if w, err := queryBenchmarkPowerSourceW(source); err == nil && w > 0 {
+				samples = append(samples, w)
+			}
+		}
+		record()
+		ticker := time.NewTicker(time.Duration(intervalSec) * time.Second)
+		defer ticker.Stop()
+		for {
+			select {
+			case <-stopCh:
+				ch <- samples
+				return
+			case <-ticker.C:
+				record()
+			}
+		}
+	}()
+	return ch
+}
+
+type benchmarkPowerAutotuneSample struct {
+	ElapsedSec     float64
+	GPUAvgUsagePct float64
+	CPUUsagePct    float64
+	GPUSumPowerW   float64
+	Sources        map[string]float64
+}
+
+func collectBenchmarkPowerAutotuneSamples(ctx context.Context, phase string, gpuIndices []int, durationSec int, logFunc func(string)) []benchmarkPowerAutotuneSample {
+	if durationSec <= 0 {
+		return nil
+	}
+	var out []benchmarkPowerAutotuneSample
+	deadline := time.Now().Add(time.Duration(durationSec) * time.Second)
+	start := time.Now()
+	for {
+		if ctx.Err() != nil {
+			return out
+		}
+		row := benchmarkPowerAutotuneSample{
+			ElapsedSec:  time.Since(start).Seconds(),
+			CPUUsagePct: sampleCPULoadPct(),
+			Sources:     sampleBenchmarkPowerSources(),
+		}
+		if gpuRows, err := sampleGPUMetrics(gpuIndices); err == nil && len(gpuRows) > 0 {
+			var usageSum float64
+			for _, gpu := range gpuRows {
+				row.GPUSumPowerW += gpu.PowerW
+				usageSum += gpu.UsagePct
+			}
+			row.GPUAvgUsagePct = usageSum / float64(len(gpuRows))
+		}
+		out = append(out, row)
+		logBenchmarkPowerAutotuneSample(phase, row, logFunc)
+		if time.Now().After(deadline) {
+			return out
+		}
+		select {
+		case <-ctx.Done():
+			return out
+		case <-time.After(benchmarkPowerAutotuneSampleInterval * time.Second):
+		}
+	}
+}
+
+func logBenchmarkPowerAutotuneSample(phase string, sample benchmarkPowerAutotuneSample, logFunc func(string)) {
+	if logFunc == nil {
+		return
+	}
+	var sourceParts []string
+	for _, source := range []string{BenchmarkPowerSourceDCMI, BenchmarkPowerSourceSDRPSUInput} {
+		if value, ok := sample.Sources[source]; ok && value > 0 {
+			sourceParts = append(sourceParts, fmt.Sprintf("%s=%.0fW", source, value))
+		} else {
+			sourceParts = append(sourceParts, fmt.Sprintf("%s=n/a", source))
+		}
+	}
+	logFunc(fmt.Sprintf(
+		"autotune %s sample t=%.0fs gpu_avg_util=%.1f%% gpu_sum_power=%.0fW cpu_load=%.1f%% %s",
+		phase,
+		sample.ElapsedSec,
+		sample.GPUAvgUsagePct,
+		sample.GPUSumPowerW,
+		sample.CPUUsagePct,
+		strings.Join(sourceParts, " "),
+	))
+}
+
+func logBenchmarkPowerAutotunePhaseSummary(phase string, samples []benchmarkPowerAutotuneSample, logFunc func(string)) {
+	if logFunc == nil || len(samples) == 0 {
+		return
+	}
+	var gpuUsage []float64
+	var cpuUsage []float64
+	var gpuPower []float64
+	sourceBuckets := map[string][]float64{}
+	for _, sample := range samples {
+		gpuUsage = append(gpuUsage, sample.GPUAvgUsagePct)
+		cpuUsage = append(cpuUsage, sample.CPUUsagePct)
+		gpuPower = append(gpuPower, sample.GPUSumPowerW)
+		for source, value := range sample.Sources {
+			if value > 0 {
+				sourceBuckets[source] = append(sourceBuckets[source], value)
+			}
+		}
+	}
+	var sourceParts []string
+	for _, source := range []string{BenchmarkPowerSourceDCMI, BenchmarkPowerSourceSDRPSUInput} {
+		values := sourceBuckets[source]
+		if len(values) == 0 {
+			sourceParts = append(sourceParts, fmt.Sprintf("%s_avg=n/a", source))
+			continue
+		}
+		sourceParts = append(sourceParts, fmt.Sprintf("%s_avg=%.0fW", source, benchmarkMean(values)))
+	}
+	logFunc(fmt.Sprintf(
+		"autotune %s summary samples=%d gpu_avg_util=%.1f%% gpu_p95_util=%.1f%% gpu_avg_power=%.0fW cpu_avg=%.1f%% cpu_p95=%.1f%% %s",
+		phase,
+		len(samples),
+		benchmarkMean(gpuUsage),
+		benchmarkPercentile(gpuUsage, 95),
+		benchmarkMean(gpuPower),
+		benchmarkMean(cpuUsage),
+		benchmarkPercentile(cpuUsage, 95),
+		strings.Join(sourceParts, " "),
+	))
+}
+
+func logBenchmarkPowerAutotuneSelection(candidates []BenchmarkPowerAutotuneCandidate, selectedSource string, gpuDelta float64, logFunc func(string)) {
+	if logFunc == nil {
+		return
+	}
+	for _, candidate := range candidates {
+		if !candidate.Available {
+			logFunc(fmt.Sprintf("autotune candidate %s unavailable", candidate.Source))
+			continue
+		}
+		logFunc(fmt.Sprintf(
+			"autotune candidate %s idle_avg=%.0fW load_avg=%.0fW delta=%.0fW gpu_delta=%.0fW relative_error=%.3f confidence=%.0f%%%s",
+			candidate.Source,
+			candidate.IdleAvgW,
+			candidate.LoadAvgW,
+			candidate.DeltaW,
+			gpuDelta,
+			candidate.RelativeError,
+			candidate.Confidence*100,
+			map[bool]string{true: " SELECTED", false: ""}[candidate.Source == selectedSource],
+		))
+		if strings.TrimSpace(candidate.SelectionNotes) != "" {
+			logFunc(fmt.Sprintf("autotune candidate %s reason: %s", candidate.Source, candidate.SelectionNotes))
+		}
+	}
+}
+
+func validateBenchmarkPowerAutotuneIdle(samples []benchmarkPowerAutotuneSample) *BenchmarkPowerAutotuneValidation {
+	result := &BenchmarkPowerAutotuneValidation{}
+	if len(samples) == 0 {
+		result.Reason = "no idle telemetry samples collected"
+		return result
+	}
+	var gpuUsage []float64
+	var cpuUsage []float64
+	for _, sample := range samples {
+		gpuUsage = append(gpuUsage, sample.GPUAvgUsagePct)
+		if sample.CPUUsagePct > 0 {
+			cpuUsage = append(cpuUsage, sample.CPUUsagePct)
+		}
+	}
+	result.GPUSamples = len(gpuUsage)
+	result.CPUSamples = len(cpuUsage)
+	result.GPUAvgUsagePct = math.Round(benchmarkMean(gpuUsage)*10) / 10
+	result.GPUP95UsagePct = math.Round(benchmarkPercentile(gpuUsage, 95)*10) / 10
+	result.CPUAvgUsagePct = math.Round(benchmarkMean(cpuUsage)*10) / 10
+	result.CPUP95UsagePct = math.Round(benchmarkPercentile(cpuUsage, 95)*10) / 10
+	switch {
+	case result.GPUAvgUsagePct > 5:
+		result.Reason = fmt.Sprintf("idle validation failed: average GPU load %.1f%% exceeds 5%%", result.GPUAvgUsagePct)
+	case result.GPUP95UsagePct > 10:
+		result.Reason = fmt.Sprintf("idle validation failed: p95 GPU load %.1f%% exceeds 10%%", result.GPUP95UsagePct)
+	case result.CPUAvgUsagePct > 20:
+		result.Reason = fmt.Sprintf("idle validation failed: average CPU load %.1f%% exceeds 20%%", result.CPUAvgUsagePct)
+	case result.CPUP95UsagePct > 35:
+		result.Reason = fmt.Sprintf("idle validation failed: p95 CPU load %.1f%% exceeds 35%%", result.CPUP95UsagePct)
+	default:
+		result.Valid = true
+	}
+	return result
+}
+
+func chooseBenchmarkPowerAutotuneSource(idle, load []benchmarkPowerAutotuneSample) (string, []BenchmarkPowerAutotuneCandidate, float64, float64, error) {
+	idleBySource := map[string][]float64{}
+	loadBySource := map[string][]float64{}
+	var idleGPU []float64
+	var loadGPU []float64
+	for _, sample := range idle {
+		idleGPU = append(idleGPU, sample.GPUSumPowerW)
+		for source, value := range sample.Sources {
+			if value > 0 {
+				idleBySource[source] = append(idleBySource[source], value)
+			}
+		}
+	}
+	for _, sample := range load {
+		loadGPU = append(loadGPU, sample.GPUSumPowerW)
+		for source, value := range sample.Sources {
+			if value > 0 {
+				loadBySource[source] = append(loadBySource[source], value)
+			}
+		}
+	}
+	idleGPUAvg := benchmarkMean(idleGPU)
+	loadGPUAvg := benchmarkMean(loadGPU)
+	gpuDelta := loadGPUAvg - idleGPUAvg
+	if gpuDelta <= 0 {
+		gpuDelta = loadGPUAvg
+	}
+
+	candidates := []BenchmarkPowerAutotuneCandidate{
+		buildBenchmarkPowerAutotuneCandidate(BenchmarkPowerSourceDCMI, idleBySource[BenchmarkPowerSourceDCMI], loadBySource[BenchmarkPowerSourceDCMI], gpuDelta),
+		buildBenchmarkPowerAutotuneCandidate(BenchmarkPowerSourceSDRPSUInput, idleBySource[BenchmarkPowerSourceSDRPSUInput], loadBySource[BenchmarkPowerSourceSDRPSUInput], gpuDelta),
+	}
+	available := make([]BenchmarkPowerAutotuneCandidate, 0, len(candidates))
+	for _, candidate := range candidates {
+		if candidate.Available && candidate.DeltaW > 0 {
+			available = append(available, candidate)
+		}
+	}
+	if len(available) == 0 {
+		return "", candidates, idleGPUAvg, loadGPUAvg, fmt.Errorf("no usable server power source samples collected")
+	}
+	sort.Slice(available, func(i, j int) bool {
+		if math.Abs(available[i].RelativeError-available[j].RelativeError) <= 0.10 {
+			if available[i].Source != available[j].Source {
+				return available[i].Source == BenchmarkPowerSourceSDRPSUInput
+			}
+		}
+		if available[i].RelativeError != available[j].RelativeError {
+			return available[i].RelativeError < available[j].RelativeError
+		}
+		return available[i].Samples > available[j].Samples
+	})
+	selected := available[0]
+	for idx := range candidates {
+		if candidates[idx].Source == selected.Source {
+			candidates[idx].Selected = true
+			candidates[idx].SelectionNotes = fmt.Sprintf("selected because delta %.0f W is closest to GPU delta %.0f W (relative error %.3f)", selected.DeltaW, gpuDelta, selected.RelativeError)
+		}
+	}
+	return selected.Source, candidates, idleGPUAvg, loadGPUAvg, nil
+}
+
+func buildBenchmarkPowerAutotuneCandidate(source string, idle, load []float64, gpuDelta float64) BenchmarkPowerAutotuneCandidate {
+	candidate := BenchmarkPowerAutotuneCandidate{
+		Source:    source,
+		Available: len(idle) > 0 && len(load) > 0,
+		Samples:   minInt(len(idle), len(load)),
+	}
+	if !candidate.Available {
+		return candidate
+	}
+	candidate.IdleAvgW = benchmarkMean(idle)
+	candidate.LoadAvgW = benchmarkMean(load)
+	candidate.DeltaW = candidate.LoadAvgW - candidate.IdleAvgW
+	if gpuDelta > 0 {
+		candidate.RelativeError = math.Abs(candidate.DeltaW-gpuDelta) / gpuDelta
+		candidate.Confidence = math.Max(0, 1-candidate.RelativeError)
+	}
+	return candidate
+}
+
+func renderBenchmarkPowerAutotuneSummary(result BenchmarkPowerAutotuneResult) string {
+	var b strings.Builder
+	fmt.Fprintf(&b, "generated_at=%s\n", result.GeneratedAt.UTC().Format(time.RFC3339))
+	fmt.Fprintf(&b, "status=%s\n", result.Status)
+	fmt.Fprintf(&b, "benchmark_kind=%s\n", result.BenchmarkKind)
+	fmt.Fprintf(&b, "profile=%s\n", result.Profile)
+	fmt.Fprintf(&b, "idle_duration_sec=%d\n", result.IdleDurationSec)
+	fmt.Fprintf(&b, "load_duration_sec=%d\n", result.LoadDurationSec)
+	fmt.Fprintf(&b, "sample_interval_sec=%d\n", result.SampleIntervalSec)
+	if result.SelectedSource != "" {
+		fmt.Fprintf(&b, "selected_source=%s\n", result.SelectedSource)
+	}
+	if result.IdleValidation != nil {
+		fmt.Fprintf(&b, "idle_valid=%t\n", result.IdleValidation.Valid)
+		fmt.Fprintf(&b, "idle_gpu_avg_usage_pct=%.1f\n", result.IdleValidation.GPUAvgUsagePct)
+		fmt.Fprintf(&b, "idle_gpu_p95_usage_pct=%.1f\n", result.IdleValidation.GPUP95UsagePct)
+		fmt.Fprintf(&b, "idle_cpu_avg_usage_pct=%.1f\n", result.IdleValidation.CPUAvgUsagePct)
+		fmt.Fprintf(&b, "idle_cpu_p95_usage_pct=%.1f\n", result.IdleValidation.CPUP95UsagePct)
+		if result.IdleValidation.Reason != "" {
+			fmt.Fprintf(&b, "idle_validation_error=%s\n", result.IdleValidation.Reason)
+		}
+	}
+	for _, candidate := range result.Candidates {
+		fmt.Fprintf(&b, "candidate_%s_available=%t\n", candidate.Source, candidate.Available)
+		if candidate.Available {
+			fmt.Fprintf(&b, "candidate_%s_idle_avg_w=%.0f\n", candidate.Source, candidate.IdleAvgW)
+			fmt.Fprintf(&b, "candidate_%s_load_avg_w=%.0f\n", candidate.Source, candidate.LoadAvgW)
+			fmt.Fprintf(&b, "candidate_%s_delta_w=%.0f\n", candidate.Source, candidate.DeltaW)
+			fmt.Fprintf(&b, "candidate_%s_relative_error=%.3f\n", candidate.Source, candidate.RelativeError)
+		}
+	}
+	return b.String()
+}
+
+func renderBenchmarkPowerAutotuneReport(result BenchmarkPowerAutotuneResult) string {
+	var b strings.Builder
+	b.WriteString("# Bee Bench Power Source Autotune\n\n")
+	fmt.Fprintf(&b, "**Status:** %s  \n", result.Status)
+	fmt.Fprintf(&b, "**Benchmark kind:** %s  \n", result.BenchmarkKind)
+	fmt.Fprintf(&b, "**Profile:** %s  \n", result.Profile)
+	fmt.Fprintf(&b, "**Idle window:** %ds  \n", result.IdleDurationSec)
+	fmt.Fprintf(&b, "**Load window:** %ds  \n", result.LoadDurationSec)
+	fmt.Fprintf(&b, "**Sample interval:** %ds  \n", result.SampleIntervalSec)
+	if result.SelectedSource != "" {
+		fmt.Fprintf(&b, "**Selected source:** `%s`  \n", result.SelectedSource)
+	}
+	b.WriteString("\n")
+	if result.IdleValidation != nil {
+		b.WriteString("## Idle Validation\n\n")
+		fmt.Fprintf(&b, "- valid: %t\n", result.IdleValidation.Valid)
+		fmt.Fprintf(&b, "- GPU avg usage: %.1f%%\n", result.IdleValidation.GPUAvgUsagePct)
+		fmt.Fprintf(&b, "- GPU p95 usage: %.1f%%\n", result.IdleValidation.GPUP95UsagePct)
+		fmt.Fprintf(&b, "- CPU avg usage: %.1f%%\n", result.IdleValidation.CPUAvgUsagePct)
+		fmt.Fprintf(&b, "- CPU p95 usage: %.1f%%\n", result.IdleValidation.CPUP95UsagePct)
+		if result.IdleValidation.Reason != "" {
+			fmt.Fprintf(&b, "- reason: %s\n", result.IdleValidation.Reason)
+		}
+		b.WriteString("\n")
+	}
+	if len(result.Candidates) > 0 {
+		b.WriteString("## Candidates\n\n")
+		b.WriteString("| Source | Idle avg W | Load avg W | Delta W | Relative error | Selected |\n")
+		b.WriteString("|--------|------------|------------|---------|----------------|----------|\n")
+		for _, candidate := range result.Candidates {
+			if !candidate.Available {
+				fmt.Fprintf(&b, "| %s | — | — | — | — | no |\n", candidate.Source)
+				continue
+			}
+			selected := "no"
+			if candidate.Selected {
+				selected = "yes"
+			}
+			fmt.Fprintf(&b, "| %s | %.0f | %.0f | %.0f | %.2f | %s |\n",
+				candidate.Source, candidate.IdleAvgW, candidate.LoadAvgW, candidate.DeltaW, candidate.RelativeError, selected)
+		}
+		b.WriteString("\n")
+	}
+	for _, note := range result.Notes {
+		fmt.Fprintf(&b, "- %s\n", note)
+	}
+	return b.String()
+}
+
+func benchmarkAutotuneLoadCommand(kind string, durationSec int, gpuIndices []int, sizeMB int) ([]string, string) {
+	allDevices := joinIndexList(gpuIndices)
+	switch strings.TrimSpace(strings.ToLower(kind)) {
+	case "power-fit", "power", "nvidia-bench-power":
+		cmd, _, err := resolveBenchmarkPowerLoadCommand(durationSec, gpuIndices)
+		if err == nil {
+			return cmd, "power-fit"
+		}
+		return nvidiaDCGMNamedDiagCommand("targeted_power", durationSec, gpuIndices), "power-fit"
+	default:
+		cmd := []string{
+			"bee-gpu-burn",
+			"--seconds", fmt.Sprintf("%d", durationSec),
+			"--devices", allDevices,
+		}
+		if sizeMB > 0 {
+			cmd = append(cmd, "--size-mb", fmt.Sprintf("%d", sizeMB))
+		}
+		return cmd, "performance"
+	}
+}
+
+func (s *System) RunNvidiaPowerSourceAutotune(ctx context.Context, baseDir string, opts NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error) {
+	if ctx == nil {
+		ctx = context.Background()
+	}
+	if logFunc == nil {
+		logFunc = func(string) {}
+	}
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = "/var/log/bee-bench/autotune"
+	}
+	if err := os.MkdirAll(baseDir, 0755); err != nil {
+		return "", fmt.Errorf("mkdir %s: %w", baseDir, err)
+	}
+	selected, err := resolveNvidiaGPUSelection(nil, nil)
+	if err != nil {
+		return "", err
+	}
+	if len(selected) == 0 {
+		return "", fmt.Errorf("no NVIDIA GPUs detected for autotune")
+	}
+	ts := time.Now().UTC().Format("20060102-150405")
+	runDir := filepath.Join(baseDir, "autotune-"+ts)
+	if err := os.MkdirAll(runDir, 0755); err != nil {
+		return "", fmt.Errorf("mkdir %s: %w", runDir, err)
+	}
+	verboseLog := filepath.Join(runDir, "verbose.log")
+	hostname, _ := os.Hostname()
+	loadCmd, normalizedKind := benchmarkAutotuneLoadCommand(benchmarkKind, benchmarkPowerAutotuneLoadSec, selected, opts.SizeMB)
+	result := BenchmarkPowerAutotuneResult{
+		GeneratedAt:       time.Now().UTC(),
+		Hostname:          hostname,
+		ServerModel:       readServerModel(),
+		BenchmarkKind:     normalizedKind,
+		Profile:           opts.Profile,
+		Status:            "FAILED",
+		IdleDurationSec:   benchmarkPowerAutotuneIdleSec,
+		LoadDurationSec:   benchmarkPowerAutotuneLoadSec,
+		SampleIntervalSec: benchmarkPowerAutotuneSampleInterval,
+	}
+
+	logFunc(fmt.Sprintf("autotune: idle validation window %ds on GPUs %s", benchmarkPowerAutotuneIdleSec, joinIndexList(selected)))
+	idleSamples := collectBenchmarkPowerAutotuneSamples(ctx, "idle", selected, benchmarkPowerAutotuneIdleSec, logFunc)
+	logBenchmarkPowerAutotunePhaseSummary("idle", idleSamples, logFunc)
+	result.IdleValidation = validateBenchmarkPowerAutotuneIdle(idleSamples)
+	if result.IdleValidation == nil || !result.IdleValidation.Valid {
+		if result.IdleValidation != nil {
+			result.IdleValidationError = result.IdleValidation.Reason
+			logFunc(result.IdleValidation.Reason)
+		}
+		result.Notes = append(result.Notes, "autotune stopped before load stage because idle validation failed")
+		if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
+			return "", err
+		}
+		return runDir, fmt.Errorf("%s", result.IdleValidationError)
+	}
+
+	logFunc(fmt.Sprintf("autotune: full-load stage using %s for %ds", normalizedKind, benchmarkPowerAutotuneLoadSec))
+	loadSamplesCh := make(chan []benchmarkPowerAutotuneSample, 1)
+	go func() {
+		loadSamplesCh <- collectBenchmarkPowerAutotuneSamples(ctx, "load", selected, benchmarkPowerAutotuneLoadSec, logFunc)
+	}()
+	out, runErr := runSATCommandCtx(ctx, verboseLog, "autotune-load.log", loadCmd, nil, logFunc)
+	_ = os.WriteFile(filepath.Join(runDir, "autotune-load.log"), out, 0644)
+	loadSamples := <-loadSamplesCh
+	logBenchmarkPowerAutotunePhaseSummary("load", loadSamples, logFunc)
+	if runErr != nil {
+		result.Notes = append(result.Notes, "full-load stage failed: "+runErr.Error())
+		if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
+			return "", err
+		}
+		return runDir, fmt.Errorf("autotune load stage: %w", runErr)
+	}
+
+	selectedSource, candidates, idleGPUAvg, loadGPUAvg, chooseErr := chooseBenchmarkPowerAutotuneSource(idleSamples, loadSamples)
+	result.Candidates = candidates
+	result.GPUPowerIdleW = idleGPUAvg
+	result.GPUPowerLoadW = loadGPUAvg
+	if chooseErr != nil {
+		result.Notes = append(result.Notes, chooseErr.Error())
+		if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
+			return "", err
+		}
+		return runDir, chooseErr
+	}
+	gpuDelta := loadGPUAvg - idleGPUAvg
+	if gpuDelta <= 0 {
+		gpuDelta = loadGPUAvg
+	}
+	logBenchmarkPowerAutotuneSelection(candidates, selectedSource, gpuDelta, logFunc)
+	result.SelectedSource = selectedSource
+	result.Status = "OK"
+	var confidence float64
+	selectionReason := fmt.Sprintf("selected %s after comparing full-load average against GPU-reported delta", selectedSource)
+	for _, candidate := range candidates {
+		if candidate.Selected {
+			confidence = candidate.Confidence
+			if strings.TrimSpace(candidate.SelectionNotes) != "" {
+				selectionReason = candidate.SelectionNotes
+			}
+			break
+		}
+	}
+	cfg := BenchmarkPowerAutotuneConfig{
+		Version:           benchmarkPowerAutotuneVersion,
+		UpdatedAt:         time.Now().UTC(),
+		SelectedSource:    selectedSource,
+		BenchmarkKind:     normalizedKind,
+		Profile:           opts.Profile,
+		IdleDurationSec:   benchmarkPowerAutotuneIdleSec,
+		LoadDurationSec:   benchmarkPowerAutotuneLoadSec,
+		SampleIntervalSec: benchmarkPowerAutotuneSampleInterval,
+		Confidence:        confidence,
+		Reason:            selectionReason,
+	}
+	result.Config = &cfg
+	configPath := BenchmarkPowerSourceConfigPath(baseDir)
+	if err := SaveBenchmarkPowerAutotuneConfig(configPath, cfg); err != nil {
+		result.Status = "FAILED"
+		result.Notes = append(result.Notes, "failed to save autotune config: "+err.Error())
+		if writeErr := writeBenchmarkPowerAutotuneArtifacts(runDir, result); writeErr != nil {
+			return "", writeErr
+		}
+		return runDir, err
+	}
+	logFunc(fmt.Sprintf("autotune conclusion: selected source %s; reason: %s", selectedSource, cfg.Reason))
+	result.Notes = append(result.Notes, "saved autotune config to "+configPath)
+	if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
+		return "", err
+	}
+	return runDir, nil
+}
+
+func writeBenchmarkPowerAutotuneArtifacts(runDir string, result BenchmarkPowerAutotuneResult) error {
+	resultJSON, err := json.MarshalIndent(result, "", "  ")
+	if err != nil {
+		return fmt.Errorf("marshal autotune result: %w", err)
+	}
+	if err := os.WriteFile(filepath.Join(runDir, "result.json"), resultJSON, 0644); err != nil {
+		return fmt.Errorf("write autotune result.json: %w", err)
+	}
+	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(renderBenchmarkPowerAutotuneSummary(result)), 0644); err != nil {
+		return fmt.Errorf("write autotune summary.txt: %w", err)
+	}
+	if err := os.WriteFile(filepath.Join(runDir, "report.md"), []byte(renderBenchmarkPowerAutotuneReport(result)), 0644); err != nil {
+		return fmt.Errorf("write autotune report.md: %w", err)
+	}
+	return nil
+}
+
+func minInt(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+var _ = exec.ErrNotFound
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -2,25 +2,15 @@ package platform

 import (
 	"fmt"
-	"os"
-	"path/filepath"
-	"regexp"
 	"strings"
 	"time"
 )

 func renderBenchmarkReport(result NvidiaBenchmarkResult) string {
-	return renderBenchmarkReportWithCharts(result, nil)
+	return renderBenchmarkReportWithCharts(result)
 }

-type benchmarkReportChart struct {
-	Title   string
-	Content string
-}
-
-var ansiEscapePattern = regexp.MustCompile(`\x1b\[[0-9;]*m`)
-
-func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benchmarkReportChart) string {
+func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 	var b strings.Builder

 	// ── Header ────────────────────────────────────────────────────────────────
@@ -58,11 +48,22 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		fmt.Fprintf(&b, "**GPU(s):** %s  \n", strings.Join(parts, ", "))
 	}
 	fmt.Fprintf(&b, "**Profile:** %s  \n", result.BenchmarkProfile)
-	fmt.Fprintf(&b, "**App version:** %s  \n", result.BenchmarkVersion)
+	fmt.Fprintf(&b, "**Benchmark version:** %s  \n", result.BenchmarkVersion)
 	fmt.Fprintf(&b, "**Generated:** %s  \n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC"))
-	if result.ParallelGPUs {
+	if result.RampStep > 0 && result.RampTotal > 0 {
+		fmt.Fprintf(&b, "**Ramp-up step:** %d of %d  \n", result.RampStep, result.RampTotal)
+		if result.RampRunID != "" {
+			fmt.Fprintf(&b, "**Ramp-up run ID:** %s  \n", result.RampRunID)
+		}
+	} else if result.ParallelGPUs {
 		fmt.Fprintf(&b, "**Mode:** parallel (all GPUs simultaneously)  \n")
 	}
+	if result.ScalabilityScore > 0 {
+		fmt.Fprintf(&b, "**Scalability score:** %.1f%%  \n", result.ScalabilityScore)
+	}
+	if result.PlatformPowerScore > 0 {
+		fmt.Fprintf(&b, "**Platform power score:** %.1f%%  \n", result.PlatformPowerScore)
+	}
 	fmt.Fprintf(&b, "**Overall status:** %s  \n", result.OverallStatus)
 	b.WriteString("\n")

@@ -83,36 +84,164 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		b.WriteString("\n")
 	}

-	// ── Scorecard table ───────────────────────────────────────────────────────
-	b.WriteString("## Scorecard\n\n")
-	b.WriteString("| GPU | Status | Composite | Compute | TOPS/SM/GHz | Power Sustain | Thermal Sustain | Stability | Interconnect |\n")
-	b.WriteString("|-----|--------|-----------|---------|-------------|---------------|-----------------|-----------|-------------|\n")
-	for _, gpu := range result.GPUs {
-		name := strings.TrimSpace(gpu.Name)
-		if name == "" {
-			name = "Unknown"
+	// ── Balanced Scorecard ────────────────────────────────────────────────────
+	b.WriteString("## Balanced Scorecard\n\n")
+
+	// Perspective 1: Compatibility — hard stops
+	b.WriteString("### 1. Compatibility\n\n")
+	{
+		var rows [][]string
+		for _, gpu := range result.GPUs {
+			thermalThrottle := "-"
+			if gpu.Scores.ThermalThrottlePct > 0 {
+				thermalThrottle = fmt.Sprintf("%.1f%%", gpu.Scores.ThermalThrottlePct)
+			}
+			fanAtThrottle := "-"
+			if result.Cooling != nil && result.Cooling.FanDutyCycleAvailable && gpu.Scores.ThermalThrottlePct > 0 {
+				fanAtThrottle = fmt.Sprintf("%.0f%%", result.Cooling.P95FanDutyCyclePct)
+			}
+			ecc := "-"
+			if gpu.ECC.Uncorrected > 0 {
+				ecc = fmt.Sprintf("⛔ %d", gpu.ECC.Uncorrected)
+			}
+			compatStatus := "✓ OK"
+			if gpu.ECC.Uncorrected > 0 || (gpu.Scores.ThermalThrottlePct > 0 && result.Cooling != nil && result.Cooling.FanDutyCycleAvailable && result.Cooling.P95FanDutyCyclePct < 95) {
+				compatStatus = "⛔ HARD STOP"
+			}
+			rows = append(rows, []string{fmt.Sprintf("GPU %d", gpu.Index), thermalThrottle, fanAtThrottle, ecc, compatStatus})
 		}
-		interconnect := "-"
-		if gpu.Scores.InterconnectScore > 0 {
-			interconnect = fmt.Sprintf("%.1f", gpu.Scores.InterconnectScore)
-		}
-		topsPerSM := "-"
-		if gpu.Scores.TOPSPerSMPerGHz > 0 {
-			topsPerSM = fmt.Sprintf("%.3f", gpu.Scores.TOPSPerSMPerGHz)
-		}
-		fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %.2f | %s | %.1f | %.1f | %.1f | %s |\n",
-			gpu.Index, name,
-			gpu.Status,
-			gpu.Scores.CompositeScore,
-			gpu.Scores.ComputeScore,
-			topsPerSM,
-			gpu.Scores.PowerSustainScore,
-			gpu.Scores.ThermalSustainScore,
-			gpu.Scores.StabilityScore,
-			interconnect,
-		)
+		b.WriteString(fmtMDTable([]string{"GPU", "Thermal throttle", "Fan duty at throttle", "ECC uncorr", "Status"}, rows))
+		b.WriteString("\n")
+	}
+
+	// Perspective 2: Thermal headroom
+	b.WriteString("### 2. Thermal Headroom\n\n")
+	{
+		var rows [][]string
+		for _, gpu := range result.GPUs {
+			shutdownTemp := gpu.ShutdownTempC
+			if shutdownTemp <= 0 {
+				shutdownTemp = 90
+			}
+			slowdownTemp := gpu.SlowdownTempC
+			if slowdownTemp <= 0 {
+				slowdownTemp = 80
+			}
+			headroom := gpu.Scores.TempHeadroomC
+			thermalStatus := "✓ OK"
+			switch {
+			case headroom < 10:
+				thermalStatus = "⛔ CRITICAL"
+			case gpu.Steady.P95TempC >= slowdownTemp:
+				thermalStatus = "⚠ WARNING"
+			}
+			throttlePct := "-"
+			if gpu.Scores.ThermalThrottlePct > 0 {
+				throttlePct = fmt.Sprintf("%.1f%%", gpu.Scores.ThermalThrottlePct)
+			}
+			rows = append(rows, []string{
+				fmt.Sprintf("GPU %d", gpu.Index),
+				fmt.Sprintf("%.1f°C", gpu.Steady.P95TempC),
+				fmt.Sprintf("%.0f°C", slowdownTemp),
+				fmt.Sprintf("%.0f°C", shutdownTemp),
+				fmt.Sprintf("%.1f°C", headroom),
+				throttlePct,
+				thermalStatus,
+			})
+		}
+		b.WriteString(fmtMDTable([]string{"GPU", "p95 temp", "Slowdown limit", "Shutdown limit", "Headroom", "Thermal throttle", "Status"}, rows))
+		b.WriteString("\n")
+	}
+
+	// Perspective 3: Power delivery
+	b.WriteString("### 3. Power Delivery\n\n")
+	{
+		var rows [][]string
+		for _, gpu := range result.GPUs {
+			powerCap := "-"
+			if gpu.Scores.PowerCapThrottlePct > 0 {
+				powerCap = fmt.Sprintf("%.1f%%", gpu.Scores.PowerCapThrottlePct)
+			}
+			fanDuty := "-"
+			if result.Cooling != nil && result.Cooling.FanDutyCycleAvailable {
+				fanDuty = fmt.Sprintf("%.0f%%", result.Cooling.P95FanDutyCyclePct)
+			}
+			powerStatus := "✓ OK"
+			if gpu.Scores.PowerCapThrottlePct > 5 {
+				powerStatus = "⚠ POWER LIMITED"
+			}
+			rows = append(rows, []string{
+				fmt.Sprintf("GPU %d", gpu.Index),
+				powerCap,
+				fmt.Sprintf("%.1f", gpu.Scores.PowerSustainScore),
+				fanDuty,
+				powerStatus,
+			})
+		}
+		b.WriteString(fmtMDTable([]string{"GPU", "Power cap throttle", "Power stability", "Fan duty (p95)", "Status"}, rows))
+		b.WriteString("\n")
+	}
+
+	// Perspective 4: Performance
+	b.WriteString("### 4. Performance\n\n")
+	{
+		var rows [][]string
+		for _, gpu := range result.GPUs {
+			synthetic := "-"
+			if gpu.Scores.SyntheticScore > 0 {
+				synthetic = fmt.Sprintf("%.2f", gpu.Scores.SyntheticScore)
+			}
+			mixed := "-"
+			if gpu.Scores.MixedScore > 0 {
+				mixed = fmt.Sprintf("%.2f", gpu.Scores.MixedScore)
+			}
+			mixedEff := "-"
+			if gpu.Scores.MixedEfficiency > 0 {
+				mixedEff = fmt.Sprintf("%.1f%%", gpu.Scores.MixedEfficiency*100)
+			}
+			topsPerSM := "-"
+			if gpu.Scores.TOPSPerSMPerGHz > 0 {
+				topsPerSM = fmt.Sprintf("%.3f", gpu.Scores.TOPSPerSMPerGHz)
+			}
+			rows = append(rows, []string{
+				fmt.Sprintf("GPU %d", gpu.Index),
+				fmt.Sprintf("**%.2f**", gpu.Scores.CompositeScore),
+				synthetic, mixed, mixedEff, topsPerSM,
+			})
+		}
+		b.WriteString(fmtMDTable([]string{"GPU", "Compute TOPS", "Synthetic", "Mixed", "Mixed Eff.", "TOPS/SM/GHz"}, rows))
+		if len(result.PerformanceRampSteps) > 0 {
+			fmt.Fprintf(&b, "\n**Platform power score (scalability):** %.1f%%\n", result.PlatformPowerScore)
+		}
+		b.WriteString("\n")
+	}
+
+	// Perspective 5: Anomaly flags
+	b.WriteString("### 5. Anomalies\n\n")
+	{
+		var rows [][]string
+		for _, gpu := range result.GPUs {
+			eccCorr := "-"
+			if gpu.ECC.Corrected > 0 {
+				eccCorr = fmt.Sprintf("⚠ %d", gpu.ECC.Corrected)
+			}
+			syncBoost := "-"
+			if gpu.Scores.SyncBoostThrottlePct > 0 {
+				syncBoost = fmt.Sprintf("%.1f%%", gpu.Scores.SyncBoostThrottlePct)
+			}
+			powerVar := "OK"
+			if gpu.Scores.PowerSustainScore < 70 {
+				powerVar = "⚠ unstable"
+			}
+			thermalVar := "OK"
+			if gpu.Scores.ThermalSustainScore < 70 {
+				thermalVar = "⚠ unstable"
+			}
+			rows = append(rows, []string{fmt.Sprintf("GPU %d", gpu.Index), eccCorr, syncBoost, powerVar, thermalVar})
+		}
+		b.WriteString(fmtMDTable([]string{"GPU", "ECC corrected", "Sync boost throttle", "Power instability", "Thermal instability"}, rows))
+		b.WriteString("\n")
 	}
-	b.WriteString("\n")

 	// ── Per GPU detail ────────────────────────────────────────────────────────
 	b.WriteString("## Per-GPU Details\n\n")
@@ -139,20 +268,75 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		if gpu.PowerLimitW > 0 {
 			fmt.Fprintf(&b, "- **Power limit:** %.0f W (default %.0f W)\n", gpu.PowerLimitW, gpu.DefaultPowerLimitW)
 		}
+		if gpu.PowerLimitDerated {
+			fmt.Fprintf(&b, "- **Power limit derating:** active (reduced limit %.0f W)\n", gpu.PowerLimitW)
+		}
+		if gpu.CalibratedPeakPowerW > 0 {
+			if gpu.CalibratedPeakTempC > 0 {
+				fmt.Fprintf(&b, "- **Calibrated peak power:** %.0f W p95 at %.1f °C p95\n", gpu.CalibratedPeakPowerW, gpu.CalibratedPeakTempC)
+			} else {
+				fmt.Fprintf(&b, "- **Calibrated peak power:** %.0f W p95\n", gpu.CalibratedPeakPowerW)
+			}
+		}
 		if gpu.LockedGraphicsClockMHz > 0 {
 			fmt.Fprintf(&b, "- **Locked clocks:** GPU %.0f MHz / Mem %.0f MHz\n", gpu.LockedGraphicsClockMHz, gpu.LockedMemoryClockMHz)
 		}
 		b.WriteString("\n")

 		// Steady-state telemetry
-		fmt.Fprintf(&b, "**Steady-state telemetry** (%ds):\n\n", int(gpu.Steady.DurationSec))
-		b.WriteString("| | Avg | P95 |\n|---|---|---|\n")
-		fmt.Fprintf(&b, "| Power | %.1f W | %.1f W |\n", gpu.Steady.AvgPowerW, gpu.Steady.P95PowerW)
-		fmt.Fprintf(&b, "| Temperature | %.1f °C | %.1f °C |\n", gpu.Steady.AvgTempC, gpu.Steady.P95TempC)
-		fmt.Fprintf(&b, "| GPU clock | %.0f MHz | %.0f MHz |\n", gpu.Steady.AvgGraphicsClockMHz, gpu.Steady.P95GraphicsClockMHz)
-		fmt.Fprintf(&b, "| Memory clock | %.0f MHz | %.0f MHz |\n", gpu.Steady.AvgMemoryClockMHz, gpu.Steady.P95MemoryClockMHz)
-		fmt.Fprintf(&b, "| GPU utilisation | %.1f %% | — |\n", gpu.Steady.AvgUsagePct)
-		b.WriteString("\n")
+		if benchmarkTelemetryAvailable(gpu.Steady) {
+			fmt.Fprintf(&b, "**Steady-state telemetry** (%ds):\n\n", int(gpu.Steady.DurationSec))
+			b.WriteString(fmtMDTable(
+				[]string{"", "Avg", "P95"},
+				[][]string{
+					{"Power", fmt.Sprintf("%.1f W", gpu.Steady.AvgPowerW), fmt.Sprintf("%.1f W", gpu.Steady.P95PowerW)},
+					{"Temperature", fmt.Sprintf("%.1f °C", gpu.Steady.AvgTempC), fmt.Sprintf("%.1f °C", gpu.Steady.P95TempC)},
+					{"GPU clock", fmt.Sprintf("%.0f MHz", gpu.Steady.AvgGraphicsClockMHz), fmt.Sprintf("%.0f MHz", gpu.Steady.P95GraphicsClockMHz)},
+					{"Memory clock", fmt.Sprintf("%.0f MHz", gpu.Steady.AvgMemoryClockMHz), fmt.Sprintf("%.0f MHz", gpu.Steady.P95MemoryClockMHz)},
+					{"GPU utilisation", fmt.Sprintf("%.1f %%", gpu.Steady.AvgUsagePct), "—"},
+				},
+			))
+			b.WriteString("\n")
+		} else {
+			b.WriteString("**Steady-state telemetry:** unavailable\n\n")
+		}
+
+		// Per-precision stability phases.
+		if len(gpu.PrecisionSteady) > 0 {
+			b.WriteString("**Per-precision stability:**\n\n")
+			var precRows [][]string
+			for _, p := range gpu.PrecisionSteady {
+				eccCorr := "—"
+				eccUncorr := "—"
+				if !p.ECC.IsZero() {
+					eccCorr = fmt.Sprintf("%d", p.ECC.Corrected)
+					eccUncorr = fmt.Sprintf("%d", p.ECC.Uncorrected)
+				}
+				status := p.Status
+				if strings.TrimSpace(status) == "" {
+					status = "OK"
+				}
+				precRows = append(precRows, []string{
+					p.Precision, status,
+					fmt.Sprintf("%.1f%%", p.Steady.ClockCVPct),
+					fmt.Sprintf("%.1f%%", p.Steady.PowerCVPct),
+					fmt.Sprintf("%.1f%%", p.Steady.ClockDriftPct),
+					eccCorr, eccUncorr,
+				})
+			}
+			b.WriteString(fmtMDTable([]string{"Precision", "Status", "Clock CV", "Power CV", "Clock Drift", "ECC corr", "ECC uncorr"}, precRows))
+			b.WriteString("\n")
+		} else {
+			// Legacy: show combined-window variance.
+			fmt.Fprintf(&b, "**Clock/power variance (combined window):** clock CV %.1f%% · power CV %.1f%% · clock drift %.1f%%\n\n",
+				gpu.Steady.ClockCVPct, gpu.Steady.PowerCVPct, gpu.Steady.ClockDriftPct)
+		}
+
+		// ECC summary
+		if !gpu.ECC.IsZero() {
+			fmt.Fprintf(&b, "**ECC errors (total):** corrected=%d uncorrected=%d\n\n",
+				gpu.ECC.Corrected, gpu.ECC.Uncorrected)
+		}

 		// Throttle
 		throttle := formatThrottleLine(gpu.Throttle, gpu.Steady.DurationSec)
@@ -163,14 +347,22 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		// Precision results
 		if len(gpu.PrecisionResults) > 0 {
 			b.WriteString("**Precision results:**\n\n")
-			b.WriteString("| Precision | TOPS | Lanes | Iterations |\n|-----------|------|-------|------------|\n")
+			var presRows [][]string
 			for _, p := range gpu.PrecisionResults {
 				if p.Supported {
-					fmt.Fprintf(&b, "| %s | %.2f | %d | %d |\n", p.Name, p.TeraOpsPerSec, p.Lanes, p.Iterations)
+					presRows = append(presRows, []string{
+						p.Name,
+						fmt.Sprintf("%.2f", p.TeraOpsPerSec),
+						fmt.Sprintf("×%.3g", p.Weight),
+						fmt.Sprintf("%.2f", p.WeightedTeraOpsPerSec),
+						fmt.Sprintf("%d", p.Lanes),
+						fmt.Sprintf("%d", p.Iterations),
+					})
 				} else {
-					fmt.Fprintf(&b, "| %s | — (unsupported) | — | — |\n", p.Name)
+					presRows = append(presRows, []string{p.Name, "— (unsupported)", "—", "—", "—", "—"})
 				}
 			}
+			b.WriteString(fmtMDTable([]string{"Precision", "TOPS (raw)", "Weight", "TOPS (fp32-eq)", "Lanes", "Iterations"}, presRows))
 			b.WriteString("\n")
 		}

@@ -192,9 +384,13 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		b.WriteString("## Interconnect (NCCL)\n\n")
 		fmt.Fprintf(&b, "**Status:** %s\n\n", result.Interconnect.Status)
 		if result.Interconnect.Supported {
-			b.WriteString("| Metric | Avg | Max |\n|--------|-----|-----|\n")
-			fmt.Fprintf(&b, "| Alg BW | %.1f GB/s | %.1f GB/s |\n", result.Interconnect.AvgAlgBWGBps, result.Interconnect.MaxAlgBWGBps)
-			fmt.Fprintf(&b, "| Bus BW | %.1f GB/s | %.1f GB/s |\n", result.Interconnect.AvgBusBWGBps, result.Interconnect.MaxBusBWGBps)
+			b.WriteString(fmtMDTable(
+				[]string{"Metric", "Avg", "Max"},
+				[][]string{
+					{"Alg BW", fmt.Sprintf("%.1f GB/s", result.Interconnect.AvgAlgBWGBps), fmt.Sprintf("%.1f GB/s", result.Interconnect.MaxAlgBWGBps)},
+					{"Bus BW", fmt.Sprintf("%.1f GB/s", result.Interconnect.AvgBusBWGBps), fmt.Sprintf("%.1f GB/s", result.Interconnect.MaxBusBWGBps)},
+				},
+			))
 			b.WriteString("\n")
 		}
 		for _, note := range result.Interconnect.Notes {
@@ -205,20 +401,26 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		}
 	}

-	// ── Server Power (IPMI) ───────────────────────────────────────────────────
+	// ── Server Power ───────────────────────────────────────────────────────────
 	if sp := result.ServerPower; sp != nil {
-		b.WriteString("## Server Power (IPMI)\n\n")
+		title := "## Server Power\n\n"
+		if sp.Source != "" {
+			title = fmt.Sprintf("## Server Power (`%s`)\n\n", sp.Source)
+		}
+		b.WriteString(title)
 		if !sp.Available {
-			b.WriteString("IPMI power measurement unavailable.\n\n")
+			b.WriteString("Server power measurement unavailable.\n\n")
 		} else {
-			b.WriteString("| | Value |\n|---|---|\n")
-			fmt.Fprintf(&b, "| Server idle | %.0f W |\n", sp.IdleW)
-			fmt.Fprintf(&b, "| Server under load | %.0f W |\n", sp.LoadedW)
-			fmt.Fprintf(&b, "| Server delta (load − idle) | %.0f W |\n", sp.DeltaW)
-			fmt.Fprintf(&b, "| GPU-reported sum | %.0f W |\n", sp.GPUReportedSumW)
-			if sp.ReportingRatio > 0 {
-				fmt.Fprintf(&b, "| Reporting ratio | %.2f (1.0 = accurate, <0.75 = GPU over-reports) |\n", sp.ReportingRatio)
+			spRows := [][]string{
+				{"Server idle", fmt.Sprintf("%.0f W", sp.IdleW)},
+				{"Server under load", fmt.Sprintf("%.0f W", sp.LoadedW)},
+				{"Server delta (load − idle)", fmt.Sprintf("%.0f W", sp.DeltaW)},
+				{"GPU-reported sum", fmt.Sprintf("%.0f W", sp.GPUReportedSumW)},
 			}
+			if sp.ReportingRatio > 0 {
+				spRows = append(spRows, []string{"Reporting ratio", fmt.Sprintf("%.2f (1.0 = accurate, <0.75 = GPU over-reports)", sp.ReportingRatio)})
+			}
+			b.WriteString(fmtMDTable([]string{"", "Value"}, spRows))
 			b.WriteString("\n")
 		}
 		for _, note := range sp.Notes {
@@ -229,61 +431,72 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		}
 	}

-	// ── Terminal charts (steady-state only) ───────────────────────────────────
-	if len(charts) > 0 {
-		b.WriteString("## Steady-State Charts\n\n")
-		for _, chart := range charts {
-			content := strings.TrimSpace(stripANSIEscapeSequences(chart.Content))
-			if content == "" {
-				continue
+	// ── PSU Issues ────────────────────────────────────────────────────────────
+	if len(result.PSUIssues) > 0 {
+		b.WriteString("## PSU Issues\n\n")
+		b.WriteString("The following power supply anomalies were detected during the benchmark:\n\n")
+		for _, issue := range result.PSUIssues {
+			fmt.Fprintf(&b, "- ⛔ %s\n", issue)
+		}
+		b.WriteString("\n")
+	}
+
+	// ── Cooling ───────────────────────────────────────────────────────────────
+	if cooling := result.Cooling; cooling != nil {
+		b.WriteString("## Cooling\n\n")
+		if cooling.Available {
+			dutyAvg, dutyP95 := "N/A", "N/A"
+			if cooling.FanDutyCycleAvailable {
+				dutyAvg = fmt.Sprintf("%.1f%%", cooling.AvgFanDutyCyclePct)
+				dutyP95 = fmt.Sprintf("%.1f%%", cooling.P95FanDutyCyclePct)
 			}
-			fmt.Fprintf(&b, "### %s\n\n```\n%s\n```\n\n", chart.Title, content)
+			b.WriteString(fmtMDTable(
+				[]string{"Metric", "Value"},
+				[][]string{
+					{"Average fan speed", fmt.Sprintf("%.0f RPM", cooling.AvgFanRPM)},
+					{"Average fan duty cycle", dutyAvg},
+					{"P95 fan duty cycle", dutyP95},
+				},
+			))
+			b.WriteString("\n")
+		} else {
+			b.WriteString("Cooling telemetry unavailable.\n\n")
+		}
+		for _, note := range cooling.Notes {
+			fmt.Fprintf(&b, "- %s\n", note)
+		}
+		if len(cooling.Notes) > 0 {
+			b.WriteString("\n")
 		}
 	}

-	// ── Methodology ───────────────────────────────────────────────────────────
-	b.WriteString("## Methodology\n\n")
-	fmt.Fprintf(&b, "- Profile `%s` uses standardized baseline → warmup → steady-state → interconnect → cooldown phases.\n", result.BenchmarkProfile)
-	b.WriteString("- Single-GPU compute score from bee-gpu-burn cuBLASLt when available.\n")
-	b.WriteString("- Thermal and power limitations inferred from NVIDIA clock event reason counters and sustained telemetry.\n")
-	b.WriteString("- `result.json` is the canonical machine-readable source for this benchmark run.\n\n")
+	// ── Platform Scalability ──────────────────────────────────────────────────
+	if len(result.PerformanceRampSteps) > 0 {
+		b.WriteString("## Platform Scalability (Performance Ramp)\n\n")
+		fmt.Fprintf(&b, "**Platform power score:** %.1f%%  \n\n", result.PlatformPowerScore)
+		var scalRows [][]string
+		for _, step := range result.PerformanceRampSteps {
+			scalRows = append(scalRows, []string{
+				fmt.Sprintf("%d", step.StepIndex),
+				joinIndexList(step.GPUIndices),
+				fmt.Sprintf("%.2f", step.TotalSyntheticTOPS),
+				fmt.Sprintf("%.1f%%", step.ScalabilityPct),
+			})
+		}
+		b.WriteString(fmtMDTable([]string{"k GPUs", "GPU Indices", "Total Synthetic TOPS", "Scalability"}, scalRows))
+		b.WriteString("\n")
+	}

 	// ── Raw files ─────────────────────────────────────────────────────────────
 	b.WriteString("## Raw Files\n\n")
 	b.WriteString("- `result.json`\n- `report.md`\n- `summary.txt`\n- `verbose.log`\n")
-	b.WriteString("- `gpu-*-baseline-metrics.csv/html/term.txt`\n")
-	b.WriteString("- `gpu-*-warmup.log`\n")
-	b.WriteString("- `gpu-*-steady.log`\n")
-	b.WriteString("- `gpu-*-steady-metrics.csv/html/term.txt`\n")
-	b.WriteString("- `gpu-*-cooldown-metrics.csv/html/term.txt`\n")
+	b.WriteString("- `gpu-metrics.csv`\n- `gpu-metrics.html`\n- `gpu-burn.log`\n")
 	if result.Interconnect != nil {
 		b.WriteString("- `nccl-all-reduce.log`\n")
 	}
 	return b.String()
 }

-// loadBenchmarkReportCharts loads only steady-state terminal charts (baseline and
-// cooldown charts are not useful for human review).
-func loadBenchmarkReportCharts(runDir string, gpuIndices []int) []benchmarkReportChart {
-	var charts []benchmarkReportChart
-	for _, idx := range gpuIndices {
-		path := filepath.Join(runDir, fmt.Sprintf("gpu-%d-steady-metrics-term.txt", idx))
-		raw, err := os.ReadFile(path)
-		if err != nil || len(raw) == 0 {
-			continue
-		}
-		charts = append(charts, benchmarkReportChart{
-			Title:   fmt.Sprintf("GPU %d — Steady State", idx),
-			Content: string(raw),
-		})
-	}
-	return charts
-}
-
-func stripANSIEscapeSequences(raw string) string {
-	return ansiEscapePattern.ReplaceAllString(raw, "")
-}
-
 // formatThrottleLine renders throttle counters as human-readable percentages of
 // the steady-state window.  Only non-zero counters are shown.  When the steady
 // duration is unknown (0), raw seconds are shown instead.
@@ -323,6 +536,7 @@ func formatThrottleLine(t BenchmarkThrottleCounters, steadyDurationSec float64)
 func renderBenchmarkSummary(result NvidiaBenchmarkResult) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "run_at_utc=%s\n", result.GeneratedAt.Format(time.RFC3339))
+	fmt.Fprintf(&b, "benchmark_version=%s\n", result.BenchmarkVersion)
 	fmt.Fprintf(&b, "benchmark_profile=%s\n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "overall_status=%s\n", result.OverallStatus)
 	fmt.Fprintf(&b, "gpu_count=%d\n", len(result.GPUs))
--- a/audit/internal/platform/benchmark_table.go
+++ b/audit/internal/platform/benchmark_table.go
@@ -0,0 +1,75 @@
+package platform
+
+import (
+	"strings"
+)
+
+// fmtMDTable renders a markdown table with column widths padded so the table
+// is readable as plain text without a markdown renderer.
+//
+// headers contains the column header strings.
+// rows contains data rows; each row must have the same number of cells as headers.
+// Cells with fewer entries than headers are treated as empty.
+func fmtMDTable(headers []string, rows [][]string) string {
+	ncols := len(headers)
+	if ncols == 0 {
+		return ""
+	}
+
+	// Compute max width per column.
+	widths := make([]int, ncols)
+	for i, h := range headers {
+		if len(h) > widths[i] {
+			widths[i] = len(h)
+		}
+	}
+	for _, row := range rows {
+		for i := 0; i < ncols; i++ {
+			cell := ""
+			if i < len(row) {
+				cell = row[i]
+			}
+			if len(cell) > widths[i] {
+				widths[i] = len(cell)
+			}
+		}
+	}
+
+	var b strings.Builder
+
+	// Header row.
+	b.WriteByte('|')
+	for i, h := range headers {
+		b.WriteByte(' ')
+		b.WriteString(h)
+		b.WriteString(strings.Repeat(" ", widths[i]-len(h)))
+		b.WriteString(" |")
+	}
+	b.WriteByte('\n')
+
+	// Separator row.
+	b.WriteByte('|')
+	for i := range headers {
+		b.WriteString(strings.Repeat("-", widths[i]+2))
+		b.WriteByte('|')
+	}
+	b.WriteByte('\n')
+
+	// Data rows.
+	for _, row := range rows {
+		b.WriteByte('|')
+		for i := 0; i < ncols; i++ {
+			cell := ""
+			if i < len(row) {
+				cell = row[i]
+			}
+			b.WriteByte(' ')
+			b.WriteString(cell)
+			b.WriteString(strings.Repeat(" ", widths[i]-len(cell)))
+			b.WriteString(" |")
+		}
+		b.WriteByte('\n')
+	}
+
+	return b.String()
+}
--- a/audit/internal/platform/benchmark_test.go
+++ b/audit/internal/platform/benchmark_test.go
@@ -1,8 +1,13 @@
 package platform

 import (
+	"context"
+	"os"
+	"os/exec"
+	"path/filepath"
 	"strings"
 	"testing"
+	"time"
 )

 func TestResolveBenchmarkProfile(t *testing.T) {
@@ -16,17 +21,17 @@ func TestResolveBenchmarkProfile(t *testing.T) {
 		{
 			name:    "default",
 			profile: "",
-			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 120, SteadySec: 480, NCCLSec: 180, CooldownSec: 120},
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 45, SteadySec: 480, NCCLSec: 180, CooldownSec: 0},
 		},
 		{
 			name:    "stability",
 			profile: "stability",
-			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 300, SteadySec: 3600, NCCLSec: 300, CooldownSec: 300},
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 120, SteadySec: 3600, NCCLSec: 300, CooldownSec: 0},
 		},
 		{
 			name:    "overnight",
 			profile: "overnight",
-			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 600, SteadySec: 27000, NCCLSec: 600, CooldownSec: 300},
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 180, SteadySec: 27000, NCCLSec: 600, CooldownSec: 0},
 		},
 	}

@@ -41,6 +46,216 @@ func TestResolveBenchmarkProfile(t *testing.T) {
 	}
 }

+func TestBuildBenchmarkSteadyPlanStandard(t *testing.T) {
+	t.Parallel()
+
+	labels, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
+		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, SteadySec: 480},
+		benchmarkPrecisionPhases,
+		func(label string) string { return label },
+	)
+	if len(labels) != 5 || len(phases) != 5 {
+		t.Fatalf("labels=%d phases=%d want 5", len(labels), len(phases))
+	}
+	if basePhaseSec != 60 {
+		t.Fatalf("basePhaseSec=%d want 60", basePhaseSec)
+	}
+	if mixedPhaseSec != 300 {
+		t.Fatalf("mixedPhaseSec=%d want 300", mixedPhaseSec)
+	}
+	if phases[len(phases)-1].PlanLabel != "mixed" || phases[len(phases)-1].DurationSec != 300 {
+		t.Fatalf("mixed phase=%+v want duration 300", phases[len(phases)-1])
+	}
+	if benchmarkPlanDurationsCSV(phases) != "60,60,60,60,300" {
+		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
+	}
+}
+
+func TestBuildBenchmarkSteadyPlanStability(t *testing.T) {
+	t.Parallel()
+
+	_, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
+		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, SteadySec: 3600},
+		benchmarkPrecisionPhases,
+		func(label string) string { return label },
+	)
+	if basePhaseSec != 300 {
+		t.Fatalf("basePhaseSec=%d want 300", basePhaseSec)
+	}
+	if mixedPhaseSec != 3600 {
+		t.Fatalf("mixedPhaseSec=%d want 3600", mixedPhaseSec)
+	}
+	if benchmarkPlanDurationsCSV(phases) != "300,300,300,300,3600" {
+		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
+	}
+}
+
+func TestBuildBenchmarkSteadyPlanOvernight(t *testing.T) {
+	t.Parallel()
+
+	_, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
+		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, SteadySec: 27000},
+		benchmarkPrecisionPhases,
+		func(label string) string { return label },
+	)
+	if basePhaseSec != 3600 {
+		t.Fatalf("basePhaseSec=%d want 3600", basePhaseSec)
+	}
+	if mixedPhaseSec != 14400 {
+		t.Fatalf("mixedPhaseSec=%d want 14400", mixedPhaseSec)
+	}
+	if benchmarkPlanDurationsCSV(phases) != "3600,3600,3600,3600,14400" {
+		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
+	}
+}
+
+func TestSplitBenchmarkRowsByPlannedPhaseUsesPhaseDurations(t *testing.T) {
+	t.Parallel()
+
+	phases := []benchmarkPlannedPhase{
+		{PlanLabel: "fp8", MetricStage: "fp8", DurationSec: 10},
+		{PlanLabel: "fp16", MetricStage: "fp16", DurationSec: 10},
+		{PlanLabel: "mixed", MetricStage: "mixed", DurationSec: 50},
+	}
+	rows := []GPUMetricRow{
+		{ElapsedSec: 5},
+		{ElapsedSec: 15},
+		{ElapsedSec: 25},
+		{ElapsedSec: 65},
+	}
+	got := splitBenchmarkRowsByPlannedPhase(rows, phases)
+	if len(got["fp8"]) != 1 {
+		t.Fatalf("fp8 rows=%d want 1", len(got["fp8"]))
+	}
+	if len(got["fp16"]) != 1 {
+		t.Fatalf("fp16 rows=%d want 1", len(got["fp16"]))
+	}
+	if len(got["mixed"]) != 2 {
+		t.Fatalf("mixed rows=%d want 2", len(got["mixed"]))
+	}
+}
+
+func TestBenchmarkSupportedPrecisionsSkipsFP4BeforeBlackwell(t *testing.T) {
+	t.Parallel()
+
+	if got := benchmarkSupportedPrecisions("9.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32" {
+		t.Fatalf("supported=%v", got)
+	}
+	if got := benchmarkSupportedPrecisions("10.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32" {
+		t.Fatalf("supported=%v", got)
+	}
+}
+
+func TestBenchmarkPlannedPhaseStatus(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name       string
+		raw        string
+		wantStatus string
+	}{
+		{name: "ok", raw: "status=OK\n", wantStatus: "OK"},
+		{name: "failed", raw: "phase_error=fp16\n", wantStatus: "FAILED"},
+		{name: "unsupported", raw: "cublasLt_profiles=unsupported\nphase_error=fp4\n", wantStatus: "UNSUPPORTED"},
+	}
+	for _, tc := range cases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			got, _ := benchmarkPlannedPhaseStatus([]byte(tc.raw))
+			if got != tc.wantStatus {
+				t.Fatalf("status=%q want %q", got, tc.wantStatus)
+			}
+		})
+	}
+}
+
+func TestBenchmarkCalibrationThrottleReasonIgnoresPowerReasons(t *testing.T) {
+	t.Parallel()
+
+	before := BenchmarkThrottleCounters{}
+	if got := benchmarkCalibrationThrottleReason(before, BenchmarkThrottleCounters{SWPowerCapUS: 1_000_000}); got != "" {
+		t.Fatalf("sw_power_cap should be ignored, got %q", got)
+	}
+	if got := benchmarkCalibrationThrottleReason(before, BenchmarkThrottleCounters{HWPowerBrakeSlowdownUS: 1_000_000}); got != "" {
+		t.Fatalf("hw_power_brake should be ignored, got %q", got)
+	}
+	if got := benchmarkCalibrationThrottleReason(before, BenchmarkThrottleCounters{HWThermalSlowdownUS: 1_000_000}); got != "hw_thermal" {
+		t.Fatalf("hw_thermal mismatch: got %q", got)
+	}
+	if got := benchmarkCalibrationThrottleReason(before, BenchmarkThrottleCounters{SWThermalSlowdownUS: 1_000_000}); got != "sw_thermal" {
+		t.Fatalf("sw_thermal mismatch: got %q", got)
+	}
+}
+
+func TestResetBenchmarkGPUsSkipsWithoutRoot(t *testing.T) {
+	t.Parallel()
+
+	oldGeteuid := benchmarkGeteuid
+	oldExec := satExecCommand
+	benchmarkGeteuid = func() int { return 1000 }
+	satExecCommand = func(name string, args ...string) *exec.Cmd {
+		t.Fatalf("unexpected command: %s %v", name, args)
+		return nil
+	}
+	t.Cleanup(func() {
+		benchmarkGeteuid = oldGeteuid
+		satExecCommand = oldExec
+	})
+
+	var logs []string
+	failed := resetBenchmarkGPUs(context.Background(), filepath.Join(t.TempDir(), "verbose.log"), []int{0, 2}, func(line string) {
+		logs = append(logs, line)
+	})
+	if got, want := strings.Join(logs, "\n"), "power benchmark pre-flight: root privileges unavailable, GPU reset skipped"; !strings.Contains(got, want) {
+		t.Fatalf("logs=%q want substring %q", got, want)
+	}
+	if len(failed) != 2 || failed[0] != 0 || failed[1] != 2 {
+		t.Fatalf("failed=%v want [0 2]", failed)
+	}
+}
+
+func TestResetBenchmarkGPUsResetsEachGPU(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	script := filepath.Join(dir, "nvidia-smi")
+	argsLog := filepath.Join(dir, "args.log")
+	if err := os.WriteFile(script, []byte("#!/bin/sh\nprintf '%s\\n' \"$*\" >> "+argsLog+"\nprintf 'ok\\n'\n"), 0755); err != nil {
+		t.Fatalf("write script: %v", err)
+	}
+
+	oldGeteuid := benchmarkGeteuid
+	oldSleep := benchmarkSleep
+	oldLookPath := satLookPath
+	benchmarkGeteuid = func() int { return 0 }
+	benchmarkSleep = func(time.Duration) {}
+	satLookPath = func(file string) (string, error) {
+		if file == "nvidia-smi" {
+			return script, nil
+		}
+		return exec.LookPath(file)
+	}
+	t.Cleanup(func() {
+		benchmarkGeteuid = oldGeteuid
+		benchmarkSleep = oldSleep
+		satLookPath = oldLookPath
+	})
+
+	failed := resetBenchmarkGPUs(context.Background(), filepath.Join(dir, "verbose.log"), []int{2, 5}, nil)
+	if len(failed) != 0 {
+		t.Fatalf("failed=%v want no failures", failed)
+	}
+	raw, err := os.ReadFile(argsLog)
+	if err != nil {
+		t.Fatalf("read args log: %v", err)
+	}
+	got := strings.Fields(string(raw))
+	want := []string{"-i", "2", "-r", "-i", "5", "-r"}
+	if strings.Join(got, " ") != strings.Join(want, " ") {
+		t.Fatalf("args=%v want %v", got, want)
+	}
+}
+
 func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
 	t.Parallel()

@@ -56,6 +271,59 @@ func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
 	}
 }

+func TestInitialBenchmarkCalibrationLimitW(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name string
+		info benchmarkGPUInfo
+		want int
+	}{
+		{
+			name: "prefers default tdp over current derated limit",
+			info: benchmarkGPUInfo{
+				PowerLimitW:        500,
+				DefaultPowerLimitW: 600,
+				MaxPowerLimitW:     600,
+			},
+			want: 600,
+		},
+		{
+			name: "caps default tdp to reported max limit",
+			info: benchmarkGPUInfo{
+				PowerLimitW:        500,
+				DefaultPowerLimitW: 700,
+				MaxPowerLimitW:     650,
+			},
+			want: 650,
+		},
+		{
+			name: "falls back to current limit when default missing",
+			info: benchmarkGPUInfo{
+				PowerLimitW:    525,
+				MaxPowerLimitW: 600,
+			},
+			want: 525,
+		},
+		{
+			name: "falls back to max limit when only that is known",
+			info: benchmarkGPUInfo{
+				MaxPowerLimitW: 575,
+			},
+			want: 575,
+		},
+	}
+
+	for _, tc := range cases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			if got := initialBenchmarkCalibrationLimitW(tc.info); got != tc.want {
+				t.Fatalf("initialBenchmarkCalibrationLimitW(%+v)=%d want %d", tc.info, got, tc.want)
+			}
+		})
+	}
+}
+
 func TestParseBenchmarkBurnLog(t *testing.T) {
 	t.Parallel()

@@ -65,8 +333,10 @@ func TestParseBenchmarkBurnLog(t *testing.T) {
 		"[gpu 0] compute_capability=9.0",
 		"[gpu 0] backend=cublasLt",
 		"[gpu 0] duration_s=10",
+		"[gpu 0] int8_tensor[0]=READY dim=16384x16384x8192 block=128 stream=0",
 		"[gpu 0] fp16_tensor[0]=READY dim=4096x4096x4096 block=128 stream=0",
 		"[gpu 0] fp8_e4m3[0]=READY dim=8192x8192x4096 block=128 stream=0",
+		"[gpu 0] int8_tensor_iterations=80",
 		"[gpu 0] fp16_tensor_iterations=200",
 		"[gpu 0] fp8_e4m3_iterations=50",
 		"[gpu 0] status=OK",
@@ -79,15 +349,24 @@ func TestParseBenchmarkBurnLog(t *testing.T) {
 	if got.ComputeCapability != "9.0" {
 		t.Fatalf("compute capability=%q want 9.0", got.ComputeCapability)
 	}
-	if len(got.Profiles) != 2 {
-		t.Fatalf("profiles=%d want 2", len(got.Profiles))
+	if len(got.Profiles) != 3 {
+		t.Fatalf("profiles=%d want 3", len(got.Profiles))
 	}
 	if got.Profiles[0].TeraOpsPerSec <= 0 {
 		t.Fatalf("profile[0] teraops=%f want >0", got.Profiles[0].TeraOpsPerSec)
 	}
+	if got.Profiles[0].Category != "fp16_bf16" {
+		t.Fatalf("profile[0] category=%q want fp16_bf16", got.Profiles[0].Category)
+	}
 	if got.Profiles[1].Category != "fp8" {
 		t.Fatalf("profile[1] category=%q want fp8", got.Profiles[1].Category)
 	}
+	if got.Profiles[2].Category != "int8" {
+		t.Fatalf("profile[2] category=%q want int8", got.Profiles[2].Category)
+	}
+	if got.Profiles[2].Weight != 0.25 {
+		t.Fatalf("profile[2] weight=%f want 0.25", got.Profiles[2].Weight)
+	}
 }

 func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
@@ -131,6 +410,13 @@ func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 				DegradationReasons: []string{"power_capped"},
 			},
 		},
+		Cooling: &BenchmarkCoolingSummary{
+			Available:             true,
+			AvgFanRPM:             9200,
+			FanDutyCycleAvailable: true,
+			AvgFanDutyCyclePct:    47.5,
+			P95FanDutyCyclePct:    62.0,
+		},
 	}

 	report := renderBenchmarkReport(result)
@@ -140,6 +426,9 @@ func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 		"1176.00",
 		"fp16_tensor",
 		"700.00",
+		"Cooling",
+		"Average fan duty cycle",
+		"47.5%",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
@@ -147,34 +436,141 @@ func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 	}
 }

-func TestRenderBenchmarkReportIncludesTerminalChartsWithoutANSI(t *testing.T) {
+func TestRenderBenchmarkReportListsUnifiedArtifacts(t *testing.T) {
 	t.Parallel()

-	report := renderBenchmarkReportWithCharts(NvidiaBenchmarkResult{
+	report := renderBenchmarkReport(NvidiaBenchmarkResult{
 		BenchmarkProfile:   NvidiaBenchmarkProfileStandard,
 		OverallStatus:      "OK",
 		SelectedGPUIndices: []int{0},
 		Normalization: BenchmarkNormalization{
 			Status: "full",
 		},
-	}, []benchmarkReportChart{
-		{
-			Title:   "GPU 0 Steady State",
-			Content: "\x1b[31mGPU 0 chart\x1b[0m\n 42┤───",
-		},
 	})

 	for _, needle := range []string{
-		"Steady-State Charts",
-		"GPU 0 Steady State",
-		"GPU 0 chart",
-		"42┤───",
+		"gpu-metrics.csv",
+		"gpu-metrics.html",
+		"gpu-burn.log",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
 		}
 	}
-	if strings.Contains(report, "\x1b[31m") {
-		t.Fatalf("report should not contain ANSI escapes\n%s", report)
+}
+
+func TestScoreBenchmarkGPUIgnoresDisabledPrecisions(t *testing.T) {
+	t.Parallel()
+
+	score := scoreBenchmarkGPUResult(BenchmarkGPUResult{
+		PrecisionSteady: []BenchmarkPrecisionSteadyPhase{
+			{Precision: "fp16", WeightedTeraOpsPerSec: 100},
+			{Precision: "fp64", WeightedTeraOpsPerSec: 999},
+			{Precision: "fp4", WeightedTeraOpsPerSec: 999},
+		},
+		PrecisionResults: []BenchmarkPrecisionResult{
+			{Category: "fp32_tf32", Supported: true, WeightedTeraOpsPerSec: 50},
+			{Category: "fp64", Supported: true, WeightedTeraOpsPerSec: 999},
+			{Category: "fp4", Supported: true, WeightedTeraOpsPerSec: 999},
+		},
+	})
+
+	if score.SyntheticScore != 100 {
+		t.Fatalf("SyntheticScore=%f want 100", score.SyntheticScore)
+	}
+	if score.MixedScore != 50 {
+		t.Fatalf("MixedScore=%f want 50", score.MixedScore)
+	}
+}
+
+func TestEnrichGPUInfoWithNvidiaSMIQ(t *testing.T) {
+	t.Parallel()
+
+	nvsmiQ := []byte(`
+GPU 00000000:4E:00.0
+    Product Name                          : NVIDIA RTX PRO 6000 Blackwell Server Edition
+    Min Power Limit                       : 200.00 W
+    Max Power Limit                       : 600.00 W
+    Default Power Limit                   : 575.00 W
+    Current Power Limit                   : 560.00 W
+    Clocks
+        Graphics                          : 2422 MHz
+        Memory                            : 12481 MHz
+    Max Clocks
+        Graphics                          : 2430 MHz
+        SM                                : 2430 MHz
+        Memory                            : 12481 MHz
+        Video                             : 2107 MHz
+
+GPU 00000000:4F:00.0
+    Product Name                          : NVIDIA RTX PRO 6000 Blackwell Server Edition
+    Max Clocks
+        Graphics                          : 2430 MHz
+        Memory                            : 12481 MHz
+`)
+
+	infoByIndex := map[int]benchmarkGPUInfo{
+		0: {Index: 0, BusID: "00000000:4E:00.0"},
+		1: {Index: 1, BusID: "00000000:4F:00.0"},
+	}
+
+	enrichGPUInfoWithNvidiaSMIQ(infoByIndex, nvsmiQ)
+
+	if infoByIndex[0].MaxGraphicsClockMHz != 2430 {
+		t.Errorf("GPU 0 MaxGraphicsClockMHz = %v, want 2430", infoByIndex[0].MaxGraphicsClockMHz)
+	}
+	if infoByIndex[0].MaxMemoryClockMHz != 12481 {
+		t.Errorf("GPU 0 MaxMemoryClockMHz = %v, want 12481", infoByIndex[0].MaxMemoryClockMHz)
+	}
+	if infoByIndex[1].MaxGraphicsClockMHz != 2430 {
+		t.Errorf("GPU 1 MaxGraphicsClockMHz = %v, want 2430", infoByIndex[1].MaxGraphicsClockMHz)
+	}
+	if infoByIndex[1].MaxMemoryClockMHz != 12481 {
+		t.Errorf("GPU 1 MaxMemoryClockMHz = %v, want 12481", infoByIndex[1].MaxMemoryClockMHz)
+	}
+	if infoByIndex[0].MinPowerLimitW != 200 {
+		t.Errorf("GPU 0 MinPowerLimitW = %v, want 200", infoByIndex[0].MinPowerLimitW)
+	}
+	if infoByIndex[0].MaxPowerLimitW != 600 {
+		t.Errorf("GPU 0 MaxPowerLimitW = %v, want 600", infoByIndex[0].MaxPowerLimitW)
+	}
+	if infoByIndex[0].DefaultPowerLimitW != 575 {
+		t.Errorf("GPU 0 DefaultPowerLimitW = %v, want 575", infoByIndex[0].DefaultPowerLimitW)
+	}
+	if infoByIndex[0].PowerLimitW != 560 {
+		t.Errorf("GPU 0 PowerLimitW = %v, want 560", infoByIndex[0].PowerLimitW)
+	}
+}
+
+func TestEnrichGPUInfoWithNvidiaSMIQSkipsPopulated(t *testing.T) {
+	t.Parallel()
+
+	nvsmiQ := []byte(`
+GPU 00000000:4E:00.0
+    Min Power Limit                       : 100.00 W
+    Max Power Limit                       : 900.00 W
+    Max Clocks
+        Graphics                          : 9999 MHz
+        Memory                            : 9999 MHz
+`)
+	// Already populated — must not be overwritten.
+	infoByIndex := map[int]benchmarkGPUInfo{
+		0: {
+			Index:               0,
+			BusID:               "00000000:4E:00.0",
+			MaxGraphicsClockMHz: 2430,
+			MaxMemoryClockMHz:   12481,
+			MinPowerLimitW:      200,
+			MaxPowerLimitW:      600,
+		},
+	}
+
+	enrichGPUInfoWithNvidiaSMIQ(infoByIndex, nvsmiQ)
+
+	if infoByIndex[0].MaxGraphicsClockMHz != 2430 {
+		t.Errorf("expected existing value to be preserved, got %v", infoByIndex[0].MaxGraphicsClockMHz)
+	}
+	if infoByIndex[0].MinPowerLimitW != 200 {
+		t.Errorf("expected existing min power limit to be preserved, got %v", infoByIndex[0].MinPowerLimitW)
 	}
 }
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -2,37 +2,192 @@ package platform

 import "time"

+// BenchmarkHostConfig holds static CPU and memory configuration captured at
+// benchmark start. Useful for correlating results across runs on different hardware.
+type BenchmarkHostConfig struct {
+	CPUModel    string  `json:"cpu_model,omitempty"`
+	CPUSockets  int     `json:"cpu_sockets,omitempty"`
+	CPUCores    int     `json:"cpu_cores,omitempty"`
+	CPUThreads  int     `json:"cpu_threads,omitempty"`
+	MemTotalGiB float64 `json:"mem_total_gib,omitempty"`
+}
+
+// BenchmarkCPULoad summarises host CPU utilisation sampled during the GPU
+// steady-state phase. High or unstable CPU load during a GPU benchmark may
+// indicate a competing workload or a CPU-bound driver bottleneck.
+type BenchmarkCPULoad struct {
+	AvgPct  float64 `json:"avg_pct"`
+	MaxPct  float64 `json:"max_pct"`
+	P95Pct  float64 `json:"p95_pct"`
+	Samples int     `json:"samples"`
+	// Status is "ok", "high", or "unstable".
+	Status string `json:"status"`
+	Note   string `json:"note,omitempty"`
+}
+
+// BenchmarkCoolingSummary captures fan telemetry averaged across the full
+// benchmark run.
+type BenchmarkCoolingSummary struct {
+	Available             bool     `json:"available"`
+	AvgFanRPM             float64  `json:"avg_fan_rpm,omitempty"`
+	FanDutyCycleAvailable bool     `json:"fan_duty_cycle_available,omitempty"`
+	FanDutyCycleEstimated bool     `json:"fan_duty_cycle_estimated,omitempty"`
+	AvgFanDutyCyclePct    float64  `json:"avg_fan_duty_cycle_pct,omitempty"`
+	P95FanDutyCyclePct    float64  `json:"p95_fan_duty_cycle_pct,omitempty"`
+	Notes                 []string `json:"notes,omitempty"`
+}
+
 const (
 	NvidiaBenchmarkProfileStandard  = "standard"
 	NvidiaBenchmarkProfileStability = "stability"
 	NvidiaBenchmarkProfileOvernight = "overnight"
 )

+const (
+	BenchmarkPowerEngineDCGMProfTester = "dcgmproftester"
+	BenchmarkPowerEngineTargetedPower  = "targeted_power"
+)
+
+// Estimated wall-clock durations for benchmark runs, derived from real _v8 logs.
+// Rule: when changing profile phase durations in resolveBenchmarkProfile(),
+// re-measure from actual task logs and update the constants here.
+//
+// Sources:
+//   - BenchmarkEstimatedPerfStandardSec:   MLT v8.22 ramp 1-4: 927 s; xFusion v8.22 parallel 8GPU: 1080 s
+//   - BenchmarkEstimatedPerfStabilitySec:  xFusion v8.22 ramp 1-8: 5532 s
+//   - BenchmarkEstimatedPerfOvernightSec:  derived from profile phases (SteadySec=27000)
+//   - BenchmarkEstimatedPowerStandardSec:  MLT v8.22 ramp 1-4: 2663 s; MSI v8.22 ramp 1-8: 2375 s
+//   - BenchmarkEstimatedPowerStabilitySec: target ~90 min with calibDurationSec=300 (8 GPU × ~2-3 attempts)
+const (
+	// Performance Benchmark (bee-gpu-burn).
+	// Duration is per full ramp-up run (ramp 1→N) or per single parallel run.
+	// Sequential per-GPU mode scales approximately linearly.
+	BenchmarkEstimatedPerfStandardSec  = 960  // ~16 min; ramp-up 1-4: 927 s, parallel 8GPU: 1080 s
+	BenchmarkEstimatedPerfStabilitySec = 5532 // ~92 min; ramp-up 1-8 measured
+	BenchmarkEstimatedPerfOvernightSec = 8 * 3600
+
+	// Power / Thermal Fit (dcgmproftester load + nvidia-smi power-limit binary search).
+	// Duration is for the full ramp-up run; individual steps vary with convergence speed.
+	BenchmarkEstimatedPowerStandardSec  = 2600 // ~43 min; ramp 1-4: 2663 s, ramp 1-8: 2375 s
+	BenchmarkEstimatedPowerStabilitySec = 5400 // ~90 min; calibDurationSec=300 × 8 GPU × ~2-3 attempts
+	BenchmarkEstimatedPowerOvernightSec = 3 * 3600
+)
+
 type NvidiaBenchmarkOptions struct {
 	Profile           string
 	SizeMB            int
 	GPUIndices        []int
 	ExcludeGPUIndices []int
 	RunNCCL           bool
-	ParallelGPUs      bool // run all selected GPUs simultaneously instead of sequentially
+	ServerPowerSource string
+	ParallelGPUs      bool   // run all selected GPUs simultaneously instead of sequentially
+	RampStep          int    // 1-based step index within a ramp-up run (0 = not a ramp-up)
+	RampTotal         int    // total number of ramp-up steps in this run
+	RampRunID         string // shared identifier across all steps of the same ramp-up run
 }

+const (
+	BenchmarkPowerSourceDCMI        = "dcmi"
+	BenchmarkPowerSourceSDRPSUInput = "sdr_psu_input"
+)
+
+type BenchmarkPowerAutotuneConfig struct {
+	Version           int       `json:"version"`
+	UpdatedAt         time.Time `json:"updated_at"`
+	SelectedSource    string    `json:"selected_source"`
+	BenchmarkKind     string    `json:"benchmark_kind,omitempty"`
+	Profile           string    `json:"profile,omitempty"`
+	IdleDurationSec   int       `json:"idle_duration_sec,omitempty"`
+	LoadDurationSec   int       `json:"load_duration_sec,omitempty"`
+	SampleIntervalSec int       `json:"sample_interval_sec,omitempty"`
+	Confidence        float64   `json:"confidence,omitempty"`
+	Reason            string    `json:"reason,omitempty"`
+}
+
+type SystemPowerSourceDecision struct {
+	Configured      bool      `json:"configured"`
+	SelectedSource  string    `json:"selected_source,omitempty"`
+	EffectiveSource string    `json:"effective_source,omitempty"`
+	Mode            string    `json:"mode,omitempty"` // autotuned, fallback, degraded
+	Reason          string    `json:"reason,omitempty"`
+	ConfiguredAt    time.Time `json:"configured_at,omitempty"`
+}
+
+type BenchmarkPowerAutotuneResult struct {
+	GeneratedAt         time.Time                         `json:"generated_at"`
+	Hostname            string                            `json:"hostname,omitempty"`
+	ServerModel         string                            `json:"server_model,omitempty"`
+	BenchmarkKind       string                            `json:"benchmark_kind,omitempty"`
+	Profile             string                            `json:"profile,omitempty"`
+	Status              string                            `json:"status"`
+	IdleDurationSec     int                               `json:"idle_duration_sec"`
+	LoadDurationSec     int                               `json:"load_duration_sec"`
+	SampleIntervalSec   int                               `json:"sample_interval_sec"`
+	SelectedSource      string                            `json:"selected_source,omitempty"`
+	IdleValidationError string                            `json:"idle_validation_error,omitempty"`
+	IdleValidation      *BenchmarkPowerAutotuneValidation `json:"idle_validation,omitempty"`
+	GPUPowerIdleW       float64                           `json:"gpu_power_idle_w,omitempty"`
+	GPUPowerLoadW       float64                           `json:"gpu_power_load_w,omitempty"`
+	Candidates          []BenchmarkPowerAutotuneCandidate `json:"candidates,omitempty"`
+	Notes               []string                          `json:"notes,omitempty"`
+	Config              *BenchmarkPowerAutotuneConfig     `json:"config,omitempty"`
+}
+
+type BenchmarkPowerAutotuneValidation struct {
+	Valid          bool    `json:"valid"`
+	GPUAvgUsagePct float64 `json:"gpu_avg_usage_pct,omitempty"`
+	GPUP95UsagePct float64 `json:"gpu_p95_usage_pct,omitempty"`
+	CPUAvgUsagePct float64 `json:"cpu_avg_usage_pct,omitempty"`
+	CPUP95UsagePct float64 `json:"cpu_p95_usage_pct,omitempty"`
+	GPUSamples     int     `json:"gpu_samples,omitempty"`
+	CPUSamples     int     `json:"cpu_samples,omitempty"`
+	Reason         string  `json:"reason,omitempty"`
+}
+
+type BenchmarkPowerAutotuneCandidate struct {
+	Source         string  `json:"source"`
+	IdleAvgW       float64 `json:"idle_avg_w,omitempty"`
+	LoadAvgW       float64 `json:"load_avg_w,omitempty"`
+	DeltaW         float64 `json:"delta_w,omitempty"`
+	Samples        int     `json:"samples,omitempty"`
+	RelativeError  float64 `json:"relative_error,omitempty"`
+	Confidence     float64 `json:"confidence,omitempty"`
+	Selected       bool    `json:"selected,omitempty"`
+	Available      bool    `json:"available"`
+	SelectionNotes string  `json:"selection_notes,omitempty"`
+}

 type NvidiaBenchmarkResult struct {
-	BenchmarkVersion   string                       `json:"benchmark_version"`
-	GeneratedAt        time.Time                    `json:"generated_at"`
-	Hostname           string                       `json:"hostname,omitempty"`
-	ServerModel        string                       `json:"server_model,omitempty"`
-	BenchmarkProfile   string                       `json:"benchmark_profile"`
-	ParallelGPUs       bool                         `json:"parallel_gpus,omitempty"`
-	OverallStatus      string                       `json:"overall_status"`
-	SelectedGPUIndices []int                        `json:"selected_gpu_indices"`
-	Findings           []string                     `json:"findings,omitempty"`
-	Warnings           []string                     `json:"warnings,omitempty"`
-	Normalization      BenchmarkNormalization       `json:"normalization"`
-	GPUs               []BenchmarkGPUResult         `json:"gpus"`
-	Interconnect       *BenchmarkInterconnectResult `json:"interconnect,omitempty"`
-	ServerPower        *BenchmarkServerPower        `json:"server_power,omitempty"`
+	BenchmarkVersion string    `json:"benchmark_version"`
+	GeneratedAt      time.Time `json:"generated_at"`
+	Hostname         string    `json:"hostname,omitempty"`
+	ServerModel      string    `json:"server_model,omitempty"`
+	BenchmarkProfile string    `json:"benchmark_profile"`
+	ParallelGPUs     bool      `json:"parallel_gpus,omitempty"`
+	RampStep         int       `json:"ramp_step,omitempty"`
+	RampTotal        int       `json:"ramp_total,omitempty"`
+	RampRunID        string    `json:"ramp_run_id,omitempty"`
+	ScalabilityScore float64   `json:"scalability_score,omitempty"`
+	// PlatformPowerScore is the mean compute scalability across ramp steps 2..N.
+	// 100% = each added GPU contributes exactly its single-card throughput.
+	// < 100% = throughput loss due to thermal throttle, power limits, or contention.
+	PlatformPowerScore   float64                      `json:"platform_power_score,omitempty"`
+	PerformanceRampSteps []NvidiaPerformanceRampStep  `json:"performance_ramp_steps,omitempty"`
+	OverallStatus        string                       `json:"overall_status"`
+	SelectedGPUIndices   []int                        `json:"selected_gpu_indices"`
+	Findings             []string                     `json:"findings,omitempty"`
+	Warnings             []string                     `json:"warnings,omitempty"`
+	Normalization        BenchmarkNormalization       `json:"normalization"`
+	HostConfig           *BenchmarkHostConfig         `json:"host_config,omitempty"`
+	CPULoad              *BenchmarkCPULoad            `json:"cpu_load,omitempty"`
+	Cooling              *BenchmarkCoolingSummary     `json:"cooling,omitempty"`
+	GPUs                 []BenchmarkGPUResult         `json:"gpus"`
+	Interconnect         *BenchmarkInterconnectResult `json:"interconnect,omitempty"`
+	ServerPower          *BenchmarkServerPower        `json:"server_power,omitempty"`
+	// PSUIssues holds power supply fault events detected by comparing IPMI PSU
+	// sensor states before and after the benchmark run. Empty when IPMI is
+	// unavailable or no PSU faults occurred during the test.
+	PSUIssues []string `json:"psu_issues,omitempty"`
 }

 type BenchmarkNormalization struct {
@@ -52,30 +207,51 @@ type BenchmarkNormalizationGPU struct {
 }

 type BenchmarkGPUResult struct {
-	Index                  int                        `json:"index"`
-	UUID                   string                     `json:"uuid,omitempty"`
-	Name                   string                     `json:"name,omitempty"`
-	BusID                  string                     `json:"bus_id,omitempty"`
-	VBIOS                  string                     `json:"vbios,omitempty"`
-	ComputeCapability      string                     `json:"compute_capability,omitempty"`
-	Backend                string                     `json:"backend,omitempty"`
-	Status                 string                     `json:"status"`
-	PowerLimitW            float64                    `json:"power_limit_w,omitempty"`
-	MultiprocessorCount    int                        `json:"multiprocessor_count,omitempty"`
-	DefaultPowerLimitW     float64                    `json:"default_power_limit_w,omitempty"`
-	MaxGraphicsClockMHz    float64                    `json:"max_graphics_clock_mhz,omitempty"`
-	BaseGraphicsClockMHz   float64                    `json:"base_graphics_clock_mhz,omitempty"`
-	MaxMemoryClockMHz      float64                    `json:"max_memory_clock_mhz,omitempty"`
-	LockedGraphicsClockMHz float64                    `json:"locked_graphics_clock_mhz,omitempty"`
-	LockedMemoryClockMHz   float64                    `json:"locked_memory_clock_mhz,omitempty"`
-	Baseline               BenchmarkTelemetrySummary  `json:"baseline"`
-	Steady                 BenchmarkTelemetrySummary  `json:"steady"`
-	Cooldown               BenchmarkTelemetrySummary  `json:"cooldown"`
-	Throttle               BenchmarkThrottleCounters  `json:"throttle_counters"`
-	PrecisionResults       []BenchmarkPrecisionResult `json:"precision_results,omitempty"`
-	Scores                 BenchmarkScorecard         `json:"scores"`
-	DegradationReasons     []string                   `json:"degradation_reasons,omitempty"`
-	Notes                  []string                   `json:"notes,omitempty"`
+	Index               int     `json:"index"`
+	UUID                string  `json:"uuid,omitempty"`
+	Name                string  `json:"name,omitempty"`
+	BusID               string  `json:"bus_id,omitempty"`
+	VBIOS               string  `json:"vbios,omitempty"`
+	ComputeCapability   string  `json:"compute_capability,omitempty"`
+	Backend             string  `json:"backend,omitempty"`
+	Status              string  `json:"status"`
+	PowerLimitW         float64 `json:"power_limit_w,omitempty"`
+	PowerLimitDerated   bool    `json:"power_limit_derated,omitempty"`
+	MultiprocessorCount int     `json:"multiprocessor_count,omitempty"`
+	DefaultPowerLimitW  float64 `json:"default_power_limit_w,omitempty"`
+	// ShutdownTempC is the hardware thermal shutdown threshold for this GPU,
+	// sourced from nvidia-smi -q ("GPU Shutdown Temp"). Fallback: 90°C.
+	ShutdownTempC float64 `json:"shutdown_temp_c,omitempty"`
+	// SlowdownTempC is the software throttle onset threshold ("GPU Slowdown Temp").
+	// Fallback: 80°C.
+	SlowdownTempC float64 `json:"slowdown_temp_c,omitempty"`
+	// CalibratedPeakPowerW is the p95 power measured during a short
+	// dcgmi targeted_power calibration run before the main benchmark.
+	// Used as the reference denominator for PowerSustainScore instead of
+	// the hardware default limit, which bee-gpu-burn cannot reach.
+	CalibratedPeakPowerW   float64                         `json:"calibrated_peak_power_w,omitempty"`
+	CalibratedPeakTempC    float64                         `json:"calibrated_peak_temp_c,omitempty"`
+	PowerCalibrationTries  int                             `json:"power_calibration_tries,omitempty"`
+	MaxGraphicsClockMHz    float64                         `json:"max_graphics_clock_mhz,omitempty"`
+	BaseGraphicsClockMHz   float64                         `json:"base_graphics_clock_mhz,omitempty"`
+	MaxMemoryClockMHz      float64                         `json:"max_memory_clock_mhz,omitempty"`
+	LockedGraphicsClockMHz float64                         `json:"locked_graphics_clock_mhz,omitempty"`
+	LockedMemoryClockMHz   float64                         `json:"locked_memory_clock_mhz,omitempty"`
+	Baseline               BenchmarkTelemetrySummary       `json:"baseline"`
+	Steady                 BenchmarkTelemetrySummary       `json:"steady"`
+	PrecisionSteady        []BenchmarkPrecisionSteadyPhase `json:"precision_steady,omitempty"`
+	PrecisionFailures      []string                        `json:"precision_failures,omitempty"`
+	Cooldown               BenchmarkTelemetrySummary       `json:"cooldown"`
+	Throttle               BenchmarkThrottleCounters       `json:"throttle_counters"`
+	// ECC error delta accumulated over the full benchmark (all phases combined).
+	ECC                BenchmarkECCCounters       `json:"ecc,omitempty"`
+	PrecisionResults   []BenchmarkPrecisionResult `json:"precision_results,omitempty"`
+	Scores             BenchmarkScorecard         `json:"scores"`
+	DegradationReasons []string                   `json:"degradation_reasons,omitempty"`
+	Notes              []string                   `json:"notes,omitempty"`
+	// CoolingWarning is non-empty when a thermal throttle event occurred with
+	// a clock drop ≥20% while server fans were not at 100% duty cycle.
+	CoolingWarning string `json:"cooling_warning,omitempty"`
 }

 type BenchmarkTelemetrySummary struct {
@@ -105,6 +281,18 @@ type BenchmarkThrottleCounters struct {
 	HWPowerBrakeSlowdownUS uint64 `json:"hw_power_brake_slowdown_us"`
 }

+// BenchmarkECCCounters holds ECC error counts sampled at a point in time.
+// Corrected = single-bit errors fixed by ECC (DRAM degradation).
+// Uncorrected = double-bit errors that could not be corrected (serious fault).
+// Both are volatile (since last driver reset), not persistent.
+type BenchmarkECCCounters struct {
+	Corrected   uint64 `json:"corrected"`
+	Uncorrected uint64 `json:"uncorrected"`
+}
+
+func (e BenchmarkECCCounters) Total() uint64 { return e.Corrected + e.Uncorrected }
+func (e BenchmarkECCCounters) IsZero() bool  { return e.Corrected == 0 && e.Uncorrected == 0 }
+
 type BenchmarkPrecisionResult struct {
 	Name          string  `json:"name"`
 	Category      string  `json:"category"`
@@ -115,34 +303,124 @@ type BenchmarkPrecisionResult struct {
 	K             uint64  `json:"k,omitempty"`
 	Iterations    uint64  `json:"iterations,omitempty"`
 	TeraOpsPerSec float64 `json:"teraops_per_sec,omitempty"`
-	Notes         string  `json:"notes,omitempty"`
+	// Weight is the fp32-equivalence factor for this precision category.
+	// fp32 = 1.0 (baseline), fp64 = 2.0, fp16 = 0.5, int8/fp8 = 0.25, fp4 = 0.125.
+	// WeightedTOPS = TeraOpsPerSec * Weight gives fp32-equivalent throughput.
+	Weight                float64 `json:"weight,omitempty"`
+	WeightedTeraOpsPerSec float64 `json:"weighted_teraops_per_sec,omitempty"`
+	Notes                 string  `json:"notes,omitempty"`
 }

 type BenchmarkScorecard struct {
-	ComputeScore        float64 `json:"compute_score"`
+	ComputeScore float64 `json:"compute_score"`
+	// SyntheticScore is the sum of fp32-equivalent TOPS from per-precision
+	// steady phases (each precision ran alone, full GPU dedicated).
+	SyntheticScore float64 `json:"synthetic_score,omitempty"`
+	// MixedScore is the sum of fp32-equivalent TOPS from the combined phase
+	// (all precisions competing simultaneously — closer to real workloads).
+	MixedScore float64 `json:"mixed_score,omitempty"`
+	// MixedEfficiency = MixedScore / SyntheticScore. Measures how well the GPU
+	// sustains throughput under concurrent mixed-precision load.
+	MixedEfficiency     float64 `json:"mixed_efficiency,omitempty"`
 	PowerSustainScore   float64 `json:"power_sustain_score"`
 	ThermalSustainScore float64 `json:"thermal_sustain_score"`
-	StabilityScore      float64 `json:"stability_score"`
-	InterconnectScore   float64 `json:"interconnect_score"`
-	CompositeScore      float64 `json:"composite_score"`
+	// StabilityScore: fraction of steady-state time the GPU spent throttling
+	// (thermal + power cap combined). 0% throttle = 100; 100% throttle = 0.
+	StabilityScore float64 `json:"stability_score"`
+
+	// Throttle breakdown — percentage of steady-state time in each throttle type.
+	// Used for diagnosis: tells WHY the GPU throttled, not just whether it did.
+	ThermalThrottlePct   float64 `json:"thermal_throttle_pct"`   // HW+SW thermal slowdown
+	PowerCapThrottlePct  float64 `json:"power_cap_throttle_pct"` // SW power cap
+	SyncBoostThrottlePct float64 `json:"sync_boost_throttle_pct,omitempty"`
+
+	// Temperature headroom: distance to the 100°C destruction threshold.
+	// TempHeadroomC = 100 - P95TempC. < 20°C = warning; < 10°C = critical.
+	// Independent of throttle — a GPU at 86°C without throttle is still in the red zone.
+	TempHeadroomC float64 `json:"temp_headroom_c"`
+
+	InterconnectScore float64 `json:"interconnect_score"`
+	// ServerQualityScore (0–100) reflects server infrastructure quality independent
+	// of GPU model. Combines throttle time, power variance, and temp variance.
+	// Use this to compare servers with the same GPU, or to flag a bad server
+	// that throttles an otherwise fast GPU.
+	ServerQualityScore float64 `json:"server_quality_score"`
+	// CompositeScore is the raw compute score (TOPS, fp32-equivalent).
+	// A throttling GPU will score lower here automatically — no quality multiplier.
+	CompositeScore float64 `json:"composite_score"`
 	// TOPSPerSMPerGHz is compute efficiency independent of clock speed and SM count.
-	// Comparable across throttle levels and GPU generations. Low value at normal
-	// clocks indicates silicon degradation.
 	TOPSPerSMPerGHz float64 `json:"tops_per_sm_per_ghz,omitempty"`
 }

-// BenchmarkServerPower captures server-side power via IPMI alongside GPU-reported
-// power. The reporting_ratio (delta / gpu_reported_sum) near 1.0 means GPU power
-// telemetry is accurate; a ratio well below 1.0 (e.g. 0.5) means the GPU is
-// over-reporting its power consumption.
+// BenchmarkPSUSlotPower holds SDR power readings for one PSU slot sampled
+// during the benchmark. Slot keys match audit HardwarePowerSupply.Slot (0-based)
+// so benchmark and audit data can be correlated by slot.
+type BenchmarkPSUSlotPower struct {
+	InputW  *float64 `json:"input_w,omitempty"`  // AC wall input (PSUx_POWER_IN)
+	OutputW *float64 `json:"output_w,omitempty"` // DC output (PSUx_POWER_OUT)
+	Status  string   `json:"status,omitempty"`
+}
+
+// BenchmarkServerPower captures server-side power from multiple independent
+// sources: IPMI DCMI (high-level), IPMI SDR per-PSU sensors (granular), and
+// GPU-reported power (nvidia-smi). Cross-comparing sources detects when DCMI
+// covers only a subset of installed PSUs (partial coverage).
+//
+// Source legend:
+//   - DCMI      — `ipmitool dcmi power reading`; fast but may miss PSUs
+//   - SDR       — `ipmitool sdr` PSUx_POWER_IN/OUT; per-PSU, reliable
+//   - nvidia-smi — GPU self-reported via internal shunt; accurate for GPU load
 type BenchmarkServerPower struct {
-	Available       bool     `json:"available"`
-	IdleW           float64  `json:"idle_w,omitempty"`
-	LoadedW         float64  `json:"loaded_w,omitempty"`
-	DeltaW          float64  `json:"delta_w,omitempty"`
-	GPUReportedSumW float64  `json:"gpu_reported_sum_w,omitempty"`
-	ReportingRatio  float64  `json:"reporting_ratio,omitempty"`
-	Notes           []string `json:"notes,omitempty"`
+	Available         bool    `json:"available"`
+	Source            string  `json:"source,omitempty"`
+	Mode              string  `json:"mode,omitempty"`
+	Reason            string  `json:"reason,omitempty"`
+	SampleIntervalSec int     `json:"sample_interval_sec,omitempty"`
+	IdleW             float64 `json:"idle_w,omitempty"`   // DCMI at idle
+	LoadedW           float64 `json:"loaded_w,omitempty"` // DCMI at peak load
+	DeltaW            float64 `json:"delta_w,omitempty"`  // DCMI loaded − idle
+	GPUReportedSumW   float64 `json:"gpu_reported_sum_w,omitempty"`
+	ReportingRatio    float64 `json:"reporting_ratio,omitempty"`
+
+	// PSU AC input sum — sampled at idle and at peak load using collector's
+	// slot patterns (PSU1_POWER_IN, PSU1_PIN, PS1 POut, Power1…).
+	PSUInputIdleW   float64 `json:"psu_input_idle_w,omitempty"`
+	PSUInputLoadedW float64 `json:"psu_input_loaded_w,omitempty"`
+
+	// PSU DC output sum — power delivered to server internals after conversion.
+	PSUOutputIdleW   float64 `json:"psu_output_idle_w,omitempty"`
+	PSUOutputLoadedW float64 `json:"psu_output_loaded_w,omitempty"`
+
+	// Per-slot PSU readings at idle and at peak load.
+	// Keys are 0-based slot strings matching audit HardwarePowerSupply.Slot.
+	PSUSlotReadingsIdle   map[string]BenchmarkPSUSlotPower `json:"psu_slot_readings_idle,omitempty"`
+	PSUSlotReadingsLoaded map[string]BenchmarkPSUSlotPower `json:"psu_slot_readings_loaded,omitempty"`
+
+	// GPUSlotTotalW is the sum of GPU_POWER_SLOTx SDR sensors at peak load.
+	// PCIe slot delivery only (excludes 16-pin connector power).
+	GPUSlotTotalW float64 `json:"gpu_slot_total_w,omitempty"`
+
+	// DCMICoverageRatio = DCMI_idle / SDR_PSU_IN_idle.
+	// Near 1.0 → DCMI tracks all PSUs. Near 0.5 → DCMI tracks half the PSUs.
+	DCMICoverageRatio float64 `json:"dcmi_coverage_ratio,omitempty"`
+
+	Notes []string `json:"notes,omitempty"`
+}
+
+// BenchmarkPrecisionSteadyPhase holds per-precision-category telemetry collected
+// during a dedicated single-precision steady window.  Because only one kernel
+// type runs at a time the PowerCVPct here is a genuine stability signal.
+type BenchmarkPrecisionSteadyPhase struct {
+	Precision             string                    `json:"precision"` // e.g. "fp8", "fp16", "fp32"
+	Status                string                    `json:"status,omitempty"`
+	Steady                BenchmarkTelemetrySummary `json:"steady"`
+	TeraOpsPerSec         float64                   `json:"teraops_per_sec,omitempty"`
+	WeightedTeraOpsPerSec float64                   `json:"weighted_teraops_per_sec,omitempty"`
+	// ECC errors accumulated during this precision phase only.
+	// Non-zero corrected = stress-induced DRAM errors for this kernel type.
+	// Any uncorrected = serious fault triggered by this precision workload.
+	ECC   BenchmarkECCCounters `json:"ecc,omitempty"`
+	Notes string               `json:"notes,omitempty"`
 }

 type BenchmarkInterconnectResult struct {
@@ -156,3 +434,103 @@ type BenchmarkInterconnectResult struct {
 	MaxBusBWGBps       float64  `json:"max_busbw_gbps,omitempty"`
 	Notes              []string `json:"notes,omitempty"`
 }
+
+type NvidiaPowerBenchResult struct {
+	BenchmarkVersion     string                 `json:"benchmark_version"`
+	GeneratedAt          time.Time              `json:"generated_at"`
+	Hostname             string                 `json:"hostname,omitempty"`
+	ServerModel          string                 `json:"server_model,omitempty"`
+	BenchmarkProfile     string                 `json:"benchmark_profile"`
+	SelectedGPUIndices   []int                  `json:"selected_gpu_indices"`
+	RecommendedSlotOrder []int                  `json:"recommended_slot_order,omitempty"`
+	RampSteps            []NvidiaPowerBenchStep `json:"ramp_steps,omitempty"`
+	OverallStatus        string                 `json:"overall_status"`
+	// PlatformMaxTDPW is the sum of per-GPU stable power limits found during the
+	// cumulative thermal ramp. Represents the actual sustained power budget of
+	// this server under full GPU load. Use for rack power planning.
+	PlatformMaxTDPW float64 `json:"platform_max_tdp_w"`
+	// ServerPower captures IPMI server power delta (idle→loaded) measured in
+	// parallel with the thermal ramp. Use to compare GPU-reported TDP against
+	// actual wall-power draw as seen by the server's power supply.
+	ServerPower *BenchmarkServerPower `json:"server_power,omitempty"`
+	Findings    []string              `json:"findings,omitempty"`
+	GPUs        []NvidiaPowerBenchGPU `json:"gpus"`
+	// PSUIssues holds power supply fault events detected by comparing IPMI PSU
+	// sensor states before and after the power benchmark run. Empty when IPMI is
+	// unavailable or no PSU faults occurred during the test.
+	PSUIssues []string `json:"psu_issues,omitempty"`
+}
+
+type NvidiaPowerBenchGPU struct {
+	Index              int     `json:"index"`
+	Name               string  `json:"name,omitempty"`
+	BusID              string  `json:"bus_id,omitempty"`
+	DefaultPowerLimitW float64 `json:"default_power_limit_w,omitempty"`
+	// AppliedPowerLimitW is the stable limit found during single-card calibration.
+	AppliedPowerLimitW float64 `json:"applied_power_limit_w,omitempty"`
+	// StablePowerLimitW is the final fixed limit for this GPU after the
+	// cumulative thermal ramp. This is the limit at which the GPU operated
+	// stably with all other GPUs running simultaneously at their own limits.
+	// May be lower than AppliedPowerLimitW if multi-GPU thermal load required
+	// additional derating.
+	StablePowerLimitW   float64  `json:"stable_power_limit_w,omitempty"`
+	MaxObservedPowerW   float64  `json:"max_observed_power_w,omitempty"`
+	MaxObservedTempC    float64  `json:"max_observed_temp_c,omitempty"`
+	CalibrationAttempts int      `json:"calibration_attempts,omitempty"`
+	Derated             bool     `json:"derated,omitempty"`
+	Status              string   `json:"status"`
+	Notes               []string `json:"notes,omitempty"`
+	// CoolingWarning mirrors BenchmarkGPUResult.CoolingWarning for the power workflow.
+	CoolingWarning string `json:"cooling_warning,omitempty"`
+	// ServerLoadedW is the IPMI server power reading captured during this
+	// GPU's single-card calibration run. ServerDeltaW = ServerLoadedW − idle.
+	ServerLoadedW float64 `json:"server_loaded_w,omitempty"`
+	ServerDeltaW  float64 `json:"server_delta_w,omitempty"`
+	// Telemetry holds the aggregated stats from the final converged calibration
+	// attempt for this GPU (temperature, power, fan, clock percentiles).
+	Telemetry *BenchmarkTelemetrySummary `json:"telemetry,omitempty"`
+	// Fan state sampled at the end of single-card calibration.
+	AvgFanRPM          float64 `json:"avg_fan_rpm,omitempty"`
+	AvgFanDutyCyclePct float64 `json:"avg_fan_duty_cycle_pct,omitempty"`
+}
+
+type NvidiaPowerBenchStep struct {
+	StepIndex  int   `json:"step_index"`
+	GPUIndices []int `json:"gpu_indices"`
+	// NewGPUIndex is the GPU whose stable limit was searched in this step.
+	NewGPUIndex int `json:"new_gpu_index"`
+	// NewGPUStableLimitW is the stable power limit found for the new GPU.
+	NewGPUStableLimitW  float64  `json:"new_gpu_stable_limit_w,omitempty"`
+	TotalObservedPowerW float64  `json:"total_observed_power_w,omitempty"`
+	AvgObservedPowerW   float64  `json:"avg_observed_power_w,omitempty"`
+	Derated             bool     `json:"derated,omitempty"`
+	Status              string   `json:"status"`
+	Notes               []string `json:"notes,omitempty"`
+	// ServerLoadedW is the IPMI server power reading captured during this
+	// ramp step's calibration run. ServerDeltaW = ServerLoadedW − idle.
+	ServerLoadedW float64 `json:"server_loaded_w,omitempty"`
+	ServerDeltaW  float64 `json:"server_delta_w,omitempty"`
+	// PSU slot readings sampled at end of this ramp step.
+	PSUSlotReadings map[string]BenchmarkPSUSlotPower `json:"psu_slot_readings,omitempty"`
+	// Fan state at end of this ramp step.
+	AvgFanRPM          float64 `json:"avg_fan_rpm,omitempty"`
+	AvgFanDutyCyclePct float64 `json:"avg_fan_duty_cycle_pct,omitempty"`
+	// Per-GPU telemetry from this step's calibration, keyed by GPU index.
+	PerGPUTelemetry map[int]*BenchmarkTelemetrySummary `json:"per_gpu_telemetry,omitempty"`
+}
+
+// NvidiaPerformanceRampStep holds per-step performance data for the
+// scalability ramp-up phase of the performance benchmark.
+type NvidiaPerformanceRampStep struct {
+	StepIndex  int   `json:"step_index"`
+	GPUIndices []int `json:"gpu_indices"`
+	// TotalSyntheticTOPS is the sum of per-GPU SyntheticScore (fp32-equivalent
+	// TOPS from dedicated single-precision phases) across all GPUs in this step.
+	TotalSyntheticTOPS float64 `json:"total_synthetic_tops"`
+	TotalMixedTOPS     float64 `json:"total_mixed_tops,omitempty"`
+	// ScalabilityPct = TotalSyntheticTOPS / (k × best_single_gpu_tops) × 100.
+	// 100% = perfect linear scaling. < 100% = thermal/power/interconnect loss.
+	ScalabilityPct float64  `json:"scalability_pct"`
+	Status         string   `json:"status"`
+	Notes          []string `json:"notes,omitempty"`
+}
--- a/audit/internal/platform/gpu_metrics.go
+++ b/audit/internal/platform/gpu_metrics.go
@@ -13,14 +13,21 @@ import (

 // GPUMetricRow is one telemetry sample from nvidia-smi during a stress test.
 type GPUMetricRow struct {
-	ElapsedSec  float64 `json:"elapsed_sec"`
-	GPUIndex    int     `json:"index"`
-	TempC       float64 `json:"temp_c"`
-	UsagePct    float64 `json:"usage_pct"`
-	MemUsagePct float64 `json:"mem_usage_pct"`
-	PowerW      float64 `json:"power_w"`
-	ClockMHz    float64 `json:"clock_mhz"`
-	MemClockMHz float64 `json:"mem_clock_mhz"`
+	Stage                 string  `json:"stage,omitempty"`
+	StageStartSec         float64 `json:"stage_start_sec,omitempty"`
+	StageEndSec           float64 `json:"stage_end_sec,omitempty"`
+	ElapsedSec            float64 `json:"elapsed_sec"`
+	GPUIndex              int     `json:"index"`
+	TempC                 float64 `json:"temp_c"`
+	UsagePct              float64 `json:"usage_pct"`
+	MemUsagePct           float64 `json:"mem_usage_pct"`
+	PowerW                float64 `json:"power_w"`
+	ClockMHz              float64 `json:"clock_mhz"`
+	MemClockMHz           float64 `json:"mem_clock_mhz"`
+	FanAvgRPM             float64 `json:"fan_avg_rpm,omitempty"`
+	FanDutyCyclePct       float64 `json:"fan_duty_cycle_pct,omitempty"`
+	FanDutyCycleAvailable bool    `json:"fan_duty_cycle_available,omitempty"`
+	FanDutyCycleEstimated bool    `json:"fan_duty_cycle_estimated,omitempty"`
 }

 // sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
@@ -141,14 +148,28 @@ func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
 // WriteGPUMetricsCSV writes collected rows as a CSV file.
 func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
 	var b bytes.Buffer
-	b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz\n")
+	b.WriteString("stage,elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz,fan_avg_rpm,fan_duty_cycle_pct,fan_duty_cycle_available,fan_duty_cycle_estimated\n")
 	for _, r := range rows {
-		fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f\n",
-			r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz)
+		dutyAvail := 0
+		if r.FanDutyCycleAvailable {
+			dutyAvail = 1
+		}
+		dutyEstimated := 0
+		if r.FanDutyCycleEstimated {
+			dutyEstimated = 1
+		}
+		fmt.Fprintf(&b, "%s,%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f,%.0f,%.1f,%d,%d\n",
+			strconv.Quote(strings.TrimSpace(r.Stage)), r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz, r.FanAvgRPM, r.FanDutyCyclePct, dutyAvail, dutyEstimated)
 	}
 	return os.WriteFile(path, b.Bytes(), 0644)
 }

+type gpuMetricStageSpan struct {
+	Name  string
+	Start float64
+	End   float64
+}
+
 // WriteGPUMetricsHTML writes a standalone HTML file with one SVG chart per GPU.
 func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
 	// Group by GPU index preserving order.
@@ -163,9 +184,25 @@ func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
 		gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
 	}

+	stageSpans := buildGPUMetricStageSpans(rows)
+	stageColorByName := make(map[string]string, len(stageSpans))
+	for i, span := range stageSpans {
+		stageColorByName[span.Name] = gpuMetricStagePalette[i%len(gpuMetricStagePalette)]
+	}
+
+	var legend strings.Builder
+	if len(stageSpans) > 0 {
+		legend.WriteString(`<div class="stage-legend">`)
+		for _, span := range stageSpans {
+			fmt.Fprintf(&legend, `<span class="stage-chip"><span class="stage-swatch" style="background:%s"></span>%s</span>`,
+				stageColorByName[span.Name], gpuHTMLEscape(span.Name))
+		}
+		legend.WriteString(`</div>`)
+	}
+
 	var svgs strings.Builder
 	for _, gpuIdx := range order {
-		svgs.WriteString(drawGPUChartSVG(gpuMap[gpuIdx], gpuIdx))
+		svgs.WriteString(drawGPUChartSVG(gpuMap[gpuIdx], gpuIdx, stageSpans, stageColorByName))
 		svgs.WriteString("\n")
 	}

@@ -175,21 +212,39 @@ func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
 <meta charset="utf-8">
 <title>GPU Stress Test Metrics</title>
 <style>
-body { font-family: sans-serif; background: #f0f0f0; margin: 0; padding: 20px; }
-h1 { text-align: center; color: #333; margin: 0 0 8px; }
-p  { text-align: center; color: #888; font-size: 13px; margin: 0 0 24px; }
+:root{--bg:#fff;--surface:#fff;--surface-2:#f9fafb;--border:rgba(34,36,38,.15);--border-lite:rgba(34,36,38,.1);--ink:rgba(0,0,0,.87);--muted:rgba(0,0,0,.6)}
+*{box-sizing:border-box}
+body{font:14px/1.5 Lato,"Helvetica Neue",Arial,Helvetica,sans-serif;background:var(--bg);color:var(--ink);margin:0}
+.page{padding:24px}
+.card{background:var(--surface);border:1px solid var(--border);border-radius:4px;box-shadow:0 1px 2px rgba(34,36,38,.15);overflow:hidden}
+.card-head{padding:11px 16px;background:var(--surface-2);border-bottom:1px solid var(--border);font-weight:700;font-size:13px}
+.card-body{padding:16px}
+h1{font-size:22px;margin:0 0 6px}
+p{color:var(--muted);font-size:13px;margin:0 0 16px}
+.stage-legend{display:flex;flex-wrap:wrap;gap:10px;margin:0 0 16px}
+.stage-chip{display:inline-flex;align-items:center;gap:8px;padding:4px 10px;border-radius:999px;background:var(--surface-2);border:1px solid var(--border-lite);font-size:12px}
+.stage-swatch{display:inline-block;width:12px;height:12px;border-radius:999px}
+.chart-block{margin-top:16px}
 </style>
 </head><body>
+<div class="page">
+<div class="card">
+<div class="card-head">GPU Stress Test Metrics</div>
+<div class="card-body">
 <h1>GPU Stress Test Metrics</h1>
 <p>Generated %s</p>
 %s
-</body></html>`, ts, svgs.String())
+<div class="chart-block">%s</div>
+</div>
+</div>
+</div>
+</body></html>`, ts, legend.String(), svgs.String())

 	return os.WriteFile(path, []byte(html), 0644)
 }

 // drawGPUChartSVG generates a self-contained SVG chart for one GPU.
-func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
+func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int, stageSpans []gpuMetricStageSpan, stageColorByName map[string]string) string {
 	// Layout
 	const W, H = 960, 520
 	const plotX1 = 120 // usage axis / chart left border
@@ -284,6 +339,23 @@ func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
 	}
 	b.WriteString("</g>\n")

+	// Stage backgrounds
+	for _, span := range stageSpans {
+		x1 := xv(span.Start)
+		x2 := xv(span.End)
+		if x2 < x1 {
+			x1, x2 = x2, x1
+		}
+		if x2-x1 < 1 {
+			x2 = x1 + 1
+		}
+		color := stageColorByName[span.Name]
+		fmt.Fprintf(&b, `<rect x="%.1f" y="%d" width="%.1f" height="%d" fill="%s" fill-opacity="0.18"/>`+"\n",
+			x1, plotY1, x2-x1, PH, color)
+		fmt.Fprintf(&b, `<text x="%.1f" y="%d" font-family="sans-serif" font-size="10" fill="#444" text-anchor="middle">%s</text>`+"\n",
+			x1+(x2-x1)/2, plotY1+12, gpuHTMLEscape(span.Name))
+	}
+
 	// Chart border
 	fmt.Fprintf(&b, `<rect x="%d" y="%d" width="%d" height="%d"`+
 		` fill="none" stroke="#333" stroke-width="1"/>`+"\n",
@@ -382,221 +454,6 @@ func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
 	return b.String()
 }

-const (
-	ansiAmber  = "\033[38;5;214m"
-	ansiReset  = "\033[0m"
-)
-
-const (
-	termChartWidth  = 70
-	termChartHeight = 12
-)
-
-// RenderGPUTerminalChart returns ANSI line charts (asciigraph-style) per GPU.
-// Used in SAT stress-test logs.
-func RenderGPUTerminalChart(rows []GPUMetricRow) string {
-	seen := make(map[int]bool)
-	var order []int
-	gpuMap := make(map[int][]GPUMetricRow)
-	for _, r := range rows {
-		if !seen[r.GPUIndex] {
-			seen[r.GPUIndex] = true
-			order = append(order, r.GPUIndex)
-		}
-		gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
-	}
-
-	type seriesDef struct {
-		caption string
-		color   string
-		fn      func(GPUMetricRow) float64
-	}
-	defs := []seriesDef{
-		{"Temperature (°C)", ansiAmber, func(r GPUMetricRow) float64 { return r.TempC }},
-		{"GPU Usage (%)", ansiAmber, func(r GPUMetricRow) float64 { return r.UsagePct }},
-		{"Power (W)", ansiAmber, func(r GPUMetricRow) float64 { return r.PowerW }},
-		{"Clock (MHz)", ansiAmber, func(r GPUMetricRow) float64 { return r.ClockMHz }},
-	}
-
-	var b strings.Builder
-	for _, gpuIdx := range order {
-		gr := gpuMap[gpuIdx]
-		if len(gr) == 0 {
-			continue
-		}
-		tMax := gr[len(gr)-1].ElapsedSec - gr[0].ElapsedSec
-		fmt.Fprintf(&b, "GPU %d — Stress Test Metrics  (%.0f seconds)\n\n", gpuIdx, tMax)
-		for _, d := range defs {
-			b.WriteString(renderLineChart(extractGPUField(gr, d.fn), d.color, d.caption,
-				termChartHeight, termChartWidth))
-			b.WriteRune('\n')
-		}
-	}
-
-	return strings.TrimRight(b.String(), "\n")
-}
-
-// renderLineChart draws a single time-series line chart using box-drawing characters.
-// Produces output in the style of asciigraph: ╭─╮ │ ╰─╯ with a Y axis and caption.
-func renderLineChart(vals []float64, color, caption string, height, width int) string {
-	if len(vals) == 0 {
-		return caption + "\n"
-	}
-
-	mn, mx := gpuMinMax(vals)
-	if mn == mx {
-		mx = mn + 1
-	}
-
-	// Use the smaller of width or len(vals) to avoid stretching sparse data.
-	w := width
-	if len(vals) < w {
-		w = len(vals)
-	}
-	data := gpuDownsample(vals, w)
-
-	// row[i] = display row index: 0 = top = max value, height = bottom = min value.
-	row := make([]int, w)
-	for i, v := range data {
-		r := int(math.Round((mx - v) / (mx - mn) * float64(height)))
-		if r < 0 {
-			r = 0
-		}
-		if r > height {
-			r = height
-		}
-		row[i] = r
-	}
-
-	// Fill the character grid.
-	grid := make([][]rune, height+1)
-	for i := range grid {
-		grid[i] = make([]rune, w)
-		for j := range grid[i] {
-			grid[i][j] = ' '
-		}
-	}
-	for x := 0; x < w; x++ {
-		r := row[x]
-		if x == 0 {
-			grid[r][0] = '─'
-			continue
-		}
-		p := row[x-1]
-		switch {
-		case r == p:
-			grid[r][x] = '─'
-		case r < p: // value went up (row index decreased toward top)
-			grid[r][x] = '╭'
-			grid[p][x] = '╯'
-			for y := r + 1; y < p; y++ {
-				grid[y][x] = '│'
-			}
-		default: // r > p, value went down
-			grid[p][x] = '╮'
-			grid[r][x] = '╰'
-			for y := p + 1; y < r; y++ {
-				grid[y][x] = '│'
-			}
-		}
-	}
-
-	// Y axis tick labels.
-	ticks := gpuNiceTicks(mn, mx, height/2)
-	tickAtRow := make(map[int]string)
-	labelWidth := 4
-	for _, t := range ticks {
-		r := int(math.Round((mx - t) / (mx - mn) * float64(height)))
-		if r < 0 || r > height {
-			continue
-		}
-		s := gpuFormatTick(t)
-		tickAtRow[r] = s
-		if len(s) > labelWidth {
-			labelWidth = len(s)
-		}
-	}
-
-	var b strings.Builder
-	for r := 0; r <= height; r++ {
-		label := tickAtRow[r]
-		fmt.Fprintf(&b, "%*s", labelWidth, label)
-		switch {
-		case label != "":
-			b.WriteRune('┤')
-		case r == height:
-			b.WriteRune('┼')
-		default:
-			b.WriteRune('│')
-		}
-		b.WriteString(color)
-		b.WriteString(string(grid[r]))
-		b.WriteString(ansiReset)
-		b.WriteRune('\n')
-	}
-
-	// Bottom axis.
-	b.WriteString(strings.Repeat(" ", labelWidth))
-	b.WriteRune('└')
-	b.WriteString(strings.Repeat("─", w))
-	b.WriteRune('\n')
-
-	// Caption centered under the chart.
-	if caption != "" {
-		total := labelWidth + 1 + w
-		if pad := (total - len(caption)) / 2; pad > 0 {
-			b.WriteString(strings.Repeat(" ", pad))
-		}
-		b.WriteString(caption)
-		b.WriteRune('\n')
-	}
-
-	return b.String()
-}
-
-func extractGPUField(rows []GPUMetricRow, fn func(GPUMetricRow) float64) []float64 {
-	v := make([]float64, len(rows))
-	for i, r := range rows {
-		v[i] = fn(r)
-	}
-	return v
-}
-
-// gpuDownsample averages vals into w buckets (or nearest-neighbor upsamples if len(vals) < w).
-func gpuDownsample(vals []float64, w int) []float64 {
-	n := len(vals)
-	if n == 0 {
-		return make([]float64, w)
-	}
-	result := make([]float64, w)
-	if n >= w {
-		counts := make([]int, w)
-		for i, v := range vals {
-			bucket := i * w / n
-			if bucket >= w {
-				bucket = w - 1
-			}
-			result[bucket] += v
-			counts[bucket]++
-		}
-		for i := range result {
-			if counts[i] > 0 {
-				result[i] /= float64(counts[i])
-			}
-		}
-	} else {
-		// Nearest-neighbour upsample.
-		for i := range result {
-			src := i * (n - 1) / (w - 1)
-			if src >= n {
-				src = n - 1
-			}
-			result[i] = vals[src]
-		}
-	}
-	return result
-}
-
 func gpuMinMax(vals []float64) (float64, float64) {
 	if len(vals) == 0 {
 		return 0, 1
@@ -641,3 +498,57 @@ func gpuFormatTick(v float64) string {
 	}
 	return strconv.FormatFloat(v, 'f', 1, 64)
 }
+
+var gpuMetricStagePalette = []string{
+	"#d95c5c",
+	"#2185d0",
+	"#21ba45",
+	"#f2c037",
+	"#6435c9",
+	"#00b5ad",
+	"#a5673f",
+}
+
+func buildGPUMetricStageSpans(rows []GPUMetricRow) []gpuMetricStageSpan {
+	var spans []gpuMetricStageSpan
+	for _, row := range rows {
+		name := strings.TrimSpace(row.Stage)
+		if name == "" {
+			name = "run"
+		}
+		start := row.StageStartSec
+		end := row.StageEndSec
+		if end <= start {
+			start = row.ElapsedSec
+			end = row.ElapsedSec
+		}
+		if len(spans) == 0 || spans[len(spans)-1].Name != name {
+			spans = append(spans, gpuMetricStageSpan{Name: name, Start: start, End: end})
+			continue
+		}
+		if start < spans[len(spans)-1].Start {
+			spans[len(spans)-1].Start = start
+		}
+		if end > spans[len(spans)-1].End {
+			spans[len(spans)-1].End = end
+		}
+	}
+	for i := range spans {
+		if spans[i].End <= spans[i].Start {
+			spans[i].End = spans[i].Start + 1
+		}
+	}
+	return spans
+}
+
+var gpuHTMLReplacer = strings.NewReplacer(
+	"&", "&amp;",
+	"<", "&lt;",
+	">", "&gt;",
+	`"`, "&quot;",
+	"'", "&#39;",
+)
+
+func gpuHTMLEscape(s string) string {
+	return gpuHTMLReplacer.Replace(s)
+}
--- a/audit/internal/platform/gpu_metrics_test.go
+++ b/audit/internal/platform/gpu_metrics_test.go
@@ -0,0 +1,65 @@
+package platform
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestWriteGPUMetricsCSVIncludesStageColumn(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	path := filepath.Join(dir, "gpu-metrics.csv")
+	rows := []GPUMetricRow{
+		{Stage: "warmup", ElapsedSec: 1, GPUIndex: 0, TempC: 71, UsagePct: 99, MemUsagePct: 80, PowerW: 420, ClockMHz: 1800, MemClockMHz: 1200},
+	}
+	if err := WriteGPUMetricsCSV(path, rows); err != nil {
+		t.Fatalf("WriteGPUMetricsCSV: %v", err)
+	}
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("ReadFile: %v", err)
+	}
+	text := string(raw)
+	for _, needle := range []string{
+		"stage,elapsed_sec,gpu_index",
+		`"warmup",1.0,0,71.0,99.0,80.0,420.0,1800,1200`,
+	} {
+		if !strings.Contains(text, needle) {
+			t.Fatalf("csv missing %q\n%s", needle, text)
+		}
+	}
+}
+
+func TestWriteGPUMetricsHTMLShowsStageLegendAndLabels(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	path := filepath.Join(dir, "gpu-metrics.html")
+	rows := []GPUMetricRow{
+		{Stage: "baseline", ElapsedSec: 1, GPUIndex: 0, TempC: 50, UsagePct: 10, MemUsagePct: 5, PowerW: 100, ClockMHz: 500, MemClockMHz: 400},
+		{Stage: "baseline", ElapsedSec: 2, GPUIndex: 0, TempC: 51, UsagePct: 11, MemUsagePct: 5, PowerW: 101, ClockMHz: 510, MemClockMHz: 400},
+		{Stage: "steady-fp16", ElapsedSec: 3, GPUIndex: 0, TempC: 70, UsagePct: 98, MemUsagePct: 75, PowerW: 390, ClockMHz: 1700, MemClockMHz: 1100},
+		{Stage: "steady-fp16", ElapsedSec: 4, GPUIndex: 0, TempC: 71, UsagePct: 99, MemUsagePct: 76, PowerW: 395, ClockMHz: 1710, MemClockMHz: 1110},
+	}
+	if err := WriteGPUMetricsHTML(path, rows); err != nil {
+		t.Fatalf("WriteGPUMetricsHTML: %v", err)
+	}
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("ReadFile: %v", err)
+	}
+	text := string(raw)
+	for _, needle := range []string{
+		"stage-legend",
+		"baseline",
+		"steady-fp16",
+		"GPU Stress Test Metrics",
+	} {
+		if !strings.Contains(text, needle) {
+			t.Fatalf("html missing %q\n%s", needle, text)
+		}
+	}
+}
--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -11,12 +11,11 @@ import (
 	"strings"
 )

+const installToRAMDir = "/dev/shm/bee-live"
+const copyProgressLogStep int64 = 100 * 1024 * 1024
+
 func (s *System) IsLiveMediaInRAM() bool {
-	fsType := mountFSType("/run/live/medium")
-	if fsType == "" {
-		return toramActive()
-	}
-	return strings.EqualFold(fsType, "tmpfs")
+	return s.LiveMediaRAMState().InRAM
 }

 func (s *System) LiveBootSource() LiveBootSource {
@@ -48,42 +47,164 @@ func (s *System) LiveBootSource() LiveBootSource {
 	return status
 }

-func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
+func (s *System) LiveMediaRAMState() LiveMediaRAMState {
+	return evaluateLiveMediaRAMState(
+		s.LiveBootSource(),
+		toramActive(),
+		globPaths("/run/live/medium/live/*.squashfs"),
+		globPaths(filepath.Join(installToRAMDir, "*.squashfs")),
+	)
+}
+
+func evaluateLiveMediaRAMState(status LiveBootSource, toram bool, sourceSquashfs, copiedSquashfs []string) LiveMediaRAMState {
+	state := LiveMediaRAMState{
+		LiveBootSource: status,
+		ToramActive:    toram,
+		CopyPresent:    len(copiedSquashfs) > 0,
+	}
+	if status.InRAM {
+		state.State = "in_ram"
+		state.Status = "ok"
+		state.CopyComplete = true
+		state.Message = "Running from RAM — installation media can be safely disconnected."
+		return state
+	}
+
+	expected := pathBaseSet(sourceSquashfs)
+	copied := pathBaseSet(copiedSquashfs)
+	state.CopyComplete = len(expected) > 0 && setContainsAll(copied, expected)
+
+	switch {
+	case state.CopyComplete:
+		state.State = "partial"
+		state.Status = "partial"
+		state.CanStartCopy = true
+		state.Message = "Live media files were copied to RAM, but the system is still mounted from the original boot source."
+	case state.CopyPresent:
+		state.State = "partial"
+		state.Status = "partial"
+		state.CanStartCopy = true
+		state.Message = "Partial RAM copy detected. A previous Copy to RAM run was interrupted or cancelled."
+	case toram:
+		state.State = "toram_failed"
+		state.Status = "failed"
+		state.CanStartCopy = true
+		state.Message = "toram boot parameter is set but the live medium is not mounted from RAM."
+	default:
+		state.State = "not_in_ram"
+		state.Status = "warning"
+		state.CanStartCopy = true
+		state.Message = "ISO not copied to RAM. Use Copy to RAM to free the boot drive and improve performance."
+	}
+	return state
+}
+
+func globPaths(pattern string) []string {
+	matches, _ := filepath.Glob(pattern)
+	return matches
+}
+
+func pathBaseSet(paths []string) map[string]struct{} {
+	out := make(map[string]struct{}, len(paths))
+	for _, path := range paths {
+		base := strings.TrimSpace(filepath.Base(path))
+		if base != "" {
+			out[base] = struct{}{}
+		}
+	}
+	return out
+}
+
+func setContainsAll(have, want map[string]struct{}) bool {
+	if len(want) == 0 {
+		return false
+	}
+	for name := range want {
+		if _, ok := have[name]; !ok {
+			return false
+		}
+	}
+	return true
+}
+
+func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) (retErr error) {
 	log := func(msg string) {
 		if logFunc != nil {
 			logFunc(msg)
 		}
 	}

-	if s.IsLiveMediaInRAM() {
+	state := s.LiveMediaRAMState()
+	if state.InRAM {
 		log("Already running from RAM — installation media can be safely disconnected.")
 		return nil
 	}

 	squashfsFiles, err := filepath.Glob("/run/live/medium/live/*.squashfs")
-	if err != nil || len(squashfsFiles) == 0 {
-		return fmt.Errorf("no squashfs files found in /run/live/medium/live/")
-	}
+	sourceAvailable := err == nil && len(squashfsFiles) > 0

-	free := freeMemBytes()
-	var needed int64
-	for _, sf := range squashfsFiles {
-		fi, err2 := os.Stat(sf)
-		if err2 != nil {
-			return fmt.Errorf("stat %s: %v", sf, err2)
+	dstDir := installToRAMDir
+
+	// If the source medium is unavailable, check whether a previous run already
+	// produced a complete copy in RAM. If so, skip the copy phase and proceed
+	// directly to the loop-rebind / bind-mount steps.
+	if !sourceAvailable {
+		copiedFiles, _ := filepath.Glob(filepath.Join(dstDir, "*.squashfs"))
+		if len(copiedFiles) > 0 {
+			log("Source medium not available, but a previous RAM copy was found — resuming from existing copy.")
+			// Proceed to rebind with the already-copied files.
+			for _, dst := range copiedFiles {
+				base := filepath.Base(dst)
+				// Re-associate the loop device that was originally backed by the
+				// source file (now gone); find it by the old source path pattern.
+				srcGuess := "/run/live/medium/live/" + base
+				loopDev, lerr := findLoopForFile(srcGuess)
+				if lerr != nil {
+					log(fmt.Sprintf("Loop device for %s not found (%v) — skipping re-association.", base, lerr))
+					continue
+				}
+				if rerr := reassociateLoopDevice(loopDev, dst); rerr != nil {
+					log(fmt.Sprintf("Warning: could not re-associate %s → %s: %v", loopDev, dst, rerr))
+				} else {
+					log(fmt.Sprintf("Loop device %s now backed by RAM copy.", loopDev))
+				}
+			}
+			goto bindMedium
 		}
-		needed += fi.Size()
-	}
-	const headroom = 256 * 1024 * 1024
-	if free > 0 && needed+headroom > free {
-		return fmt.Errorf("insufficient RAM: need %s, available %s",
-			humanBytes(needed+headroom), humanBytes(free))
+		return fmt.Errorf("no squashfs files found in /run/live/medium/live/ and no prior RAM copy in %s — reconnect the installation medium and retry", dstDir)
 	}

-	dstDir := "/dev/shm/bee-live"
+	{
+		free := freeMemBytes()
+		var needed int64
+		for _, sf := range squashfsFiles {
+			fi, err2 := os.Stat(sf)
+			if err2 != nil {
+				return fmt.Errorf("stat %s: %v", sf, err2)
+			}
+			needed += fi.Size()
+		}
+		const headroom = 256 * 1024 * 1024
+		if free > 0 && needed+headroom > free {
+			return fmt.Errorf("insufficient RAM: need %s, available %s",
+				humanBytes(needed+headroom), humanBytes(free))
+		}
+	}
+
+	if state.CopyPresent {
+		log("Removing stale partial RAM copy before retry...")
+	}
+	_ = os.RemoveAll(dstDir)
 	if err := os.MkdirAll(dstDir, 0755); err != nil {
 		return fmt.Errorf("create tmpfs dir: %v", err)
 	}
+	defer func() {
+		if retErr == nil {
+			return
+		}
+		_ = os.RemoveAll(dstDir)
+		log("Removed incomplete RAM copy.")
+	}()

 	for _, sf := range squashfsFiles {
 		if err := ctx.Err(); err != nil {
@@ -109,6 +230,7 @@ func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) erro
 		}
 	}

+bindMedium:
 	log("Copying remaining medium files...")
 	if err := cpDir(ctx, "/run/live/medium", dstDir, log); err != nil {
 		log(fmt.Sprintf("Warning: partial copy: %v", err))
@@ -198,6 +320,7 @@ func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) e
 	defer out.Close()
 	total := fi.Size()
 	var copied int64
+	var lastLogged int64
 	buf := make([]byte, 4*1024*1024)
 	for {
 		if err := ctx.Err(); err != nil {
@@ -209,7 +332,8 @@ func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) e
 				return werr
 			}
 			copied += int64(n)
-			if logFunc != nil && total > 0 {
+			if shouldLogCopyProgress(copied, total, lastLogged) {
+				lastLogged = copied
 				pct := int(float64(copied) / float64(total) * 100)
 				logFunc(fmt.Sprintf("  %s / %s (%d%%)", humanBytes(copied), humanBytes(total), pct))
 			}
@@ -224,6 +348,19 @@ func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) e
 	return out.Sync()
 }

+func shouldLogCopyProgress(copied, total, lastLogged int64) bool {
+	if total <= 0 || copied <= 0 {
+		return false
+	}
+	if copied >= total {
+		return copied > lastLogged
+	}
+	if copied < copyProgressLogStep {
+		return false
+	}
+	return copied-lastLogged >= copyProgressLogStep
+}
+
 func cpDir(ctx context.Context, src, dst string, logFunc func(string)) error {
 	return filepath.Walk(src, func(path string, fi os.FileInfo, err error) error {
 		if ctx.Err() != nil {
--- a/audit/internal/platform/install_to_ram_test.go
+++ b/audit/internal/platform/install_to_ram_test.go
@@ -58,3 +58,69 @@ func TestDescribeLiveBootSource(t *testing.T) {
 		t.Fatalf("got %q want /run/live/medium", got)
 	}
 }
+
+func TestEvaluateLiveMediaRAMState(t *testing.T) {
+	t.Parallel()
+
+	t.Run("in_ram", func(t *testing.T) {
+		state := evaluateLiveMediaRAMState(
+			LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"},
+			false,
+			nil,
+			nil,
+		)
+		if state.State != "in_ram" || state.Status != "ok" || state.CanStartCopy {
+			t.Fatalf("state=%+v", state)
+		}
+	})
+
+	t.Run("partial_copy_after_cancel", func(t *testing.T) {
+		state := evaluateLiveMediaRAMState(
+			LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"},
+			false,
+			[]string{"/run/live/medium/live/filesystem.squashfs", "/run/live/medium/live/firmware.squashfs"},
+			[]string{"/dev/shm/bee-live/filesystem.squashfs"},
+		)
+		if state.State != "partial" || state.Status != "partial" || !state.CanStartCopy {
+			t.Fatalf("state=%+v", state)
+		}
+		if state.CopyComplete {
+			t.Fatalf("CopyComplete=%v want false", state.CopyComplete)
+		}
+	})
+
+	t.Run("toram_failed", func(t *testing.T) {
+		state := evaluateLiveMediaRAMState(
+			LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"},
+			true,
+			nil,
+			nil,
+		)
+		if state.State != "toram_failed" || state.Status != "failed" || !state.CanStartCopy {
+			t.Fatalf("state=%+v", state)
+		}
+	})
+}
+
+func TestShouldLogCopyProgress(t *testing.T) {
+	t.Parallel()
+
+	total := int64(250 * 1024 * 1024)
+	step := int64(100 * 1024 * 1024)
+
+	if shouldLogCopyProgress(step-1, total, 0) {
+		t.Fatal("progress logged too early")
+	}
+	if !shouldLogCopyProgress(step, total, 0) {
+		t.Fatal("expected log at first 100MB boundary")
+	}
+	if shouldLogCopyProgress(step+16*1024*1024, total, step) {
+		t.Fatal("progress logged again before next 100MB")
+	}
+	if !shouldLogCopyProgress(2*step, total, step) {
+		t.Fatal("expected log at second 100MB boundary")
+	}
+	if !shouldLogCopyProgress(total, total, 2*step) {
+		t.Fatal("expected final completion log")
+	}
+}
--- a/audit/internal/platform/kill_workers.go
+++ b/audit/internal/platform/kill_workers.go
@@ -1,11 +1,14 @@
 package platform

 import (
+	"context"
 	"fmt"
+	"log/slog"
 	"os"
 	"strconv"
 	"strings"
 	"syscall"
+	"time"
 )

 // workerPatterns are substrings matched against /proc/<pid>/cmdline to identify
@@ -30,7 +33,12 @@ type KilledProcess struct {
 // KillTestWorkers scans /proc for running test worker processes and sends
 // SIGKILL to each one found. It returns a list of killed processes.
 // Errors for individual processes (e.g. already exited) are silently ignored.
+// The scan runs under a 5-second deadline to avoid blocking if the process
+// table is very large (e.g. after a stress test with thousands of children).
 func KillTestWorkers() []KilledProcess {
+	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer cancel()
+
 	entries, err := os.ReadDir("/proc")
 	if err != nil {
 		return nil
@@ -38,6 +46,13 @@ func KillTestWorkers() []KilledProcess {

 	var killed []KilledProcess
 	for _, e := range entries {
+		select {
+		case <-ctx.Done():
+			slog.Warn("KillTestWorkers scan timed out", "killed_so_far", len(killed))
+			return killed
+		default:
+		}
+
 		if !e.IsDir() {
 			continue
 		}
--- a/audit/internal/platform/live_metrics.go
+++ b/audit/internal/platform/live_metrics.go
@@ -1,8 +1,10 @@
 package platform

 import (
+	"bee/audit/internal/collector"
 	"bufio"
 	"encoding/json"
+	"fmt"
 	"os"
 	"os/exec"
 	"sort"
@@ -14,13 +16,24 @@ import (
 // LiveMetricSample is a single point-in-time snapshot of server metrics
 // collected for the web UI metrics page.
 type LiveMetricSample struct {
-	Timestamp  time.Time      `json:"ts"`
-	Fans       []FanReading   `json:"fans"`
-	Temps      []TempReading  `json:"temps"`
-	PowerW     float64        `json:"power_w"`
-	CPULoadPct float64        `json:"cpu_load_pct"`
-	MemLoadPct float64        `json:"mem_load_pct"`
-	GPUs       []GPUMetricRow `json:"gpus"`
+	Timestamp   time.Time      `json:"ts"`
+	Fans        []FanReading   `json:"fans"`
+	Temps       []TempReading  `json:"temps"`
+	PowerW      float64        `json:"power_w"`
+	PowerSource string         `json:"power_source,omitempty"`
+	PowerMode   string         `json:"power_mode,omitempty"`
+	PowerReason string         `json:"power_reason,omitempty"`
+	PSUs        []PSUReading   `json:"psus,omitempty"`
+	CPULoadPct  float64        `json:"cpu_load_pct"`
+	MemLoadPct  float64        `json:"mem_load_pct"`
+	GPUs        []GPUMetricRow `json:"gpus"`
+}
+
+// PSUReading is a per-slot power supply input power reading.
+type PSUReading struct {
+	Slot   int     `json:"slot"`
+	Name   string  `json:"name"`
+	PowerW float64 `json:"power_w"`
 }

 // TempReading is a named temperature sensor value.
@@ -54,8 +67,17 @@ func SampleLiveMetrics() LiveMetricSample {
 		}
 	}

-	// System power — returns 0 if unavailable
-	s.PowerW = sampleSystemPower()
+	// Per-PSU power — populated when IPMI SDR has Power Supply entities with Watt readings
+	s.PSUs = samplePSUPower()
+
+	// System power: use the global autotune-selected source when configured,
+	// otherwise fall back to the historical heuristic and mark the mode.
+	if powerW, decision, err := SampleSystemPowerResolved(""); err == nil {
+		s.PowerW = powerW
+		s.PowerSource = decision.EffectiveSource
+		s.PowerMode = decision.Mode
+		s.PowerReason = decision.Reason
+	}

 	// CPU load — from /proc/stat
 	s.CPULoadPct = sampleCPULoadPct()
@@ -326,3 +348,46 @@ func compactAmbientTempName(chip, name string) string {
 	}
 	return chip + " / " + name
 }
+
+// samplePSUPower reads per-PSU input power via IPMI SDR.
+// Uses collector.PSUSlotsFromSDR (name-based matching) which works across
+// vendors where PSU sensors may not carry entity ID "10.N".
+// Returns nil when IPMI is unavailable or no PSU Watt sensors exist.
+func samplePSUPower() []PSUReading {
+	out, err := exec.Command("ipmitool", "sdr").Output()
+	if err != nil || len(out) == 0 {
+		return nil
+	}
+	slots := collector.PSUSlotsFromSDR(string(out))
+	if len(slots) == 0 {
+		return nil
+	}
+	// Collect slot keys and sort for stable output.
+	keys := make([]int, 0, len(slots))
+	for k := range slots {
+		n, err := strconv.Atoi(k)
+		if err == nil {
+			keys = append(keys, n)
+		}
+	}
+	sort.Ints(keys)
+	psus := make([]PSUReading, 0, len(keys))
+	for _, k := range keys {
+		entry := slots[strconv.Itoa(k)]
+		// Prefer AC input power; fall back to DC output power.
+		var w float64
+		if entry.InputW != nil && *entry.InputW > 0 {
+			w = *entry.InputW
+		} else if entry.OutputW != nil && *entry.OutputW > 0 {
+			w = *entry.OutputW
+		}
+		if w <= 0 {
+			continue
+		}
+		psus = append(psus, PSUReading{Slot: k + 1, Name: fmt.Sprintf("PSU%d", k+1), PowerW: w})
+	}
+	if len(psus) == 0 {
+		return nil
+	}
+	return psus
+}
--- a/audit/internal/platform/nvidia_recover.go
+++ b/audit/internal/platform/nvidia_recover.go
@@ -0,0 +1,30 @@
+package platform
+
+import (
+	"fmt"
+	"os/exec"
+	"time"
+)
+
+const nvidiaRecoverHelper = "/usr/local/bin/bee-nvidia-recover"
+
+func runNvidiaRecover(args ...string) (string, error) {
+	helperArgs := append([]string{nvidiaRecoverHelper}, args...)
+	if _, err := exec.LookPath("systemd-run"); err == nil {
+		unit := fmt.Sprintf("bee-nvidia-recover-%d", time.Now().UnixNano())
+		cmdArgs := []string{
+			"systemd-run",
+			"--quiet",
+			"--pipe",
+			"--wait",
+			"--collect",
+			"--service-type=oneshot",
+			"--unit", unit,
+		}
+		cmdArgs = append(cmdArgs, helperArgs...)
+		raw, err := exec.Command("sudo", cmdArgs...).CombinedOutput()
+		return string(raw), err
+	}
+	raw, err := exec.Command("sudo", helperArgs...).CombinedOutput()
+	return string(raw), err
+}
--- a/audit/internal/platform/nvidia_stress.go
+++ b/audit/internal/platform/nvidia_stress.go
@@ -49,6 +49,9 @@ func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
 			"--seconds", strconv.Itoa(opts.DurationSec),
 			"--size-mb", strconv.Itoa(opts.SizeMB),
 		}
+		if opts.StaggerSeconds > 0 && len(selected) > 1 {
+			cmd = append(cmd, "--stagger-seconds", strconv.Itoa(opts.StaggerSeconds))
+		}
 		if len(selected) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(selected))
 		}
@@ -63,6 +66,9 @@ func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
 			"bee-john-gpu-stress",
 			"--seconds", strconv.Itoa(opts.DurationSec),
 		}
+		if opts.StaggerSeconds > 0 && len(selected) > 1 {
+			cmd = append(cmd, "--stagger-seconds", strconv.Itoa(opts.StaggerSeconds))
+		}
 		if len(selected) > 0 {
 			cmd = append(cmd, "--devices", joinIndexList(selected))
 		}
--- a/audit/internal/platform/platform_stress.go
+++ b/audit/internal/platform/platform_stress.go
@@ -161,13 +161,7 @@ func (s *System) RunPlatformStress(
 	}
 	_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)

-	// Pack tar.gz
-	archivePath := filepath.Join(baseDir, "platform-stress-"+stamp+".tar.gz")
-	if err := packPlatformDir(runDir, archivePath); err != nil {
-		return "", fmt.Errorf("pack archive: %w", err)
-	}
-	_ = os.RemoveAll(runDir)
-	return archivePath, nil
+	return runDir, nil
 }

 // collectPhase samples live metrics every second until ctx is done.
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -1,6 +1,7 @@
 package platform

 import (
+	"bufio"
 	"os"
 	"os/exec"
 	"strings"
@@ -27,6 +28,8 @@ var runtimeTrackedServices = []string{
 	"bee-audit",
 	"bee-web",
 	"bee-sshsetup",
+	"nvidia-dcgm",
+	"nvidia-fabricmanager",
 }

 func (s *System) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, error) {
@@ -114,6 +117,8 @@ func (s *System) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, e
 	}

 	s.collectGPURuntimeHealth(vendor, &health)
+	s.collectToRAMHealth(&health)
+	s.collectUSBExportHealth(&health)

 	if health.Status != "FAILED" && len(health.Issues) > 0 {
 		health.Status = "PARTIAL"
@@ -168,6 +173,99 @@ func resolvedToolStatus(display string, candidates ...string) ToolStatus {
 	return ToolStatus{Name: display}
 }

+// collectToRAMHealth evaluates whether the live system is fully running from RAM.
+// Status values: "ok" = fully in RAM, "warning" = not copied, "partial" = stale or
+// incomplete RAM copy exists but runtime still depends on the boot medium,
+// "failed" = toram was requested but medium is not in RAM.
+func (s *System) collectToRAMHealth(health *schema.RuntimeHealth) {
+	state := s.LiveMediaRAMState()
+	health.ToRAMStatus = state.Status
+	switch state.Status {
+	case "ok":
+		return
+	case "failed":
+		health.Issues = append(health.Issues, schema.RuntimeIssue{
+			Code:        "toram_copy_failed",
+			Severity:    "warning",
+			Description: state.Message,
+		})
+	case "partial":
+		health.Issues = append(health.Issues, schema.RuntimeIssue{
+			Code:        "toram_copy_partial",
+			Severity:    "warning",
+			Description: state.Message,
+		})
+	}
+}
+
+// collectUSBExportHealth scans /proc/mounts for a writable USB-backed filesystem
+// suitable for log export. Sets USBExportPath to the first match found.
+func (s *System) collectUSBExportHealth(health *schema.RuntimeHealth) {
+	health.USBExportPath = findUSBExportMount()
+}
+
+// findUSBExportMount returns the mount point of the first writable USB filesystem
+// found in /proc/mounts (vfat, exfat, ext2/3/4, ntfs) whose backing block device
+// has USB transport. Returns "" if none found.
+func findUSBExportMount() string {
+	f, err := os.Open("/proc/mounts")
+	if err != nil {
+		return ""
+	}
+	defer f.Close()
+
+	// fs types that are expected on USB export drives
+	exportFSTypes := map[string]bool{
+		"vfat":    true,
+		"exfat":   true,
+		"ext2":    true,
+		"ext3":    true,
+		"ext4":    true,
+		"ntfs":    true,
+		"ntfs3":   true,
+		"fuseblk": true,
+	}
+
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		// fields: device mountpoint fstype options dump pass
+		fields := strings.Fields(scanner.Text())
+		if len(fields) < 4 {
+			continue
+		}
+		device, mountPoint, fsType, options := fields[0], fields[1], fields[2], fields[3]
+		if !exportFSTypes[strings.ToLower(fsType)] {
+			continue
+		}
+		// Skip read-only mounts
+		opts := strings.Split(options, ",")
+		readOnly := false
+		for _, o := range opts {
+			if strings.TrimSpace(o) == "ro" {
+				readOnly = true
+				break
+			}
+		}
+		if readOnly {
+			continue
+		}
+		// Check USB transport via lsblk on the device (or its parent disk for partitions).
+		if !strings.HasPrefix(device, "/dev/") {
+			continue
+		}
+		checkDev := device
+		// lsblk only reports TRAN for the whole disk, not for partitions (e.g. /dev/sdc1).
+		// Strip trailing partition digits to get the parent disk name.
+		if trimmed := strings.TrimRight(device, "0123456789"); trimmed != device && len(trimmed) > len("/dev/") {
+			checkDev = trimmed
+		}
+		if blockDeviceTransport(checkDev) == "usb" {
+			return mountPoint
+		}
+	}
+	return ""
+}
+
 func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHealth) {
 	lsmodText := commandText("lsmod")

--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -20,6 +20,54 @@ import (
 	"time"
 )

+// Estimated wall-clock durations for each SAT/validate test, derived from real
+// production logs in _benchmark/_v8/.
+//
+// Rule: whenever the commands, timeout parameters, or number of sub-jobs inside
+// the corresponding Run*Pack function change, re-measure the wall-clock duration
+// from actual task logs and update the matching constant here.
+//
+// Sources:
+//   - SATEstimatedCPUValidateSec:                 xFusion v8.6 — 62 s
+//   - SATEstimatedMemoryValidateSec:               xFusion v8.6 — 68 s
+//   - SATEstimatedNvidiaGPUValidateSec:            xFusion v8.6/v8.22 — 77–87 s/GPU (measured per-GPU; re-measure after switch to all-GPU simultaneous)
+//   - SATEstimatedNvidiaGPUStressSec:              xFusion v8.6/v8.22 — 444–448 s/GPU (measured per-GPU; re-measure after switch to all-GPU simultaneous)
+//   - SATEstimatedNvidiaTargetedStressSec:         xFusion v8.6/v8.22 — 347–348 s/GPU (measured per-GPU; re-measure after switch to all-GPU simultaneous)
+//   - SATEstimatedNvidiaTargetedPowerSec:          MSI v8.22 / xFusion v8.6 — 346–351 s/GPU (measured per-GPU; re-measure after switch to all-GPU simultaneous)
+//   - SATEstimatedNvidiaPulseTestSec:              xFusion v8.6 — 4 926 s / 8 GPU (all simultaneous)
+//   - SATEstimatedNvidiaInterconnectSec:           xFusion v8.6/v8.22 — 210–384 s / 8 GPU (all simultaneous)
+//   - SATEstimatedNvidiaBandwidthSec:              xFusion v8.6/v8.22 — 2 664–2 688 s / 8 GPU (all simultaneous)
+const (
+	// CPU stress: stress-ng 60 s + lscpu/sensors overhead.
+	SATEstimatedCPUValidateSec = 65
+	// CPU stress: stress-ng 1800 s (stress mode default).
+	SATEstimatedCPUStressSec = 1800
+
+	// RAM: memtester 256 MB / 1 pass.
+	SATEstimatedMemoryValidateSec = 70
+	// RAM: memtester 512 MB / 1 pass (extrapolated from validate timing, linear with size).
+	SATEstimatedMemoryStressSec = 140
+
+	// NVIDIA dcgmi diag Level 2 (medium), all GPUs simultaneously.
+	SATEstimatedNvidiaGPUValidateSec = 85
+	// NVIDIA dcgmi diag Level 3 (targeted stress), all GPUs simultaneously.
+	SATEstimatedNvidiaGPUStressSec = 450
+
+	// NVIDIA dcgmi targeted_stress 300 s + overhead, all GPUs simultaneously.
+	SATEstimatedNvidiaTargetedStressSec = 350
+	// NVIDIA dcgmi targeted_power 300 s + overhead, all GPUs simultaneously.
+	SATEstimatedNvidiaTargetedPowerSec = 350
+
+	// NVIDIA dcgmi pulse_test, all GPUs simultaneously (not per-GPU).
+	SATEstimatedNvidiaPulseTestSec = 5000
+
+	// NCCL all_reduce_perf, all GPUs simultaneously.
+	SATEstimatedNvidiaInterconnectSec = 300
+	// nvbandwidth, all GPUs simultaneously. Tool runs all built-in tests
+	// without a user-configurable time limit; duration is determined by nvbandwidth itself.
+	SATEstimatedNvidiaBandwidthSec = 2700
+)
+
 var (
 	satExecCommand  = exec.Command
 	satLookPath     = exec.LookPath
@@ -108,15 +156,15 @@ type nvidiaGPUHealth struct {
 }

 type nvidiaGPUStatusFile struct {
-	Index       int
-	Name        string
-	RunStatus   string
-	Reason      string
-	Health      string
-	HealthRaw   string
-	Observed    bool
-	Selected    bool
-	FailingJob  string
+	Index      int
+	Name       string
+	RunStatus  string
+	Reason     string
+	Health     string
+	HealthRaw  string
+	Observed   bool
+	Selected   bool
+	FailingJob string
 }

 // AMDGPUInfo holds basic info about an AMD GPU from rocm-smi.
@@ -359,19 +407,21 @@ func (s *System) ResetNvidiaGPU(index int) (string, error) {
 	if index < 0 {
 		return "", fmt.Errorf("gpu index must be >= 0")
 	}
-	raw, err := satExecCommand("nvidia-smi", "-r", "-i", strconv.Itoa(index)).CombinedOutput()
-	if len(raw) == 0 && err == nil {
-		raw = []byte("GPU reset completed.\n")
+	out, err := runNvidiaRecover("reset-gpu", strconv.Itoa(index))
+	if strings.TrimSpace(out) == "" && err == nil {
+		out = "GPU reset completed.\n"
 	}
-	return string(raw), err
+	return out, err
 }

-// RunNCCLTests runs nccl-tests all_reduce_perf across all NVIDIA GPUs.
+// RunNCCLTests runs nccl-tests all_reduce_perf across the selected NVIDIA GPUs.
 // Measures collective communication bandwidth over NVLink/PCIe.
-func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
-	// detect GPU count
-	out, _ := exec.Command("nvidia-smi", "--query-gpu=index", "--format=csv,noheader").Output()
-	gpuCount := len(strings.Split(strings.TrimSpace(string(out)), "\n"))
+func (s *System) RunNCCLTests(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
+	selected, err := resolveDCGMGPUIndices(gpuIndices)
+	if err != nil {
+		return "", err
+	}
+	gpuCount := len(selected)
 	if gpuCount < 1 {
 		gpuCount = 1
 	}
@@ -380,18 +430,40 @@ func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(
 		satJob{name: "02-all-reduce-perf.log", cmd: []string{
 			"all_reduce_perf", "-b", "512M", "-e", "4G", "-f", "2",
 			"-g", strconv.Itoa(gpuCount), "--iters", "20",
-		}},
+		}, env: nvidiaVisibleDevicesEnv(selected)},
 	), logFunc)
 }

-func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
 	selected, err := resolveDCGMGPUIndices(gpuIndices)
 	if err != nil {
 		return "", err
 	}
-	profCmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)))
-	if err != nil {
-		return "", err
+	var (
+		profCmd []string
+		profEnv []string
+	)
+	if len(selected) > 1 {
+		// For multiple GPUs, always spawn one dcgmproftester process per GPU via
+		// bee-dcgmproftester-staggered (stagger=0 means all start simultaneously).
+		// A single dcgmproftester process without -i only loads GPU 0 regardless
+		// of CUDA_VISIBLE_DEVICES.
+		stagger := staggerSec
+		if stagger < 0 {
+			stagger = 0
+		}
+		profCmd = []string{
+			"bee-dcgmproftester-staggered",
+			"--seconds", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)),
+			"--stagger-seconds", strconv.Itoa(stagger),
+			"--devices", joinIndexList(selected),
+		}
+	} else {
+		profCmd, err = resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)))
+		if err != nil {
+			return "", err
+		}
+		profEnv = nvidiaVisibleDevicesEnv(selected)
 	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-compute", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
@@ -399,7 +471,7 @@ func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir strin
 		satJob{
 			name:       "03-dcgmproftester.log",
 			cmd:        profCmd,
-			env:        nvidiaVisibleDevicesEnv(selected),
+			env:        profEnv,
 			collectGPU: true,
 			gpuIndices: selected,
 		},
@@ -412,6 +484,13 @@ func (s *System) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string,
 	if err != nil {
 		return "", err
 	}
+	// Kill any lingering nvvs/dcgmi processes from a previous interrupted run
+	// before starting — otherwise dcgmi diag fails with DCGM_ST_IN_USE (-34).
+	if killed := KillTestWorkers(); len(killed) > 0 && logFunc != nil {
+		for _, p := range killed {
+			logFunc(fmt.Sprintf("pre-flight: killed stale worker pid=%d name=%s", p.PID, p.Name))
+		}
+	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-targeted-power", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{
@@ -429,6 +508,13 @@ func (s *System) RunNvidiaPulseTestPack(ctx context.Context, baseDir string, dur
 	if err != nil {
 		return "", err
 	}
+	// Kill any lingering nvvs/dcgmi processes from a previous interrupted run
+	// before starting — otherwise dcgmi diag fails with DCGM_ST_IN_USE (-34).
+	if killed := KillTestWorkers(); len(killed) > 0 && logFunc != nil {
+		for _, p := range killed {
+			logFunc(fmt.Sprintf("pre-flight: killed stale worker pid=%d name=%s", p.PID, p.Name))
+		}
+	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-pulse", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{
@@ -446,6 +532,13 @@ func (s *System) RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpu
 	if err != nil {
 		return "", err
 	}
+	// Kill any lingering nvvs/dcgmi processes from a previous interrupted run
+	// before starting — otherwise dcgmi diag fails with DCGM_ST_IN_USE (-34).
+	if killed := KillTestWorkers(); len(killed) > 0 && logFunc != nil {
+		for _, p := range killed {
+			logFunc(fmt.Sprintf("pre-flight: killed stale worker pid=%d name=%s", p.PID, p.Name))
+		}
+	}
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-bandwidth", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{
@@ -538,9 +631,19 @@ func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, si
 	if passes <= 0 {
 		passes = 1
 	}
+	// Keep Validate Memory bounded to a quick diagnostic window. The timeout is
+	// intentionally conservative enough for healthy systems while avoiding the
+	// prior 30-80 minute hangs caused by memtester spinning on a bad subtest.
+	timeoutSec := sizeMB*passes*20/100 + 60
+	if timeoutSec < 180 {
+		timeoutSec = 180
+	}
+	if timeoutSec > 900 {
+		timeoutSec = 900
+	}
 	return runAcceptancePackCtx(ctx, baseDir, "memory", []satJob{
 		{name: "01-free-before.log", cmd: []string{"free", "-h"}},
-		{name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
+		{name: "02-memtester.log", cmd: []string{"timeout", fmt.Sprintf("%d", timeoutSec), "memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
 		{name: "03-free-after.log", cmd: []string{"free", "-h"}},
 	}, logFunc)
 }
@@ -648,11 +751,7 @@ func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, e
 	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
 		return "", err
 	}
-	archive := filepath.Join(baseDir, "storage-"+ts+".tar.gz")
-	if err := createTarGz(archive, runDir); err != nil {
-		return "", err
-	}
-	return archive, nil
+	return runDir, nil
 }

 type satJob struct {
@@ -838,11 +937,7 @@ func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []sa
 		}
 	}

-	archive := filepath.Join(baseDir, prefix+"-"+ts+".tar.gz")
-	if err := createTarGz(archive, runDir); err != nil {
-		return "", err
-	}
-	return archive, nil
+	return runDir, nil
 }

 func updateNvidiaGPUStatus(perGPU map[int]*nvidiaGPUStatusFile, idx int, status, jobName, detail string) {
@@ -905,7 +1000,7 @@ func writeNvidiaGPUStatusFiles(runDir, overall string, perGPU map[int]*nvidiaGPU
 			entry.Health = "UNKNOWN"
 		}
 		if entry.Name == "" {
-			entry.Name = "unknown"
+			entry.Name = "Unknown GPU"
 		}
 		var body strings.Builder
 		fmt.Fprintf(&body, "gpu_index=%d\n", entry.Index)
@@ -1376,8 +1471,6 @@ func runSATCommandWithMetrics(ctx context.Context, verboseLog, name string, cmd
 	if len(metricRows) > 0 {
 		_ = WriteGPUMetricsCSV(filepath.Join(runDir, "gpu-metrics.csv"), metricRows)
 		_ = WriteGPUMetricsHTML(filepath.Join(runDir, "gpu-metrics.html"), metricRows)
-		chart := RenderGPUTerminalChart(metricRows)
-		_ = os.WriteFile(filepath.Join(runDir, "gpu-metrics-term.txt"), []byte(chart), 0644)
 	}

 	return out, err
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"math"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -42,27 +43,56 @@ type GPUStressMetric struct {

 // FanStressRow is one second-interval telemetry sample covering all monitored dimensions.
 type FanStressRow struct {
-	TimestampUTC string
-	ElapsedSec   float64
-	Phase        string // "baseline", "load1", "pause", "load2", "cooldown"
-	GPUs         []GPUStressMetric
-	Fans         []FanReading
-	CPUMaxTempC  float64 // highest CPU temperature from ipmitool / sensors
-	SysPowerW    float64 // DCMI system power reading
+	TimestampUTC   string
+	ElapsedSec     float64
+	Phase          string // "baseline", "load1", "pause", "load2", "cooldown"
+	GPUs           []GPUStressMetric
+	Fans           []FanReading
+	CPUMaxTempC    float64 // highest CPU temperature from ipmitool / sensors
+	SysPowerW      float64
+	SysPowerSource string
+	SysPowerMode   string
 }

 type cachedPowerReading struct {
 	Value     float64
+	Source    string
+	Mode      string
+	Reason    string
 	UpdatedAt time.Time
 }

+type fanObservationState struct {
+	MaxRPM map[string]float64 `json:"max_rpm"`
+}
+
+type fanPeakCandidate struct {
+	FirstSeen time.Time
+	RPM       float64
+}
+
 var (
 	systemPowerCacheMu sync.Mutex
 	systemPowerCache   cachedPowerReading
+	fanObservationMu   sync.Mutex
+	fanObservation     fanObservationState
+	fanObservationInit bool
+	fanPeakCandidates  = make(map[string]fanPeakCandidate)
 )

 const systemPowerHoldTTL = 15 * time.Second

+var fanObservationStatePath = "/var/log/bee-sat/fan-observation.json"
+
+const fanObservationMinPeakHold = time.Second
+
+func normalizeObservedFanMaxRPM(rpm float64) float64 {
+	if rpm <= 0 {
+		return 0
+	}
+	return math.Ceil(rpm/1000.0) * 1000.0
+}
+
 // RunFanStressTest runs a two-phase GPU stress test while monitoring fan speeds,
 // temperatures, and power draw every second. Exports metrics.csv and fan-sensors.csv.
 // Designed to reproduce case-04 fan-speed lag and detect GPU thermal throttling.
@@ -223,11 +253,7 @@ func (s *System) RunFanStressTest(ctx context.Context, baseDir string, opts FanS
 		return "", err
 	}

-	archive := filepath.Join(baseDir, "fan-stress-"+ts+".tar.gz")
-	if err := createTarGz(archive, runDir); err != nil {
-		return "", err
-	}
-	return archive, nil
+	return runDir, nil
 }

 func applyFanStressDefaults(opts *FanStressOptions) {
@@ -257,7 +283,7 @@ func sampleFanStressRow(gpuIndices []int, phase string, elapsed float64) FanStre
 	row.GPUs = sampleGPUStressMetrics(gpuIndices)
 	row.Fans, _ = sampleFanSpeeds()
 	row.CPUMaxTempC = sampleCPUMaxTemp()
-	row.SysPowerW = sampleSystemPower()
+	row.SysPowerW, row.SysPowerSource, row.SysPowerMode = sampleSystemPowerResolved()
 	return row
 }

@@ -314,11 +340,13 @@ func sampleFanSpeeds() ([]FanReading, error) {
 	out, err := exec.Command("ipmitool", "sdr", "type", "Fan").Output()
 	if err == nil {
 		if fans := parseFanSpeeds(string(out)); len(fans) > 0 {
+			updateFanObservation(fans, time.Now())
 			return fans, nil
 		}
 	}
 	fans, sensorsErr := sampleFanSpeedsViaSensorsJSON()
 	if len(fans) > 0 {
+		updateFanObservation(fans, time.Now())
 		return fans, nil
 	}
 	if err != nil {
@@ -327,6 +355,119 @@ func sampleFanSpeeds() ([]FanReading, error) {
 	return nil, sensorsErr
 }

+func loadFanObservationLocked() {
+	if fanObservationInit {
+		return
+	}
+	fanObservationInit = true
+	fanObservation.MaxRPM = make(map[string]float64)
+	raw, err := os.ReadFile(fanObservationStatePath)
+	if err != nil || len(raw) == 0 {
+		return
+	}
+	var persisted fanObservationState
+	if json.Unmarshal(raw, &persisted) != nil {
+		return
+	}
+	for name, rpm := range persisted.MaxRPM {
+		name = strings.TrimSpace(name)
+		if name == "" || rpm <= 0 {
+			continue
+		}
+		fanObservation.MaxRPM[name] = rpm
+	}
+}
+
+func saveFanObservationLocked() {
+	if len(fanObservation.MaxRPM) == 0 {
+		return
+	}
+	dir := filepath.Dir(fanObservationStatePath)
+	if dir == "" || dir == "." {
+		dir = "/var/log/bee-sat"
+	}
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		return
+	}
+	raw, err := json.MarshalIndent(fanObservation, "", "  ")
+	if err != nil {
+		return
+	}
+	_ = os.WriteFile(fanObservationStatePath, raw, 0644)
+}
+
+func updateFanObservation(fans []FanReading, now time.Time) {
+	if len(fans) == 0 {
+		return
+	}
+	fanObservationMu.Lock()
+	defer fanObservationMu.Unlock()
+	loadFanObservationLocked()
+	changed := false
+	for _, fan := range fans {
+		name := strings.TrimSpace(fan.Name)
+		if name == "" || fan.RPM <= 0 {
+			continue
+		}
+		currentMax := fanObservation.MaxRPM[name]
+		if fan.RPM <= currentMax {
+			delete(fanPeakCandidates, name)
+			continue
+		}
+		if cand, ok := fanPeakCandidates[name]; ok {
+			if now.Sub(cand.FirstSeen) >= fanObservationMinPeakHold {
+				newMax := math.Max(cand.RPM, fan.RPM)
+				if newMax > currentMax {
+					fanObservation.MaxRPM[name] = normalizeObservedFanMaxRPM(newMax)
+					changed = true
+				}
+				delete(fanPeakCandidates, name)
+				continue
+			}
+			if fan.RPM > cand.RPM {
+				fanPeakCandidates[name] = fanPeakCandidate{FirstSeen: cand.FirstSeen, RPM: fan.RPM}
+			}
+			continue
+		}
+		fanPeakCandidates[name] = fanPeakCandidate{FirstSeen: now, RPM: fan.RPM}
+	}
+	if changed {
+		saveFanObservationLocked()
+	}
+}
+
+func estimateFanDutyCyclePctFromObservation(fans []FanReading) (float64, bool) {
+	if len(fans) == 0 {
+		return 0, false
+	}
+	fanObservationMu.Lock()
+	defer fanObservationMu.Unlock()
+	loadFanObservationLocked()
+	var samples []float64
+	for _, fan := range fans {
+		name := strings.TrimSpace(fan.Name)
+		if name == "" || fan.RPM <= 0 {
+			continue
+		}
+		maxRPM := fanObservation.MaxRPM[name]
+		if maxRPM <= 0 {
+			continue
+		}
+		pct := fan.RPM / maxRPM * 100.0
+		if pct > 100 {
+			pct = 100
+		}
+		if pct < 0 {
+			pct = 0
+		}
+		samples = append(samples, pct)
+	}
+	if len(samples) == 0 {
+		return 0, false
+	}
+	return benchmarkMean(samples), true
+}
+
 // parseFanSpeeds parses "ipmitool sdr type Fan" output.
 // Handles two formats:
 //
@@ -430,6 +571,116 @@ func sampleFanSpeedsViaSensorsJSON() ([]FanReading, error) {
 	return fans, nil
 }

+// sampleFanDutyCyclePct reads fan PWM/duty-cycle controls from lm-sensors.
+// Returns the average duty cycle across all exposed PWM controls.
+func sampleFanDutyCyclePct() (float64, bool, bool) {
+	out, err := exec.Command("sensors", "-j").Output()
+	if err != nil || len(out) == 0 {
+		fans, fanErr := sampleFanSpeeds()
+		if fanErr != nil {
+			return 0, false, false
+		}
+		return sampleFanDutyCyclePctFromFans(fans)
+	}
+	pct, ok := parseFanDutyCyclePctSensorsJSON(out)
+	return pct, ok, false
+}
+
+func sampleFanDutyCyclePctFromFans(fans []FanReading) (float64, bool, bool) {
+	if len(fans) == 0 {
+		return 0, false, false
+	}
+	if pct, ok := estimateFanDutyCyclePctFromObservation(fans); ok {
+		return pct, true, true
+	}
+	return 0, false, false
+}
+
+func parseFanDutyCyclePctSensorsJSON(raw []byte) (float64, bool) {
+	var doc map[string]map[string]any
+	if err := json.Unmarshal(raw, &doc); err != nil {
+		return 0, false
+	}
+	var samples []float64
+	for _, features := range doc {
+		for name, feature := range features {
+			if strings.EqualFold(name, "Adapter") {
+				continue
+			}
+			featureMap, ok := feature.(map[string]any)
+			if !ok {
+				continue
+			}
+			if duty, ok := firstFanDutyValue(name, featureMap); ok {
+				samples = append(samples, duty)
+			}
+		}
+	}
+	if len(samples) == 0 {
+		return 0, false
+	}
+	return benchmarkMean(samples), true
+}
+
+func firstFanDutyValue(featureName string, feature map[string]any) (float64, bool) {
+	featureName = strings.ToLower(strings.TrimSpace(featureName))
+	if strings.Contains(featureName, "enable") || strings.Contains(featureName, "mode") || strings.Contains(featureName, "alarm") {
+		return 0, false
+	}
+	if strings.Contains(featureName, "pwm") {
+		for _, key := range []string{"input", "value", "current"} {
+			if value, ok := feature[key]; ok {
+				if duty, parsed := parseFanDutyValue(value); parsed {
+					return duty, true
+				}
+			}
+		}
+	}
+	keys := make([]string, 0, len(feature))
+	for key := range feature {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+	for _, key := range keys {
+		lower := strings.ToLower(key)
+		if !strings.Contains(lower, "pwm") {
+			continue
+		}
+		if strings.Contains(lower, "enable") || strings.Contains(lower, "mode") || strings.Contains(lower, "alarm") {
+			continue
+		}
+		if duty, parsed := parseFanDutyValue(feature[key]); parsed {
+			return duty, true
+		}
+	}
+	return 0, false
+}
+
+func parseFanDutyValue(value any) (float64, bool) {
+	switch v := value.(type) {
+	case float64:
+		return normalizePWMAsDutyPct(v)
+	case string:
+		if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil {
+			return normalizePWMAsDutyPct(f)
+		}
+	}
+	return 0, false
+}
+
+func normalizePWMAsDutyPct(raw float64) (float64, bool) {
+	if raw < 0 {
+		return 0, false
+	}
+	if raw <= 100 {
+		return raw, true
+	}
+	if raw <= 255 {
+		return raw / 255.0 * 100.0, true
+	}
+	return 0, false
+}
+
 func firstFanInputValue(feature map[string]any) (float64, bool) {
 	keys := make([]string, 0, len(feature))
 	for key := range feature {
@@ -517,19 +768,19 @@ func sampleCPUTempViaSensors() float64 {
 	return max
 }

-// sampleSystemPower reads system power draw via DCMI.
-func sampleSystemPower() float64 {
+// sampleSystemPowerResolved reads system power via the global autotune source,
+// falling back to the historical heuristic before autotune or when degraded.
+func sampleSystemPowerResolved() (float64, string, string) {
 	now := time.Now()
-	current := 0.0
-	out, err := exec.Command("ipmitool", "dcmi", "power", "reading").Output()
-	if err == nil {
-		current = parseDCMIPowerReading(string(out))
-	}
+	current, decision, err := SampleSystemPowerResolved("")
 	systemPowerCacheMu.Lock()
 	defer systemPowerCacheMu.Unlock()
-	value, updated := effectiveSystemPowerReading(systemPowerCache, current, now)
+	if err != nil {
+		current = 0
+	}
+	value, updated := effectiveSystemPowerReading(systemPowerCache, current, decision.EffectiveSource, decision.Mode, decision.Reason, now)
 	systemPowerCache = updated
-	return value
+	return value, updated.Source, updated.Mode
 }

 // parseDCMIPowerReading extracts the instantaneous power reading from ipmitool dcmi output.
@@ -552,9 +803,9 @@ func parseDCMIPowerReading(raw string) float64 {
 	return 0
 }

-func effectiveSystemPowerReading(cache cachedPowerReading, current float64, now time.Time) (float64, cachedPowerReading) {
+func effectiveSystemPowerReading(cache cachedPowerReading, current float64, source, mode, reason string, now time.Time) (float64, cachedPowerReading) {
 	if current > 0 {
-		cache = cachedPowerReading{Value: current, UpdatedAt: now}
+		cache = cachedPowerReading{Value: current, Source: source, Mode: mode, Reason: reason, UpdatedAt: now}
 		return current, cache
 	}
 	if cache.Value > 0 && !cache.UpdatedAt.IsZero() && now.Sub(cache.UpdatedAt) <= systemPowerHoldTTL {
--- a/audit/internal/platform/sat_fan_stress_test.go
+++ b/audit/internal/platform/sat_fan_stress_test.go
@@ -1,6 +1,7 @@
 package platform

 import (
+	"path/filepath"
 	"testing"
 	"time"
 )
@@ -29,6 +30,74 @@ func TestFirstFanInputValue(t *testing.T) {
 	}
 }

+func TestParseFanDutyCyclePctSensorsJSON(t *testing.T) {
+	raw := []byte(`{
+		"chip0": {
+			"fan1": {"input": 9000},
+			"pwm1": {"input": 128},
+			"pwm1_enable": {"input": 1}
+		},
+		"chip1": {
+			"pwm2": {"input": 64}
+		}
+	}`)
+
+	got, ok := parseFanDutyCyclePctSensorsJSON(raw)
+	if !ok {
+		t.Fatalf("expected duty cycle telemetry to be parsed")
+	}
+	if got < 57 || got > 58 {
+		t.Fatalf("got=%v want ~57.1", got)
+	}
+}
+
+func TestEstimateFanDutyCyclePctFromObservation(t *testing.T) {
+	t.Parallel()
+
+	oldPath := fanObservationStatePath
+	oldState := fanObservation
+	oldInit := fanObservationInit
+	oldCandidates := fanPeakCandidates
+	fanObservationStatePath = filepath.Join(t.TempDir(), "fan-observation.json")
+	fanObservation = fanObservationState{}
+	fanObservationInit = false
+	fanPeakCandidates = make(map[string]fanPeakCandidate)
+	t.Cleanup(func() {
+		fanObservationStatePath = oldPath
+		fanObservation = oldState
+		fanObservationInit = oldInit
+		fanPeakCandidates = oldCandidates
+	})
+
+	start := time.Unix(100, 0)
+	updateFanObservation([]FanReading{{Name: "FAN1", RPM: 5000}}, start)
+	if _, ok := estimateFanDutyCyclePctFromObservation([]FanReading{{Name: "FAN1", RPM: 2500}}); ok {
+		t.Fatalf("single-sample spike should not establish observed max")
+	}
+
+	updateFanObservation([]FanReading{{Name: "FAN1", RPM: 5200}}, start.Add(500*time.Millisecond))
+	updateFanObservation([]FanReading{{Name: "FAN1", RPM: 5100}}, start.Add(1500*time.Millisecond))
+
+	got, ok := estimateFanDutyCyclePctFromObservation([]FanReading{{Name: "FAN1", RPM: 2600}})
+	if !ok {
+		t.Fatalf("expected estimated duty cycle from persisted observed max")
+	}
+	if got < 43 || got > 44 {
+		t.Fatalf("got=%v want ~43.3", got)
+	}
+
+	fanObservation = fanObservationState{}
+	fanObservationInit = false
+	fanPeakCandidates = make(map[string]fanPeakCandidate)
+	got, ok = estimateFanDutyCyclePctFromObservation([]FanReading{{Name: "FAN1", RPM: 2600}})
+	if !ok {
+		t.Fatalf("expected persisted observed max to be reloaded from disk")
+	}
+	if got < 43 || got > 44 {
+		t.Fatalf("reloaded got=%v want ~43.3", got)
+	}
+}
+
 func TestParseDCMIPowerReading(t *testing.T) {
 	raw := `
 Instantaneous power reading:                   512 Watts
@@ -43,7 +112,7 @@ func TestEffectiveSystemPowerReading(t *testing.T) {
 	now := time.Now()
 	cache := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-5 * time.Second)}

-	got, updated := effectiveSystemPowerReading(cache, 0, now)
+	got, updated := effectiveSystemPowerReading(cache, 0, "", "", "", now)
 	if got != 480 {
 		t.Fatalf("got=%v want cached 480", got)
 	}
@@ -51,7 +120,7 @@ func TestEffectiveSystemPowerReading(t *testing.T) {
 		t.Fatalf("updated=%+v", updated)
 	}

-	got, updated = effectiveSystemPowerReading(cache, 530, now)
+	got, updated = effectiveSystemPowerReading(cache, 530, "dcmi", "fallback", "test", now)
 	if got != 530 {
 		t.Fatalf("got=%v want 530", got)
 	}
@@ -60,7 +129,7 @@ func TestEffectiveSystemPowerReading(t *testing.T) {
 	}

 	expired := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-systemPowerHoldTTL - time.Second)}
-	got, _ = effectiveSystemPowerReading(expired, 0, now)
+	got, _ = effectiveSystemPowerReading(expired, 0, "", "", "", now)
 	if got != 0 {
 		t.Fatalf("expired cache returned %v want 0", got)
 	}
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -321,6 +321,19 @@ func TestNvidiaDCGMNamedDiagCommandUsesDurationAndSelection(t *testing.T) {
 	}
 }

+func TestNvidiaDCGMNamedDiagCommandSkipsDurationForNVBandwidth(t *testing.T) {
+	cmd := nvidiaDCGMNamedDiagCommand("nvbandwidth", 0, []int{2, 0})
+	want := []string{"dcgmi", "diag", "-r", "nvbandwidth", "-i", "2,0"}
+	if len(cmd) != len(want) {
+		t.Fatalf("cmd len=%d want %d (%v)", len(cmd), len(want), cmd)
+	}
+	for i := range want {
+		if cmd[i] != want[i] {
+			t.Fatalf("cmd[%d]=%q want %q", i, cmd[i], want[i])
+		}
+	}
+}
+
 func TestNvidiaVisibleDevicesEnvUsesSelectedGPUs(t *testing.T) {
 	env := nvidiaVisibleDevicesEnv([]int{0, 2, 4})
 	if len(env) != 2 {
--- a/audit/internal/platform/services.go
+++ b/audit/internal/platform/services.go
@@ -61,6 +61,9 @@ func (s *System) ServiceState(name string) string {
 }

 func (s *System) ServiceDo(name string, action ServiceAction) (string, error) {
+	if name == "bee-nvidia" && action == ServiceRestart {
+		return runNvidiaRecover("restart-drivers")
+	}
 	// bee-web runs as the bee user; sudo is required to control system services.
 	// /etc/sudoers.d/bee grants bee NOPASSWD:ALL.
 	raw, err := exec.Command("sudo", "systemctl", string(action), name).CombinedOutput()
--- a/audit/internal/platform/techdump.go
+++ b/audit/internal/platform/techdump.go
@@ -20,6 +20,7 @@ var techDumpFixedCommands = []struct {
 	{Name: "dmidecode", Args: []string{"-t", "4"}, File: "dmidecode-type4.txt"},
 	{Name: "dmidecode", Args: []string{"-t", "17"}, File: "dmidecode-type17.txt"},
 	{Name: "lspci", Args: []string{"-vmm", "-D"}, File: "lspci-vmm.txt"},
+	{Name: "lspci", Args: []string{"-vvv"}, File: "lspci-vvv.txt"},
 	{Name: "lsblk", Args: []string{"-J", "-d", "-o", "NAME,TYPE,SIZE,SERIAL,MODEL,TRAN,HCTL"}, File: "lsblk.json"},
 	{Name: "sensors", Args: []string{"-j"}, File: "sensors.json"},
 	{Name: "ipmitool", Args: []string{"fru", "print"}, File: "ipmitool-fru.txt"},
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -9,6 +9,17 @@ type LiveBootSource struct {
 	Device string `json:"device,omitempty"`
 }

+type LiveMediaRAMState struct {
+	LiveBootSource
+	State        string `json:"state"`
+	Status       string `json:"status"`
+	ToramActive  bool   `json:"toram_active,omitempty"`
+	CopyPresent  bool   `json:"copy_present,omitempty"`
+	CopyComplete bool   `json:"copy_complete,omitempty"`
+	CanStartCopy bool   `json:"can_start_copy,omitempty"`
+	Message      string `json:"message,omitempty"`
+}
+
 type InterfaceInfo struct {
 	Name  string
 	State string
@@ -70,6 +81,7 @@ type NvidiaStressOptions struct {
 	Loader            string
 	GPUIndices        []int
 	ExcludeGPUIndices []int
+	StaggerSeconds    int
 }

 func New() *System {
--- a/audit/internal/schema/hardware.go
+++ b/audit/internal/schema/hardware.go
@@ -15,13 +15,17 @@ type HardwareIngestRequest struct {
 }

 type RuntimeHealth struct {
-	Status        string                 `json:"status"`
-	CheckedAt     string                 `json:"checked_at"`
-	ExportDir     string                 `json:"export_dir,omitempty"`
-	DriverReady   bool                   `json:"driver_ready,omitempty"`
-	CUDAReady     bool                   `json:"cuda_ready,omitempty"`
-	NvidiaGSPMode string                 `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
-	NetworkStatus string                 `json:"network_status,omitempty"`
+	Status        string `json:"status"`
+	CheckedAt     string `json:"checked_at"`
+	ExportDir     string `json:"export_dir,omitempty"`
+	DriverReady   bool   `json:"driver_ready,omitempty"`
+	CUDAReady     bool   `json:"cuda_ready,omitempty"`
+	NvidiaGSPMode string `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
+	NetworkStatus string `json:"network_status,omitempty"`
+	// ToRAMStatus: "ok" (fully in RAM), "warning" (not copied), "partial" (stale/incomplete copy exists), "failed" (toram active but copy failed)
+	ToRAMStatus string `json:"toram_status,omitempty"`
+	// USBExportPath: mount point of the first writable USB drive found, empty if none.
+	USBExportPath string                 `json:"usb_export_path,omitempty"`
 	Issues        []RuntimeIssue         `json:"issues,omitempty"`
 	Tools         []RuntimeToolStatus    `json:"tools,omitempty"`
 	Services      []RuntimeServiceStatus `json:"services,omitempty"`
@@ -183,6 +187,13 @@ type HardwarePCIeDevice struct {
 	BatteryTemperatureC    *float64       `json:"battery_temperature_c,omitempty"`
 	BatteryVoltageV        *float64       `json:"battery_voltage_v,omitempty"`
 	BatteryReplaceRequired *bool          `json:"battery_replace_required,omitempty"`
+	SFPPresent             *bool          `json:"sfp_present,omitempty"`
+	SFPIdentifier          *string        `json:"sfp_identifier,omitempty"`
+	SFPConnector           *string        `json:"sfp_connector,omitempty"`
+	SFPVendor              *string        `json:"sfp_vendor,omitempty"`
+	SFPPartNumber          *string        `json:"sfp_part_number,omitempty"`
+	SFPSerialNumber        *string        `json:"sfp_serial_number,omitempty"`
+	SFPWavelengthNM        *float64       `json:"sfp_wavelength_nm,omitempty"`
 	SFPTemperatureC        *float64       `json:"sfp_temperature_c,omitempty"`
 	SFPTXPowerDBM          *float64       `json:"sfp_tx_power_dbm,omitempty"`
 	SFPRXPowerDBM          *float64       `json:"sfp_rx_power_dbm,omitempty"`
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -12,6 +12,7 @@ import (
 	"path/filepath"
 	"regexp"
 	"sort"
+	"strconv"
 	"strings"
 	"sync/atomic"
 	"syscall"
@@ -35,6 +36,16 @@ var apiListNvidiaGPUStatuses = func(a *app.App) ([]platform.NvidiaGPUStatus, err
 	return a.ListNvidiaGPUStatuses()
 }

+const (
+	taskPriorityBenchmark      = 10
+	taskPriorityBurn           = 20
+	taskPriorityValidateStress = 30
+	taskPriorityValidate       = 40
+	taskPriorityAudit          = 50
+	taskPriorityInstallToRAM   = 60
+	taskPriorityInstall        = 70
+)
+
 // ── Job ID counter ────────────────────────────────────────────────────────────

 var jobCounter atomic.Uint64
@@ -99,7 +110,7 @@ func writeTaskRunResponse(w http.ResponseWriter, tasks []*Task) {

 func shouldSplitHomogeneousNvidiaTarget(target string) bool {
 	switch strings.TrimSpace(target) {
-	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute",
+	case "nvidia", "nvidia-targeted-stress", "nvidia-bench-perf", "nvidia-bench-power", "nvidia-compute",
 		"nvidia-targeted-power", "nvidia-pulse", "nvidia-interconnect",
 		"nvidia-bandwidth", "nvidia-stress":
 		return true
@@ -108,6 +119,30 @@ func shouldSplitHomogeneousNvidiaTarget(target string) bool {
 	}
 }

+func defaultTaskPriority(target string, params taskParams) int {
+	switch strings.TrimSpace(target) {
+	case "install":
+		return taskPriorityInstall
+	case "install-to-ram":
+		return taskPriorityInstallToRAM
+	case "audit":
+		return taskPriorityAudit
+	case "nvidia-bench-perf", "nvidia-bench-power", "nvidia-bench-autotune":
+		return taskPriorityBenchmark
+	case "nvidia-stress", "amd-stress", "memory-stress", "sat-stress", "platform-stress", "nvidia-compute":
+		return taskPriorityBurn
+	case "nvidia", "nvidia-targeted-stress", "nvidia-targeted-power", "nvidia-pulse",
+		"nvidia-interconnect", "nvidia-bandwidth", "memory", "storage", "cpu",
+		"amd", "amd-mem", "amd-bandwidth":
+		if params.StressMode {
+			return taskPriorityValidateStress
+		}
+		return taskPriorityValidate
+	default:
+		return 0
+	}
+}
+
 func expandHomogeneousNvidiaSelections(gpus []platform.NvidiaGPU, include, exclude []int) ([]nvidiaTaskSelection, error) {
 	if len(gpus) == 0 {
 		return nil, fmt.Errorf("no NVIDIA GPUs detected")
@@ -209,6 +244,14 @@ func joinTaskIndices(indices []int) string {
 	return strings.Join(parts, ",")
 }

+func formatGPUIndexList(indices []int) string {
+	parts := make([]string, len(indices))
+	for i, idx := range indices {
+		parts[i] = strconv.Itoa(idx)
+	}
+	return strings.Join(parts, ",")
+}
+
 func formatSplitTaskName(baseName, selectionLabel string) string {
 	baseName = strings.TrimSpace(baseName)
 	selectionLabel = strings.TrimSpace(selectionLabel)
@@ -449,6 +492,7 @@ func (h *handler) handleAPIAuditRun(w http.ResponseWriter, _ *http.Request) {
 		ID:        newJobID("audit"),
 		Name:      "Audit",
 		Target:    "audit",
+		Priority:  defaultTaskPriority("audit", taskParams{}),
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 	}
@@ -487,6 +531,8 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 			StressMode         bool     `json:"stress_mode"`
 			GPUIndices         []int    `json:"gpu_indices"`
 			ExcludeGPUIndices  []int    `json:"exclude_gpu_indices"`
+			StaggerGPUStart    bool     `json:"stagger_gpu_start"`
+			ParallelGPUs       bool     `json:"parallel_gpus"`
 			Loader             string   `json:"loader"`
 			Profile            string   `json:"profile"`
 			DisplayName        string   `json:"display_name"`
@@ -508,12 +554,14 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 			StressMode:         body.StressMode,
 			GPUIndices:         body.GPUIndices,
 			ExcludeGPUIndices:  body.ExcludeGPUIndices,
+			StaggerGPUStart:    body.StaggerGPUStart,
+			ParallelGPUs:       body.ParallelGPUs,
 			Loader:             body.Loader,
 			BurnProfile:        body.Profile,
 			DisplayName:        body.DisplayName,
 			PlatformComponents: body.PlatformComponents,
 		}
-		tasks, err := buildNvidiaTaskSet(target, 0, time.Now(), params, name, h.opts.App, "sat-"+target)
+		tasks, err := buildNvidiaTaskSet(target, defaultTaskPriority(target, params), time.Now(), params, name, h.opts.App, "sat-"+target)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, err.Error())
 			return
@@ -525,57 +573,208 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 	}
 }

-func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
+func (h *handler) handleAPIBenchmarkNvidiaRunKind(target string) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		if h.opts.App == nil {
+			writeError(w, http.StatusServiceUnavailable, "app not configured")
+			return
+		}
+
+		var body struct {
+			Profile           string `json:"profile"`
+			SizeMB            int    `json:"size_mb"`
+			GPUIndices        []int  `json:"gpu_indices"`
+			ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
+			RunNCCL           *bool  `json:"run_nccl"`
+			ParallelGPUs      *bool  `json:"parallel_gpus"`
+			RampUp            *bool  `json:"ramp_up"`
+			DisplayName       string `json:"display_name"`
+		}
+		if r.Body != nil {
+			if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
+				writeError(w, http.StatusBadRequest, "invalid request body")
+				return
+			}
+		}
+
+		runNCCL := true
+		if body.RunNCCL != nil {
+			runNCCL = *body.RunNCCL
+		}
+		parallelGPUs := false
+		if body.ParallelGPUs != nil {
+			parallelGPUs = *body.ParallelGPUs
+		}
+		rampUp := false
+		if body.RampUp != nil {
+			rampUp = *body.RampUp
+		}
+		// Build a descriptive base name that includes profile and mode so the task
+		// list is self-explanatory without opening individual task detail pages.
+		profile := strings.TrimSpace(body.Profile)
+		if profile == "" {
+			profile = "standard"
+		}
+		name := taskDisplayName(target, "", "")
+		if strings.TrimSpace(body.DisplayName) != "" {
+			name = body.DisplayName
+		}
+		// Append profile tag.
+		name = fmt.Sprintf("%s · %s", name, profile)
+
+		if target == "nvidia-bench-power" && parallelGPUs {
+			writeError(w, http.StatusBadRequest, "power / thermal fit benchmark uses sequential or ramp-up modes only")
+			return
+		}
+
+		if rampUp && len(body.GPUIndices) > 1 {
+			// Ramp-up mode: RunNvidiaPowerBench internally ramps from 1 to N GPUs
+			// in Phase 2 (one additional GPU per step). A single task with all
+			// selected GPUs is sufficient — spawning N tasks with growing subsets
+			// would repeat all earlier steps redundantly.
+			gpus, err := apiListNvidiaGPUs(h.opts.App)
+			if err != nil {
+				writeError(w, http.StatusBadRequest, err.Error())
+				return
+			}
+			resolved, err := expandSelectedGPUIndices(gpus, body.GPUIndices, body.ExcludeGPUIndices)
+			if err != nil {
+				writeError(w, http.StatusBadRequest, err.Error())
+				return
+			}
+			if len(resolved) < 2 {
+				// Fall through to normal single-task path.
+				rampUp = false
+			} else {
+				now := time.Now()
+				rampRunID := fmt.Sprintf("ramp-%s", now.UTC().Format("20060102-150405"))
+				taskName := fmt.Sprintf("%s · ramp 1–%d · GPU %s", name, len(resolved), formatGPUIndexList(resolved))
+				t := &Task{
+					ID:        newJobID("bee-bench-nvidia"),
+					Name:      taskName,
+					Target:    target,
+					Priority:  defaultTaskPriority(target, taskParams{}),
+					Status:    TaskPending,
+					CreatedAt: now,
+					params: taskParams{
+						GPUIndices:       append([]int(nil), resolved...),
+						SizeMB:           body.SizeMB,
+						BenchmarkProfile: body.Profile,
+						RunNCCL:          runNCCL,
+						ParallelGPUs:     true,
+						RampTotal:        len(resolved),
+						RampRunID:        rampRunID,
+						DisplayName:      taskName,
+					},
+				}
+				globalQueue.enqueue(t)
+				writeTaskRunResponse(w, []*Task{t})
+				return
+			}
+		}
+
+		// For non-ramp tasks append mode tag.
+		if parallelGPUs {
+			name = fmt.Sprintf("%s · parallel", name)
+		} else {
+			name = fmt.Sprintf("%s · sequential", name)
+		}
+
+		params := taskParams{
+			GPUIndices:        body.GPUIndices,
+			ExcludeGPUIndices: body.ExcludeGPUIndices,
+			SizeMB:            body.SizeMB,
+			BenchmarkProfile:  body.Profile,
+			RunNCCL:           runNCCL,
+			ParallelGPUs:      parallelGPUs,
+			DisplayName:       body.DisplayName,
+		}
+		tasks, err := buildNvidiaTaskSet(target, defaultTaskPriority(target, params), time.Now(), params, name, h.opts.App, "bee-bench-nvidia")
+		if err != nil {
+			writeError(w, http.StatusBadRequest, err.Error())
+			return
+		}
+		for _, t := range tasks {
+			globalQueue.enqueue(t)
+		}
+		writeTaskRunResponse(w, tasks)
+	}
+}
+
+func (h *handler) handleAPIBenchmarkAutotuneRun() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		if h.opts.App == nil {
+			writeError(w, http.StatusServiceUnavailable, "app not configured")
+			return
+		}
+		var body struct {
+			Profile       string `json:"profile"`
+			BenchmarkKind string `json:"benchmark_kind"`
+			SizeMB        int    `json:"size_mb"`
+		}
+		if r.Body != nil {
+			if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
+				writeError(w, http.StatusBadRequest, "invalid request body")
+				return
+			}
+		}
+		profile := strings.TrimSpace(body.Profile)
+		if profile == "" {
+			profile = "standard"
+		}
+		benchmarkKind := strings.TrimSpace(body.BenchmarkKind)
+		if benchmarkKind == "" {
+			benchmarkKind = "power-fit"
+		}
+		now := time.Now()
+		taskName := fmt.Sprintf("NVIDIA Benchmark Autotune · %s · %s", profile, benchmarkKind)
+		t := &Task{
+			ID:        newJobID("bee-bench-autotune"),
+			Name:      taskName,
+			Target:    "nvidia-bench-autotune",
+			Priority:  defaultTaskPriority("nvidia-bench-autotune", taskParams{}),
+			Status:    TaskPending,
+			CreatedAt: now,
+			params: taskParams{
+				BenchmarkProfile: profile,
+				BenchmarkKind:    benchmarkKind,
+				SizeMB:           body.SizeMB,
+				DisplayName:      taskName,
+			},
+		}
+		globalQueue.enqueue(t)
+		writeTaskRunResponse(w, []*Task{t})
+	}
+}
+
+func (h *handler) handleAPIBenchmarkAutotuneStatus(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-
-	var body struct {
-		Profile           string `json:"profile"`
-		SizeMB            int    `json:"size_mb"`
-		GPUIndices        []int  `json:"gpu_indices"`
-		ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
-		RunNCCL           *bool  `json:"run_nccl"`
-		ParallelGPUs      *bool  `json:"parallel_gpus"`
-		DisplayName       string `json:"display_name"`
-	}
-	if r.Body != nil {
-		if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
-			writeError(w, http.StatusBadRequest, "invalid request body")
+	cfg, err := h.opts.App.LoadBenchmarkPowerAutotune()
+	if err != nil {
+		if os.IsNotExist(err) {
+			w.WriteHeader(http.StatusOK)
+			writeJSON(w, map[string]any{
+				"configured": false,
+				"decision":   platform.ResolveSystemPowerDecision(h.opts.ExportDir),
+			})
 			return
 		}
-	}
-
-	runNCCL := true
-	if body.RunNCCL != nil {
-		runNCCL = *body.RunNCCL
-	}
-	parallelGPUs := false
-	if body.ParallelGPUs != nil {
-		parallelGPUs = *body.ParallelGPUs
-	}
-	name := taskDisplayName("nvidia-benchmark", "", "")
-	if strings.TrimSpace(body.DisplayName) != "" {
-		name = body.DisplayName
-	}
-	tasks, err := buildNvidiaTaskSet("nvidia-benchmark", 15, time.Now(), taskParams{
-		GPUIndices:        body.GPUIndices,
-		ExcludeGPUIndices: body.ExcludeGPUIndices,
-		SizeMB:            body.SizeMB,
-		BenchmarkProfile:  body.Profile,
-		RunNCCL:           runNCCL,
-		ParallelGPUs:      parallelGPUs,
-		DisplayName:       body.DisplayName,
-	}, name, h.opts.App, "benchmark-nvidia")
-	if err != nil {
-		writeError(w, http.StatusBadRequest, err.Error())
+		writeError(w, http.StatusInternalServerError, err.Error())
 		return
 	}
-	for _, t := range tasks {
-		globalQueue.enqueue(t)
-	}
-	writeTaskRunResponse(w, tasks)
+	w.WriteHeader(http.StatusOK)
+	writeJSON(w, map[string]any{
+		"configured": true,
+		"config":     cfg,
+		"decision":   platform.ResolveSystemPowerDecision(h.opts.ExportDir),
+	})
+}
+
+func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
+	h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf").ServeHTTP(w, r)
 }

 func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
@@ -610,6 +809,9 @@ func (h *handler) handleAPISATAbort(w http.ResponseWriter, r *http.Request) {
 			if t.job != nil {
 				t.job.abort()
 			}
+			if taskMayLeaveOrphanWorkers(t.Target) {
+				platform.KillTestWorkers()
+			}
 			t.Status = TaskCancelled
 			now := time.Now()
 			t.DoneAt = &now
@@ -950,25 +1152,62 @@ func (h *handler) handleAPIRAMStatus(w http.ResponseWriter, r *http.Request) {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-	status := h.opts.App.LiveBootSource()
+	status := h.currentRAMStatus()
 	w.Header().Set("Content-Type", "application/json")
 	_ = json.NewEncoder(w).Encode(status)
 }

+type ramStatusResponse struct {
+	platform.LiveMediaRAMState
+	InstallTaskActive bool   `json:"install_task_active,omitempty"`
+	CopyTaskActive    bool   `json:"copy_task_active,omitempty"`
+	CanStartTask      bool   `json:"can_start_task,omitempty"`
+	BlockedReason     string `json:"blocked_reason,omitempty"`
+}
+
+func (h *handler) currentRAMStatus() ramStatusResponse {
+	state := h.opts.App.LiveMediaRAMState()
+	resp := ramStatusResponse{LiveMediaRAMState: state}
+	if globalQueue.hasActiveTarget("install") {
+		resp.InstallTaskActive = true
+		resp.BlockedReason = "install to disk is already running"
+		return resp
+	}
+	if globalQueue.hasActiveTarget("install-to-ram") {
+		resp.CopyTaskActive = true
+		resp.BlockedReason = "install to RAM task is already pending or running"
+		return resp
+	}
+	if state.InRAM {
+		resp.BlockedReason = "system is already running from RAM"
+		return resp
+	}
+	resp.CanStartTask = state.CanStartCopy
+	if !resp.CanStartTask && resp.BlockedReason == "" {
+		resp.BlockedReason = state.Message
+	}
+	return resp
+}
+
 func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-	if globalQueue.hasActiveTarget("install") {
-		writeError(w, http.StatusConflict, "install to disk is already running")
+	status := h.currentRAMStatus()
+	if !status.CanStartTask {
+		msg := strings.TrimSpace(status.BlockedReason)
+		if msg == "" {
+			msg = "install to RAM is not available"
+		}
+		writeError(w, http.StatusConflict, msg)
 		return
 	}
 	t := &Task{
 		ID:        newJobID("install-to-ram"),
 		Name:      "Install to RAM",
 		Target:    "install-to-ram",
-		Priority:  10,
+		Priority:  defaultTaskPriority("install-to-ram", taskParams{}),
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 	}
@@ -1083,7 +1322,7 @@ func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) {
 		ID:        newJobID("install"),
 		Name:      "Install to Disk",
 		Target:    "install",
-		Priority:  20,
+		Priority:  defaultTaskPriority("install", taskParams{}),
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 		params: taskParams{
@@ -1359,6 +1598,11 @@ func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Reques
 	writeJSON(w, map[string]string{"status": "rolled back"})
 }

+func (h *handler) handleAPIBenchmarkResults(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+	fmt.Fprint(w, renderBenchmarkResultsCard(h.opts.ExportDir))
+}
+
 func (h *handler) rollbackPendingNetworkChange() error {
 	h.pendingNetMu.Lock()
 	pnc := h.pendingNet
@@ -1375,4 +1619,3 @@ func (h *handler) rollbackPendingNetworkChange() error {
 	}
 	return nil
 }
-
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -39,6 +39,9 @@ func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
 	if got := globalQueue.tasks[0].params.BurnProfile; got != "smoke" {
 		t.Fatalf("burn profile=%q want smoke", got)
 	}
+	if got := globalQueue.tasks[0].Priority; got != taskPriorityValidate {
+		t.Fatalf("priority=%d want %d", got, taskPriorityValidate)
+	}
 }

 func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
@@ -61,7 +64,7 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })

 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
-	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
+	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/perf/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
 	rec := httptest.NewRecorder()

 	h.handleAPIBenchmarkNvidiaRun(rec, req)
@@ -75,8 +78,8 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	task := globalQueue.tasks[0]
-	if task.Target != "nvidia-benchmark" {
-		t.Fatalf("target=%q want nvidia-benchmark", task.Target)
+	if task.Target != "nvidia-bench-perf" {
+		t.Fatalf("target=%q want nvidia-bench-perf", task.Target)
 	}
 	if got := task.params.GPUIndices; len(got) != 2 || got[0] != 1 || got[1] != 3 {
 		t.Fatalf("gpu indices=%v want [1 3]", got)
@@ -84,6 +87,9 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 	if task.params.RunNCCL {
 		t.Fatal("RunNCCL should reflect explicit false from request")
 	}
+	if task.Priority != taskPriorityBenchmark {
+		t.Fatalf("priority=%d want %d", task.Priority, taskPriorityBenchmark)
+	}
 }

 func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
@@ -107,7 +113,7 @@ func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })

 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
-	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"run_nccl":false}`))
+	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/perf/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"run_nccl":false}`))
 	rec := httptest.NewRecorder()

 	h.handleAPIBenchmarkNvidiaRun(rec, req)
@@ -133,6 +139,94 @@ func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
 	if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 {
 		t.Fatalf("task[1] gpu indices=%v want [2]", got)
 	}
+	if got := globalQueue.tasks[0].Priority; got != taskPriorityBenchmark {
+		t.Fatalf("task[0] priority=%d want %d", got, taskPriorityBenchmark)
+	}
+	if got := globalQueue.tasks[1].Priority; got != taskPriorityBenchmark {
+		t.Fatalf("task[1] priority=%d want %d", got, taskPriorityBenchmark)
+	}
+}
+
+func TestHandleAPIBenchmarkPowerFitRampQueuesBenchmarkPowerFitTasks(t *testing.T) {
+	globalQueue.mu.Lock()
+	originalTasks := globalQueue.tasks
+	globalQueue.tasks = nil
+	globalQueue.mu.Unlock()
+	t.Cleanup(func() {
+		globalQueue.mu.Lock()
+		globalQueue.tasks = originalTasks
+		globalQueue.mu.Unlock()
+	})
+	prevList := apiListNvidiaGPUs
+	apiListNvidiaGPUs = func(_ *app.App) ([]platform.NvidiaGPU, error) {
+		return []platform.NvidiaGPU{
+			{Index: 0, Name: "NVIDIA H100 PCIe"},
+			{Index: 1, Name: "NVIDIA H100 PCIe"},
+			{Index: 2, Name: "NVIDIA H100 PCIe"},
+		}, nil
+	}
+	t.Cleanup(func() { apiListNvidiaGPUs = prevList })
+
+	h := &handler{opts: HandlerOptions{App: &app.App{}}}
+	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/power/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"ramp_up":true}`))
+	rec := httptest.NewRecorder()
+
+	h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-power").ServeHTTP(rec, req)
+
+	if rec.Code != 200 {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	globalQueue.mu.Lock()
+	defer globalQueue.mu.Unlock()
+	// Ramp-up mode creates a single task that handles the 1→N GPU ramp internally
+	// (spawning N separate tasks would redundantly repeat all earlier ramp steps).
+	if len(globalQueue.tasks) != 1 {
+		t.Fatalf("tasks=%d want 1 (ramp-up uses single task)", len(globalQueue.tasks))
+	}
+	task := globalQueue.tasks[0]
+	if task.Target != "nvidia-bench-power" {
+		t.Fatalf("task target=%q want nvidia-bench-power", task.Target)
+	}
+	if task.Priority != taskPriorityBenchmark {
+		t.Fatalf("task priority=%d want %d", task.Priority, taskPriorityBenchmark)
+	}
+	if task.params.RampTotal != 3 {
+		t.Fatalf("task RampTotal=%d want 3", task.params.RampTotal)
+	}
+}
+
+func TestHandleAPIBenchmarkAutotuneRunQueuesTask(t *testing.T) {
+	globalQueue.mu.Lock()
+	originalTasks := globalQueue.tasks
+	globalQueue.tasks = nil
+	globalQueue.mu.Unlock()
+	t.Cleanup(func() {
+		globalQueue.mu.Lock()
+		globalQueue.tasks = originalTasks
+		globalQueue.mu.Unlock()
+	})
+
+	h := &handler{opts: HandlerOptions{App: &app.App{}}}
+	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/autotune/run", strings.NewReader(`{"profile":"standard","benchmark_kind":"power-fit"}`))
+	rec := httptest.NewRecorder()
+
+	h.handleAPIBenchmarkAutotuneRun().ServeHTTP(rec, req)
+
+	if rec.Code != 200 {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	globalQueue.mu.Lock()
+	defer globalQueue.mu.Unlock()
+	if len(globalQueue.tasks) != 1 {
+		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
+	}
+	task := globalQueue.tasks[0]
+	if task.Target != "nvidia-bench-autotune" {
+		t.Fatalf("task target=%q want nvidia-bench-autotune", task.Target)
+	}
+	if task.params.BenchmarkKind != "power-fit" {
+		t.Fatalf("task benchmark kind=%q want power-fit", task.params.BenchmarkKind)
+	}
 }

 func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) {
@@ -175,6 +269,41 @@ func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) {
 	if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 {
 		t.Fatalf("task[1] gpu indices=%v want [2]", got)
 	}
+	if got := globalQueue.tasks[0].Priority; got != taskPriorityValidate {
+		t.Fatalf("task[0] priority=%d want %d", got, taskPriorityValidate)
+	}
+	if got := globalQueue.tasks[1].Priority; got != taskPriorityValidate {
+		t.Fatalf("task[1] priority=%d want %d", got, taskPriorityValidate)
+	}
+}
+
+func TestDefaultTaskPriorityOrder(t *testing.T) {
+	got := []int{
+		defaultTaskPriority("install-to-ram", taskParams{}),
+		defaultTaskPriority("audit", taskParams{}),
+		defaultTaskPriority("cpu", taskParams{}),
+		defaultTaskPriority("cpu", taskParams{StressMode: true}),
+		defaultTaskPriority("nvidia-stress", taskParams{}),
+		defaultTaskPriority("nvidia-bench-perf", taskParams{}),
+		defaultTaskPriority("nvidia-bench-power", taskParams{}),
+	}
+	want := []int{
+		taskPriorityInstallToRAM,
+		taskPriorityAudit,
+		taskPriorityValidate,
+		taskPriorityValidateStress,
+		taskPriorityBurn,
+		taskPriorityBenchmark,
+		taskPriorityBenchmark,
+	}
+	for i := range want {
+		if got[i] != want[i] {
+			t.Fatalf("priority[%d]=%d want %d", i, got[i], want[i])
+		}
+	}
+	if !(got[0] > got[1] && got[1] > got[2] && got[2] > got[3] && got[3] > got[4] && got[4] > got[5] && got[5] == got[6]) {
+		t.Fatalf("priority order=%v", got)
+	}
 }

 func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
--- a/audit/internal/webui/charts_svg.go
+++ b/audit/internal/webui/charts_svg.go
@@ -83,6 +83,10 @@ func renderMetricChartSVG(title string, labels []string, times []time.Time, data
 		}
 	}

+	// Downsample to at most ~1400 points (one per pixel) before building SVG.
+	times, datasets = downsampleTimeSeries(times, datasets, 1400)
+	pointCount = len(times)
+
 	statsLabel := chartStatsLabel(datasets)

 	legendItems := []metricChartSeries{}
@@ -196,6 +200,19 @@ func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, s
 		}
 	}

+	// Downsample to at most ~1400 points before building SVG.
+	{
+		datasets := make([][]float64, len(series))
+		for i := range series {
+			datasets[i] = series[i].Values
+		}
+		times, datasets = downsampleTimeSeries(times, datasets, 1400)
+		pointCount = len(times)
+		for i := range series {
+			series[i].Values = datasets[i]
+		}
+	}
+
 	scales := make([]chartScale, len(series))
 	for i := range series {
 		min, max := chartSeriesBounds(series[i].Values)
@@ -445,6 +462,127 @@ func synthesizeChartTimes(times []time.Time, count int) []time.Time {
 	return out
 }

+// renderStackedMetricChartSVG renders a stacked area chart where each dataset
+// is visually "stacked" on top of the previous one. Intended for multi-PSU
+// power charts where the filled area of each PSU shows its individual
+// contribution and the total height equals the combined draw.
+func renderStackedMetricChartSVG(title string, labels []string, times []time.Time, datasets [][]float64, names []string, yMax *float64, canvasHeight int, timeline []chartTimelineSegment) ([]byte, error) {
+	pointCount := len(labels)
+	if len(times) > pointCount {
+		pointCount = len(times)
+	}
+	if pointCount == 0 {
+		pointCount = 1
+		labels = []string{""}
+		times = []time.Time{{}}
+	}
+	if len(labels) < pointCount {
+		padded := make([]string, pointCount)
+		copy(padded, labels)
+		labels = padded
+	}
+	if len(times) < pointCount {
+		times = synthesizeChartTimes(times, pointCount)
+	}
+	for i := range datasets {
+		if len(datasets[i]) == 0 {
+			datasets[i] = make([]float64, pointCount)
+		}
+	}
+
+	times, datasets = downsampleTimeSeries(times, datasets, 1400)
+	pointCount = len(times)
+
+	// Build cumulative sums per time point.
+	cumulative := make([][]float64, len(datasets)+1)
+	for i := range cumulative {
+		cumulative[i] = make([]float64, pointCount)
+	}
+	for i, ds := range datasets {
+		for j, v := range ds {
+			cumulative[i+1][j] = cumulative[i][j] + v
+		}
+	}
+
+	// Scale is based on the total (top cumulative row).
+	total := cumulative[len(cumulative)-1]
+	yMin := floatPtr(0)
+	if yMax == nil {
+		yMax = autoMax120(total)
+	}
+	scale := singleAxisChartScale([][]float64{total}, yMin, yMax)
+
+	legendItems := make([]metricChartSeries, len(datasets))
+	for i, name := range names {
+		color := metricChartPalette[i%len(metricChartPalette)]
+		legendItems[i] = metricChartSeries{Name: name, Color: color, Values: datasets[i]}
+	}
+
+	// Stats label from totals.
+	statsLabel := chartStatsLabel([][]float64{total})
+
+	layout := singleAxisChartLayout(canvasHeight, len(legendItems))
+	start, end := chartTimeBounds(times)
+
+	var b strings.Builder
+	writeSVGOpen(&b, layout.Width, layout.Height)
+	writeChartFrame(&b, title, statsLabel, layout.Width, layout.Height)
+	writeTimelineIdleSpans(&b, layout, start, end, timeline)
+	writeVerticalGrid(&b, layout, times, pointCount, 8)
+	writeHorizontalGrid(&b, layout, scale)
+	writeTimelineBoundaries(&b, layout, start, end, timeline)
+	writePlotBorder(&b, layout)
+	writeSingleAxisY(&b, layout, scale)
+	writeXAxisLabels(&b, layout, times, labels, start, end, 8)
+
+	// Draw stacked areas from top to bottom so lower layers are visible.
+	for i := len(datasets) - 1; i >= 0; i-- {
+		writeStackedArea(&b, layout, times, start, end, cumulative[i], cumulative[i+1], scale, legendItems[i].Color)
+	}
+	// Draw border polylines on top.
+	for i := len(datasets) - 1; i >= 0; i-- {
+		writeSeriesPolyline(&b, layout, times, start, end, cumulative[i+1], scale, legendItems[i].Color)
+	}
+
+	writeLegend(&b, layout, legendItems)
+	writeSVGClose(&b)
+	return []byte(b.String()), nil
+}
+
+// writeStackedArea draws a filled polygon between two cumulative value arrays
+// (baseline and top), using the given color at 55% opacity.
+func writeStackedArea(b *strings.Builder, layout chartLayout, times []time.Time, start, end time.Time, baseline, top []float64, scale chartScale, color string) {
+	n := len(top)
+	if n == 0 {
+		return
+	}
+	if len(baseline) < n {
+		baseline = make([]float64, n)
+	}
+
+	// Forward path along top values, then backward along baseline values.
+	var points strings.Builder
+	for i := 0; i < n; i++ {
+		x := chartXForTime(chartPointTime(times, i), start, end, layout.PlotLeft, layout.PlotRight)
+		y := chartYForValue(valueClamp(top[i], scale), scale, layout.PlotTop, layout.PlotBottom)
+		if i > 0 {
+			points.WriteByte(' ')
+		}
+		points.WriteString(strconv.FormatFloat(x, 'f', 1, 64))
+		points.WriteByte(',')
+		points.WriteString(strconv.FormatFloat(y, 'f', 1, 64))
+	}
+	for i := n - 1; i >= 0; i-- {
+		x := chartXForTime(chartPointTime(times, i), start, end, layout.PlotLeft, layout.PlotRight)
+		y := chartYForValue(valueClamp(baseline[i], scale), scale, layout.PlotTop, layout.PlotBottom)
+		points.WriteByte(' ')
+		points.WriteString(strconv.FormatFloat(x, 'f', 1, 64))
+		points.WriteByte(',')
+		points.WriteString(strconv.FormatFloat(y, 'f', 1, 64))
+	}
+	fmt.Fprintf(b, `<polygon points="%s" fill="%s" fill-opacity="0.55" stroke="none"/>`+"\n", points.String(), color)
+}
+
 func writeSVGOpen(b *strings.Builder, width, height int) {
 	fmt.Fprintf(b, `<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" viewBox="0 0 %d %d">`+"\n", width, height, width, height)
 }
@@ -626,6 +764,87 @@ func writeTimelineBoundaries(b *strings.Builder, layout chartLayout, start, end
 	b.WriteString(`</g>` + "\n")
 }

+// downsampleTimeSeries reduces the time series to at most maxPts points using
+// min-max bucketing. Each bucket contributes the index of its min and max value
+// (using the first full-length dataset as the reference series). All parallel
+// datasets are sampled at those same indices so all series stay aligned.
+// If len(times) <= maxPts the inputs are returned unchanged.
+func downsampleTimeSeries(times []time.Time, datasets [][]float64, maxPts int) ([]time.Time, [][]float64) {
+	n := len(times)
+	if n <= maxPts || maxPts <= 0 {
+		return times, datasets
+	}
+	buckets := maxPts / 2
+	if buckets < 1 {
+		buckets = 1
+	}
+	// Use the first dataset that has the same length as times as the reference
+	// for deciding which two indices to keep per bucket.
+	var ref []float64
+	for _, ds := range datasets {
+		if len(ds) == n {
+			ref = ds
+			break
+		}
+	}
+	selected := make([]int, 0, maxPts)
+	bucketSize := float64(n) / float64(buckets)
+	for b := 0; b < buckets; b++ {
+		lo := int(math.Round(float64(b) * bucketSize))
+		hi := int(math.Round(float64(b+1) * bucketSize))
+		if hi > n {
+			hi = n
+		}
+		if lo >= hi {
+			continue
+		}
+		if ref == nil {
+			selected = append(selected, lo)
+			if hi-1 != lo {
+				selected = append(selected, hi-1)
+			}
+			continue
+		}
+		minIdx, maxIdx := lo, lo
+		for i := lo + 1; i < hi; i++ {
+			if ref[i] < ref[minIdx] {
+				minIdx = i
+			}
+			if ref[i] > ref[maxIdx] {
+				maxIdx = i
+			}
+		}
+		if minIdx <= maxIdx {
+			selected = append(selected, minIdx)
+			if maxIdx != minIdx {
+				selected = append(selected, maxIdx)
+			}
+		} else {
+			selected = append(selected, maxIdx)
+			if minIdx != maxIdx {
+				selected = append(selected, minIdx)
+			}
+		}
+	}
+	outTimes := make([]time.Time, len(selected))
+	for i, idx := range selected {
+		outTimes[i] = times[idx]
+	}
+	outDatasets := make([][]float64, len(datasets))
+	for d, ds := range datasets {
+		if len(ds) != n {
+			outDatasets[d] = ds
+			continue
+		}
+		out := make([]float64, len(selected))
+		for i, idx := range selected {
+			out[i] = ds[idx]
+		}
+		outDatasets[d] = out
+	}
+	return outTimes, outDatasets
+}
+
 func chartXForTime(ts, start, end time.Time, left, right int) float64 {
 	if !end.After(start) {
 		return float64(left+right) / 2
--- a/audit/internal/webui/jobs.go
+++ b/audit/internal/webui/jobs.go
@@ -1,6 +1,9 @@
 package webui

 import (
+	"bufio"
+	"fmt"
+	"io"
 	"os"
 	"strings"
 	"sync"
@@ -17,6 +20,25 @@ type jobState struct {
 	cancel       func() // optional cancel function; nil if job is not cancellable
 	logPath      string
 	serialPrefix string
+	logFile      *os.File    // kept open for the task lifetime to avoid per-line open/close
+	logBuf       *bufio.Writer
+}
+
+// readTaskLogFile reads a task log, refusing files over 50 MB.
+func readTaskLogFile(path string) ([]byte, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+	data, err := io.ReadAll(io.LimitReader(f, 50<<20+1))
+	if err != nil {
+		return nil, err
+	}
+	if int64(len(data)) > 50<<20 {
+		return nil, fmt.Errorf("task log %s too large (exceeds 50 MB)", path)
+	}
+	return data, nil
 }

 // abort cancels the job if it has a cancel function and is not yet done.
@@ -35,7 +57,7 @@ func (j *jobState) append(line string) {
 	defer j.mu.Unlock()
 	j.lines = append(j.lines, line)
 	if j.logPath != "" {
-		appendJobLog(j.logPath, line)
+		j.writeLogLineLocked(line)
 	}
 	if j.serialPrefix != "" {
 		taskSerialWriteLine(j.serialPrefix + line)
@@ -48,6 +70,35 @@ func (j *jobState) append(line string) {
 	}
 }

+// writeLogLineLocked writes a line to the persistent log file, opening it lazily.
+// Must be called with j.mu held. Uses a buffered writer kept open for the task
+// lifetime — avoids thousands of open/close syscalls during high-frequency logs.
+func (j *jobState) writeLogLineLocked(line string) {
+	if j.logFile == nil {
+		f, err := os.OpenFile(j.logPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
+		if err != nil {
+			return
+		}
+		j.logFile = f
+		j.logBuf = bufio.NewWriterSize(f, 64*1024)
+	}
+	_, _ = j.logBuf.WriteString(line + "\n")
+}
+
+// closeLog flushes and closes the log file. Called after all task output is done.
+func (j *jobState) closeLog() {
+	j.mu.Lock()
+	defer j.mu.Unlock()
+	if j.logBuf != nil {
+		_ = j.logBuf.Flush()
+	}
+	if j.logFile != nil {
+		_ = j.logFile.Close()
+		j.logFile = nil
+		j.logBuf = nil
+	}
+}
+
 func (j *jobState) finish(errMsg string) {
 	j.mu.Lock()
 	defer j.mu.Unlock()
@@ -119,7 +170,7 @@ func newTaskJobState(logPath string, serialPrefix ...string) *jobState {
 	if logPath == "" {
 		return j
 	}
-	data, err := os.ReadFile(logPath)
+	data, err := readTaskLogFile(logPath)
 	if err != nil || len(data) == 0 {
 		return j
 	}
--- a/audit/internal/webui/kmsg_watcher.go
+++ b/audit/internal/webui/kmsg_watcher.go
@@ -232,7 +232,7 @@ func truncate(s string, max int) string {
 // isSATTarget returns true for task targets that run hardware acceptance tests.
 func isSATTarget(target string) bool {
 	switch target {
-	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
+	case "nvidia", "nvidia-targeted-stress", "nvidia-bench-perf", "nvidia-bench-power", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
 		"nvidia-interconnect", "nvidia-bandwidth", "nvidia-stress", "memory", "memory-stress", "storage",
 		"cpu", "sat-stress", "amd", "amd-mem", "amd-bandwidth", "amd-stress",
 		"platform-stress":
--- a/audit/internal/webui/layout.go
+++ b/audit/internal/webui/layout.go
@@ -0,0 +1,137 @@
+package webui
+
+import (
+	"fmt"
+	"html"
+	"os"
+	"strings"
+)
+
+func layoutHead(title string) string {
+	return `<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<title>` + html.EscapeString(title) + `</title>
+<style>
+:root{--bg:#fff;--surface:#fff;--surface-2:#f9fafb;--border:rgba(34,36,38,.15);--border-lite:rgba(34,36,38,.1);--ink:rgba(0,0,0,.87);--muted:rgba(0,0,0,.6);--accent:#2185d0;--accent-dark:#1678c2;--crit-bg:#fff6f6;--crit-fg:#9f3a38;--crit-border:#e0b4b4;--ok-bg:#fcfff5;--ok-fg:#2c662d;--warn-bg:#fffaf3;--warn-fg:#573a08}
+*{box-sizing:border-box;margin:0;padding:0}
+body{font:14px/1.5 Lato,"Helvetica Neue",Arial,Helvetica,sans-serif;background:var(--bg);color:var(--ink);display:flex;min-height:100vh}
+a{color:var(--accent);text-decoration:none}
+/* Sidebar */
+.sidebar{width:210px;min-height:100vh;background:#1b1c1d;flex-shrink:0;display:flex;flex-direction:column}
+.sidebar-logo{padding:18px 16px 12px;font-size:18px;font-weight:700;color:#fff;letter-spacing:-.5px}
+.sidebar-logo span{color:rgba(255,255,255,.5);font-weight:400;font-size:12px;display:block;margin-top:2px}
+.sidebar-version{padding:0 16px 14px;font-size:11px;color:rgba(255,255,255,.45)}
+.sidebar-badge{margin:0 12px 12px;padding:5px 8px;border-radius:4px;font-size:11px;font-weight:600;text-align:center}
+.sidebar-badge-warn{background:#7a4f00;color:#f6c90e}
+.sidebar-badge-crit{background:#5c1a1a;color:#ff6b6b}
+.nav{flex:1}
+.nav-item{display:block;padding:10px 16px;color:rgba(255,255,255,.7);font-size:13px;border-left:3px solid transparent;transition:all .15s}
+.nav-item:hover{color:#fff;background:rgba(255,255,255,.08)}
+.nav-item.active{color:#fff;background:rgba(33,133,208,.25);border-left-color:var(--accent)}
+/* Content */
+.main{flex:1;display:flex;flex-direction:column;overflow:auto}
+.topbar{padding:13px 24px;background:#1b1c1d;display:flex;align-items:center;gap:12px}
+.topbar h1{font-size:16px;font-weight:700;color:rgba(255,255,255,.9)}
+.content{padding:24px;flex:1}
+/* Cards */
+.card{background:var(--surface);border:1px solid var(--border);border-radius:4px;box-shadow:0 1px 2px rgba(34,36,38,.15);margin-bottom:16px;overflow:hidden}
+.card-head{padding:11px 16px;background:var(--surface-2);border-bottom:1px solid var(--border);font-weight:700;font-size:13px;display:flex;align-items:center;gap:8px}
+.card-head-actions{justify-content:space-between}
+.card-head-buttons{display:flex;align-items:center;gap:8px;margin-left:auto;flex-wrap:wrap}
+.card-body{padding:16px}
+/* Buttons */
+.btn{display:inline-flex;align-items:center;gap:6px;padding:8px 16px;border-radius:4px;font-size:13px;font-weight:700;cursor:pointer;border:none;transition:background .1s;font-family:inherit}
+.btn-primary{background:var(--accent);color:#fff}.btn-primary:hover{background:var(--accent-dark)}
+.btn-danger{background:#db2828;color:#fff}.btn-danger:hover{background:#b91c1c}
+.btn-secondary{background:var(--surface-2);color:var(--ink);border:1px solid var(--border)}.btn-secondary:hover{background:#eee}
+.btn-sm{padding:5px 10px;font-size:12px}
+/* Tables */
+table{width:100%;border-collapse:collapse;font-size:13px;background:var(--surface)}
+th{text-align:left;padding:9px 14px;color:var(--ink);font-weight:700;background:var(--surface-2);border-bottom:1px solid var(--border-lite)}
+td{padding:9px 14px;border-top:1px solid var(--border-lite)}
+tr:first-child td{border-top:0}
+tbody tr:hover td{background:rgba(0,0,0,.03)}
+/* Status badges */
+.badge{display:inline-block;padding:2px 9px;border-radius:4px;font-size:11px;font-weight:700}
+.badge-ok{background:var(--ok-bg);color:var(--ok-fg);border:1px solid #a3c293}
+.badge-warn{background:var(--warn-bg);color:var(--warn-fg);border:1px solid #c9ba9b}
+.badge-err{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)}
+.badge-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)}
+/* Component chips — one small square per device */
+.chips{display:inline-flex;flex-wrap:wrap;gap:3px;align-items:center;vertical-align:middle}
+.chip{display:inline-flex;align-items:center;justify-content:center;width:20px;height:20px;border-radius:3px;font-size:10px;font-weight:800;cursor:default;font-family:monospace;letter-spacing:0;user-select:none}
+.chip-ok{background:var(--ok-bg);color:var(--ok-fg);border:1px solid #a3c293}
+.chip-warn{background:var(--warn-bg);color:var(--warn-fg);border:1px solid #c9ba9b}
+.chip-fail{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)}
+.chip-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)}
+/* Output terminal */
+.terminal{background:#1b1c1d;border:1px solid rgba(0,0,0,.2);border-radius:4px;padding:14px;font-family:monospace;font-size:12px;color:#b5cea8;max-height:400px;overflow-y:auto;white-space:pre-wrap;word-break:break-all;user-select:text;-webkit-user-select:text}
+.terminal-wrap{position:relative}.terminal-copy{position:absolute;top:6px;right:6px;background:#2d2f30;border:1px solid #444;color:#aaa;font-size:11px;padding:2px 8px;border-radius:3px;cursor:pointer;opacity:.7}.terminal-copy:hover{opacity:1}
+/* Forms */
+.form-row{margin-bottom:14px}
+.form-row label{display:block;font-size:12px;color:var(--muted);margin-bottom:5px;font-weight:700}
+.form-row input,.form-row select{width:100%;padding:8px 10px;background:var(--surface);border:1px solid var(--border);border-radius:4px;color:var(--ink);font-size:13px;outline:none;font-family:inherit}
+.form-row input:focus,.form-row select:focus{border-color:var(--accent);box-shadow:0 0 0 2px rgba(33,133,208,.2)}
+/* Grid */
+.grid2{display:grid;grid-template-columns:1fr 1fr;gap:16px}
+.grid3{display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px}
+@media(max-width:900px){.grid2,.grid3{grid-template-columns:1fr}.card-head-actions{align-items:flex-start;flex-direction:column}.card-head-buttons{margin-left:0}}
+/* iframe viewer */
+.viewer-frame{width:100%;height:calc(100vh - 160px);border:0;border-radius:4px;background:var(--surface-2)}
+/* Alerts */
+.alert{padding:10px 14px;border-radius:4px;font-size:13px;margin-bottom:14px}
+.alert-info{background:#dff0ff;border:1px solid #a9d4f5;color:#1e3a5f}
+.alert-warn{background:var(--warn-bg);border:1px solid #c9ba9b;color:var(--warn-fg)}
+</style>
+</head>
+<body>
+`
+}
+
+func layoutNav(active string, buildLabel string) string {
+	items := []struct{ id, label, href, onclick string }{
+		{"dashboard", "Dashboard", "/", ""},
+		{"audit", "Audit", "/audit", ""},
+		{"validate", "Validate", "/validate", ""},
+		{"burn", "Burn", "/burn", ""},
+		{"benchmark", "Benchmark", "/benchmark", ""},
+		{"tasks", "Tasks", "/tasks", ""},
+		{"tools", "Tools", "/tools", ""},
+	}
+	var b strings.Builder
+	b.WriteString(`<aside class="sidebar">`)
+	b.WriteString(`<div class="sidebar-logo">bee<span>hardware audit</span></div>`)
+	if strings.TrimSpace(buildLabel) == "" {
+		buildLabel = "dev"
+	}
+	b.WriteString(`<div class="sidebar-version">Version ` + html.EscapeString(buildLabel) + `</div>`)
+	if raw, err := os.ReadFile("/run/bee-nvidia-mode"); err == nil {
+		gspMode := strings.TrimSpace(string(raw))
+		switch gspMode {
+		case "gsp-off":
+			b.WriteString(`<div class="sidebar-badge sidebar-badge-warn">NVIDIA GSP=off</div>`)
+		case "gsp-stuck":
+			b.WriteString(`<div class="sidebar-badge sidebar-badge-crit">NVIDIA GSP stuck — reboot</div>`)
+		}
+	}
+	b.WriteString(`<nav class="nav">`)
+	for _, item := range items {
+		cls := "nav-item"
+		if item.id == active {
+			cls += " active"
+		}
+		if item.onclick != "" {
+			b.WriteString(fmt.Sprintf(`<a class="%s" href="%s" onclick="%s">%s</a>`,
+				cls, item.href, item.onclick, item.label))
+		} else {
+			b.WriteString(fmt.Sprintf(`<a class="%s" href="%s">%s</a>`,
+				cls, item.href, item.label))
+		}
+	}
+	b.WriteString(`</nav>`)
+	b.WriteString(`</aside>`)
+	return b.String()
+}
--- a/audit/internal/webui/metricsdb.go
+++ b/audit/internal/webui/metricsdb.go
@@ -53,6 +53,9 @@ CREATE TABLE IF NOT EXISTS sys_metrics (
  cpu_load_pct REAL,
  mem_load_pct REAL,
  power_w      REAL,
+  power_source TEXT,
+  power_mode   TEXT,
+  power_reason TEXT,
  PRIMARY KEY (ts)
 );
 CREATE TABLE IF NOT EXISTS gpu_metrics (
@@ -86,7 +89,16 @@ CREATE TABLE IF NOT EXISTS temp_metrics (
 	if err := ensureMetricsColumn(db, "gpu_metrics", "clock_mhz", "REAL"); err != nil {
 		return err
 	}
-	return ensureMetricsColumn(db, "gpu_metrics", "mem_clock_mhz", "REAL")
+	if err := ensureMetricsColumn(db, "gpu_metrics", "mem_clock_mhz", "REAL"); err != nil {
+		return err
+	}
+	if err := ensureMetricsColumn(db, "sys_metrics", "power_source", "TEXT"); err != nil {
+		return err
+	}
+	if err := ensureMetricsColumn(db, "sys_metrics", "power_mode", "TEXT"); err != nil {
+		return err
+	}
+	return ensureMetricsColumn(db, "sys_metrics", "power_reason", "TEXT")
 }

 func ensureMetricsColumn(db *sql.DB, table, column, definition string) error {
@@ -125,8 +137,8 @@ func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
 	defer func() { _ = tx.Rollback() }()

 	_, err = tx.Exec(
-		`INSERT OR REPLACE INTO sys_metrics(ts,cpu_load_pct,mem_load_pct,power_w) VALUES(?,?,?,?)`,
-		ts, s.CPULoadPct, s.MemLoadPct, s.PowerW,
+		`INSERT OR REPLACE INTO sys_metrics(ts,cpu_load_pct,mem_load_pct,power_w,power_source,power_mode,power_reason) VALUES(?,?,?,?,?,?,?)`,
+		ts, s.CPULoadPct, s.MemLoadPct, s.PowerW, s.PowerSource, s.PowerMode, s.PowerReason,
 	)
 	if err != nil {
 		return err
@@ -161,14 +173,64 @@ func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
 	return tx.Commit()
 }

+// Downsample reduces density of old metrics rows to 1 sample per minute.
+// Only rows in the half-open window [deleteOlderThan, downsampleBefore) are
+// affected — rows newer than downsampleBefore keep full 5-second resolution.
+// For each 60-second bucket the row with the smallest ts is kept; the rest
+// are deleted. This trims ~92 % of rows in that window while preserving
+// the overall shape of every chart.
+//
+// Called hourly by the metrics collector background goroutine.
+func (m *MetricsDB) Downsample(downsampleBefore, deleteOlderThan time.Time) error {
+	if m == nil || m.db == nil {
+		return nil
+	}
+	start := deleteOlderThan.Unix()
+	end := downsampleBefore.Unix()
+	if end <= start {
+		return nil
+	}
+	// For each table: delete rows in [start, end) whose ts is NOT the minimum
+	// ts in its 60-second bucket (ts/60 integer division = bucket ID).
+	for _, table := range []string{"sys_metrics", "gpu_metrics", "fan_metrics", "temp_metrics"} {
+		_, err := m.db.Exec(`
+DELETE FROM `+table+` WHERE ts >= ? AND ts < ?
+  AND ts NOT IN (
+    SELECT MIN(ts) FROM `+table+`
+    WHERE ts >= ? AND ts < ?
+    GROUP BY ts / 60
+  )`, start, end, start, end)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Prune deletes all rows older than the given cutoff from every metrics table.
+// Called hourly by the metrics collector to keep the DB size bounded.
+func (m *MetricsDB) Prune(before time.Time) error {
+	if m == nil || m.db == nil {
+		return nil
+	}
+	cutTS := before.Unix()
+	for _, table := range []string{"sys_metrics", "gpu_metrics", "fan_metrics", "temp_metrics"} {
+		if _, err := m.db.Exec("DELETE FROM "+table+" WHERE ts < ?", cutTS); err != nil {
+			return err
+		}
+	}
+	_, _ = m.db.Exec("PRAGMA wal_checkpoint(TRUNCATE)")
+	return nil
+}
+
 // LoadRecent returns up to n samples in chronological order (oldest first).
 func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
-	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM (SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?) ORDER BY ts`, n)
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w,IFNULL(power_source,''),IFNULL(power_mode,''),IFNULL(power_reason,'') FROM (SELECT ts,cpu_load_pct,mem_load_pct,power_w,power_source,power_mode,power_reason FROM sys_metrics ORDER BY ts DESC LIMIT ?) ORDER BY ts`, n)
 }

 // LoadAll returns all persisted samples in chronological order (oldest first).
 func (m *MetricsDB) LoadAll() ([]platform.LiveMetricSample, error) {
-	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts`, nil)
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w,IFNULL(power_source,''),IFNULL(power_mode,''),IFNULL(power_reason,'') FROM sys_metrics ORDER BY ts`, nil)
 }

 // LoadBetween returns samples in chronological order within the given time window.
@@ -183,7 +245,7 @@ func (m *MetricsDB) LoadBetween(start, end time.Time) ([]platform.LiveMetricSamp
 		start, end = end, start
 	}
 	return m.loadSamples(
-		`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics WHERE ts>=? AND ts<=? ORDER BY ts`,
+		`SELECT ts,cpu_load_pct,mem_load_pct,power_w,IFNULL(power_source,''),IFNULL(power_mode,''),IFNULL(power_reason,'') FROM sys_metrics WHERE ts>=? AND ts<=? ORDER BY ts`,
 		start.Unix(), end.Unix(),
 	)
 }
@@ -199,11 +261,14 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	type sysRow struct {
 		ts            int64
 		cpu, mem, pwr float64
+		powerSource   string
+		powerMode     string
+		powerReason   string
 	}
 	var sysRows []sysRow
 	for rows.Next() {
 		var r sysRow
-		if err := rows.Scan(&r.ts, &r.cpu, &r.mem, &r.pwr); err != nil {
+		if err := rows.Scan(&r.ts, &r.cpu, &r.mem, &r.pwr, &r.powerSource, &r.powerMode, &r.powerReason); err != nil {
 			continue
 		}
 		sysRows = append(sysRows, r)
@@ -313,10 +378,13 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	samples := make([]platform.LiveMetricSample, len(sysRows))
 	for i, r := range sysRows {
 		s := platform.LiveMetricSample{
-			Timestamp:  time.Unix(r.ts, 0).UTC(),
-			CPULoadPct: r.cpu,
-			MemLoadPct: r.mem,
-			PowerW:     r.pwr,
+			Timestamp:   time.Unix(r.ts, 0).UTC(),
+			CPULoadPct:  r.cpu,
+			MemLoadPct:  r.mem,
+			PowerW:      r.pwr,
+			PowerSource: r.powerSource,
+			PowerMode:   r.powerMode,
+			PowerReason: r.powerReason,
 		}
 		for _, idx := range gpuIndices {
 			if g, ok := gpuData[gpuKey{r.ts, idx}]; ok {
--- a/audit/internal/webui/page_benchmark.go
+++ b/audit/internal/webui/page_benchmark.go
@@ -0,0 +1,613 @@
+package webui
+
+import (
+	"encoding/json"
+	"fmt"
+	"html"
+	"os"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+
+	"bee/audit/internal/app"
+	"bee/audit/internal/platform"
+)
+
+type benchmarkHistoryRun struct {
+	generatedAt   time.Time
+	displayTime   string
+	gpuScores     map[int]float64
+	gpuStatuses   map[int]string
+	overallStatus string
+}
+
+func renderBenchmark(opts HandlerOptions) string {
+	return `<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Benchmark runs generate a human-readable TXT report and machine-readable result bundle. Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
+
+<div class="grid2">
+  <div class="card">
+    <div class="card-head">Benchmark Setup</div>
+    <div class="card-body">
+      <div class="form-row">
+        <label>Profile</label>
+        <select id="benchmark-profile">
+          <option value="standard" selected>Standard — Perf ` + validateFmtDur(platform.BenchmarkEstimatedPerfStandardSec) + ` / Power Fit ` + validateFmtDur(platform.BenchmarkEstimatedPowerStandardSec) + `</option>
+          <option value="stability">Stability — Perf ` + validateFmtDur(platform.BenchmarkEstimatedPerfStabilitySec) + ` / Power Fit ` + validateFmtDur(platform.BenchmarkEstimatedPowerStabilitySec) + `</option>
+          <option value="overnight">Overnight — Perf ` + validateFmtDur(platform.BenchmarkEstimatedPerfOvernightSec) + ` / Power Fit ` + validateFmtDur(platform.BenchmarkEstimatedPowerOvernightSec) + `</option>
+        </select>
+      </div>
+      <div class="form-row">
+        <label>GPU Selection</label>
+        <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
+          <button class="btn btn-sm btn-secondary" type="button" onclick="benchmarkSelectAll()">Select All</button>
+          <button class="btn btn-sm btn-secondary" type="button" onclick="benchmarkSelectNone()">Clear</button>
+        </div>
+        <div id="benchmark-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
+          <p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
+        </div>
+      </div>
+      <label class="benchmark-cb-row">
+        <input type="radio" name="benchmark-mode" value="sequential" onchange="benchmarkUpdateSelectionNote()">
+        <span>Sequential — one GPU at a time</span>
+      </label>
+      <label class="benchmark-cb-row" id="benchmark-parallel-label">
+        <input type="radio" name="benchmark-mode" value="parallel" onchange="benchmarkUpdateSelectionNote()">
+        <span>Parallel — all selected GPUs simultaneously</span>
+      </label>
+      <label class="benchmark-cb-row" id="benchmark-ramp-label">
+        <input type="radio" name="benchmark-mode" value="ramp-up" checked onchange="benchmarkUpdateSelectionNote()">
+        <span>Ramp-up — 1 GPU → 2 → … → all selected (separate tasks)</span>
+      </label>
+      <p id="benchmark-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 14px">Select one GPU for single-card benchmarking or several GPUs for a constrained multi-GPU run.</p>
+      <div style="display:flex;gap:8px;flex-wrap:wrap;align-items:center">
+        <button id="benchmark-run-performance-btn" class="btn btn-primary" onclick="runNvidiaBenchmark('performance')" disabled>&#9654; Run Performance Benchmark</button>
+        <button id="benchmark-run-power-fit-btn" class="btn btn-secondary" onclick="runNvidiaBenchmark('power-fit')" disabled>&#9654; Run Power / Thermal Fit</button>
+        <button id="benchmark-run-autotune-btn" class="btn btn-secondary" onclick="runBenchmarkAutotune()">Autotune</button>
+      </div>
+      <span id="benchmark-run-nccl" hidden>nccl-auto</span>
+      <span id="benchmark-run-status" style="margin-left:10px;font-size:12px;color:var(--muted)"></span>
+      <div id="benchmark-autotune-status" style="margin-top:10px;font-size:12px;color:var(--muted)">Autotune status: loading…</div>
+      <div style="margin-top:6px;font-size:12px;color:var(--muted)">Autotune overwrites the saved system-power source and applies it to all new power charts and tests.</div>
+    </div>
+  </div>
+
+  <div class="card">
+    <div class="card-head">Method Split</div>
+    <div class="card-body">
+      <p style="font-size:13px;color:var(--muted);margin-bottom:10px">The benchmark page now exposes two fundamentally different test families so compute score and server power-fit are not mixed into one number.</p>
+      <table>
+        <tr><th>Run Type</th><th>Engine</th><th>Question</th><th>Standard</th><th>Stability</th></tr>
+        <tr><td>Performance Benchmark</td><td><code>bee-gpu-burn</code></td><td>How much isolated compute performance does the GPU realize in this server?</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPerfStandardSec) + `</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPerfStabilitySec) + `</td></tr>
+        <tr><td>Power / Thermal Fit</td><td><code>dcgmproftester</code> + <code>nvidia-smi -pl</code></td><td>How much power per GPU can this server sustain as GPU count ramps up?</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPowerStandardSec) + `</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPowerStabilitySec) + `</td></tr>
+      </table>
+      <p style="font-size:12px;color:var(--muted);margin-top:10px">Timings are per full ramp-up run (1 GPU → all selected), measured on 4–8 GPU servers. Use ramp-up mode for capacity work: it creates 1 GPU → 2 GPU → … → all selected steps so analysis software can derive server total score and watts-per-GPU curves.</p>
+    </div>
+  </div>
+</div>
+
+` + `<div id="benchmark-results-section">` + renderBenchmarkResultsCard(opts.ExportDir) + `</div>` + `
+
+<div id="benchmark-output" style="display:none;margin-top:16px" class="card">
+  <div class="card-head">Benchmark Output <span id="benchmark-title"></span></div>
+  <div class="card-body"><div id="benchmark-terminal" class="terminal"></div></div>
+</div>
+
+<style>
+.benchmark-cb-row { display:flex; align-items:flex-start; gap:8px; cursor:pointer; font-size:13px; }
+.benchmark-cb-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
+.benchmark-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
+.benchmark-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
+</style>
+
+<script>
+let benchmarkES = null;
+function benchmarkTaskIDs(payload) {
+  if (payload && Array.isArray(payload.task_ids) && payload.task_ids.length) return payload.task_ids;
+  if (payload && payload.task_id) return [payload.task_id];
+  return [];
+}
+function benchmarkSelectedGPUIndices() {
+  return Array.from(document.querySelectorAll('.benchmark-gpu-checkbox'))
+    .filter(function(el) { return el.checked && !el.disabled; })
+    .map(function(el) { return parseInt(el.value, 10); })
+    .filter(function(v) { return !Number.isNaN(v); })
+    .sort(function(a, b) { return a - b; });
+}
+function benchmarkMode() {
+  const el = document.querySelector('input[name="benchmark-mode"]:checked');
+  return el ? el.value : 'sequential';
+}
+function benchmarkUpdateSelectionNote() {
+  const selected = benchmarkSelectedGPUIndices();
+  const perfBtn = document.getElementById('benchmark-run-performance-btn');
+  const fitBtn = document.getElementById('benchmark-run-power-fit-btn');
+  const note = document.getElementById('benchmark-selection-note');
+  if (!selected.length) {
+    perfBtn.disabled = true;
+    fitBtn.disabled = true;
+    note.textContent = 'Select at least one NVIDIA GPU to run the benchmark.';
+    return;
+  }
+  perfBtn.disabled = false;
+  fitBtn.disabled = false;
+  const mode = benchmarkMode();
+  if (mode === 'ramp-up') {
+    note.textContent = 'Ramp-up: ' + selected.length + ' tasks (1 GPU → ' + selected.length + ' GPUs). Performance uses compute benchmark; Power / Thermal Fit uses dcgmproftester load with nvidia-smi power-limit search per step.';
+  } else if (mode === 'parallel') {
+    note.textContent = 'Parallel: all ' + selected.length + ' GPU(s) simultaneously. Only the performance benchmark supports this mode.';
+  } else {
+    note.textContent = 'Sequential: each selected GPU benchmarked separately.';
+  }
+}
+function benchmarkRenderGPUList(gpus) {
+  const root = document.getElementById('benchmark-gpu-list');
+  if (!gpus || !gpus.length) {
+    root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
+    benchmarkUpdateSelectionNote();
+    return;
+  }
+  root.innerHTML = gpus.map(function(gpu) {
+    const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
+    return '<label class="benchmark-gpu-row">'
+      + '<input class="benchmark-gpu-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="benchmarkUpdateSelectionNote()">'
+      + '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
+      + '</label>';
+  }).join('');
+  benchmarkApplyMultiGPUState(gpus.length);
+  benchmarkUpdateSelectionNote();
+}
+function benchmarkApplyMultiGPUState(gpuCount) {
+  var multiValues = ['parallel', 'ramp-up'];
+  var radios = document.querySelectorAll('input[name="benchmark-mode"]');
+  radios.forEach(function(el) {
+    var isMulti = multiValues.indexOf(el.value) >= 0;
+    if (gpuCount < 2 && isMulti) {
+      el.disabled = true;
+      if (el.checked) {
+        var seq = document.querySelector('input[name="benchmark-mode"][value="sequential"]');
+        if (seq) seq.checked = true;
+      }
+      var label = el.closest('label');
+      if (label) label.style.opacity = '0.4';
+    } else {
+      el.disabled = false;
+      if (gpuCount >= 2 && el.value === 'ramp-up') el.checked = true;
+      var label = el.closest('label');
+      if (label) label.style.opacity = '';
+    }
+  });
+  benchmarkUpdateSelectionNote();
+}
+function benchmarkLoadGPUs() {
+  const status = document.getElementById('benchmark-run-status');
+  status.textContent = '';
+  fetch('/api/gpu/nvidia').then(function(r) {
+    return r.json().then(function(body) {
+      if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
+      return body;
+    });
+  }).then(function(gpus) {
+    benchmarkRenderGPUList(gpus);
+  }).catch(function(err) {
+    document.getElementById('benchmark-gpu-list').innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
+    benchmarkUpdateSelectionNote();
+  });
+}
+function benchmarkSelectAll() {
+  document.querySelectorAll('.benchmark-gpu-checkbox').forEach(function(el) { el.checked = true; });
+  benchmarkUpdateSelectionNote();
+}
+function benchmarkSelectNone() {
+  document.querySelectorAll('.benchmark-gpu-checkbox').forEach(function(el) { el.checked = false; });
+  benchmarkUpdateSelectionNote();
+}
+function runNvidiaBenchmark(kind) {
+  const selected = benchmarkSelectedGPUIndices();
+  const status = document.getElementById('benchmark-run-status');
+  if (!selected.length) {
+    status.textContent = 'Select at least one GPU.';
+    return;
+  }
+  if (benchmarkES) { benchmarkES.close(); benchmarkES = null; }
+  const mode = benchmarkMode();
+  const rampUp = mode === 'ramp-up' && selected.length > 1;
+  const parallelGPUs = mode === 'parallel' && kind === 'performance';
+  if (kind === 'power-fit' && mode === 'parallel') {
+    status.textContent = 'Power / Thermal Fit supports sequential or ramp-up only.';
+    return;
+  }
+  const body = {
+    profile: document.getElementById('benchmark-profile').value || 'standard',
+    gpu_indices: selected,
+    run_nccl: kind === 'performance' && selected.length > 1,
+    parallel_gpus: parallelGPUs,
+    ramp_up: rampUp,
+    display_name: kind === 'power-fit' ? 'NVIDIA Power / Thermal Fit' : 'NVIDIA Performance Benchmark'
+  };
+  document.getElementById('benchmark-output').style.display = 'block';
+  document.getElementById('benchmark-title').textContent = '— ' + body.display_name + ' · ' + body.profile + ' [' + selected.join(', ') + ']';
+  const term = document.getElementById('benchmark-terminal');
+  term.textContent = 'Enqueuing ' + body.display_name + ' for GPUs ' + selected.join(', ') + '...\n';
+  status.textContent = 'Queueing...';
+  const endpoint = kind === 'power-fit' ? '/api/bee-bench/nvidia/power/run' : '/api/bee-bench/nvidia/perf/run';
+  fetch(endpoint, {
+    method: 'POST',
+    headers: {'Content-Type':'application/json'},
+    body: JSON.stringify(body)
+  }).then(function(r) {
+    return r.json().then(function(payload) {
+      if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
+      return payload;
+    });
+  }).then(function(d) {
+    const taskIds = benchmarkTaskIDs(d);
+    if (!taskIds.length) throw new Error('No benchmark task was queued.');
+    status.textContent = taskIds.length === 1 ? ('Task ' + taskIds[0] + ' queued.') : ('Queued ' + taskIds.length + ' tasks.');
+    const streamNext = function(idx, failures) {
+      if (idx >= taskIds.length) {
+        status.textContent = failures ? 'Completed with failures.' : 'Completed.';
+        return;
+      }
+      const taskId = taskIds[idx];
+      term.textContent += '\n[' + (idx + 1) + '/' + taskIds.length + '] Task ' + taskId + ' queued. Streaming log...\n';
+      benchmarkES = new EventSource('/api/tasks/' + taskId + '/stream');
+      benchmarkES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
+      benchmarkES.addEventListener('done', function(e) {
+        benchmarkES.close();
+        benchmarkES = null;
+        if (e.data) failures += 1;
+        term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
+        term.scrollTop = term.scrollHeight;
+        const isLast = (idx + 1 >= taskIds.length);
+        streamNext(idx + 1, failures);
+        if (isLast) { benchmarkRefreshResults(); }
+      });
+      benchmarkES.onerror = function() {
+        if (benchmarkES) {
+          benchmarkES.close();
+          benchmarkES = null;
+        }
+        term.textContent += '\nERROR: stream disconnected.\n';
+        term.scrollTop = term.scrollHeight;
+        streamNext(idx + 1, failures + 1);
+      };
+    };
+    streamNext(0, 0);
+  }).catch(function(err) {
+    status.textContent = 'Error.';
+    term.textContent += 'ERROR: ' + err.message + '\n';
+  });
+}
+function benchmarkRenderAutotuneStatus(payload) {
+  const el = document.getElementById('benchmark-autotune-status');
+  if (!el) return;
+  if (!payload || !payload.configured || !payload.config) {
+    el.textContent = 'Autotune status: not configured. Temporary fallback source is used until autotune completes.';
+    return;
+  }
+  const cfg = payload.config || {};
+  const decision = payload.decision || {};
+  const updated = cfg.updated_at ? new Date(cfg.updated_at).toLocaleString() : 'unknown time';
+  const confidence = typeof cfg.confidence === 'number' ? (' · confidence ' + Math.round(cfg.confidence * 100) + '%') : '';
+  const effective = decision.effective_source ? (' · effective ' + decision.effective_source) : '';
+  const mode = decision.mode ? (' · mode ' + decision.mode) : '';
+  el.textContent = 'Autotune status: ' + cfg.selected_source + effective + mode + ' · updated ' + updated + confidence;
+}
+function loadBenchmarkAutotuneStatus() {
+  fetch('/api/bee-bench/nvidia/autotune/status')
+    .then(function(r) {
+      return r.json().then(function(body) {
+        if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
+        return body;
+      });
+    })
+    .then(function(body) { benchmarkRenderAutotuneStatus(body); })
+    .catch(function(err) {
+      const el = document.getElementById('benchmark-autotune-status');
+      if (el) el.textContent = 'Autotune status error: ' + err.message;
+    });
+}
+function runBenchmarkAutotune() {
+  const selected = benchmarkSelectedGPUIndices();
+  const status = document.getElementById('benchmark-run-status');
+  const term = document.getElementById('benchmark-terminal');
+  if (benchmarkES) { benchmarkES.close(); benchmarkES = null; }
+  document.getElementById('benchmark-output').style.display = 'block';
+  document.getElementById('benchmark-title').textContent = '— NVIDIA Benchmark Autotune';
+  term.textContent = 'Enqueuing benchmark autotune...\n';
+  status.textContent = 'Queueing autotune...';
+  fetch('/api/bee-bench/nvidia/autotune/run', {
+    method: 'POST',
+    headers: {'Content-Type':'application/json'},
+    body: JSON.stringify({
+      profile: document.getElementById('benchmark-profile').value || 'standard',
+      benchmark_kind: benchmarkMode() === 'parallel' ? 'performance' : 'power-fit',
+      gpu_indices: selected
+    })
+  }).then(function(r) {
+    return r.json().then(function(payload) {
+      if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
+      return payload;
+    });
+  }).then(function(d) {
+    const taskIds = benchmarkTaskIDs(d);
+    if (!taskIds.length) throw new Error('No autotune task was queued.');
+    const taskId = taskIds[0];
+    status.textContent = 'Autotune queued: ' + taskId;
+    benchmarkES = new EventSource('/api/tasks/' + taskId + '/stream');
+    benchmarkES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
+    benchmarkES.addEventListener('done', function(e) {
+      if (benchmarkES) {
+        benchmarkES.close();
+        benchmarkES = null;
+      }
+      term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
+      status.textContent = e.data ? 'Autotune failed.' : 'Autotune completed.';
+      loadBenchmarkAutotuneStatus();
+    });
+  }).catch(function(err) {
+    status.textContent = 'Autotune error.';
+    term.textContent += 'ERROR: ' + err.message + '\n';
+  });
+}
+benchmarkLoadGPUs();
+loadBenchmarkAutotuneStatus();
+function benchmarkRefreshResults() {
+  fetch('/api/benchmark/results')
+    .then(function(r) { return r.text(); })
+    .then(function(html) {
+      const el = document.getElementById('benchmark-results-section');
+      if (el) el.innerHTML = html;
+    })
+    .catch(function() {});
+}
+</script>`
+}
+
+func renderBenchmarkResultsCard(exportDir string) string {
+	maxIdx, runs := loadBenchmarkHistory(exportDir)
+	perf := renderBenchmarkResultsCardFromRuns(
+		"Perf Results",
+		"Composite score by saved benchmark run and GPU.",
+		"No saved performance benchmark runs yet.",
+		maxIdx,
+		runs,
+	)
+	power := renderPowerBenchmarkResultsCard(exportDir)
+	return perf + "\n" + power
+}
+
+func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string, maxGPUIndex int, runs []benchmarkHistoryRun) string {
+	if len(runs) == 0 {
+		return `<div class="card"><div class="card-head">` + html.EscapeString(title) + `</div><div class="card-body"><p style="color:var(--muted);font-size:13px">` + html.EscapeString(emptyMessage) + `</p></div></div>`
+	}
+	var b strings.Builder
+	b.WriteString(`<div class="card"><div class="card-head">` + html.EscapeString(title) + `</div><div class="card-body">`)
+	if strings.TrimSpace(description) != "" {
+		b.WriteString(`<p style="color:var(--muted);font-size:13px;margin-bottom:12px">` + html.EscapeString(description) + `</p>`)
+	}
+	b.WriteString(`<div style="overflow-x:auto">`)
+	b.WriteString(`<table><thead><tr><th>Run</th><th>Time</th><th>Status</th>`)
+	for i := 0; i <= maxGPUIndex; i++ {
+		b.WriteString(`<th>GPU ` + strconv.Itoa(i) + `</th>`)
+	}
+	b.WriteString(`</tr></thead><tbody>`)
+	for i, run := range runs {
+		b.WriteString(`<tr>`)
+		b.WriteString(`<td>#` + strconv.Itoa(i+1) + `</td>`)
+		b.WriteString(`<td>` + html.EscapeString(run.displayTime) + `</td>`)
+		overallColor := "var(--ok)"
+		overallLabel := run.overallStatus
+		if overallLabel == "" {
+			overallLabel = "OK"
+		}
+		if overallLabel == "FAILED" {
+			overallColor = "var(--crit-fg,#9f3a38)"
+		} else if overallLabel != "OK" {
+			overallColor = "var(--warn)"
+		}
+		b.WriteString(`<td style="color:` + overallColor + `;font-weight:600">` + html.EscapeString(overallLabel) + `</td>`)
+		for idx := 0; idx <= maxGPUIndex; idx++ {
+			score, ok := run.gpuScores[idx]
+			if !ok {
+				b.WriteString(`<td style="color:var(--muted)">-</td>`)
+				continue
+			}
+			gpuStatus := run.gpuStatuses[idx]
+			scoreColor := ""
+			switch gpuStatus {
+			case "FAILED":
+				scoreColor = ` style="color:var(--crit-fg,#9f3a38);font-weight:600"`
+			case "WARNING", "PARTIAL":
+				scoreColor = ` style="color:var(--warn);font-weight:600"`
+			case "", "OK":
+			default:
+				scoreColor = ` style="color:var(--warn);font-weight:600"`
+			}
+			b.WriteString(`<td` + scoreColor + `>` + fmt.Sprintf("%.2f", score) + `</td>`)
+		}
+		b.WriteString(`</tr>`)
+	}
+	b.WriteString(`</tbody></table></div></div></div>`)
+	return b.String()
+}
+
+func loadBenchmarkHistory(exportDir string) (int, []benchmarkHistoryRun) {
+	baseDir := app.DefaultBeeBenchPerfDir
+	if strings.TrimSpace(exportDir) != "" {
+		baseDir = filepath.Join(exportDir, "bee-bench", "perf")
+	}
+	paths, err := filepath.Glob(filepath.Join(baseDir, "perf-*", "result.json"))
+	if err != nil || len(paths) == 0 {
+		return -1, nil
+	}
+	sort.Strings(paths)
+	return loadBenchmarkHistoryFromPaths(paths)
+}
+
+func loadBenchmarkHistoryFromPaths(paths []string) (int, []benchmarkHistoryRun) {
+	runs := make([]benchmarkHistoryRun, 0, len(paths))
+	maxGPUIndex := -1
+	for _, path := range paths {
+		raw, err := os.ReadFile(path)
+		if err != nil {
+			continue
+		}
+		var result platform.NvidiaBenchmarkResult
+		if err := json.Unmarshal(raw, &result); err != nil {
+			continue
+		}
+		run := benchmarkHistoryRun{
+			generatedAt:   result.GeneratedAt,
+			displayTime:   result.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
+			gpuScores:     make(map[int]float64),
+			gpuStatuses:   make(map[int]string),
+			overallStatus: result.OverallStatus,
+		}
+		for _, gpu := range result.GPUs {
+			run.gpuScores[gpu.Index] = gpu.Scores.CompositeScore
+			run.gpuStatuses[gpu.Index] = gpu.Status
+			if gpu.Index > maxGPUIndex {
+				maxGPUIndex = gpu.Index
+			}
+		}
+		runs = append(runs, run)
+	}
+	sort.Slice(runs, func(i, j int) bool {
+		return runs[i].generatedAt.After(runs[j].generatedAt)
+	})
+	return maxGPUIndex, runs
+}
+
+func renderPowerBenchmarkResultsCard(exportDir string) string {
+	baseDir := app.DefaultBeeBenchPowerDir
+	if strings.TrimSpace(exportDir) != "" {
+		baseDir = filepath.Join(exportDir, "bee-bench", "power")
+	}
+	paths, err := filepath.Glob(filepath.Join(baseDir, "power-*", "result.json"))
+	if err != nil || len(paths) == 0 {
+		return `<div class="card" style="margin-top:16px"><div class="card-head">Power / Thermal Fit Results</div><div class="card-body"><p style="color:var(--muted);font-size:13px">No saved power benchmark runs yet.</p></div></div>`
+	}
+	sort.Strings(paths)
+
+	type powerRun struct {
+		generatedAt time.Time
+		displayTime string
+		result      platform.NvidiaPowerBenchResult
+	}
+	var runs []powerRun
+	for _, path := range paths {
+		raw, err := os.ReadFile(path)
+		if err != nil {
+			continue
+		}
+		var r platform.NvidiaPowerBenchResult
+		if err := json.Unmarshal(raw, &r); err != nil {
+			continue
+		}
+		runs = append(runs, powerRun{
+			generatedAt: r.GeneratedAt,
+			displayTime: r.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
+			result:      r,
+		})
+	}
+	sort.Slice(runs, func(i, j int) bool {
+		return runs[i].generatedAt.After(runs[j].generatedAt)
+	})
+
+	var b strings.Builder
+	b.WriteString(`<div class="card" style="margin-top:16px"><div class="card-head">Power / Thermal Fit Results</div><div class="card-body">`)
+
+	latest := runs[0].result
+	b.WriteString(`<p style="font-size:12px;color:var(--muted);margin-bottom:10px">Latest run: ` + html.EscapeString(runs[0].displayTime))
+	if latest.Hostname != "" {
+		b.WriteString(` — ` + html.EscapeString(latest.Hostname))
+	}
+	if latest.OverallStatus != "" {
+		statusColor := "var(--ok)"
+		if latest.OverallStatus != "OK" {
+			statusColor = "var(--warn)"
+		}
+		b.WriteString(` — <span style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(latest.OverallStatus) + `</span>`)
+	}
+	b.WriteString(`</p>`)
+
+	if len(latest.GPUs) > 0 {
+		b.WriteString(`<div style="overflow-x:auto"><table><thead><tr>`)
+		b.WriteString(`<th>GPU</th><th>Model</th><th>Nominal W</th><th>Single-card W</th><th>Multi-GPU W</th><th>P95 Observed W</th><th>Status</th>`)
+		b.WriteString(`</tr></thead><tbody>`)
+		for _, gpu := range latest.GPUs {
+			finalLimitW := gpu.StablePowerLimitW
+			if finalLimitW <= 0 {
+				finalLimitW = gpu.AppliedPowerLimitW
+			}
+			derated := gpu.Derated ||
+				(gpu.DefaultPowerLimitW > 0 && finalLimitW > 0 && finalLimitW < gpu.DefaultPowerLimitW-1)
+			rowStyle := ""
+			finalStyle := ""
+			if derated {
+				rowStyle = ` style="background:rgba(255,180,0,0.08)"`
+				finalStyle = ` style="color:#e6a000;font-weight:600"`
+			}
+			statusLabel := gpu.Status
+			if statusLabel == "" {
+				statusLabel = "OK"
+			}
+			statusColor := "var(--ok)"
+			if statusLabel == "FAILED" {
+				statusColor = "var(--crit-fg,#9f3a38)"
+			} else if statusLabel != "OK" {
+				statusColor = "var(--warn)"
+			}
+			nominalStr := "-"
+			if gpu.DefaultPowerLimitW > 0 {
+				nominalStr = fmt.Sprintf("%.0f", gpu.DefaultPowerLimitW)
+			}
+			singleStr := "-"
+			if gpu.AppliedPowerLimitW > 0 {
+				singleStr = fmt.Sprintf("%.0f", gpu.AppliedPowerLimitW)
+			}
+			multiStr := "-"
+			if gpu.StablePowerLimitW > 0 {
+				multiStr = fmt.Sprintf("%.0f", gpu.StablePowerLimitW)
+			}
+			p95Str := "-"
+			if gpu.MaxObservedPowerW > 0 {
+				p95Str = fmt.Sprintf("%.0f", gpu.MaxObservedPowerW)
+			}
+			b.WriteString(`<tr` + rowStyle + `>`)
+			b.WriteString(`<td>` + strconv.Itoa(gpu.Index) + `</td>`)
+			b.WriteString(`<td>` + html.EscapeString(gpu.Name) + `</td>`)
+			b.WriteString(`<td>` + nominalStr + `</td>`)
+			b.WriteString(`<td>` + singleStr + `</td>`)
+			b.WriteString(`<td` + finalStyle + `>` + multiStr + `</td>`)
+			b.WriteString(`<td>` + p95Str + `</td>`)
+			b.WriteString(`<td style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(statusLabel) + `</td>`)
+			b.WriteString(`</tr>`)
+		}
+		b.WriteString(`</tbody></table></div>`)
+	}
+
+	if len(runs) > 1 {
+		b.WriteString(`<details style="margin-top:12px"><summary style="font-size:12px;color:var(--muted);cursor:pointer">` + strconv.Itoa(len(runs)) + ` runs total</summary>`)
+		b.WriteString(`<div style="overflow-x:auto;margin-top:8px"><table><thead><tr><th>#</th><th>Time</th><th>GPUs</th><th>Status</th></tr></thead><tbody>`)
+		for i, run := range runs {
+			statusColor := "var(--ok)"
+			if run.result.OverallStatus != "OK" {
+				statusColor = "var(--warn)"
+			}
+			b.WriteString(`<tr>`)
+			b.WriteString(`<td>#` + strconv.Itoa(i+1) + `</td>`)
+			b.WriteString(`<td>` + html.EscapeString(run.displayTime) + `</td>`)
+			b.WriteString(`<td>` + strconv.Itoa(len(run.result.GPUs)) + `</td>`)
+			b.WriteString(`<td style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(run.result.OverallStatus) + `</td>`)
+			b.WriteString(`</tr>`)
+		}
+		b.WriteString(`</tbody></table></div></details>`)
+	}
+
+	b.WriteString(`</div></div>`)
+	return b.String()
+}
--- a/audit/internal/webui/page_burn.go
+++ b/audit/internal/webui/page_burn.go
@@ -0,0 +1,383 @@
+package webui
+
+func renderBurn() string {
+	return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>&#9888; Warning:</strong> Stress tests on this page run hardware at high load. Repeated or prolonged use may reduce hardware lifespan. Use only when necessary.</div>
+<div class="alert alert-info" style="margin-bottom:16px"><strong>Scope:</strong> Burn exposes sustained GPU compute load recipes. DCGM diagnostics (` + "targeted_stress, targeted_power, pulse_test" + `) and LINPACK remain in <a href="/validate">Validate → Stress mode</a>; NCCL and NVBandwidth are available directly from <a href="/validate">Validate</a>.</div>
+<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
+
+<div class="card" style="margin-bottom:16px">
+  <div class="card-head">Burn Profile</div>
+  <div class="card-body burn-profile-body">
+    <div class="burn-profile-col">
+      <div class="form-row" style="margin:0 0 8px"><label>Preset</label></div>
+      <label class="cb-row"><input type="radio" name="burn-profile" value="smoke" checked><span>Smoke — 5 min/GPU (sequential) or 5 min (parallel)</span></label>
+      <label class="cb-row"><input type="radio" name="burn-profile" value="acceptance"><span>Acceptance — 1 h/GPU (sequential) or 1 h (parallel)</span></label>
+      <label class="cb-row"><input type="radio" name="burn-profile" value="overnight"><span>Overnight — 8 h/GPU (sequential) or 8 h (parallel)</span></label>
+    </div>
+    <div class="burn-profile-col burn-profile-action">
+      <button type="button" class="btn btn-primary" onclick="runAllBurnTasks()">Burn one by one</button>
+      <p>Runs checked tests as separate sequential tasks. In sequential GPU mode, total time = profile duration × N GPU. In parallel mode, all selected GPUs burn simultaneously for one profile duration.</p>
+    </div>
+    <div class="burn-profile-col burn-profile-action">
+      <button type="button" class="btn btn-secondary" onclick="runPlatformStress()">Thermal Cycling</button>
+      <p>Run checked core test modules (CPU, MEM, GPU). Tests start at the same time and run for a period with short cooldown phases to stress the server cooling system.</p>
+    </div>
+  </div>
+  <div class="card-body" style="padding-top:0;display:flex;justify-content:center">
+    <span id="burn-all-status" style="font-size:12px;color:var(--muted)"></span>
+  </div>
+</div>
+
+<div class="card" style="margin-bottom:16px">
+  <div class="card-head">NVIDIA GPU Selection</div>
+  <div class="card-body">
+    <p style="font-size:12px;color:var(--muted);margin:0 0 10px">Official NVIDIA recipes and custom NVIDIA stressors use only the GPUs selected here. Multi-GPU interconnect tests are limited to this selection as well.</p>
+    <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
+      <button class="btn btn-sm btn-secondary" type="button" onclick="burnSelectAll()">Select All</button>
+      <button class="btn btn-sm btn-secondary" type="button" onclick="burnSelectNone()">Clear</button>
+    </div>
+	    <div id="burn-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
+	      <p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
+	    </div>
+	    <p id="burn-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 0">Select at least one NVIDIA GPU to enable NVIDIA burn recipes.</p>
+	    <div style="display:flex;flex-direction:column;gap:4px;margin-top:10px">
+	      <label class="cb-row">
+	        <input type="radio" name="burn-nvidia-mode" value="sequential" checked>
+	        <span>Sequential — selected GPUs one at a time</span>
+	      </label>
+	      <label class="cb-row" id="burn-parallel-label">
+	        <input type="radio" name="burn-nvidia-mode" value="parallel">
+	        <span>Parallel — all selected GPUs simultaneously</span>
+	      </label>
+	      <label class="cb-row" id="burn-ramp-label">
+	        <input type="radio" name="burn-nvidia-mode" value="ramp-up">
+	        <span>Ramp-up — add one GPU at a time</span>
+	      </label>
+	    </div>
+	  </div>
+	</div>
+
+<div class="burn-section">Core Burn Paths</div>
+<div class="grid2 burn-grid" style="margin-bottom:16px">
+<div class="card burn-card">
+  <div class="card-head card-head-actions"><span>GPU Max Load</span><button class="btn btn-primary btn-sm" onclick="runBurnTaskSet([{id:'burn-nvidia-compute',target:'nvidia-compute',label:'NVIDIA Max Compute Load (dcgmproftester)',nvidia:true},{id:'burn-gpu-bee',target:'nvidia-stress',label:'GPU Burn (bee-gpu-burn)',nvidia:true,extra:{loader:'builtin'}},{id:'burn-gpu-john',target:'nvidia-stress',label:'John GPU Stress (john/OpenCL)',nvidia:true,extra:{loader:'john'}},{id:'burn-gpu-rvs',target:'amd-stress',label:'AMD GPU Stress (rvs gst)'}])">Run</button></div>
+  <div class="card-body burn-card-body">
+    <p style="font-size:12px;color:var(--muted);margin:0 0 10px">Combine vendor-backed and custom GPU max-load recipes in one run set. ` + "dcgmproftester" + ` is the primary official NVIDIA path; custom stressors remain available as parallel checkbox options.</p>
+    <label class="cb-row"><input type="checkbox" id="burn-nvidia-compute" checked disabled><span>NVIDIA Max Compute Load (dcgmproftester) <span class="cb-note" id="note-nvidia-compute"></span></span></label>
+    <label class="cb-row"><input type="checkbox" id="burn-gpu-bee" checked disabled><span>GPU Burn (bee-gpu-burn) <span class="cb-note" id="note-bee"></span></span></label>
+    <label class="cb-row"><input type="checkbox" id="burn-gpu-john" disabled><span>John GPU Stress (john/OpenCL) <span class="cb-note" id="note-john"></span></span></label>
+    <label class="cb-row"><input type="checkbox" id="burn-gpu-rvs" disabled><span>AMD GPU Stress (rvs gst) <span class="cb-note" id="note-rvs"></span></span></label>
+  </div>
+</div>
+
+<div class="card burn-card">
+  <div class="card-head card-head-actions"><span>Compute Stress</span><button class="btn btn-primary btn-sm" onclick="runBurnTaskSet([{id:'burn-cpu',target:'cpu',label:'CPU Burn-in'},{id:'burn-mem-stress',target:'memory-stress',label:'Memory Burn-in'},{id:'burn-sat-stress',target:'sat-stress',label:'SAT Stress (stressapptest)'}])">Run</button></div>
+  <div class="card-body burn-card-body">
+    <p style="font-size:12px;color:var(--muted);margin:0 0 10px">Select which subsystems to stress. Each checked item runs as a separate task.</p>
+    <label class="cb-row"><input type="checkbox" id="burn-cpu" checked><span>CPU stress (stress-ng)</span></label>
+    <label class="cb-row"><input type="checkbox" id="burn-mem-stress" checked><span>Memory stress (stress-ng --vm)</span></label>
+    <label class="cb-row"><input type="checkbox" id="burn-sat-stress"><span>stressapptest (CPU + memory bus)</span></label>
+  </div>
+</div>
+</div>
+
+<div id="bi-output" style="display:none;margin-top:16px" class="card">
+  <div class="card-head">Output <span id="bi-title"></span></div>
+  <div class="card-body"><div id="bi-terminal" class="terminal"></div></div>
+</div>
+
+<style>
+.cb-row { display:flex; align-items:flex-start; gap:8px; padding:4px 0; cursor:pointer; font-size:13px; }
+.cb-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
+.cb-row input[type=checkbox]:disabled { opacity:0.4; cursor:not-allowed; }
+.cb-row input[type=checkbox]:disabled ~ span { opacity:0.45; cursor:not-allowed; }
+.cb-note { font-size:11px; color:var(--muted); font-style:italic; }
+.burn-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
+.burn-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
+.burn-profile-body { display:grid; grid-template-columns:1fr 1fr 1fr; gap:24px; align-items:stretch; }
+.burn-profile-col { min-width:0; }
+.burn-profile-action { display:flex; flex-direction:column; align-items:center; justify-content:flex-start; gap:8px; }
+.burn-profile-action p { font-size:12px; color:var(--muted); margin:0; width:100%; text-align:left; }
+.burn-section { font-size:12px; font-weight:700; letter-spacing:.06em; text-transform:uppercase; color:var(--muted); margin:0 0 10px; padding-top:4px; }
+.burn-grid { align-items:stretch; }
+.burn-card { height:100%; display:flex; flex-direction:column; }
+.burn-card-body { flex:1; display:flex; flex-direction:column; }
+.card-head-actions { justify-content:space-between; }
+.card-head-buttons { display:flex; align-items:center; gap:8px; margin-left:auto; }
+@media(max-width:900px){ .card-head-actions { align-items:flex-start; flex-direction:column; } .card-head-buttons { margin-left:0; } .burn-profile-body { grid-template-columns:1fr; } }
+</style>
+
+<script>
+let biES = null;
+function burnTaskIDs(payload) {
+  if (payload && Array.isArray(payload.task_ids) && payload.task_ids.length) return payload.task_ids;
+  if (payload && payload.task_id) return [payload.task_id];
+  return [];
+}
+function burnProfile() {
+  const selected = document.querySelector('input[name="burn-profile"]:checked');
+  return selected ? selected.value : 'smoke';
+}
+function burnSelectedGPUIndices() {
+  return Array.from(document.querySelectorAll('.burn-gpu-checkbox'))
+    .filter(function(el) { return el.checked && !el.disabled; })
+    .map(function(el) { return parseInt(el.value, 10); })
+    .filter(function(v) { return !Number.isNaN(v); })
+    .sort(function(a, b) { return a - b; });
+}
+function burnNvidiaMode() {
+  const el = document.querySelector('input[name="burn-nvidia-mode"]:checked');
+  return el ? el.value : 'sequential';
+}
+function burnApplyMultiGPUState(gpuCount) {
+  var multiValues = ['parallel', 'ramp-up'];
+  var radios = document.querySelectorAll('input[name="burn-nvidia-mode"]');
+  radios.forEach(function(el) {
+    var isMulti = multiValues.indexOf(el.value) >= 0;
+    if (gpuCount < 2 && isMulti) {
+      el.disabled = true;
+      if (el.checked) {
+        var seq = document.querySelector('input[name="burn-nvidia-mode"][value="sequential"]');
+        if (seq) seq.checked = true;
+      }
+      var label = el.closest('label');
+      if (label) label.style.opacity = '0.4';
+    } else {
+      el.disabled = false;
+      var label = el.closest('label');
+      if (label) label.style.opacity = '';
+    }
+  });
+}
+function burnUpdateSelectionNote() {
+  const note = document.getElementById('burn-selection-note');
+  const selected = burnSelectedGPUIndices();
+  if (!selected.length) {
+    note.textContent = 'Select at least one NVIDIA GPU to enable NVIDIA burn recipes.';
+    return;
+  }
+  note.textContent = 'Selected NVIDIA GPUs: ' + selected.join(', ') + '. Official and custom NVIDIA tasks will use only these GPUs.';
+}
+function burnRenderGPUList(gpus) {
+  const root = document.getElementById('burn-gpu-list');
+  if (!gpus || !gpus.length) {
+    root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
+    burnUpdateSelectionNote();
+    return;
+  }
+  root.innerHTML = gpus.map(function(gpu) {
+    const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
+    return '<label class="burn-gpu-row">'
+      + '<input class="burn-gpu-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="burnUpdateSelectionNote()">'
+      + '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
+      + '</label>';
+  }).join('');
+  burnApplyMultiGPUState(gpus.length);
+  burnUpdateSelectionNote();
+}
+function burnSelectAll() {
+  document.querySelectorAll('.burn-gpu-checkbox').forEach(function(el) { el.checked = true; });
+  burnUpdateSelectionNote();
+}
+function burnSelectNone() {
+  document.querySelectorAll('.burn-gpu-checkbox').forEach(function(el) { el.checked = false; });
+  burnUpdateSelectionNote();
+}
+function burnLoadGPUs() {
+  fetch('/api/gpu/nvidia').then(function(r) {
+    return r.json().then(function(body) {
+      if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
+      return body;
+    });
+  }).then(function(gpus) {
+    burnRenderGPUList(gpus);
+  }).catch(function(err) {
+    document.getElementById('burn-gpu-list').innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
+    burnUpdateSelectionNote();
+  });
+}
+function enqueueBurnTask(target, label, extra, useSelectedNvidia) {
+  const body = Object.assign({ profile: burnProfile(), display_name: label }, extra || {});
+  if (useSelectedNvidia) {
+    const selected = burnSelectedGPUIndices();
+    if (!selected.length) {
+      return Promise.reject(new Error('Select at least one NVIDIA GPU.'));
+    }
+    body.gpu_indices = selected;
+    const bMode = burnNvidiaMode();
+    if (bMode === 'ramp-up' && selected.length > 1) {
+      body.stagger_gpu_start = true;
+    } else if (bMode === 'parallel' && selected.length > 1) {
+      body.parallel_gpus = true;
+    }
+  }
+  return fetch('/api/sat/' + target + '/run', {
+    method: 'POST',
+    headers: {'Content-Type':'application/json'},
+    body: JSON.stringify(body)
+  }).then(function(r) {
+    return r.json().then(function(payload) {
+      if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
+      return payload;
+    });
+  });
+}
+function streamTask(taskId, label) {
+  if (biES) { biES.close(); biES = null; }
+  document.getElementById('bi-output').style.display = 'block';
+  document.getElementById('bi-title').textContent = '— ' + label + ' [' + burnProfile() + ']';
+  const term = document.getElementById('bi-terminal');
+  term.textContent = 'Task ' + taskId + ' queued. Streaming...\n';
+  biES = new EventSource('/api/tasks/' + taskId + '/stream');
+  biES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
+  biES.addEventListener('done', function(e) {
+    biES.close();
+    biES = null;
+    term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
+    term.scrollTop = term.scrollHeight;
+  });
+}
+function streamBurnTask(taskId, label, resetTerminal) {
+  return streamBurnTaskSet([taskId], label, resetTerminal);
+}
+function streamBurnTaskSet(taskIds, label, resetTerminal) {
+  if (biES) { biES.close(); biES = null; }
+  document.getElementById('bi-output').style.display = 'block';
+  document.getElementById('bi-title').textContent = '— ' + label + ' [' + burnProfile() + ']';
+  const term = document.getElementById('bi-terminal');
+  if (resetTerminal) {
+    term.textContent = '';
+  }
+  if (!Array.isArray(taskIds) || !taskIds.length) {
+    term.textContent += 'ERROR: no tasks queued.\n';
+    return Promise.resolve({ok:false, error:'no tasks queued'});
+  }
+  const streamNext = function(idx, failures) {
+    if (idx >= taskIds.length) {
+      return Promise.resolve({ok: failures === 0, error: failures ? (failures + ' task(s) failed') : ''});
+    }
+    const taskId = taskIds[idx];
+    term.textContent += '[' + (idx + 1) + '/' + taskIds.length + '] Task ' + taskId + ' queued. Streaming...\n';
+    return new Promise(function(resolve) {
+      biES = new EventSource('/api/tasks/' + taskId + '/stream');
+      biES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
+      biES.addEventListener('done', function(e) {
+        biES.close();
+        biES = null;
+        term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
+        term.scrollTop = term.scrollHeight;
+        resolve(failures + (e.data ? 1 : 0));
+      });
+      biES.onerror = function() {
+        if (biES) {
+          biES.close();
+          biES = null;
+        }
+        term.textContent += '\nERROR: stream disconnected.\n';
+        term.scrollTop = term.scrollHeight;
+        resolve(failures + 1);
+      };
+    }).then(function(nextFailures) {
+      return streamNext(idx + 1, nextFailures);
+    });
+  };
+  return streamNext(0, 0);
+}
+function runBurnTaskSet(tasks, statusElId) {
+  const enabled = tasks.filter(function(t) {
+    const el = document.getElementById(t.id);
+    return el && el.checked && !el.disabled;
+  });
+  const status = statusElId ? document.getElementById(statusElId) : null;
+  if (status) status.textContent = '';
+  if (!enabled.length) {
+    if (status) status.textContent = 'No tasks selected.';
+    return;
+  }
+  const term = document.getElementById('bi-terminal');
+  document.getElementById('bi-output').style.display = 'block';
+  document.getElementById('bi-title').textContent = '— Burn one by one [' + burnProfile() + ']';
+  term.textContent = '';
+  const runNext = function(idx) {
+    if (idx >= enabled.length) {
+      if (status) status.textContent = 'Completed ' + enabled.length + ' task(s).';
+      return Promise.resolve();
+    }
+    const t = enabled[idx];
+    term.textContent += '\n[' + (idx + 1) + '/' + enabled.length + '] ' + t.label + '\n';
+    if (status) status.textContent = 'Running ' + (idx + 1) + '/' + enabled.length + '...';
+    return enqueueBurnTask(t.target, t.label, t.extra, !!t.nvidia)
+      .then(function(d) {
+        return streamBurnTaskSet(burnTaskIDs(d), t.label, false);
+      })
+      .then(function() {
+        return runNext(idx + 1);
+      })
+      .catch(function(err) {
+        if (status) status.textContent = 'Error: ' + err.message;
+        document.getElementById('bi-output').style.display = 'block';
+        term.textContent += 'ERROR: ' + err.message + '\n';
+        return Promise.reject(err);
+      });
+  };
+  return runNext(0);
+}
+function runPlatformStress() {
+  const comps = [];
+  const computeIDs = ['burn-cpu', 'burn-mem-stress', 'burn-sat-stress'];
+  const gpuIDs = ['burn-nvidia-compute', 'burn-gpu-bee', 'burn-gpu-john', 'burn-gpu-rvs'];
+  const hasChecked = function(ids) {
+    return ids.some(function(id) {
+      const el = document.getElementById(id);
+      return el && el.checked && !el.disabled;
+    });
+  };
+  if (hasChecked(computeIDs)) comps.push('cpu');
+  if (hasChecked(gpuIDs)) comps.push('gpu');
+  if (!comps.length) {
+    const status = document.getElementById('burn-all-status');
+    if (status) status.textContent = 'Select at least one test in GPU Max Load or Compute Stress.';
+    return;
+  }
+  const extra = comps.length > 0 ? {platform_components: comps} : {};
+  enqueueBurnTask('platform-stress', 'Platform Thermal Cycling', extra, false).then(function(d) {
+    streamTask(d.task_id, 'Platform Thermal Cycling');
+  });
+}
+function runAllBurnTasks() {
+  const status = document.getElementById('burn-all-status');
+  const all = [
+    {id:'burn-nvidia-compute',target:'nvidia-compute',label:'NVIDIA Max Compute Load (dcgmproftester)',nvidia:true},
+    {id:'burn-gpu-bee',target:'nvidia-stress',label:'GPU Burn (bee-gpu-burn)',nvidia:true,extra:{loader:'builtin'}},
+    {id:'burn-gpu-john',target:'nvidia-stress',label:'John GPU Stress (john/OpenCL)',nvidia:true,extra:{loader:'john'}},
+    {id:'burn-gpu-rvs',target:'amd-stress',label:'AMD GPU Stress (rvs gst)'},
+    {id:'burn-cpu',target:'cpu',label:'CPU Burn-in'},
+    {id:'burn-mem-stress',target:'memory-stress',label:'Memory Burn-in'},
+    {id:'burn-sat-stress',target:'sat-stress',label:'SAT Stress (stressapptest)'},
+  ];
+  status.textContent = 'Enqueuing...';
+  runBurnTaskSet(all, 'burn-all-status');
+}
+fetch('/api/gpu/tools').then(function(r) { return r.json(); }).then(function(tools) {
+  const map = {
+    'nvidia-compute': {cb:'burn-nvidia-compute', note:'note-nvidia-compute', reason:'dcgmproftester not available or NVIDIA driver not running'},
+    'bee-gpu-burn': {cb:'burn-gpu-bee', note:'note-bee', reason:'bee-gpu-burn not available or NVIDIA driver not running'},
+    'john': {cb:'burn-gpu-john', note:'note-john', reason:'bee-john-gpu-stress not available or NVIDIA driver not running'},
+    'rvs': {cb:'burn-gpu-rvs', note:'note-rvs', reason:'AMD driver not running'},
+  };
+  tools.forEach(function(t) {
+    const spec = map[t.id];
+    if (!spec) return;
+    const cb = document.getElementById(spec.cb);
+    const note = document.getElementById(spec.note);
+    if (!cb) return;
+    if (t.available) {
+      cb.disabled = false;
+    } else if (note) {
+      note.textContent = '— ' + spec.reason;
+    }
+  });
+}).catch(function() {});
+burnLoadGPUs();
+</script>`
+}
--- a/audit/internal/webui/page_export_tools.go
+++ b/audit/internal/webui/page_export_tools.go
@@ -0,0 +1,434 @@
+package webui
+
+import (
+	"fmt"
+	"html"
+	"net/url"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+)
+
+func renderExport(exportDir string) string {
+	entries, _ := listExportFiles(exportDir)
+	var rows strings.Builder
+	for _, e := range entries {
+		rows.WriteString(fmt.Sprintf(`<tr><td><a href="/export/file?path=%s" target="_blank">%s</a></td></tr>`,
+			url.QueryEscape(e), html.EscapeString(e)))
+	}
+	if len(entries) == 0 {
+		rows.WriteString(`<tr><td style="color:var(--muted)">No export files found.</td></tr>`)
+	}
+	return `<div class="grid2">
+<div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
+<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Creates a tar.gz archive of all audit files, SAT results, and logs.</p>
+` + renderSupportBundleInline() + `
+</div></div>
+<div class="card"><div class="card-head">Export Files</div><div class="card-body">
+<table><tr><th>File</th></tr>` + rows.String() + `</table>
+</div></div>
+</div>
+
+` + renderUSBExportCard()
+}
+
+func listExportFiles(exportDir string) ([]string, error) {
+	var entries []string
+	err := filepath.Walk(strings.TrimSpace(exportDir), func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		if info.IsDir() {
+			return nil
+		}
+		rel, err := filepath.Rel(exportDir, path)
+		if err != nil {
+			return err
+		}
+		entries = append(entries, rel)
+		return nil
+	})
+	if err != nil && !os.IsNotExist(err) {
+		return nil, err
+	}
+	sort.Strings(entries)
+	return entries, nil
+}
+
+func renderSupportBundleInline() string {
+	return `<button id="support-bundle-btn" class="btn btn-primary" onclick="supportBundleDownload()">&#8595; Download Support Bundle</button>
+<div id="support-bundle-status" style="margin-top:10px;font-size:13px;color:var(--muted)"></div>
+<script>
+window.supportBundleDownload = function() {
+  var btn = document.getElementById('support-bundle-btn');
+  var status = document.getElementById('support-bundle-status');
+  btn.disabled = true;
+  btn.textContent = 'Building...';
+  status.textContent = 'Collecting logs and export data\u2026';
+  status.style.color = 'var(--muted)';
+  var filename = 'bee-support.tar.gz';
+  fetch('/export/support.tar.gz')
+    .then(function(r) {
+      if (!r.ok) throw new Error('HTTP ' + r.status);
+      var cd = r.headers.get('Content-Disposition') || '';
+      var m = cd.match(/filename="?([^";]+)"?/);
+      if (m) filename = m[1];
+      return r.blob();
+    })
+    .then(function(blob) {
+      var url = URL.createObjectURL(blob);
+      var a = document.createElement('a');
+      a.href = url;
+      a.download = filename;
+      document.body.appendChild(a);
+      a.click();
+      document.body.removeChild(a);
+      URL.revokeObjectURL(url);
+      status.textContent = 'Download started.';
+      status.style.color = 'var(--ok-fg)';
+    })
+    .catch(function(e) {
+      status.textContent = 'Error: ' + e.message;
+      status.style.color = 'var(--crit-fg)';
+    })
+    .finally(function() {
+      btn.disabled = false;
+      btn.textContent = '\u2195 Download Support Bundle';
+    });
+};
+</script>`
+}
+
+func renderUSBExportCard() string {
+	return `<div class="card" style="margin-top:16px">
+  <div class="card-head">Export to USB
+    <button class="btn btn-sm btn-secondary" onclick="usbRefresh()" style="margin-left:auto">&#8635; Refresh</button>
+  </div>
+  <div class="card-body">` + renderUSBExportInline() + `</div>
+</div>`
+}
+
+func renderUSBExportInline() string {
+	return `<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Write audit JSON or support bundle directly to a removable USB drive.</p>
+<div id="usb-status" style="font-size:13px;color:var(--muted)">Scanning for USB devices...</div>
+<div id="usb-targets" style="margin-top:12px"></div>
+<div id="usb-msg" style="margin-top:10px;font-size:13px"></div>
+<script>
+(function(){
+function usbRefresh() {
+  document.getElementById('usb-status').textContent = 'Scanning...';
+  document.getElementById('usb-targets').innerHTML = '';
+  document.getElementById('usb-msg').textContent = '';
+  fetch('/api/export/usb').then(r=>r.json()).then(targets => {
+    window._usbTargets = Array.isArray(targets) ? targets : [];
+    const st = document.getElementById('usb-status');
+    const ct = document.getElementById('usb-targets');
+    if (!targets || targets.length === 0) {
+      st.textContent = 'No removable USB devices found.';
+      return;
+    }
+    st.textContent = targets.length + ' device(s) found:';
+    ct.innerHTML = '<table><tr><th>Device</th><th>FS</th><th>Size</th><th>Label</th><th>Model</th><th>Actions</th></tr>' +
+      targets.map((t, idx) => {
+        const dev = t.device || '';
+        const label = t.label || '';
+        const model = t.model || '';
+        return '<tr>' +
+          '<td style="font-family:monospace">'+dev+'</td>' +
+          '<td>'+t.fs_type+'</td>' +
+          '<td>'+t.size+'</td>' +
+          '<td>'+label+'</td>' +
+          '<td style="font-size:12px;color:var(--muted)">'+model+'</td>' +
+          '<td style="white-space:nowrap">' +
+            '<button class="btn btn-sm btn-primary" onclick="usbExport(\'audit\','+idx+',this)">Audit JSON</button> ' +
+            '<button class="btn btn-sm btn-secondary" onclick="usbExport(\'bundle\','+idx+',this)">Support Bundle</button>' +
+            '<div class="usb-row-msg" style="margin-top:6px;font-size:12px;color:var(--muted)"></div>' +
+          '</td></tr>';
+      }).join('') + '</table>';
+  }).catch(e => {
+    document.getElementById('usb-status').textContent = 'Error: ' + e;
+  });
+}
+window.usbExport = function(type, targetIndex, btn) {
+  const target = (window._usbTargets || [])[targetIndex];
+  if (!target) {
+    const msg = document.getElementById('usb-msg');
+    msg.style.color = 'var(--err,red)';
+    msg.textContent = 'Error: USB target not found. Refresh and try again.';
+    return;
+  }
+  const msg = document.getElementById('usb-msg');
+  const row = btn ? btn.closest('td') : null;
+  const rowMsg = row ? row.querySelector('.usb-row-msg') : null;
+  const originalText = btn ? btn.textContent : '';
+  if (btn) {
+    btn.disabled = true;
+    btn.textContent = 'Exporting...';
+  }
+  if (rowMsg) {
+    rowMsg.style.color = 'var(--muted)';
+    rowMsg.textContent = 'Working...';
+  }
+  msg.style.color = 'var(--muted)';
+  msg.textContent = 'Exporting ' + (type === 'bundle' ? 'support bundle' : 'audit JSON') + ' to ' + (target.device||'') + '...';
+  fetch('/api/export/usb/'+type, {
+    method: 'POST',
+    headers: {'Content-Type':'application/json'},
+    body: JSON.stringify(target)
+  }).then(async r => {
+    const d = await r.json();
+    if (!r.ok) throw new Error(d.error || ('HTTP ' + r.status));
+    return d;
+  }).then(d => {
+    msg.style.color = 'var(--ok,green)';
+    msg.textContent = d.message || 'Done.';
+    if (rowMsg) {
+      rowMsg.style.color = 'var(--ok,green)';
+      rowMsg.textContent = d.message || 'Done.';
+    }
+  }).catch(e => {
+    msg.style.color = 'var(--err,red)';
+    msg.textContent = 'Error: '+e;
+    if (rowMsg) {
+      rowMsg.style.color = 'var(--err,red)';
+      rowMsg.textContent = 'Error: ' + e;
+    }
+  }).finally(() => {
+    if (btn) {
+      btn.disabled = false;
+      btn.textContent = originalText;
+    }
+  });
+};
+window.usbRefresh = usbRefresh;
+usbRefresh();
+})();
+</script>`
+}
+
+func renderNvidiaSelfHealInline() string {
+	return `<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Inspect NVIDIA GPU health, restart the bee-nvidia driver service, and issue a per-GPU reset when the driver reports reset required.</p>
+<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:12px">
+  <button id="nvidia-restart-btn" class="btn btn-secondary" onclick="nvidiaRestartDrivers()">Restart GPU Drivers</button>
+  <button class="btn btn-sm btn-secondary" onclick="loadNvidiaSelfHeal()">&#8635; Refresh</button>
+</div>
+<div id="nvidia-self-heal-status" style="font-size:13px;color:var(--muted);margin-bottom:12px">Loading NVIDIA GPU status...</div>
+<div id="nvidia-self-heal-table"><p style="color:var(--muted);font-size:13px">Loading...</p></div>
+<div id="nvidia-self-heal-out" style="display:none;margin-top:12px">
+  <div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:4px">
+    <span id="nvidia-self-heal-out-label" style="font-size:12px;font-weight:600;color:var(--muted)">Output</span>
+    <span id="nvidia-self-heal-out-status" style="font-size:12px"></span>
+  </div>
+  <div id="nvidia-self-heal-terminal" class="terminal" style="max-height:220px;width:100%;box-sizing:border-box"></div>
+</div>
+<script>
+function nvidiaSelfHealShowResult(label, status, output) {
+  var out = document.getElementById('nvidia-self-heal-out');
+  var term = document.getElementById('nvidia-self-heal-terminal');
+  var statusEl = document.getElementById('nvidia-self-heal-out-status');
+  var labelEl = document.getElementById('nvidia-self-heal-out-label');
+  out.style.display = 'block';
+  labelEl.textContent = label;
+  term.textContent = output || '(no output)';
+  term.scrollTop = term.scrollHeight;
+  if (status === 'ok') {
+    statusEl.textContent = '✓ done';
+    statusEl.style.color = 'var(--ok-fg, #2c662d)';
+  } else {
+    statusEl.textContent = '✗ failed';
+    statusEl.style.color = 'var(--crit-fg, #9f3a38)';
+  }
+}
+function nvidiaRestartDrivers() {
+  var btn = document.getElementById('nvidia-restart-btn');
+  var original = btn.textContent;
+  btn.disabled = true;
+  btn.textContent = 'Restarting...';
+  nvidiaSelfHealShowResult('restart bee-nvidia', 'ok', 'Running...');
+  fetch('/api/services/action', {
+    method:'POST',
+    headers:{'Content-Type':'application/json'},
+    body:JSON.stringify({name:'bee-nvidia', action:'restart'})
+  }).then(r=>r.json()).then(d => {
+    nvidiaSelfHealShowResult('restart bee-nvidia', d.status || 'error', d.output || d.error || '(no output)');
+    setTimeout(function() {
+      loadServices();
+      loadNvidiaSelfHeal();
+    }, 800);
+  }).catch(e => {
+    nvidiaSelfHealShowResult('restart bee-nvidia', 'error', 'Request failed: ' + e);
+  }).finally(() => {
+    btn.disabled = false;
+    btn.textContent = original;
+  });
+}
+function nvidiaResetGPU(index, btn) {
+  var original = btn.textContent;
+  btn.disabled = true;
+  btn.textContent = 'Resetting...';
+  nvidiaSelfHealShowResult('reset gpu ' + index, 'ok', 'Running...');
+  fetch('/api/gpu/nvidia-reset', {
+    method:'POST',
+    headers:{'Content-Type':'application/json'},
+    body:JSON.stringify({index:index})
+  }).then(r=>r.json()).then(d => {
+    nvidiaSelfHealShowResult('reset gpu ' + index, d.status || 'error', d.output || '(no output)');
+    setTimeout(loadNvidiaSelfHeal, 1000);
+  }).catch(e => {
+    nvidiaSelfHealShowResult('reset gpu ' + index, 'error', 'Request failed: ' + e);
+  }).finally(() => {
+    btn.disabled = false;
+    btn.textContent = original;
+  });
+}
+function loadNvidiaSelfHeal() {
+  var status = document.getElementById('nvidia-self-heal-status');
+  var table = document.getElementById('nvidia-self-heal-table');
+  status.textContent = 'Loading NVIDIA GPU status...';
+  status.style.color = 'var(--muted)';
+  table.innerHTML = '<p style="color:var(--muted);font-size:13px">Loading...</p>';
+  fetch('/api/gpu/nvidia-status').then(r=>r.json()).then(gpus => {
+    if (!Array.isArray(gpus) || gpus.length === 0) {
+      status.textContent = 'No NVIDIA GPUs detected or nvidia-smi is unavailable.';
+      table.innerHTML = '';
+      return;
+    }
+    status.textContent = gpus.length + ' NVIDIA GPU(s) detected.';
+    const rows = gpus.map(g => {
+      const serial = g.serial || '';
+      const bdf = g.bdf || '';
+      const id = serial || bdf || ('gpu-' + g.index);
+      const badge = g.status === 'OK' ? 'badge-ok' : g.status === 'RESET_REQUIRED' ? 'badge-err' : 'badge-warn';
+      const details = [];
+      if (serial) details.push('serial ' + serial);
+      if (bdf) details.push('bdf ' + bdf);
+      if (g.parse_failure && g.raw_line) details.push(g.raw_line);
+      return '<tr>'
+        + '<td style="white-space:nowrap">' + g.index + '</td>'
+        + '<td>' + (g.name || 'unknown') + '</td>'
+        + '<td style="font-family:monospace">' + id + '</td>'
+        + '<td><span class="badge ' + badge + '">' + (g.status || 'UNKNOWN') + '</span>'
+        + (details.length ? '<div style="margin-top:4px;font-size:12px;color:var(--muted)">' + details.join(' | ') + '</div>' : '')
+        + '</td>'
+        + '<td style="white-space:nowrap"><button class="btn btn-sm btn-secondary" onclick="nvidiaResetGPU(' + g.index + ', this)">Reset GPU</button></td>'
+        + '</tr>';
+    }).join('');
+    table.innerHTML = '<table><tr><th>GPU</th><th>Model</th><th>ID</th><th>Status</th><th>Action</th></tr>' + rows + '</table>';
+  }).catch(e => {
+    status.textContent = 'Error loading NVIDIA GPU status: ' + e;
+    status.style.color = 'var(--crit-fg, #9f3a38)';
+    table.innerHTML = '';
+  });
+}
+loadNvidiaSelfHeal();
+</script>`
+}
+
+func renderTools() string {
+	return `<div class="card" style="margin-bottom:16px">
+  <div class="card-head">System Install</div>
+  <div class="card-body">
+    <div style="margin-bottom:20px">
+    <div style="font-weight:600;margin-bottom:8px">Install to RAM</div>
+    <p id="boot-source-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Detecting boot source...</p>
+    <p id="ram-status-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Checking...</p>
+    <button id="ram-install-btn" class="btn btn-primary" onclick="installToRAM()" style="display:none">&#9654; Copy to RAM</button>
+    </div>
+    <div style="border-top:1px solid var(--line);padding-top:20px">
+    <div style="font-weight:600;margin-bottom:8px">Install to Disk</div>` +
+		renderInstallInline() + `
+    </div>
+  </div>
+</div>
+<script>
+fetch('/api/system/ram-status').then(r=>r.json()).then(d=>{
+  const boot = document.getElementById('boot-source-text');
+  const txt = document.getElementById('ram-status-text');
+  const btn = document.getElementById('ram-install-btn');
+  let source = d.device || d.source || 'unknown source';
+  let kind = d.kind || 'unknown';
+  let label = source;
+  if (kind === 'ram') label = 'RAM';
+  else if (kind === 'usb') label = 'USB (' + source + ')';
+  else if (kind === 'cdrom') label = 'CD-ROM (' + source + ')';
+  else if (kind === 'disk') label = 'disk (' + source + ')';
+  else label = source;
+  boot.textContent = 'Current boot source: ' + label + '.';
+  txt.textContent = d.message || 'Checking...';
+  if (d.status === 'ok' || d.in_ram) {
+    txt.style.color = 'var(--ok, green)';
+  } else if (d.status === 'failed') {
+    txt.style.color = 'var(--err, #b91c1c)';
+  } else {
+    txt.style.color = 'var(--muted)';
+  }
+  if (d.can_start_task) {
+    btn.style.display = '';
+    btn.disabled = false;
+  } else {
+    btn.style.display = 'none';
+  }
+});
+function installToRAM() {
+  document.getElementById('ram-install-btn').disabled = true;
+  fetch('/api/system/install-to-ram', {method:'POST'}).then(r=>r.json()).then(d=>{
+    window.location.href = '/tasks#' + d.task_id;
+  });
+}
+</script>
+
+<div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
+<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Downloads a tar.gz archive of all audit files, SAT results, and logs.</p>
+` + renderSupportBundleInline() + `
+<div style="border-top:1px solid var(--border);margin-top:16px;padding-top:16px">
+  <div style="font-weight:600;margin-bottom:8px">Export to USB</div>
+  ` + renderUSBExportInline() + `
+</div>
+</div></div>
+
+<div class="card"><div class="card-head">Tool Check <button class="btn btn-sm btn-secondary" onclick="checkTools()" style="margin-left:auto">&#8635; Check</button></div>
+<div class="card-body"><div id="tools-table"><p style="color:var(--muted);font-size:13px">Checking...</p></div></div></div>
+
+<div class="card"><div class="card-head">NVIDIA Self Heal</div><div class="card-body">` +
+		renderNvidiaSelfHealInline() + `</div></div>
+
+<div class="card"><div class="card-head">Network</div><div class="card-body">` +
+		renderNetworkInline() + `</div></div>
+
+<div class="card"><div class="card-head">Services</div><div class="card-body">` +
+		renderServicesInline() + `</div></div>
+
+
+<script>
+function checkTools() {
+  document.getElementById('tools-table').innerHTML = '<p style="color:var(--muted);font-size:13px">Checking...</p>';
+  fetch('/api/tools/check').then(r=>r.json()).then(tools => {
+    const rows = tools.map(t =>
+      '<tr><td>'+t.Name+'</td><td><span class="badge '+(t.OK ? 'badge-ok' : 'badge-err')+'">'+(t.OK ? '&#10003; '+t.Path : '&#10007; missing')+'</span></td></tr>'
+    ).join('');
+    document.getElementById('tools-table').innerHTML =
+      '<table><tr><th>Tool</th><th>Status</th></tr>'+rows+'</table>';
+  });
+}
+checkTools();
+</script>`
+}
+
+func renderExportIndex(exportDir string) (string, error) {
+	entries, err := listExportFiles(exportDir)
+	if err != nil {
+		return "", err
+	}
+	var body strings.Builder
+	body.WriteString(`<!DOCTYPE html><html><head><meta charset="utf-8"><title>Bee Export Files</title></head><body>`)
+	body.WriteString(`<h1>Bee Export Files</h1><ul>`)
+	for _, entry := range entries {
+		body.WriteString(`<li><a href="/export/file?path=` + url.QueryEscape(entry) + `">` + html.EscapeString(entry) + `</a></li>`)
+	}
+	if len(entries) == 0 {
+		body.WriteString(`<li>No export files found.</li>`)
+	}
+	body.WriteString(`</ul></body></html>`)
+	return body.String(), nil
+}
--- a/audit/internal/webui/page_install_tasks.go
+++ b/audit/internal/webui/page_install_tasks.go
@@ -0,0 +1,314 @@
+package webui
+
+func renderInstallInline() string {
+	return `
+    <div class="alert alert-warn" style="margin-bottom:16px">
+      <strong>Warning:</strong> Installing will <strong>completely erase</strong> the selected
+      disk and write the live system onto it. All existing data on the target disk will be lost.
+      This operation cannot be undone.
+    </div>
+    <div id="install-loading" style="color:var(--muted);font-size:13px">Loading disk list…</div>
+    <div id="install-disk-section" style="display:none">
+      <div class="card" style="margin-bottom:0">
+        <table id="install-disk-table">
+          <thead><tr><th></th><th>Device</th><th>Model</th><th>Size</th><th>Status</th></tr></thead>
+          <tbody id="install-disk-tbody"></tbody>
+        </table>
+      </div>
+      <div style="margin-top:12px">
+        <button class="btn btn-secondary btn-sm" onclick="installRefreshDisks()">↻ Refresh</button>
+      </div>
+    </div>
+    <div id="install-confirm-section" style="display:none;margin-top:20px">
+      <div id="install-confirm-warn" class="alert" style="background:#fff6f6;border:1px solid #e0b4b4;color:#9f3a38;font-size:13px"></div>
+      <div class="form-row" style="max-width:360px">
+        <label>Type the device name to confirm (e.g. /dev/sda)</label>
+        <input type="text" id="install-confirm-input" placeholder="/dev/..." oninput="installCheckConfirm()" autocomplete="off" spellcheck="false">
+      </div>
+      <button class="btn btn-danger" id="install-start-btn" disabled onclick="installStart()">Install to Disk</button>
+      <button class="btn btn-secondary" style="margin-left:8px" onclick="installDeselect()">Cancel</button>
+    </div>
+    <div id="install-progress-section" style="display:none;margin-top:20px">
+      <div class="card-head" style="margin-bottom:8px">Installation Progress</div>
+      <div id="install-terminal" class="terminal" style="max-height:500px"></div>
+      <div id="install-status" style="margin-top:12px;font-size:13px"></div>
+    </div>
+
+<style>
+#install-disk-tbody tr{cursor:pointer}
+#install-disk-tbody tr.selected td{background:rgba(33,133,208,.1)}
+#install-disk-tbody tr:hover td{background:rgba(33,133,208,.07)}
+</style>
+
+<script>
+var _installSelected = null;
+
+function installRefreshDisks() {
+  document.getElementById('install-loading').style.display = '';
+  document.getElementById('install-disk-section').style.display = 'none';
+  document.getElementById('install-confirm-section').style.display = 'none';
+  _installSelected = null;
+  fetch('/api/install/disks').then(function(r){ return r.json(); }).then(function(disks){
+    document.getElementById('install-loading').style.display = 'none';
+    var tbody = document.getElementById('install-disk-tbody');
+    tbody.innerHTML = '';
+    if (!disks || disks.length === 0) {
+      tbody.innerHTML = '<tr><td colspan="5" style="color:var(--muted);text-align:center">No installable disks found</td></tr>';
+    } else {
+      disks.forEach(function(d) {
+        var warnings = (d.warnings || []);
+        var statusHtml;
+        if (warnings.length === 0) {
+          statusHtml = '<span class="badge badge-ok">OK</span>';
+        } else {
+          var hasSmall = warnings.some(function(w){ return w.indexOf('too small') >= 0; });
+          statusHtml = warnings.map(function(w){
+            var cls = hasSmall ? 'badge-err' : 'badge-warn';
+            return '<span class="badge ' + cls + '" title="' + w.replace(/"/g,'&quot;') + '">' +
+              (w.length > 40 ? w.substring(0,38)+'…' : w) + '</span>';
+          }).join(' ');
+        }
+        var mountedNote = (d.mounted_parts && d.mounted_parts.length > 0)
+          ? ' <span style="color:var(--warn-fg);font-size:11px">(mounted)</span>' : '';
+        var tr = document.createElement('tr');
+        tr.dataset.device = d.device;
+        tr.dataset.model = d.model || 'Unknown';
+        tr.dataset.size = d.size;
+        tr.dataset.warnings = JSON.stringify(warnings);
+        tr.innerHTML =
+          '<td><input type="radio" name="install-disk" value="' + d.device + '"></td>' +
+          '<td><code>' + d.device + '</code>' + mountedNote + '</td>' +
+          '<td>' + (d.model || '—') + '</td>' +
+          '<td>' + d.size + '</td>' +
+          '<td>' + statusHtml + '</td>';
+        tr.addEventListener('click', function(){ installSelectDisk(this); });
+        tbody.appendChild(tr);
+      });
+    }
+    document.getElementById('install-disk-section').style.display = '';
+  }).catch(function(e){
+    document.getElementById('install-loading').textContent = 'Failed to load disk list: ' + e;
+  });
+}
+
+function installSelectDisk(tr) {
+  document.querySelectorAll('#install-disk-tbody tr').forEach(function(r){ r.classList.remove('selected'); });
+  tr.classList.add('selected');
+  var radio = tr.querySelector('input[type=radio]');
+  if (radio) radio.checked = true;
+  _installSelected = {
+    device: tr.dataset.device,
+    model: tr.dataset.model,
+    size: tr.dataset.size,
+    warnings: JSON.parse(tr.dataset.warnings || '[]')
+  };
+  var warnBox = document.getElementById('install-confirm-warn');
+  var warnLines = '<strong>⚠ DANGER:</strong> ' + _installSelected.device +
+    ' (' + _installSelected.model + ', ' + _installSelected.size + ')' +
+    ' will be <strong>completely erased</strong> and repartitioned. All data will be lost.<br>';
+  if (_installSelected.warnings.length > 0) {
+    warnLines += '<br>' + _installSelected.warnings.map(function(w){ return '• ' + w; }).join('<br>');
+  }
+  warnBox.innerHTML = warnLines;
+  document.getElementById('install-confirm-input').value = '';
+  document.getElementById('install-start-btn').disabled = true;
+  document.getElementById('install-confirm-section').style.display = '';
+  document.getElementById('install-progress-section').style.display = 'none';
+}
+
+function installDeselect() {
+  _installSelected = null;
+  document.querySelectorAll('#install-disk-tbody tr').forEach(function(r){ r.classList.remove('selected'); });
+  document.querySelectorAll('#install-disk-tbody input[type=radio]').forEach(function(r){ r.checked = false; });
+  document.getElementById('install-confirm-section').style.display = 'none';
+}
+
+function installCheckConfirm() {
+  var val = document.getElementById('install-confirm-input').value.trim();
+  var ok = _installSelected && val === _installSelected.device;
+  document.getElementById('install-start-btn').disabled = !ok;
+}
+
+function installStart() {
+  if (!_installSelected) return;
+  document.getElementById('install-confirm-section').style.display = 'none';
+  document.getElementById('install-disk-section').style.display = 'none';
+  document.getElementById('install-loading').style.display = 'none';
+  var prog = document.getElementById('install-progress-section');
+  var term = document.getElementById('install-terminal');
+  var status = document.getElementById('install-status');
+  prog.style.display = '';
+  term.textContent = '';
+  status.textContent = 'Starting installation…';
+  status.style.color = 'var(--muted)';
+
+  fetch('/api/install/run', {
+    method: 'POST',
+    headers: {'Content-Type': 'application/json'},
+    body: JSON.stringify({device: _installSelected.device})
+  }).then(function(r){
+    return r.json().then(function(j){
+      if (!r.ok) throw new Error(j.error || r.statusText);
+      return j;
+    });
+  }).then(function(j){
+    if (!j.task_id) throw new Error('missing task id');
+    installStreamLog(j.task_id);
+  }).catch(function(e){
+    status.textContent = 'Error: ' + e;
+    status.style.color = 'var(--crit-fg)';
+  });
+}
+
+function installStreamLog(taskId) {
+  var term = document.getElementById('install-terminal');
+  var status = document.getElementById('install-status');
+  var es = new EventSource('/api/tasks/' + taskId + '/stream');
+  es.onmessage = function(e) {
+    term.textContent += e.data + '\n';
+    term.scrollTop = term.scrollHeight;
+  };
+  es.addEventListener('done', function(e) {
+    es.close();
+    if (!e.data) {
+      status.innerHTML = '<span style="color:var(--ok-fg);font-weight:700">✓ Installation complete.</span> Remove the ISO and reboot.';
+      var rebootBtn = document.createElement('button');
+      rebootBtn.className = 'btn btn-primary btn-sm';
+      rebootBtn.style.marginLeft = '12px';
+      rebootBtn.textContent = 'Reboot now';
+      rebootBtn.onclick = function(){
+        fetch('/api/services/action', {method:'POST',headers:{'Content-Type':'application/json'},
+          body: JSON.stringify({name:'', action:'reboot'})});
+      };
+      status.appendChild(rebootBtn);
+    } else {
+      status.textContent = '✗ Installation failed: ' + e.data;
+      status.style.color = 'var(--crit-fg)';
+    }
+  });
+  es.onerror = function() {
+    es.close();
+    status.textContent = '✗ Stream disconnected.';
+    status.style.color = 'var(--crit-fg)';
+  };
+}
+
+installRefreshDisks();
+</script>
+`
+}
+
+func renderInstall() string {
+	return `<div class="card"><div class="card-head">Install Live System to Disk</div><div class="card-body">` +
+		renderInstallInline() +
+		`</div></div>`
+}
+
+func renderTasks() string {
+	return `<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px;flex-wrap:wrap">
+<button class="btn btn-danger btn-sm" onclick="cancelAll()">Cancel All</button>
+<button class="btn btn-sm" style="background:#b45309;color:#fff" onclick="killWorkers()" title="Send SIGKILL to all running test processes (bee-gpu-burn, stress-ng, stressapptest, memtester)">Kill Workers</button>
+<span id="kill-toast" style="font-size:12px;color:var(--muted);display:none"></span>
+<span style="font-size:12px;color:var(--muted)">Open a task to view its saved logs and charts.</span>
+</div>
+<div class="card">
+<div id="tasks-table"><p style="color:var(--muted);font-size:13px;padding:16px">Loading...</p></div>
+</div>
+<script>
+var _taskRefreshTimer = null;
+var _tasksAll = [];
+var _taskPage = 1;
+var _taskPageSize = 50;
+
+function loadTasks() {
+  fetch('/api/tasks').then(r=>r.json()).then(tasks => {
+    _tasksAll = Array.isArray(tasks) ? tasks : [];
+    if (_tasksAll.length === 0) {
+      _taskPage = 1;
+      document.getElementById('tasks-table').innerHTML = '<p style="color:var(--muted);font-size:13px;padding:16px">No tasks.</p>';
+      return;
+    }
+    const totalPages = Math.max(1, Math.ceil(_tasksAll.length / _taskPageSize));
+    if (_taskPage > totalPages) _taskPage = totalPages;
+    if (_taskPage < 1) _taskPage = 1;
+    const start = (_taskPage - 1) * _taskPageSize;
+    const pageTasks = _tasksAll.slice(start, start + _taskPageSize);
+    const rows = pageTasks.map(t => {
+      const dur = t.elapsed_sec ? formatDurSec(t.elapsed_sec) : '';
+      const statusClass = {running:'badge-ok',pending:'badge-unknown',done:'badge-ok',failed:'badge-err',cancelled:'badge-unknown'}[t.status]||'badge-unknown';
+      const statusLabel = {running:'&#9654; running',pending:'pending',done:'&#10003; done',failed:'&#10007; failed',cancelled:'cancelled'}[t.status]||t.status;
+      let actions = '<a class="btn btn-sm btn-secondary" href="/tasks/'+encodeURIComponent(t.id)+'">Open</a>';
+      if (t.status === 'running' || t.status === 'pending') {
+        actions += ' <button class="btn btn-sm btn-danger" onclick="cancelTask(\''+t.id+'\')">Cancel</button>';
+      }
+      if (t.status === 'pending') {
+        actions += ' <button class="btn btn-sm btn-secondary" onclick="setPriority(\''+t.id+'\',1)" title="Increase priority">&#8679;</button>';
+        actions += ' <button class="btn btn-sm btn-secondary" onclick="setPriority(\''+t.id+'\',-1)" title="Decrease priority">&#8681;</button>';
+      }
+      return '<tr><td><a href="/tasks/'+encodeURIComponent(t.id)+'">'+escHtml(t.name)+'</a></td>' +
+        '<td><span class="badge '+statusClass+'">'+statusLabel+'</span></td>' +
+        '<td style="font-size:12px;color:var(--muted)">'+fmtTime(t.created_at)+'</td>' +
+        '<td style="font-size:12px;color:var(--muted)">'+dur+'</td>' +
+        '<td>'+t.priority+'</td>' +
+        '<td>'+actions+'</td></tr>';
+    }).join('');
+    const showingFrom = start + 1;
+    const showingTo = Math.min(start + pageTasks.length, _tasksAll.length);
+    const pager =
+      '<div style="display:flex;align-items:center;justify-content:space-between;gap:12px;flex-wrap:wrap;padding:12px 14px;border-top:1px solid var(--border-lite);background:var(--surface-2)">' +
+        '<div style="font-size:12px;color:var(--muted)">Showing '+showingFrom+'-'+showingTo+' of '+_tasksAll.length+' tasks</div>' +
+        '<div style="display:flex;align-items:center;gap:8px">' +
+          '<button class="btn btn-sm btn-secondary" onclick="setTaskPage('+(_taskPage-1)+')" '+(_taskPage <= 1 ? 'disabled' : '')+'>Previous</button>' +
+          '<span style="font-size:12px;color:var(--muted)">Page '+_taskPage+' / '+totalPages+'</span>' +
+          '<button class="btn btn-sm btn-secondary" onclick="setTaskPage('+(_taskPage+1)+')" '+(_taskPage >= totalPages ? 'disabled' : '')+'>Next</button>' +
+        '</div>' +
+      '</div>';
+    document.getElementById('tasks-table').innerHTML =
+      '<table><tr><th>Name</th><th>Status</th><th>Created</th><th>Duration</th><th>Priority</th><th>Actions</th></tr>'+rows+'</table>' + pager;
+  });
+}
+
+function escHtml(s) { return (s||'').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;'); }
+function fmtTime(s) { if (!s) return ''; try { return new Date(s).toLocaleTimeString(); } catch(e){ return s; } }
+function formatDurSec(sec) {
+  sec = Math.max(0, Math.round(sec||0));
+  if (sec < 60) return sec+'s';
+  const m = Math.floor(sec/60), ss = sec%60;
+  return m+'m '+ss+'s';
+}
+function setTaskPage(page) {
+  const totalPages = Math.max(1, Math.ceil(_tasksAll.length / _taskPageSize));
+  _taskPage = Math.min(totalPages, Math.max(1, page));
+  loadTasks();
+}
+
+function cancelTask(id) {
+  fetch('/api/tasks/'+id+'/cancel',{method:'POST'}).then(()=>loadTasks());
+}
+function cancelAll() {
+  fetch('/api/tasks/cancel-all',{method:'POST'}).then(()=>loadTasks());
+}
+function killWorkers() {
+  if (!confirm('Send SIGKILL to all running test workers (bee-gpu-burn, stress-ng, stressapptest, memtester)?\n\nThis will also cancel all queued and running tasks.')) return;
+  fetch('/api/tasks/kill-workers',{method:'POST'})
+    .then(r=>r.json())
+    .then(d=>{
+      loadTasks();
+      var toast = document.getElementById('kill-toast');
+      var parts = [];
+      if (d.cancelled > 0) parts.push(d.cancelled+' task'+(d.cancelled===1?'':'s')+' cancelled');
+      if (d.killed > 0) parts.push(d.killed+' process'+(d.killed===1?'':'es')+' killed');
+      toast.textContent = parts.length ? parts.join(', ')+'.' : 'No processes found.';
+      toast.style.display = '';
+      setTimeout(()=>{ toast.style.display='none'; }, 5000);
+    });
+}
+function setPriority(id, delta) {
+  fetch('/api/tasks/'+id+'/priority',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({delta:delta})})
+    .then(()=>loadTasks());
+}
+
+loadTasks();
+_taskRefreshTimer = setInterval(loadTasks, 2000);
+</script>`
+}
--- a/audit/internal/webui/page_metrics.go
+++ b/audit/internal/webui/page_metrics.go
@@ -0,0 +1,238 @@
+package webui
+
+func renderMetrics() string {
+	return `<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Live metrics — updated every 2 seconds.</p>
+
+<div class="card" style="margin-bottom:16px">
+  <div class="card-head">Server — Load</div>
+  <div class="card-body" style="padding:8px">
+    <img id="chart-server-load" data-chart-refresh="1" src="/api/metrics/chart/server-load.svg" style="width:100%;display:block;border-radius:6px" alt="CPU/Mem load">
+  </div>
+</div>
+
+<div class="card" style="margin-bottom:16px">
+  <div class="card-head">Temperature — CPU</div>
+  <div class="card-body" style="padding:8px">
+    <img id="chart-server-temp-cpu" data-chart-refresh="1" src="/api/metrics/chart/server-temp-cpu.svg" style="width:100%;display:block;border-radius:6px" alt="CPU temperature">
+  </div>
+</div>
+
+
+<div class="card" style="margin-bottom:16px">
+  <div class="card-head">Temperature — Ambient Sensors</div>
+  <div class="card-body" style="padding:8px">
+    <img id="chart-server-temp-ambient" data-chart-refresh="1" src="/api/metrics/chart/server-temp-ambient.svg" style="width:100%;display:block;border-radius:6px" alt="Ambient temperature sensors">
+  </div>
+</div>
+
+<div class="card" style="margin-bottom:16px">
+  <div class="card-head">Server — Power</div>
+  <div class="card-body" style="padding:8px">
+    <img id="chart-server-power" data-chart-refresh="1" src="/api/metrics/chart/server-power.svg" style="width:100%;display:block;border-radius:6px" alt="System power">
+  </div>
+</div>
+
+<div id="card-server-fans" class="card" style="margin-bottom:16px;display:none">
+  <div class="card-head">Server — Fan RPM</div>
+  <div class="card-body" style="padding:8px">
+    <img id="chart-server-fans" data-chart-refresh="1" src="/api/metrics/chart/server-fans.svg" style="width:100%;display:block;border-radius:6px" alt="Fan RPM">
+  </div>
+</div>
+
+<section id="gpu-metrics-section" style="display:none;margin-top:24px;padding:16px 16px 4px;border:1px solid #d7e0ea;border-radius:10px;background:linear-gradient(180deg,#f7fafc 0%,#eef4f8 100%)">
+  <div style="display:flex;align-items:center;justify-content:space-between;gap:16px;flex-wrap:wrap;margin-bottom:14px">
+    <div>
+      <div style="font-size:12px;font-weight:700;letter-spacing:.08em;text-transform:uppercase;color:#486581">GPU Metrics</div>
+      <div id="gpu-metrics-summary" style="font-size:13px;color:var(--muted);margin-top:4px">Detected GPUs are rendered in a dedicated section.</div>
+    </div>
+    <label style="display:inline-flex;align-items:center;gap:8px;font-size:13px;color:var(--ink);font-weight:700;cursor:pointer">
+      <input id="gpu-chart-toggle" type="checkbox">
+      <span>One chart per GPU</span>
+    </label>
+  </div>
+
+  <div id="gpu-metrics-by-metric">
+    <div class="card" style="margin-bottom:16px">
+      <div class="card-head">GPU — Compute Load</div>
+      <div class="card-body" style="padding:8px">
+        <img id="chart-gpu-all-load" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU compute load">
+      </div>
+    </div>
+    <div class="card" style="margin-bottom:16px">
+      <div class="card-head">GPU — Memory Load</div>
+      <div class="card-body" style="padding:8px">
+        <img id="chart-gpu-all-memload" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-memload.svg" style="width:100%;display:block;border-radius:6px" alt="GPU memory load">
+      </div>
+    </div>
+    <div class="card" style="margin-bottom:16px">
+      <div class="card-head">GPU — Core Clock</div>
+      <div class="card-body" style="padding:8px">
+        <img id="chart-gpu-all-clock" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-clock.svg" style="width:100%;display:block;border-radius:6px" alt="GPU core clock">
+      </div>
+    </div>
+    <div class="card" style="margin-bottom:16px">
+      <div class="card-head">GPU — Power</div>
+      <div class="card-body" style="padding:8px">
+        <img id="chart-gpu-all-power" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU power">
+      </div>
+    </div>
+    <div class="card" style="margin-bottom:16px">
+      <div class="card-head">GPU — Temperature</div>
+      <div class="card-body" style="padding:8px">
+        <img id="chart-gpu-all-temp" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-temp.svg" style="width:100%;display:block;border-radius:6px" alt="GPU temperature">
+      </div>
+    </div>
+  </div>
+
+  <div id="gpu-metrics-by-gpu" style="display:none"></div>
+</section>
+
+<script>
+let gpuChartKey = '';
+const gpuChartModeStorageKey = 'bee.metrics.gpuChartMode';
+let metricsNvidiaGPUsPromise = null;
+
+function loadMetricsNvidiaGPUs() {
+  if (!metricsNvidiaGPUsPromise) {
+    metricsNvidiaGPUsPromise = fetch('/api/gpu/nvidia')
+      .then(function(r) {
+        if (!r.ok) throw new Error('Failed to load NVIDIA GPUs.');
+        return r.json();
+      })
+      .then(function(list) { return Array.isArray(list) ? list : []; })
+      .catch(function() { return []; });
+  }
+  return metricsNvidiaGPUsPromise;
+}
+
+function metricsGPUNameMap(list) {
+  const out = {};
+  (list || []).forEach(function(gpu) {
+    const idx = Number(gpu.index);
+    if (!Number.isFinite(idx) || !gpu.name) return;
+    out[idx] = gpu.name;
+  });
+  return out;
+}
+
+function metricsGPUDisplayLabel(idx, names) {
+  const name = names && names[idx];
+  return name ? ('GPU ' + idx + ' — ' + name) : ('GPU ' + idx);
+}
+
+function loadGPUChartModePreference() {
+  try {
+    return sessionStorage.getItem(gpuChartModeStorageKey) === 'per-gpu';
+  } catch (_) {
+    return false;
+  }
+}
+
+function saveGPUChartModePreference(perGPU) {
+  try {
+    sessionStorage.setItem(gpuChartModeStorageKey, perGPU ? 'per-gpu' : 'per-metric');
+  } catch (_) {}
+}
+
+function refreshChartImage(el) {
+  if (!el || el.dataset.loading === '1') return;
+  if (el.offsetParent === null) return;
+  const baseSrc = el.dataset.baseSrc || el.src.split('?')[0];
+  const nextSrc = baseSrc + '?t=' + Date.now();
+  const probe = new Image();
+  el.dataset.baseSrc = baseSrc;
+  el.dataset.loading = '1';
+  probe.onload = function() {
+    el.src = nextSrc;
+    el.dataset.loading = '0';
+  };
+  probe.onerror = function() {
+    el.dataset.loading = '0';
+  };
+  probe.src = nextSrc;
+}
+
+function refreshCharts() {
+  document.querySelectorAll('img[data-chart-refresh="1"]').forEach(refreshChartImage);
+}
+
+function gpuIndices(rows) {
+  const seen = {};
+  const out = [];
+  (rows || []).forEach(function(row) {
+    const idx = Number(row.index);
+    if (!Number.isFinite(idx) || seen[idx]) return;
+    seen[idx] = true;
+    out.push(idx);
+  });
+  return out.sort(function(a, b) { return a - b; });
+}
+
+function renderGPUOverviewCards(indices, names) {
+  const host = document.getElementById('gpu-metrics-by-gpu');
+  if (!host) return;
+  host.innerHTML = indices.map(function(idx) {
+    const label = metricsGPUDisplayLabel(idx, names);
+    return '<div class="card" style="margin-bottom:16px">' +
+      '<div class="card-head">' + label + ' — Overview</div>' +
+      '<div class="card-body" style="padding:8px">' +
+      '<img id="chart-gpu-' + idx + '-overview" data-chart-refresh="1" src="/api/metrics/chart/gpu/' + idx + '-overview.svg" style="width:100%;display:block;border-radius:6px" alt="' + label + ' overview">' +
+      '</div></div>';
+  }).join('');
+}
+
+function applyGPUChartMode() {
+  const perMetric = document.getElementById('gpu-metrics-by-metric');
+  const perGPU = document.getElementById('gpu-metrics-by-gpu');
+  const toggle = document.getElementById('gpu-chart-toggle');
+  const gpuModePerGPU = !!(toggle && toggle.checked);
+  if (perMetric) perMetric.style.display = gpuModePerGPU ? 'none' : '';
+  if (perGPU) perGPU.style.display = gpuModePerGPU ? '' : 'none';
+}
+
+function syncMetricsLayout(d) {
+  const fanCard = document.getElementById('card-server-fans');
+  if (fanCard) fanCard.style.display = (d.fans && d.fans.length > 0) ? '' : 'none';
+  const section = document.getElementById('gpu-metrics-section');
+  const summary = document.getElementById('gpu-metrics-summary');
+  const indices = gpuIndices(d.gpus);
+  loadMetricsNvidiaGPUs().then(function(gpus) {
+    const names = metricsGPUNameMap(gpus);
+    if (section) section.style.display = indices.length > 0 ? '' : 'none';
+    if (summary) {
+      summary.textContent = indices.length > 0
+        ? ('Detected GPUs: ' + indices.map(function(idx) { return metricsGPUDisplayLabel(idx, names); }).join(', '))
+        : 'No GPUs detected in live metrics.';
+    }
+    const nextKey = indices.join(',') + '|' + indices.map(function(idx) { return names[idx] || ''; }).join(',');
+    if (nextKey !== gpuChartKey) {
+      renderGPUOverviewCards(indices, names);
+      gpuChartKey = nextKey;
+    }
+    applyGPUChartMode();
+  });
+}
+
+function loadMetricsLayout() {
+  fetch('/api/metrics/latest').then(function(r) { return r.json(); }).then(syncMetricsLayout).catch(function() {});
+}
+
+const gpuChartToggle = document.getElementById('gpu-chart-toggle');
+if (gpuChartToggle) {
+  gpuChartToggle.checked = loadGPUChartModePreference();
+}
+applyGPUChartMode();
+
+if (gpuChartToggle) {
+  gpuChartToggle.addEventListener('change', function() {
+    saveGPUChartModePreference(!!gpuChartToggle.checked);
+    applyGPUChartMode();
+    refreshCharts();
+  });
+}
+
+loadMetricsLayout();
+setInterval(refreshCharts, 3000);
+setInterval(loadMetricsLayout, 5000);
+</script>`
+}
--- a/audit/internal/webui/page_network_services.go
+++ b/audit/internal/webui/page_network_services.go
@@ -0,0 +1,213 @@
+package webui
+
+import "html"
+
+// renderNetworkInline returns the network UI without a wrapping card (for embedding in Tools).
+func renderNetworkInline() string {
+	return `<div id="net-pending" style="display:none" class="alert alert-warn">
+<strong>&#9888; Network change applied.</strong> Reverting in <span id="net-countdown">60</span>s unless confirmed.
+<button class="btn btn-primary btn-sm" style="margin-left:8px" onclick="confirmNetChange()">Confirm</button>
+<button class="btn btn-secondary btn-sm" style="margin-left:4px" onclick="rollbackNetChange()">Rollback</button>
+</div>
+<div id="iface-table"><p style="color:var(--muted);font-size:13px">Loading...</p></div>
+<div class="grid2" style="margin-top:16px">
+<div><div style="font-weight:700;font-size:13px;margin-bottom:8px">DHCP</div>
+<div class="form-row"><label>Interface (leave empty for all)</label><input type="text" id="dhcp-iface" placeholder="eth0"></div>
+<button class="btn btn-primary" onclick="runDHCP()">&#9654; Run DHCP</button>
+<div id="dhcp-out" style="margin-top:10px;font-size:12px;color:var(--ok-fg)"></div>
+</div>
+<div><div style="font-weight:700;font-size:13px;margin-bottom:8px">Static IPv4</div>
+<div class="form-row"><label>Interface</label><input type="text" id="st-iface" placeholder="eth0"></div>
+<div class="form-row"><label>Address</label><input type="text" id="st-addr" placeholder="192.168.1.100"></div>
+<div class="form-row"><label>Prefix length</label><input type="text" id="st-prefix" placeholder="24"></div>
+<div class="form-row"><label>Gateway</label><input type="text" id="st-gw" placeholder="192.168.1.1"></div>
+<div class="form-row"><label>DNS (comma-separated)</label><input type="text" id="st-dns" placeholder="8.8.8.8,8.8.4.4"></div>
+<button class="btn btn-primary" onclick="setStatic()">Apply Static IP</button>
+<div id="static-out" style="margin-top:10px;font-size:12px;color:var(--ok-fg)"></div>
+</div>
+</div>
+<script>
+var _netCountdownTimer = null;
+var _netRefreshTimer = null;
+const NET_ROLLBACK_SECS = 60;
+function loadNetwork() {
+  fetch('/api/network').then(r=>r.json()).then(d => {
+    const rows = (d.interfaces||[]).map(i =>
+      '<tr><td style="cursor:pointer" onclick="selectIface(\''+i.Name+'\')" title="Use this interface in the forms below"><span style="text-decoration:underline">'+i.Name+'</span></td>' +
+      '<td style="cursor:pointer" onclick="toggleIface(\''+i.Name+'\',\''+i.State+'\')" title="Click to toggle"><span class="badge '+(i.State==='up'?'badge-ok':'badge-warn')+'">'+i.State+'</span></td>' +
+      '<td>'+(i.IPv4||[]).join(', ')+'</td></tr>'
+    ).join('');
+    document.getElementById('iface-table').innerHTML =
+      '<table><tr><th>Interface</th><th>State (click to toggle)</th><th>Addresses</th></tr>'+rows+'</table>' +
+      (d.default_route ? '<p style="font-size:12px;color:var(--muted);margin-top:8px">Default route: '+d.default_route+'</p>' : '');
+    if (d.pending_change) showNetPending(d.rollback_in || NET_ROLLBACK_SECS);
+    else hideNetPending();
+  }).catch(function() {});
+}
+function selectIface(iface) {
+  document.getElementById('dhcp-iface').value = iface;
+  document.getElementById('st-iface').value = iface;
+}
+function toggleIface(iface, currentState) {
+  showNetPending(NET_ROLLBACK_SECS);
+  fetch('/api/network/toggle',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({iface:iface})})
+    .then(r=>r.json()).then(d => {
+      if (d.error) { hideNetPending(); alert('Error: '+d.error); return; }
+      loadNetwork();
+      showNetPending(d.rollback_in || NET_ROLLBACK_SECS);
+    }).catch(function() {
+      setTimeout(loadNetwork, 1500);
+    });
+}
+function hideNetPending() {
+  const el = document.getElementById('net-pending');
+  if (_netCountdownTimer) clearInterval(_netCountdownTimer);
+  _netCountdownTimer = null;
+  el.style.display = 'none';
+}
+function showNetPending(secs) {
+  if (!secs || secs < 1) { hideNetPending(); return; }
+  const el = document.getElementById('net-pending');
+  el.style.display = 'block';
+  if (_netCountdownTimer) clearInterval(_netCountdownTimer);
+  let remaining = secs;
+  document.getElementById('net-countdown').textContent = remaining;
+  _netCountdownTimer = setInterval(function() {
+    remaining--;
+    document.getElementById('net-countdown').textContent = remaining;
+    if (remaining <= 0) { hideNetPending(); loadNetwork(); }
+  }, 1000);
+}
+function confirmNetChange() {
+  hideNetPending();
+  fetch('/api/network/confirm',{method:'POST'}).then(()=>loadNetwork()).catch(()=>{});
+}
+function rollbackNetChange() {
+  hideNetPending();
+  fetch('/api/network/rollback',{method:'POST'}).then(()=>loadNetwork()).catch(()=>{});
+}
+function runDHCP() {
+  const iface = document.getElementById('dhcp-iface').value.trim();
+  showNetPending(NET_ROLLBACK_SECS);
+  fetch('/api/network/dhcp',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({interface:iface||'all'})})
+    .then(r=>r.json()).then(d => {
+      document.getElementById('dhcp-out').textContent = d.output || d.error || 'Done.';
+      if (d.error) { hideNetPending(); return; }
+      showNetPending(d.rollback_in || NET_ROLLBACK_SECS);
+      loadNetwork();
+    }).catch(function() {
+      setTimeout(loadNetwork, 1500);
+    });
+}
+function setStatic() {
+  const dns = document.getElementById('st-dns').value.split(',').map(s=>s.trim()).filter(Boolean);
+  showNetPending(NET_ROLLBACK_SECS);
+  fetch('/api/network/static',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({
+    interface: document.getElementById('st-iface').value,
+    address: document.getElementById('st-addr').value,
+    prefix: document.getElementById('st-prefix').value,
+    gateway: document.getElementById('st-gw').value,
+    dns: dns,
+  })}).then(r=>r.json()).then(d => {
+    document.getElementById('static-out').textContent = d.output || d.error || 'Done.';
+    if (d.error) { hideNetPending(); return; }
+    showNetPending(d.rollback_in || NET_ROLLBACK_SECS);
+    loadNetwork();
+  }).catch(function() {
+    setTimeout(loadNetwork, 1500);
+  });
+}
+loadNetwork();
+if (_netRefreshTimer) clearInterval(_netRefreshTimer);
+_netRefreshTimer = setInterval(loadNetwork, 5000);
+</script>`
+}
+
+func renderNetwork() string {
+	return `<div class="card"><div class="card-head">Network Interfaces</div><div class="card-body">` +
+		renderNetworkInline() +
+		`</div></div>`
+}
+
+func renderServicesInline() string {
+	return `<p style="font-size:13px;color:var(--muted);margin-bottom:10px">` + html.EscapeString(`bee-selfheal.timer is expected to be active; the oneshot bee-selfheal.service itself is not shown as a long-running service.`) + `</p>
+<div style="display:flex;justify-content:flex-end;gap:8px;flex-wrap:wrap;margin-bottom:8px"><button class="btn btn-sm btn-secondary" onclick="loadServices()">&#8635; Refresh</button></div>
+<div id="svc-table"><p style="color:var(--muted);font-size:13px">Loading...</p></div>
+<div id="svc-out" style="display:none;margin-top:12px">
+  <div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:4px">
+    <span id="svc-out-label" style="font-size:12px;font-weight:600;color:var(--muted)">Output</span>
+    <span id="svc-out-status" style="font-size:12px"></span>
+  </div>
+  <div id="svc-terminal" class="terminal" style="max-height:220px;width:100%;box-sizing:border-box"></div>
+</div>
+<script>
+function loadServices() {
+  fetch('/api/services').then(r=>r.json()).then(svcs => {
+    const rows = svcs.map(s => {
+      const st = s.state||'unknown';
+      const badge = st==='active' ? 'badge-ok' : st==='failed' ? 'badge-err' : 'badge-warn';
+      const id = 'svc-body-'+s.name.replace(/[^a-z0-9]/g,'-');
+      const body = (s.body||'').replace(/</g,'&lt;').replace(/>/g,'&gt;');
+      return '<tr>' +
+        '<td style="white-space:nowrap">'+s.name+'</td>' +
+        '<td style="white-space:nowrap"><span class="badge '+badge+'" style="cursor:pointer" onclick="toggleBody(\''+id+'\')">'+st+' ▾</span>' +
+        '<div id="'+id+'" style="display:none;margin-top:6px"><pre style="font-size:11px;white-space:pre-wrap;word-break:break-all;max-height:200px;overflow-y:auto;background:#1b1c1d;padding:8px;border-radius:4px;color:#b5cea8">'+body+'</pre></div>' +
+        '</td>' +
+        '<td style="white-space:nowrap">' +
+        '<button class="btn btn-sm btn-secondary" id="btn-'+s.name+'-start"   onclick="svcAction(this,\''+s.name+'\',\'start\')">Start</button> ' +
+        '<button class="btn btn-sm btn-secondary" id="btn-'+s.name+'-stop"    onclick="svcAction(this,\''+s.name+'\',\'stop\')">Stop</button> ' +
+        '<button class="btn btn-sm btn-secondary" id="btn-'+s.name+'-restart" onclick="svcAction(this,\''+s.name+'\',\'restart\')">Restart</button>' +
+        '</td></tr>';
+    }).join('');
+    document.getElementById('svc-table').innerHTML =
+      '<table><tr><th>Unit</th><th>Status</th><th>Actions</th></tr>'+rows+'</table>';
+  });
+}
+function toggleBody(id) {
+  const el = document.getElementById(id);
+  if (el) el.style.display = el.style.display==='none' ? 'block' : 'none';
+}
+function svcAction(btn, name, action) {
+  var label = btn.textContent;
+  btn.disabled = true;
+  btn.textContent = '...';
+  var out = document.getElementById('svc-out');
+  var term = document.getElementById('svc-terminal');
+  var statusEl = document.getElementById('svc-out-status');
+  var labelEl = document.getElementById('svc-out-label');
+  out.style.display = 'block';
+  labelEl.textContent = action + ' ' + name;
+  term.textContent = 'Running...';
+  statusEl.textContent = '';
+  statusEl.style.color = '';
+  fetch('/api/services/action',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({name,action})})
+    .then(r=>r.json()).then(d => {
+      term.textContent = d.output || d.error || '(no output)';
+      term.scrollTop = term.scrollHeight;
+      if (d.status === 'ok') {
+        statusEl.textContent = '✓ done';
+        statusEl.style.color = 'var(--ok-fg, #2c662d)';
+      } else {
+        statusEl.textContent = '✗ failed';
+        statusEl.style.color = 'var(--crit-fg, #9f3a38)';
+      }
+      btn.textContent = label;
+      btn.disabled = false;
+      setTimeout(loadServices, 800);
+    }).catch(e => {
+      term.textContent = 'Request failed: ' + e;
+      statusEl.textContent = '✗ error';
+      statusEl.style.color = 'var(--crit-fg, #9f3a38)';
+      btn.textContent = label;
+      btn.disabled = false;
+    });
+}
+loadServices();
+</script>`
+}
+
+func renderServices() string {
+	return `<div class="card"><div class="card-head">Bee Services</div><div class="card-body">` +
+		renderServicesInline() +
+		`</div></div>`
+}
--- a/audit/internal/webui/page_validate.go
+++ b/audit/internal/webui/page_validate.go
@@ -0,0 +1,663 @@
+package webui
+
+import (
+	"encoding/json"
+	"fmt"
+	"html"
+	"sort"
+	"strings"
+
+	"bee/audit/internal/platform"
+	"bee/audit/internal/schema"
+)
+
+type validateInventory struct {
+	CPU            string
+	Memory         string
+	Storage        string
+	NVIDIA         string
+	AMD            string
+	NvidiaGPUCount int
+	AMDGPUCount    int
+}
+
+func validateFmtDur(secs int) string {
+	if secs < 120 {
+		return fmt.Sprintf("~%d s", secs)
+	}
+	mins := (secs + 29) / 60
+	return fmt.Sprintf("~%d min", mins)
+}
+
+func validateTotalValidateSec(n int) int {
+	if n < 0 {
+		n = 0
+	}
+	total := platform.SATEstimatedCPUValidateSec +
+		platform.SATEstimatedMemoryValidateSec +
+		platform.SATEstimatedNvidiaInterconnectSec +
+		platform.SATEstimatedNvidiaBandwidthSec
+	if n > 0 {
+		total += platform.SATEstimatedNvidiaGPUValidateSec
+	}
+	return total
+}
+
+func validateTotalStressSec(n int) int {
+	if n < 0 {
+		n = 0
+	}
+	total := platform.SATEstimatedCPUStressSec +
+		platform.SATEstimatedMemoryStressSec +
+		platform.SATEstimatedNvidiaPulseTestSec +
+		platform.SATEstimatedNvidiaInterconnectSec +
+		platform.SATEstimatedNvidiaBandwidthSec
+	if n > 0 {
+		total += platform.SATEstimatedNvidiaGPUStressSec +
+			platform.SATEstimatedNvidiaTargetedStressSec +
+			platform.SATEstimatedNvidiaTargetedPowerSec
+	}
+	return total
+}
+
+func renderValidate(opts HandlerOptions) string {
+	inv := loadValidateInventory(opts)
+	n := inv.NvidiaGPUCount
+	validateTotalStr := validateFmtDur(validateTotalValidateSec(n))
+	stressTotalStr := validateFmtDur(validateTotalStressSec(n))
+	gpuNote := ""
+	if n > 0 {
+		gpuNote = fmt.Sprintf(" (%d GPU)", n)
+	}
+	return `<div class="alert alert-info" style="margin-bottom:16px"><strong>Non-destructive:</strong> Validate tests collect diagnostics only. They do not write to disks, do not run sustained load, and do not increment hardware wear counters.</div>
+<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
+
+	<div class="card" style="margin-bottom:16px">
+	  <div class="card-head">Validate Profile</div>
+	  <div class="card-body validate-profile-body">
+	    <div class="validate-profile-col">
+	      <div class="form-row" style="margin:12px 0 0"><label>Mode</label></div>
+	      <label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-validate" value="validate" checked onchange="satModeChanged()"><span>Validate — quick non-destructive check</span></label>
+	      <label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-stress" value="stress" onchange="satModeChanged()"><span>Stress — thorough load test (` + stressTotalStr + gpuNote + `)</span></label>
+	    </div>
+	    <div class="validate-profile-col validate-profile-action">
+	      <p style="color:var(--muted);font-size:12px;margin:0 0 10px">Runs validate modules sequentially. Validate: ` + validateTotalStr + gpuNote + `; Stress: ` + stressTotalStr + gpuNote + `. Estimates are based on real log data and scale with GPU count.</p>
+	      <button type="button" class="btn btn-primary" onclick="runAllSAT()">Validate one by one</button>
+	      <div style="margin-top:12px">
+	        <span id="sat-all-status" style="font-size:12px;color:var(--muted)"></span>
+	      </div>
+	    </div>
+	  </div>
+	</div>
+
+<div class="grid3">
+` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody(
+		inv.CPU,
+		`Collects CPU inventory and temperatures, then runs a bounded CPU stress pass.`,
+		`<code>lscpu</code>, <code>sensors</code>, <code>stress-ng</code>`,
+		validateFmtDur(platform.SATEstimatedCPUValidateSec)+` in Validate (stress-ng 60 s). `+validateFmtDur(platform.SATEstimatedCPUStressSec)+` in Stress (stress-ng 30 min).`,
+	)) +
+		renderSATCard("memory", "Memory", "runSAT('memory')", "", renderValidateCardBody(
+			inv.Memory,
+			`Runs a RAM validation pass and records memory state around the test.`,
+			`<code>free</code>, <code>memtester</code>`,
+			validateFmtDur(platform.SATEstimatedMemoryValidateSec)+` in Validate (256 MB × 1 pass). `+validateFmtDur(platform.SATEstimatedMemoryStressSec)+` in Stress (512 MB × 1 pass).`,
+		)) +
+		renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody(
+			inv.Storage,
+			`Scans all storage devices and runs the matching health or self-test path for each device type.`,
+			`<code>lsblk</code>; NVMe: <code>nvme</code>; SATA/SAS: <code>smartctl</code>`,
+			`Seconds in Validate (NVMe: instant device query; SATA/SAS: short self-test). Up to ~1 h per device in Stress (extended self-test, device-dependent).`,
+		)) +
+		`</div>
+<div style="height:1px;background:var(--border);margin:16px 0"></div>
+<div class="card" style="margin-bottom:16px">
+  <div class="card-head">NVIDIA GPU Selection</div>
+  <div class="card-body">
+    <p style="font-size:12px;color:var(--muted);margin:0 0 8px">` + inv.NVIDIA + `</p>
+    <p style="font-size:12px;color:var(--muted);margin:0 0 10px">All NVIDIA validate tasks use only the GPUs selected here. The same selection is used by Validate one by one.</p>
+    <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
+      <button class="btn btn-sm btn-secondary" type="button" onclick="satSelectAllGPUs()">Select All</button>
+      <button class="btn btn-sm btn-secondary" type="button" onclick="satSelectNoGPUs()">Clear</button>
+    </div>
+    <div id="sat-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
+      <p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
+    </div>
+    <p id="sat-gpu-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 0">Select at least one NVIDIA GPU to enable NVIDIA validate tasks.</p>
+  </div>
+</div>
+
+<div class="grid3">
+` + renderSATCard("nvidia", "NVIDIA GPU", "runNvidiaValidateSet('nvidia')", "", renderValidateCardBody(
+		inv.NVIDIA,
+		`Runs NVIDIA diagnostics and board inventory checks.`,
+		`<code>nvidia-smi</code>, <code>dmidecode</code>, <code>dcgmi diag</code>`,
+		fmt.Sprintf("Validate: %s (Level 2, all GPUs simultaneously). Stress: %s (Level 3, all GPUs simultaneously).",
+			validateFmtDur(platform.SATEstimatedNvidiaGPUValidateSec),
+			validateFmtDur(platform.SATEstimatedNvidiaGPUStressSec)),
+	)) +
+		`<div id="sat-card-nvidia-targeted-stress">` +
+		renderSATCard("nvidia-targeted-stress", "NVIDIA GPU Targeted Stress", "runNvidiaValidateSet('nvidia-targeted-stress')", "", renderValidateCardBody(
+			inv.NVIDIA,
+			`Runs a controlled NVIDIA DCGM load to check stability under moderate stress.`,
+			`<code>dcgmi diag targeted_stress</code>`,
+		"Skipped in Validate. Stress: " + validateFmtDur(platform.SATEstimatedNvidiaTargetedStressSec) + ` (all GPUs simultaneously).<p id="sat-ts-mode-hint" style="color:var(--warn-fg);font-size:12px;margin:8px 0 0">Only runs in Stress mode. Switch mode above to enable in Run All.</p>`,
+		)) +
+		`</div>` +
+		`<div id="sat-card-nvidia-targeted-power">` +
+		renderSATCard("nvidia-targeted-power", "NVIDIA Targeted Power", "runNvidiaValidateSet('nvidia-targeted-power')", "", renderValidateCardBody(
+			inv.NVIDIA,
+			`Checks that the GPU can sustain its declared power delivery envelope. Pass/fail determined by DCGM.`,
+			`<code>dcgmi diag targeted_power</code>`,
+		"Skipped in Validate. Stress: " + validateFmtDur(platform.SATEstimatedNvidiaTargetedPowerSec) + ` (all GPUs simultaneously).<p id="sat-tp-mode-hint" style="color:var(--warn-fg);font-size:12px;margin:8px 0 0">Only runs in Stress mode. Switch mode above to enable in Run All.</p>`,
+		)) +
+		`</div>` +
+		`<div id="sat-card-nvidia-pulse">` +
+		renderSATCard("nvidia-pulse", "NVIDIA PSU Pulse Test", "runNvidiaFabricValidate('nvidia-pulse')", "", renderValidateCardBody(
+			inv.NVIDIA,
+			`Tests power supply transient response by pulsing all GPUs simultaneously between idle and full load. Synchronous pulses across all GPUs create worst-case PSU load spikes — running per-GPU would miss PSU-level failures.`,
+			`<code>dcgmi diag pulse_test</code>`,
+			`Skipped in Validate. Stress: `+validateFmtDur(platform.SATEstimatedNvidiaPulseTestSec)+` (all GPUs simultaneously; measured on 8-GPU system).`+`<p id="sat-pt-mode-hint" style="color:var(--warn-fg);font-size:12px;margin:8px 0 0">Only runs in Stress mode. Switch mode above to enable in Run All.</p>`,
+		)) +
+		`</div>` +
+		`<div id="sat-card-nvidia-interconnect">` +
+		renderSATCard("nvidia-interconnect", "NVIDIA Interconnect (NCCL)", "runNvidiaFabricValidate('nvidia-interconnect')", "", renderValidateCardBody(
+			inv.NVIDIA,
+			`Verifies NVLink/NVSwitch fabric bandwidth using NCCL all_reduce_perf across all selected GPUs. Pass/fail based on achieved bandwidth vs. theoretical.`,
+			`<code>all_reduce_perf</code> (NCCL tests)`,
+			`Validate and Stress: `+validateFmtDur(platform.SATEstimatedNvidiaInterconnectSec)+` (all GPUs simultaneously, requires ≥2).`,
+		)) +
+		`</div>` +
+		`<div id="sat-card-nvidia-bandwidth">` +
+		renderSATCard("nvidia-bandwidth", "NVIDIA Bandwidth (NVBandwidth)", "runNvidiaFabricValidate('nvidia-bandwidth')", "", renderValidateCardBody(
+			inv.NVIDIA,
+			`Validates GPU memory copy and peer-to-peer bandwidth paths using NVBandwidth.`,
+			`<code>nvbandwidth</code>`,
+			`Validate and Stress: `+validateFmtDur(platform.SATEstimatedNvidiaBandwidthSec)+` (all GPUs simultaneously; nvbandwidth runs all built-in tests without a time limit — duration set by the tool).`,
+		)) +
+		`</div>` +
+		`</div>
+<div class="grid3" style="margin-top:16px">
+` + renderSATCard("amd", "AMD GPU", "runAMDValidateSet()", "", renderValidateCardBody(
+		inv.AMD,
+		`Runs the selected AMD checks only. GPU Validate collects inventory; MEM Integrity uses the RVS MEM module; MEM Bandwidth uses rocm-bandwidth-test and the RVS BABEL module.`,
+		`GPU Validate: <code>rocm-smi</code>, <code>dmidecode</code>; MEM Integrity: <code>rvs mem</code>; MEM Bandwidth: <code>rocm-bandwidth-test</code>, <code>rvs babel</code>`,
+		`<div style="display:flex;flex-direction:column;gap:4px"><label class="cb-row"><input type="checkbox" id="sat-amd-target" checked><span>GPU Validate</span></label><label class="cb-row"><input type="checkbox" id="sat-amd-mem-target" checked><span>MEM Integrity</span></label><label class="cb-row"><input type="checkbox" id="sat-amd-bandwidth-target" checked><span>MEM Bandwidth</span></label></div>`,
+	)) +
+		`</div>
+<div id="sat-output" style="display:none;margin-top:16px" class="card">
+  <div class="card-head">Test Output <span id="sat-title"></span></div>
+  <div class="card-body"><div id="sat-terminal" class="terminal"></div></div>
+</div>
+<style>
+.validate-profile-body { display:grid; grid-template-columns:1fr 1fr 1fr; gap:24px; align-items:stretch; }
+.validate-profile-col { min-width:0; display:flex; flex-direction:column; }
+.validate-profile-action { display:flex; flex-direction:column; align-items:center; justify-content:center; }
+.validate-card-body { padding:0; }
+.validate-card-section { padding:12px 16px 0; }
+.validate-card-section:last-child { padding-bottom:16px; }
+.sat-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
+.sat-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
+@media(max-width:900px){ .validate-profile-body { grid-template-columns:1fr; } }
+</style>
+<script>
+let satES = null;
+function satStressMode() {
+  return document.querySelector('input[name="sat-mode"]:checked')?.value === 'stress';
+}
+function satModeChanged() {
+  const stress = satStressMode();
+  [
+    {card: 'sat-card-nvidia-targeted-stress', hint: 'sat-ts-mode-hint'},
+    {card: 'sat-card-nvidia-targeted-power',  hint: 'sat-tp-mode-hint'},
+    {card: 'sat-card-nvidia-pulse',           hint: 'sat-pt-mode-hint'},
+  ].forEach(function(item) {
+    const card = document.getElementById(item.card);
+    if (card) {
+      card.style.opacity = stress ? '1' : '0.5';
+      const hint = document.getElementById(item.hint);
+      if (hint) hint.style.display = stress ? 'none' : '';
+    }
+  });
+}
+function satLabels() {
+  return {nvidia:'Validate GPU', 'nvidia-targeted-stress':'NVIDIA Targeted Stress (dcgmi diag targeted_stress)', 'nvidia-targeted-power':'NVIDIA Targeted Power (dcgmi diag targeted_power)', 'nvidia-pulse':'NVIDIA PSU Pulse Test (dcgmi diag pulse_test)', 'nvidia-interconnect':'NVIDIA Interconnect (NCCL all_reduce_perf)', 'nvidia-bandwidth':'NVIDIA Bandwidth (NVBandwidth)', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
+}
+let satNvidiaGPUsPromise = null;
+function loadSatNvidiaGPUs() {
+  if (!satNvidiaGPUsPromise) {
+    satNvidiaGPUsPromise = fetch('/api/gpu/nvidia')
+      .then(r => {
+        if (!r.ok) throw new Error('Failed to load NVIDIA GPUs.');
+        return r.json();
+      })
+      .then(list => Array.isArray(list) ? list : []);
+  }
+  return satNvidiaGPUsPromise;
+}
+function satSelectedGPUIndices() {
+  return Array.from(document.querySelectorAll('.sat-nvidia-checkbox'))
+    .filter(function(el) { return el.checked && !el.disabled; })
+    .map(function(el) { return parseInt(el.value, 10); })
+    .filter(function(v) { return !Number.isNaN(v); })
+    .sort(function(a, b) { return a - b; });
+}
+function satUpdateGPUSelectionNote() {
+  const note = document.getElementById('sat-gpu-selection-note');
+  if (!note) return;
+  const selected = satSelectedGPUIndices();
+  if (!selected.length) {
+    note.textContent = 'Select at least one NVIDIA GPU to enable NVIDIA validate tasks.';
+    return;
+  }
+  note.textContent = 'Selected GPUs: ' + selected.join(', ') + '. Multi-GPU tests will use all selected GPUs.';
+}
+function satRenderGPUList(gpus) {
+  const root = document.getElementById('sat-gpu-list');
+  if (!root) return;
+  if (!gpus || !gpus.length) {
+    root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
+    satUpdateGPUSelectionNote();
+    return;
+  }
+  root.innerHTML = gpus.map(function(gpu) {
+    const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
+    return '<label class="sat-gpu-row">'
+      + '<input class="sat-nvidia-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="satUpdateGPUSelectionNote()">'
+      + '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
+      + '</label>';
+  }).join('');
+  satUpdateGPUSelectionNote();
+}
+function satSelectAllGPUs() {
+  document.querySelectorAll('.sat-nvidia-checkbox').forEach(function(el) { el.checked = true; });
+  satUpdateGPUSelectionNote();
+}
+function satSelectNoGPUs() {
+  document.querySelectorAll('.sat-nvidia-checkbox').forEach(function(el) { el.checked = false; });
+  satUpdateGPUSelectionNote();
+}
+function satLoadGPUs() {
+  loadSatNvidiaGPUs().then(function(gpus) {
+    satRenderGPUList(gpus);
+  }).catch(function(err) {
+    const root = document.getElementById('sat-gpu-list');
+    if (root) {
+      root.innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
+    }
+    satUpdateGPUSelectionNote();
+  });
+}
+function satGPUDisplayName(gpu) {
+  const idx = (gpu && Number.isFinite(Number(gpu.index))) ? Number(gpu.index) : 0;
+  const name = gpu && gpu.name ? gpu.name : ('GPU ' + idx);
+  return 'GPU ' + idx + ' — ' + name;
+}
+function satRequestBody(target, overrides) {
+  const body = {};
+  const labels = satLabels();
+  body.display_name = labels[target] || ('Validate ' + target);
+  body.stress_mode = satStressMode();
+  if (target === 'cpu') body.duration = satStressMode() ? 1800 : 60;
+  if (overrides) {
+    Object.keys(overrides).forEach(key => { body[key] = overrides[key]; });
+  }
+  return body;
+}
+function enqueueSATTarget(target, overrides) {
+  return fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(satRequestBody(target, overrides))})
+    .then(r => r.json());
+}
+function streamSATTask(taskId, title, resetTerminal) {
+  if (satES) { satES.close(); satES = null; }
+  document.getElementById('sat-output').style.display='block';
+  document.getElementById('sat-title').textContent = '— ' + title;
+  const term = document.getElementById('sat-terminal');
+  if (resetTerminal) {
+    term.textContent = '';
+  }
+  term.textContent += 'Task ' + taskId + ' queued. Streaming log...\n';
+  return new Promise(function(resolve) {
+    satES = new EventSource('/api/tasks/' + taskId + '/stream');
+    satES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
+    satES.addEventListener('done', function(e) {
+      satES.close();
+      satES = null;
+      term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
+      term.scrollTop = term.scrollHeight;
+      resolve({ok: !e.data, error: e.data || ''});
+    });
+    satES.onerror = function() {
+      if (satES) {
+        satES.close();
+        satES = null;
+      }
+      term.textContent += '\nERROR: stream disconnected.\n';
+      term.scrollTop = term.scrollHeight;
+      resolve({ok: false, error: 'stream disconnected'});
+    };
+  });
+}
+function selectedAMDValidateTargets() {
+  const targets = [];
+  const gpu = document.getElementById('sat-amd-target');
+  const mem = document.getElementById('sat-amd-mem-target');
+  const bw = document.getElementById('sat-amd-bandwidth-target');
+  if (gpu && gpu.checked && !gpu.disabled) targets.push('amd');
+  if (mem && mem.checked && !mem.disabled) targets.push('amd-mem');
+  if (bw && bw.checked && !bw.disabled) targets.push('amd-bandwidth');
+  return targets;
+}
+function runSAT(target) {
+  return runSATWithOverrides(target, null);
+}
+function runSATWithOverrides(target, overrides) {
+  const title = (overrides && overrides.display_name) || target;
+  const term = document.getElementById('sat-terminal');
+  document.getElementById('sat-output').style.display='block';
+  document.getElementById('sat-title').textContent = '— ' + title;
+  term.textContent = 'Enqueuing ' + title + ' test...\n';
+  return enqueueSATTarget(target, overrides)
+    .then(d => streamSATTask(d.task_id, title, false));
+}
+const nvidiaPerGPUTargets = [];
+const nvidiaAllGPUTargets = ['nvidia', 'nvidia-targeted-stress', 'nvidia-targeted-power', 'nvidia-pulse', 'nvidia-interconnect', 'nvidia-bandwidth'];
+function satAllGPUIndicesForMulti() {
+  return Promise.resolve(satSelectedGPUIndices());
+}
+function expandSATTarget(target) {
+  if (nvidiaAllGPUTargets.indexOf(target) >= 0) {
+    return satAllGPUIndicesForMulti().then(function(indices) {
+      if (!indices.length) return Promise.reject(new Error('No NVIDIA GPUs available.'));
+      return [{target: target, overrides: {gpu_indices: indices, display_name: satLabels()[target] || target}}];
+    });
+  }
+  if (nvidiaPerGPUTargets.indexOf(target) < 0) {
+    return Promise.resolve([{target: target}]);
+  }
+  const selected = satSelectedGPUIndices();
+  if (!selected.length) {
+    return Promise.reject(new Error('Select at least one NVIDIA GPU.'));
+  }
+  return loadSatNvidiaGPUs().then(gpus => gpus.filter(gpu => selected.indexOf(Number(gpu.index)) >= 0).map(gpu => ({
+    target: target,
+    overrides: {
+      gpu_indices: [Number(gpu.index)],
+      display_name: (satLabels()[target] || ('Validate ' + target)) + ' (' + satGPUDisplayName(gpu) + ')'
+    },
+    label: satGPUDisplayName(gpu),
+  })));
+}
+function runNvidiaFabricValidate(target) {
+  satAllGPUIndicesForMulti().then(function(indices) {
+    if (!indices.length) { alert('No NVIDIA GPUs available.'); return; }
+    runSATWithOverrides(target, {gpu_indices: indices, display_name: satLabels()[target] || target});
+  });
+}
+function runNvidiaValidateSet(target) {
+  const selected = satSelectedGPUIndices();
+  if (!selected.length) { alert('Select at least one NVIDIA GPU.'); return; }
+  return runSATWithOverrides(target, {gpu_indices: selected, display_name: satLabels()[target] || target});
+}
+function runAMDValidateSet() {
+  const targets = selectedAMDValidateTargets();
+  if (!targets.length) return;
+  if (targets.length === 1) return runSAT(targets[0]);
+  document.getElementById('sat-output').style.display='block';
+  document.getElementById('sat-title').textContent = '— amd';
+  const term = document.getElementById('sat-terminal');
+  term.textContent = 'Running AMD validate set one by one...\n';
+  const labels = satLabels();
+  const runNext = (idx) => {
+    if (idx >= targets.length) return Promise.resolve();
+    const target = targets[idx];
+    term.textContent += '\n[' + (idx + 1) + '/' + targets.length + '] ' + labels[target] + '\n';
+    return enqueueSATTarget(target)
+      .then(d => {
+        return streamSATTask(d.task_id, labels[target], false);
+      }).then(function() {
+        return runNext(idx + 1);
+      });
+  };
+  return runNext(0);
+}
+function runAllSAT() {
+  const cycles = 1;
+  const status = document.getElementById('sat-all-status');
+  status.textContent = 'Enqueuing...';
+  const stressOnlyTargets = ['nvidia-targeted-stress', 'nvidia-targeted-power', 'nvidia-pulse'];
+  const baseTargets = ['nvidia','nvidia-targeted-stress','nvidia-targeted-power','nvidia-pulse','nvidia-interconnect','nvidia-bandwidth','memory','storage','cpu'].concat(selectedAMDValidateTargets());
+  const activeTargets = baseTargets.filter(target => {
+    if (stressOnlyTargets.indexOf(target) >= 0 && !satStressMode()) return false;
+    const btn = document.getElementById('sat-btn-' + target);
+    return !(btn && btn.disabled);
+  });
+  Promise.all(activeTargets.map(expandSATTarget)).then(groups => {
+    const expanded = [];
+    for (let cycle = 0; cycle < cycles; cycle++) {
+      groups.forEach(group => group.forEach(item => expanded.push(item)));
+    }
+    const total = expanded.length;
+    let enqueued = 0;
+    if (!total) {
+      status.textContent = 'No tasks selected.';
+      return;
+    }
+    const runNext = (idx) => {
+      if (idx >= expanded.length) { status.textContent = 'Completed ' + total + ' task(s).'; return Promise.resolve(); }
+      const item = expanded[idx];
+      status.textContent = 'Running ' + (idx + 1) + '/' + total + '...';
+      return enqueueSATTarget(item.target, item.overrides)
+        .then(() => {
+          enqueued++;
+          return runNext(idx + 1);
+        });
+    };
+    return runNext(0);
+  }).catch(err => {
+    status.textContent = 'Error: ' + err.message;
+  });
+}
+</script>
+<script>
+fetch('/api/gpu/presence').then(r=>r.json()).then(gp => {
+    if (!gp.nvidia) disableSATCard('nvidia', 'No NVIDIA GPU detected');
+    if (!gp.nvidia) disableSATCard('nvidia-targeted-stress', 'No NVIDIA GPU detected');
+    if (!gp.nvidia) disableSATCard('nvidia-targeted-power', 'No NVIDIA GPU detected');
+    if (!gp.nvidia) disableSATCard('nvidia-pulse', 'No NVIDIA GPU detected');
+    if (!gp.nvidia) disableSATCard('nvidia-interconnect', 'No NVIDIA GPU detected');
+    if (!gp.nvidia) disableSATCard('nvidia-bandwidth', 'No NVIDIA GPU detected');
+    if (!gp.amd) disableSATCard('amd', 'No AMD GPU detected');
+    if (!gp.amd) disableSATAMDOptions('No AMD GPU detected');
+});
+satLoadGPUs();
+function disableSATAMDOptions(reason) {
+    ['sat-amd-target','sat-amd-mem-target','sat-amd-bandwidth-target'].forEach(function(id) {
+        const cb = document.getElementById(id);
+        if (!cb) return;
+        cb.disabled = true;
+        cb.checked = false;
+        cb.title = reason;
+    });
+}
+function disableSATCard(id, reason) {
+    const btn = document.getElementById('sat-btn-' + id);
+    if (!btn) return;
+    btn.disabled = true;
+    btn.title = reason;
+    btn.style.opacity = '0.4';
+    const card = btn.closest('.card');
+    if (card) {
+        let note = card.querySelector('.sat-unavail');
+        if (!note) {
+            note = document.createElement('p');
+            note.className = 'sat-unavail';
+            note.style.cssText = 'color:var(--muted);font-size:12px;margin:0 0 8px';
+            const body = card.querySelector('.card-body');
+            if (body) body.insertBefore(note, body.firstChild);
+        }
+        note.textContent = reason;
+    }
+}
+</script>`
+}
+
+func loadValidateInventory(opts HandlerOptions) validateInventory {
+	unknown := "Audit snapshot not loaded."
+	out := validateInventory{
+		CPU:     unknown,
+		Memory:  unknown,
+		Storage: unknown,
+		NVIDIA:  unknown,
+		AMD:     unknown,
+	}
+	data, err := loadSnapshot(opts.AuditPath)
+	if err != nil {
+		return out
+	}
+	var snap schema.HardwareIngestRequest
+	if err := json.Unmarshal(data, &snap); err != nil {
+		return out
+	}
+
+	cpuCounts := map[string]int{}
+	cpuTotal := 0
+	for _, cpu := range snap.Hardware.CPUs {
+		if cpu.Present != nil && !*cpu.Present {
+			continue
+		}
+		cpuTotal++
+		addValidateModel(cpuCounts, validateFirstNonEmpty(validateTrimPtr(cpu.Model), validateTrimPtr(cpu.Manufacturer), "unknown"))
+	}
+
+	memCounts := map[string]int{}
+	memTotal := 0
+	for _, dimm := range snap.Hardware.Memory {
+		if dimm.Present != nil && !*dimm.Present {
+			continue
+		}
+		memTotal++
+		addValidateModel(memCounts, validateFirstNonEmpty(validateTrimPtr(dimm.PartNumber), validateTrimPtr(dimm.Type), validateTrimPtr(dimm.Manufacturer), "unknown"))
+	}
+
+	storageCounts := map[string]int{}
+	storageTotal := 0
+	for _, dev := range snap.Hardware.Storage {
+		if dev.Present != nil && !*dev.Present {
+			continue
+		}
+		storageTotal++
+		addValidateModel(storageCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown"))
+	}
+
+	nvidiaCounts := map[string]int{}
+	nvidiaTotal := 0
+	amdCounts := map[string]int{}
+	amdTotal := 0
+	for _, dev := range snap.Hardware.PCIeDevices {
+		if dev.Present != nil && !*dev.Present {
+			continue
+		}
+		if validateIsVendorGPU(dev, "nvidia") {
+			nvidiaTotal++
+			addValidateModel(nvidiaCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown"))
+		}
+		if validateIsVendorGPU(dev, "amd") {
+			amdTotal++
+			addValidateModel(amdCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown"))
+		}
+	}
+
+	out.CPU = formatValidateDeviceSummary(cpuTotal, cpuCounts, "CPU")
+	out.Memory = formatValidateDeviceSummary(memTotal, memCounts, "module")
+	out.Storage = formatValidateDeviceSummary(storageTotal, storageCounts, "device")
+	out.NVIDIA = formatValidateDeviceSummary(nvidiaTotal, nvidiaCounts, "GPU")
+	out.AMD = formatValidateDeviceSummary(amdTotal, amdCounts, "GPU")
+	out.NvidiaGPUCount = nvidiaTotal
+	out.AMDGPUCount = amdTotal
+	return out
+}
+
+func renderValidateCardBody(devices, description, commands, settings string) string {
+	return `<div class="validate-card-section"><div style="font-size:13px;color:var(--muted)">` + devices + `</div></div>` +
+		`<div class="validate-card-section"><div style="font-size:13px">` + description + `</div></div>` +
+		`<div class="validate-card-section"><div style="font-size:13px">` + commands + `</div></div>` +
+		`<div class="validate-card-section"><div style="font-size:13px;color:var(--muted)">` + settings + `</div></div>`
+}
+
+func formatValidateDeviceSummary(total int, models map[string]int, unit string) string {
+	if total == 0 {
+		return "0 " + unit + "s detected."
+	}
+	keys := make([]string, 0, len(models))
+	for key := range models {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+	parts := make([]string, 0, len(keys))
+	for _, key := range keys {
+		parts = append(parts, fmt.Sprintf("%d x %s", models[key], html.EscapeString(key)))
+	}
+	label := unit
+	if total != 1 {
+		label += "s"
+	}
+	if len(parts) == 1 {
+		return parts[0] + " " + label
+	}
+	return fmt.Sprintf("%d %s: %s", total, label, strings.Join(parts, ", "))
+}
+
+func addValidateModel(counts map[string]int, name string) {
+	name = strings.TrimSpace(name)
+	if name == "" {
+		name = "unknown"
+	}
+	counts[name]++
+}
+
+func validateTrimPtr(value *string) string {
+	if value == nil {
+		return ""
+	}
+	return strings.TrimSpace(*value)
+}
+
+func validateFirstNonEmpty(values ...string) string {
+	for _, value := range values {
+		value = strings.TrimSpace(value)
+		if value != "" {
+			return value
+		}
+	}
+	return ""
+}
+
+func validateIsVendorGPU(dev schema.HardwarePCIeDevice, vendor string) bool {
+	model := strings.ToLower(validateTrimPtr(dev.Model))
+	manufacturer := strings.ToLower(validateTrimPtr(dev.Manufacturer))
+	class := strings.ToLower(validateTrimPtr(dev.DeviceClass))
+	if strings.Contains(model, "aspeed") || strings.Contains(manufacturer, "aspeed") {
+		return false
+	}
+	switch vendor {
+	case "nvidia":
+		return strings.Contains(model, "nvidia") || strings.Contains(manufacturer, "nvidia")
+	case "amd":
+		isGPUClass := class == "processingaccelerator" || class == "displaycontroller" || class == "videocontroller"
+		isAMDVendor := strings.Contains(manufacturer, "advanced micro devices") || strings.Contains(manufacturer, "amd") || strings.Contains(manufacturer, "ati")
+		isAMDModel := strings.Contains(model, "instinct") || strings.Contains(model, "radeon") || strings.Contains(model, "amd")
+		return isGPUClass && (isAMDVendor || isAMDModel)
+	default:
+		return false
+	}
+}
+
+func renderSATCard(id, label, runAction, headerActions, body string) string {
+	actions := `<button id="sat-btn-` + id + `" class="btn btn-primary btn-sm" onclick="` + runAction + `">Run</button>`
+	if strings.TrimSpace(headerActions) != "" {
+		actions += headerActions
+	}
+	return fmt.Sprintf(`<div class="card"><div class="card-head card-head-actions"><span>%s</span><div class="card-head-buttons">%s</div></div><div class="card-body validate-card-body">%s</div></div>`,
+		label, actions, body)
+}
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -135,6 +135,14 @@ type namedMetricsRing struct {
 // At metricsCollectInterval = 5 s this covers 30 minutes of live history.
 const metricsChartWindow = 360

+// metricsDownsampleAge is the age after which old metrics rows are downsampled
+// to 1 sample per minute. Data fresher than this is kept at full resolution.
+const metricsDownsampleAge = 2 * time.Hour
+
+// metricsRetainWindow is the total retention period for metrics rows.
+// Rows older than this are deleted entirely by the background compactor.
+const metricsRetainWindow = 48 * time.Hour
+
 var metricsCollectInterval = 5 * time.Second

 // pendingNetChange tracks a network state change awaiting confirmation.
@@ -261,7 +269,11 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	mux.HandleFunc("POST /api/sat/platform-stress/run", h.handleAPISATRun("platform-stress"))
 	mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
 	mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
-	mux.HandleFunc("POST /api/benchmark/nvidia/run", h.handleAPIBenchmarkNvidiaRun)
+	mux.HandleFunc("POST /api/bee-bench/nvidia/perf/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf"))
+	mux.HandleFunc("POST /api/bee-bench/nvidia/power/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-power"))
+	mux.HandleFunc("POST /api/bee-bench/nvidia/autotune/run", h.handleAPIBenchmarkAutotuneRun())
+	mux.HandleFunc("GET /api/bee-bench/nvidia/autotune/status", h.handleAPIBenchmarkAutotuneStatus)
+	mux.HandleFunc("GET /api/benchmark/results", h.handleAPIBenchmarkResults)

 	// Tasks
 	mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
@@ -333,13 +345,24 @@ func (h *handler) startMetricsCollector() {
 	goRecoverLoop("metrics collector", 2*time.Second, func() {
 		ticker := time.NewTicker(metricsCollectInterval)
 		defer ticker.Stop()
-		for range ticker.C {
-			sample := platform.SampleLiveMetrics()
-			if h.metricsDB != nil {
-				_ = h.metricsDB.Write(sample)
+		pruneTicker := time.NewTicker(time.Hour)
+		defer pruneTicker.Stop()
+		for {
+			select {
+			case <-ticker.C:
+				sample := platform.SampleLiveMetrics()
+				if h.metricsDB != nil {
+					_ = h.metricsDB.Write(sample)
+				}
+				h.feedRings(sample)
+				h.setLatestMetric(sample)
+			case <-pruneTicker.C:
+				if h.metricsDB != nil {
+					now := time.Now().UTC()
+					_ = h.metricsDB.Downsample(now.Add(-metricsDownsampleAge), now.Add(-metricsRetainWindow))
+					_ = h.metricsDB.Prune(now.Add(-metricsRetainWindow))
+				}
 			}
-			h.feedRings(sample)
-			h.setLatestMetric(sample)
 		}
 	})
 }
@@ -573,12 +596,14 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 	}
 	timeline := metricsTimelineSegments(samples, time.Now())
 	if idx, sub, ok := parseGPUChartPath(path); ok && sub == "overview" {
-		buf, ok, err := renderGPUOverviewChartSVG(idx, samples, timeline)
+		var overviewOk bool
+		var buf []byte
+		buf, overviewOk, err = renderGPUOverviewChartSVG(idx, samples, timeline)
 		if err != nil {
 			http.Error(w, err.Error(), http.StatusInternalServerError)
 			return
 		}
-		if !ok {
+		if !overviewOk {
 			http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
 			return
 		}
@@ -587,23 +612,37 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 		_, _ = w.Write(buf)
 		return
 	}
-	datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples)
+	datasets, names, labels, title, yMin, yMax, stacked, ok := chartDataFromSamples(path, samples)
 	if !ok {
 		http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
 		return
 	}

-	buf, err := renderMetricChartSVG(
-		title,
-		labels,
-		sampleTimes(samples),
-		datasets,
-		names,
-		yMin,
-		yMax,
-		chartCanvasHeightForPath(path, len(names)),
-		timeline,
-	)
+	var buf []byte
+	if stacked {
+		buf, err = renderStackedMetricChartSVG(
+			title,
+			labels,
+			sampleTimes(samples),
+			datasets,
+			names,
+			yMax,
+			chartCanvasHeightForPath(path, len(names)),
+			timeline,
+		)
+	} else {
+		buf, err = renderMetricChartSVG(
+			title,
+			labels,
+			sampleTimes(samples),
+			datasets,
+			names,
+			yMin,
+			yMax,
+			chartCanvasHeightForPath(path, len(names)),
+			timeline,
+		)
+	}
 	if err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
@@ -613,12 +652,8 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 	_, _ = w.Write(buf)
 }

-func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][]float64, []string, []string, string, *float64, *float64, bool) {
-	var datasets [][]float64
-	var names []string
-	var title string
-	var yMin, yMax *float64
-	labels := sampleTimeLabels(samples)
+func chartDataFromSamples(path string, samples []platform.LiveMetricSample) (datasets [][]float64, names []string, labels []string, title string, yMin, yMax *float64, stacked bool, ok bool) {
+	labels = sampleTimeLabels(samples)

 	switch {
 	case path == "server-load":
@@ -655,12 +690,19 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 	case path == "server-power":
 		title = "System Power"
 		power := make([]float64, len(samples))
+		label := "Power W"
 		for i, s := range samples {
 			power[i] = s.PowerW
+			if strings.TrimSpace(s.PowerSource) != "" {
+				label = fmt.Sprintf("Power W · %s", s.PowerSource)
+				if strings.TrimSpace(s.PowerMode) != "" {
+					label += fmt.Sprintf(" (%s)", s.PowerMode)
+				}
+			}
 		}
 		power = normalizePowerSeries(power)
 		datasets = [][]float64{power}
-		names = []string{"Power W"}
+		names = []string{label}
 		yMin = floatPtr(0)
 		yMax = autoMax120(power)

@@ -705,7 +747,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 	case strings.HasPrefix(path, "gpu/"):
 		idx, sub, ok := parseGPUChartPath(path)
 		if !ok {
-			return nil, nil, nil, "", nil, nil, false
+			return nil, nil, nil, "", nil, nil, false, false
 		}
 		switch sub {
 		case "load":
@@ -713,7 +755,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 			util := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.UsagePct })
 			mem := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct })
 			if util == nil && mem == nil {
-				return nil, nil, nil, "", nil, nil, false
+				return nil, nil, nil, "", nil, nil, false, false
 			}
 			datasets = [][]float64{coalesceDataset(util, len(samples)), coalesceDataset(mem, len(samples))}
 			names = []string{"Load %", "Mem %"}
@@ -723,7 +765,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 			title = gpuDisplayLabel(idx) + " Temperature"
 			temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
 			if temp == nil {
-				return nil, nil, nil, "", nil, nil, false
+				return nil, nil, nil, "", nil, nil, false, false
 			}
 			datasets = [][]float64{temp}
 			names = []string{"Temp °C"}
@@ -733,7 +775,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 			title = gpuDisplayLabel(idx) + " Core Clock"
 			clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
 			if clock == nil {
-				return nil, nil, nil, "", nil, nil, false
+				return nil, nil, nil, "", nil, nil, false, false
 			}
 			datasets = [][]float64{clock}
 			names = []string{"Core Clock MHz"}
@@ -742,7 +784,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 			title = gpuDisplayLabel(idx) + " Memory Clock"
 			clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
 			if clock == nil {
-				return nil, nil, nil, "", nil, nil, false
+				return nil, nil, nil, "", nil, nil, false, false
 			}
 			datasets = [][]float64{clock}
 			names = []string{"Memory Clock MHz"}
@@ -751,7 +793,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 			title = gpuDisplayLabel(idx) + " Power"
 			power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
 			if power == nil {
-				return nil, nil, nil, "", nil, nil, false
+				return nil, nil, nil, "", nil, nil, false, false
 			}
 			datasets = [][]float64{power}
 			names = []string{"Power W"}
@@ -759,10 +801,10 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][
 		}

 	default:
-		return nil, nil, nil, "", nil, nil, false
+		return nil, nil, nil, "", nil, nil, false, false
 	}

-	return datasets, names, labels, title, yMin, yMax, len(datasets) > 0
+	return datasets, names, labels, title, yMin, yMax, stacked, len(datasets) > 0
 }

 func parseGPUChartPath(path string) (idx int, sub string, ok bool) {
@@ -928,6 +970,37 @@ func normalizePowerSeries(ds []float64) []float64 {
 	return out
 }

+// psuSlotsFromSamples returns the sorted list of PSU slot numbers seen across samples.
+func psuSlotsFromSamples(samples []platform.LiveMetricSample) []int {
+	seen := map[int]struct{}{}
+	for _, s := range samples {
+		for _, p := range s.PSUs {
+			seen[p.Slot] = struct{}{}
+		}
+	}
+	slots := make([]int, 0, len(seen))
+	for s := range seen {
+		slots = append(slots, s)
+	}
+	sort.Ints(slots)
+	return slots
+}
+
+// psuStackedTotal returns the point-by-point sum of all PSU datasets (for scale calculation).
+func psuStackedTotal(datasets [][]float64) []float64 {
+	if len(datasets) == 0 {
+		return nil
+	}
+	n := len(datasets[0])
+	total := make([]float64, n)
+	for _, ds := range datasets {
+		for i, v := range ds {
+			total[i] += v
+		}
+	}
+	return total
+}
+
 func normalizeFanSeries(ds []float64) []float64 {
 	if len(ds) == 0 {
 		return nil
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -11,6 +11,7 @@ import (
 	"time"

 	"bee/audit/internal/platform"
+	"bee/audit/internal/schema"
 )

 func TestChartLegendNumber(t *testing.T) {
@@ -78,6 +79,16 @@ func TestRecoverMiddlewarePreservesStreamingInterfaces(t *testing.T) {
 	}
 }

+func TestBuildRuntimeToRAMRowShowsPartialCopyWarning(t *testing.T) {
+	row := buildRuntimeToRAMRow(schema.RuntimeHealth{ToRAMStatus: "partial"})
+	if row.Status != "WARNING" {
+		t.Fatalf("status=%q want WARNING", row.Status)
+	}
+	if !strings.Contains(row.Issue, "Partial or staged RAM copy detected") {
+		t.Fatalf("issue=%q", row.Issue)
+	}
+}
+
 func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
 	samples := []platform.LiveMetricSample{
 		{
@@ -109,7 +120,7 @@ func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
 		},
 	}

-	datasets, names, labels, title, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
+	datasets, names, labels, title, _, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
 	if !ok {
 		t.Fatal("chartDataFromSamples returned ok=false")
 	}
@@ -153,7 +164,7 @@ func TestChartDataFromSamplesKeepsStableGPUSeriesOrder(t *testing.T) {
 		},
 	}

-	datasets, names, _, title, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
+	datasets, names, _, title, _, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
 	if !ok {
 		t.Fatal("chartDataFromSamples returned ok=false")
 	}
@@ -198,7 +209,7 @@ func TestChartDataFromSamplesIncludesGPUClockCharts(t *testing.T) {
 		},
 	}

-	datasets, names, _, title, _, _, ok := chartDataFromSamples("gpu-all-clock", samples)
+	datasets, names, _, title, _, _, _, ok := chartDataFromSamples("gpu-all-clock", samples)
 	if !ok {
 		t.Fatal("gpu-all-clock returned ok=false")
 	}
@@ -409,6 +420,49 @@ func TestHandleMetricsChartSVGRendersCustomSVG(t *testing.T) {
 	}
 }

+func TestChartDataFromSamplesServerPowerUsesResolvedSystemPower(t *testing.T) {
+	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
+	samples := []platform.LiveMetricSample{
+		{
+			Timestamp: start,
+			PSUs: []platform.PSUReading{
+				{Slot: 1, PowerW: 120},
+				{Slot: 2, PowerW: 130},
+			},
+			PowerW:      250,
+			PowerSource: "sdr_psu_input",
+			PowerMode:   "autotuned",
+		},
+		{
+			Timestamp: start.Add(time.Minute),
+			PSUs: []platform.PSUReading{
+				{Slot: 1, PowerW: 140},
+				{Slot: 2, PowerW: 135},
+			},
+			PowerW:      275,
+			PowerSource: "sdr_psu_input",
+			PowerMode:   "autotuned",
+		},
+	}
+
+	datasets, names, _, title, _, _, stacked, ok := chartDataFromSamples("server-power", samples)
+	if !ok {
+		t.Fatal("expected server-power chart data")
+	}
+	if title != "System Power" {
+		t.Fatalf("title=%q", title)
+	}
+	if stacked {
+		t.Fatal("server-power should use resolved system power, not stacked PSU inputs")
+	}
+	if len(datasets) != 1 || len(names) != 1 {
+		t.Fatalf("datasets=%d names=%d want 1/1", len(datasets), len(names))
+	}
+	if names[0] != "Power W · sdr_psu_input (autotuned)" {
+		t.Fatalf("names=%v", names)
+	}
+}
+
 func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
 	got := normalizeFanSeries([]float64{4200, 0, 0, 4300, 0})
 	want := []float64{4200, 4200, 4200, 4300, 4300}
@@ -637,8 +691,14 @@ func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
 		`href="/benchmark"`,
 		`id="benchmark-gpu-list"`,
 		`/api/gpu/nvidia`,
-		`/api/benchmark/nvidia/run`,
+		`/api/bee-bench/nvidia/perf/run`,
+		`/api/bee-bench/nvidia/power/run`,
+		`/api/bee-bench/nvidia/autotune/run`,
+		`/api/bee-bench/nvidia/autotune/status`,
 		`benchmark-run-nccl`,
+		`Run Performance Benchmark`,
+		`Run Power / Thermal Fit`,
+		`Autotune`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("benchmark page missing %q: %s", needle, body)
@@ -649,7 +709,7 @@ func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
 func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
 	dir := t.TempDir()
 	exportDir := filepath.Join(dir, "export")
-	runDir := filepath.Join(exportDir, "bee-benchmark", "gpu-benchmark-20260406-120000")
+	runDir := filepath.Join(exportDir, "bee-bench", "perf", "perf-20260406-120000")
 	if err := os.MkdirAll(runDir, 0755); err != nil {
 		t.Fatal(err)
 	}
@@ -691,10 +751,10 @@ func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
 	body := rec.Body.String()
 	wantTime := result.GeneratedAt.Local().Format("2006-01-02 15:04:05")
 	for _, needle := range []string{
-		`Benchmark Results`,
+		`Perf Results`,
 		`Composite score by saved benchmark run and GPU.`,
-		`GPU #0 — NVIDIA H100 PCIe`,
-		`GPU #1 — NVIDIA H100 PCIe`,
+		`GPU 0`,
+		`GPU 1`,
 		`#1`,
 		wantTime,
 		`1176.25`,
@@ -730,6 +790,26 @@ func TestValidatePageRendersNvidiaTargetedStressCard(t *testing.T) {
 	}
 }

+func TestValidatePageRendersNvidiaFabricCardsInValidateMode(t *testing.T) {
+	handler := NewHandler(HandlerOptions{})
+	rec := httptest.NewRecorder()
+	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/validate", nil))
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d", rec.Code)
+	}
+	body := rec.Body.String()
+	for _, needle := range []string{
+		`NVIDIA Interconnect (NCCL)`,
+		`Validate and Stress:`,
+		`NVIDIA Bandwidth (NVBandwidth)`,
+		`nvbandwidth runs all built-in tests without a time limit`,
+	} {
+		if !strings.Contains(body, needle) {
+			t.Fatalf("validate page missing %q: %s", needle, body)
+		}
+	}
+}
+
 func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
 	handler := NewHandler(HandlerOptions{})
 	rec := httptest.NewRecorder()
@@ -1094,6 +1174,7 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
 	}
 	body := rec.Body.String()
 	for _, needle := range []string{
+		// Runtime Health card — LiveCD checks only
 		`Runtime Health`,
 		`<th>Check</th><th>Status</th><th>Source</th><th>Issue</th>`,
 		`Export Directory`,
@@ -1102,16 +1183,18 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
 		`CUDA / ROCm`,
 		`Required Utilities`,
 		`Bee Services`,
-		`<td>CPU</td>`,
-		`<td>Memory</td>`,
-		`<td>Storage</td>`,
-		`<td>GPU</td>`,
 		`CUDA runtime is not ready for GPU SAT.`,
 		`Missing: nvidia-smi`,
 		`bee-nvidia=inactive`,
-		`cpu SAT: FAILED`,
-		`storage SAT: FAILED`,
-		`sat:nvidia`,
+		// Hardware Summary card — component health badges
+		`Hardware Summary`,
+		`>CPU<`,
+		`>Memory<`,
+		`>Storage<`,
+		`>GPU<`,
+		`>PSU<`,
+		`badge-warn`, // cpu Warning badge
+		`badge-err`,  // storage Critical badge
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("dashboard missing %q: %s", needle, body)
--- a/audit/internal/webui/stability.go
+++ b/audit/internal/webui/stability.go
@@ -7,14 +7,43 @@ import (
 	"time"
 )

+const (
+	recoverLoopMaxDelay   = 60 * time.Second
+	recoverLoopResetAfter = 30 * time.Second
+)
+
+// goRecoverLoop starts fn in a goroutine, restarting after panics.
+// restartDelay is the initial delay; successive panics double it up to
+// recoverLoopMaxDelay. The delay resets to restartDelay once fn runs
+// successfully for recoverLoopResetAfter without panicking.
 func goRecoverLoop(name string, restartDelay time.Duration, fn func()) {
 	go func() {
+		delay := restartDelay
+		consecutive := 0
 		for {
-			if !runRecoverable(name, fn) {
+			start := time.Now()
+			panicked := runRecoverable(name, fn)
+			if !panicked {
 				return
 			}
-			if restartDelay > 0 {
-				time.Sleep(restartDelay)
+			consecutive++
+			if time.Since(start) >= recoverLoopResetAfter {
+				delay = restartDelay
+				consecutive = 1
+			}
+			slog.Warn("goroutine restarting after panic",
+				"component", name,
+				"consecutive_panics", consecutive,
+				"next_delay", delay,
+			)
+			if delay > 0 {
+				time.Sleep(delay)
+			}
+			if delay < recoverLoopMaxDelay {
+				delay *= 2
+				if delay > recoverLoopMaxDelay {
+					delay = recoverLoopMaxDelay
+				}
 			}
 		}
 	}()
--- a/audit/internal/webui/task_report.go
+++ b/audit/internal/webui/task_report.go
@@ -171,21 +171,17 @@ func renderTaskChartSVG(path string, samples []platform.LiveMetricSample, timeli
 		}
 		return gpuDisplayLabel(idx) + " Overview", buf, true
 	}
-	datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples)
+	datasets, names, labels, title, yMin, yMax, stacked, ok := chartDataFromSamples(path, samples)
 	if !ok {
 		return "", nil, false
 	}
-	buf, err := renderMetricChartSVG(
-		title,
-		labels,
-		sampleTimes(samples),
-		datasets,
-		names,
-		yMin,
-		yMax,
-		chartCanvasHeightForPath(path, len(names)),
-		timeline,
-	)
+	var buf []byte
+	var err error
+	if stacked {
+		buf, err = renderStackedMetricChartSVG(title, labels, sampleTimes(samples), datasets, names, yMax, chartCanvasHeightForPath(path, len(names)), timeline)
+	} else {
+		buf, err = renderMetricChartSVG(title, labels, sampleTimes(samples), datasets, names, yMin, yMax, chartCanvasHeightForPath(path, len(names)), timeline)
+	}
 	if err != nil {
 		return "", nil, false
 	}
@@ -233,6 +229,9 @@ func renderTaskReportFragment(report taskReport, charts map[string]string, logTe
 	if benchmarkCard := renderTaskBenchmarkResultsCard(report.Target, logText); benchmarkCard != "" {
 		b.WriteString(benchmarkCard)
 	}
+	if powerCard := renderTaskPowerResultsCard(report.Target, logText); powerCard != "" {
+		b.WriteString(powerCard)
+	}

 	if len(report.Charts) > 0 {
 		for _, chart := range report.Charts {
@@ -251,7 +250,9 @@ func renderTaskReportFragment(report taskReport, charts map[string]string, logTe
 }

 func renderTaskBenchmarkResultsCard(target, logText string) string {
-	if strings.TrimSpace(target) != "nvidia-benchmark" {
+	switch strings.TrimSpace(target) {
+	case "nvidia-bench-perf":
+	default:
 		return ""
 	}
 	resultPath := taskBenchmarkResultPath(logText)
@@ -263,7 +264,7 @@ func renderTaskBenchmarkResultsCard(target, logText string) string {
 		return ""
 	}
 	return renderBenchmarkResultsCardFromRuns(
-		"Benchmark Results",
+		"Perf Results",
 		"Composite score for this benchmark task.",
 		"No benchmark results were saved for this task.",
 		columns,
@@ -271,15 +272,42 @@ func renderTaskBenchmarkResultsCard(target, logText string) string {
 	)
 }

+func renderTaskPowerResultsCard(target, logText string) string {
+	if strings.TrimSpace(target) != "nvidia-bench-power" {
+		return ""
+	}
+	resultPath := taskBenchmarkResultPath(logText)
+	if strings.TrimSpace(resultPath) == "" {
+		return ""
+	}
+	raw, err := os.ReadFile(resultPath)
+	if err != nil {
+		return ""
+	}
+	var result platform.NvidiaPowerBenchResult
+	if err := json.Unmarshal(raw, &result); err != nil {
+		return ""
+	}
+	var b strings.Builder
+	b.WriteString(`<div class="card"><div class="card-head">Power Results</div><div class="card-body">`)
+	if len(result.RecommendedSlotOrder) > 0 {
+		b.WriteString(`<p style="margin-bottom:10px"><strong>Recommended slot order:</strong> ` + html.EscapeString(joinTaskIndices(result.RecommendedSlotOrder)) + `</p>`)
+	}
+	b.WriteString(`<table><tr><th>GPU</th><th>Status</th><th>Max Power</th><th>Applied Limit</th></tr>`)
+	for _, gpu := range result.GPUs {
+		fmt.Fprintf(&b, `<tr><td>GPU %d</td><td>%s</td><td>%.0f W</td><td>%.0f W</td></tr>`,
+			gpu.Index, html.EscapeString(gpu.Status), gpu.MaxObservedPowerW, gpu.AppliedPowerLimitW)
+	}
+	b.WriteString(`</table></div></div>`)
+	return b.String()
+}
+
 func taskBenchmarkResultPath(logText string) string {
 	archivePath := taskArchivePathFromLog(logText)
 	if archivePath == "" {
 		return ""
 	}
 	runDir := strings.TrimSuffix(archivePath, ".tar.gz")
-	if runDir == archivePath {
-		return ""
-	}
 	return filepath.Join(runDir, "result.json")
 }

--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -32,7 +32,9 @@ const (
 var taskNames = map[string]string{
 	"nvidia":                 "NVIDIA SAT",
 	"nvidia-targeted-stress": "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)",
-	"nvidia-benchmark":       "NVIDIA Benchmark",
+	"nvidia-bench-perf":      "NVIDIA Bee Bench Perf",
+	"nvidia-bench-power":     "NVIDIA Bee Bench Power",
+	"nvidia-bench-autotune":  "NVIDIA Bee Bench Power Source Autotune",
 	"nvidia-compute":         "NVIDIA Max Compute Load (dcgmproftester)",
 	"nvidia-targeted-power":  "NVIDIA Targeted Power (dcgmi diag targeted_power)",
 	"nvidia-pulse":           "NVIDIA Pulse Test (dcgmi diag pulse_test)",
@@ -118,13 +120,18 @@ type taskParams struct {
 	StressMode         bool     `json:"stress_mode,omitempty"`
 	GPUIndices         []int    `json:"gpu_indices,omitempty"`
 	ExcludeGPUIndices  []int    `json:"exclude_gpu_indices,omitempty"`
+	StaggerGPUStart    bool     `json:"stagger_gpu_start,omitempty"`
 	SizeMB             int      `json:"size_mb,omitempty"`
 	Passes             int      `json:"passes,omitempty"`
 	Loader             string   `json:"loader,omitempty"`
 	BurnProfile        string   `json:"burn_profile,omitempty"`
 	BenchmarkProfile   string   `json:"benchmark_profile,omitempty"`
+	BenchmarkKind      string   `json:"benchmark_kind,omitempty"`
 	RunNCCL            bool     `json:"run_nccl,omitempty"`
 	ParallelGPUs       bool     `json:"parallel_gpus,omitempty"`
+	RampStep           int      `json:"ramp_step,omitempty"`
+	RampTotal          int      `json:"ramp_total,omitempty"`
+	RampRunID          string   `json:"ramp_run_id,omitempty"`
 	DisplayName        string   `json:"display_name,omitempty"`
 	Device             string   `json:"device,omitempty"` // for install
 	PlatformComponents []string `json:"platform_components,omitempty"`
@@ -151,6 +158,38 @@ type burnPreset struct {
 	DurationSec int
 }

+type nvidiaRampSpec struct {
+	DurationSec      int
+	StaggerSeconds   int
+	TotalDurationSec int
+}
+
+func resolveMemoryValidatePreset(profile string, stress bool) (sizeMB, passes int) {
+	switch strings.TrimSpace(strings.ToLower(profile)) {
+	case "overnight":
+		return 1024, 2
+	case "acceptance":
+		return 1024, 1
+	case "smoke":
+		return 256, 1
+	}
+	if stress {
+		return 512, 1
+	}
+	return 256, 1
+}
+
+func taskMayLeaveOrphanWorkers(target string) bool {
+	switch strings.TrimSpace(strings.ToLower(target)) {
+	case "nvidia", "nvidia-targeted-stress", "nvidia-targeted-power", "nvidia-pulse",
+		"nvidia-bandwidth", "nvidia-stress", "nvidia-compute", "nvidia-bench-perf",
+		"memory", "memory-stress", "cpu", "sat-stress", "platform-stress":
+		return true
+	default:
+		return false
+	}
+}
+
 func resolveBurnPreset(profile string) burnPreset {
 	switch profile {
 	case "overnight":
@@ -162,6 +201,45 @@ func resolveBurnPreset(profile string) burnPreset {
 	}
 }

+func resolveNvidiaRampPlan(profile string, enabled bool, selected []int) (nvidiaRampSpec, error) {
+	base := resolveBurnPreset(profile).DurationSec
+	plan := nvidiaRampSpec{
+		DurationSec:      base,
+		TotalDurationSec: base,
+	}
+	if !enabled {
+		return plan, nil
+	}
+	count := len(selected)
+	if count == 0 {
+		return nvidiaRampSpec{}, fmt.Errorf("staggered NVIDIA burn requires explicit GPU selection")
+	}
+	if count == 1 {
+		return plan, nil
+	}
+
+	switch profile {
+	case "acceptance":
+		plan.StaggerSeconds = 10 * 60
+		plan.TotalDurationSec = plan.DurationSec + plan.StaggerSeconds*(count-1)
+	case "overnight":
+		plan.StaggerSeconds = 60 * 60
+		plan.TotalDurationSec = 8 * 60 * 60
+		minTotal := count * 60 * 60
+		if plan.TotalDurationSec < minTotal {
+			plan.TotalDurationSec = minTotal
+		}
+		if plan.TotalDurationSec > 10*60*60 {
+			return nvidiaRampSpec{}, fmt.Errorf("overnight staggered NVIDIA burn supports at most 10 GPUs")
+		}
+		plan.DurationSec = plan.TotalDurationSec - plan.StaggerSeconds*(count-1)
+	default:
+		plan.StaggerSeconds = 2 * 60
+		plan.TotalDurationSec = plan.DurationSec + plan.StaggerSeconds*(count-1)
+	}
+	return plan, nil
+}
+
 func resolvePlatformStressPreset(profile string) platform.PlatformStressOptions {
 	acceptanceCycles := []platform.PlatformStressCycle{
 		{LoadSec: 85, IdleSec: 5},
@@ -509,6 +587,7 @@ func (q *taskQueue) finalizeTaskRun(t *Task, j *jobState) {
 	if err := writeTaskReportArtifacts(t); err != nil {
 		appendJobLog(t.LogPath, "WARN: task report generation failed: "+err.Error())
 	}
+	j.closeLog()
 	if t.ErrMsg != "" {
 		taskSerialEvent(t, "finished with status="+t.Status+" error="+t.ErrMsg)
 		return
@@ -537,8 +616,9 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 	}
 	a := q.opts.App

+	recovered := len(j.lines) > 0
 	j.append(fmt.Sprintf("Starting %s...", t.Name))
-	if len(j.lines) > 0 {
+	if recovered {
 		j.append(fmt.Sprintf("Recovered after bee-web restart at %s", time.Now().UTC().Format(time.RFC3339)))
 	}

@@ -579,7 +659,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			dur = 300
 		}
 		archive, err = a.RunNvidiaTargetedStressValidatePack(ctx, "", dur, t.params.GPUIndices, j.append)
-	case "nvidia-benchmark":
+	case "nvidia-bench-perf":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
@@ -591,7 +671,32 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
 			RunNCCL:           t.params.RunNCCL,
 			ParallelGPUs:      t.params.ParallelGPUs,
+			RampStep:          t.params.RampStep,
+			RampTotal:         t.params.RampTotal,
+			RampRunID:         t.params.RampRunID,
 		}, j.append)
+	case "nvidia-bench-power":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
+		archive, err = a.RunNvidiaPowerBenchCtx(ctx, app.DefaultBeeBenchPowerDir, platform.NvidiaBenchmarkOptions{
+			Profile:           t.params.BenchmarkProfile,
+			GPUIndices:        t.params.GPUIndices,
+			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
+			RampStep:          t.params.RampStep,
+			RampTotal:         t.params.RampTotal,
+			RampRunID:         t.params.RampRunID,
+		}, j.append)
+	case "nvidia-bench-autotune":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
+		archive, err = a.RunNvidiaPowerSourceAutotuneCtx(ctx, app.DefaultBeeBenchAutotuneDir, platform.NvidiaBenchmarkOptions{
+			Profile: t.params.BenchmarkProfile,
+			SizeMB:  t.params.SizeMB,
+		}, t.params.BenchmarkKind, j.append)
 	case "nvidia-compute":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
@@ -601,7 +706,18 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
-		archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, j.append)
+		rampPlan, planErr := resolveNvidiaRampPlan(t.params.BurnProfile, t.params.StaggerGPUStart, t.params.GPUIndices)
+		if planErr != nil {
+			err = planErr
+			break
+		}
+		if t.params.BurnProfile != "" && t.params.StaggerGPUStart && dur <= 0 {
+			dur = rampPlan.DurationSec
+		}
+		if rampPlan.StaggerSeconds > 0 {
+			j.append(fmt.Sprintf("NVIDIA staggered ramp-up enabled: %ds per GPU; post-ramp hold: %ds; total runtime: %ds", rampPlan.StaggerSeconds, dur, rampPlan.TotalDurationSec))
+		}
+		archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, rampPlan.StaggerSeconds, j.append)
 	case "nvidia-targeted-power":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
@@ -633,15 +749,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			err = fmt.Errorf("app not configured")
 			break
 		}
-		dur := t.params.Duration
-		if t.params.BurnProfile != "" && dur <= 0 {
-			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
-		}
-		archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
-			DurationSec: dur,
-			Loader:      platform.NvidiaStressLoaderNCCL,
-			GPUIndices:  t.params.GPUIndices,
-		}, j.append)
+		archive, err = a.RunNCCLTests(ctx, "", t.params.GPUIndices, j.append)
 	case "nvidia-stress":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
@@ -651,21 +759,31 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		if t.params.BurnProfile != "" && dur <= 0 {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
+		rampPlan, planErr := resolveNvidiaRampPlan(t.params.BurnProfile, t.params.StaggerGPUStart, t.params.GPUIndices)
+		if planErr != nil {
+			err = planErr
+			break
+		}
+		if t.params.BurnProfile != "" && t.params.StaggerGPUStart && dur <= 0 {
+			dur = rampPlan.DurationSec
+		}
+		if rampPlan.StaggerSeconds > 0 {
+			j.append(fmt.Sprintf("NVIDIA staggered ramp-up enabled: %ds per GPU; post-ramp hold: %ds; total runtime: %ds", rampPlan.StaggerSeconds, dur, rampPlan.TotalDurationSec))
+		}
 		archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
 			DurationSec:       dur,
 			Loader:            t.params.Loader,
 			GPUIndices:        t.params.GPUIndices,
 			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
+			StaggerSeconds:    rampPlan.StaggerSeconds,
 		}, j.append)
 	case "memory":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
-		sizeMB, passes := 256, 1
-		if t.params.StressMode {
-			sizeMB, passes = 1024, 3
-		}
+		sizeMB, passes := resolveMemoryValidatePreset(t.params.BurnProfile, t.params.StressMode)
+		j.append(fmt.Sprintf("Memory validate preset: %d MB x %d pass(es)", sizeMB, passes))
 		archive, err = runMemoryAcceptancePackCtx(a, ctx, "", sizeMB, passes, j.append)
 	case "storage":
 		if a == nil {
@@ -921,6 +1039,9 @@ func (h *handler) handleAPITasksCancelAll(w http.ResponseWriter, _ *http.Request
 			if t.job != nil {
 				t.job.abort()
 			}
+			if taskMayLeaveOrphanWorkers(t.Target) {
+				platform.KillTestWorkers()
+			}
 			t.Status = TaskCancelled
 			t.DoneAt = &now
 			taskSerialEvent(t, "finished with status="+t.Status)
@@ -948,6 +1069,9 @@ func (h *handler) handleAPITasksKillWorkers(w http.ResponseWriter, _ *http.Reque
 			if t.job != nil {
 				t.job.abort()
 			}
+			if taskMayLeaveOrphanWorkers(t.Target) {
+				platform.KillTestWorkers()
+			}
 			t.Status = TaskCancelled
 			t.DoneAt = &now
 			taskSerialEvent(t, "finished with status="+t.Status)
@@ -1052,10 +1176,13 @@ func (q *taskQueue) loadLocked() {
 		q.assignTaskLogPathLocked(t)
 		if t.Status == TaskRunning {
 			// The task was interrupted by a bee-web restart. Child processes
-			// (e.g. bee-gpu-burn-worker) survive the restart in their own
-			// process groups and cannot be cancelled retroactively. Mark the
-			// task as failed so the user can decide whether to re-run it
-			// rather than blindly re-launching duplicate workers.
+			// (e.g. bee-gpu-burn-worker, dcgmi/nvvs) survive the restart in
+			// their own process groups. Kill any matching stale workers before
+			// marking the task failed so the next GPU test does not inherit a
+			// busy DCGM slot or duplicate workers.
+			if taskMayLeaveOrphanWorkers(t.Target) {
+				_ = platform.KillTestWorkers()
+			}
 			now := time.Now()
 			t.Status = TaskFailed
 			t.DoneAt = &now
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -366,7 +366,7 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	taskReportMetricsDBPath = metricsPath
 	t.Cleanup(func() { taskReportMetricsDBPath = prevMetricsPath })

-	benchmarkDir := filepath.Join(dir, "bee-benchmark", "gpu-benchmark-20260406-120000")
+	benchmarkDir := filepath.Join(dir, "bee-bench", "perf", "perf-20260406-120000")
 	if err := os.MkdirAll(benchmarkDir, 0755); err != nil {
 		t.Fatal(err)
 	}
@@ -398,14 +398,14 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	}
 	task := &Task{
 		ID:           "task-bench",
-		Name:         "NVIDIA Benchmark",
-		Target:       "nvidia-benchmark",
+		Name:         "NVIDIA Bee Bench Perf",
+		Target:       "nvidia-bench-perf",
 		Status:       TaskDone,
 		CreatedAt:    time.Now().UTC().Add(-time.Minute),
 		ArtifactsDir: artifactsDir,
 	}
 	ensureTaskReportPaths(task)
-	logText := "line-1\nArchive: " + filepath.Join(dir, "bee-benchmark", "gpu-benchmark-20260406-120000.tar.gz") + "\n"
+	logText := "line-1\nArchive: " + filepath.Join(dir, "bee-bench", "perf", "perf-20260406-120000.tar.gz") + "\n"
 	if err := os.WriteFile(task.LogPath, []byte(logText), 0644); err != nil {
 		t.Fatal(err)
 	}
@@ -420,9 +420,9 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	}
 	html := string(body)
 	for _, needle := range []string{
-		`Benchmark Results`,
+		`Perf Results`,
 		`Composite score for this benchmark task.`,
-		`GPU #0 — NVIDIA H100 PCIe`,
+		`GPU 0`,
 		`1176.25`,
 	} {
 		if !strings.Contains(html, needle) {
@@ -491,6 +491,83 @@ func TestResolveBurnPreset(t *testing.T) {
 	}
 }

+func TestResolveNvidiaRampPlan(t *testing.T) {
+	tests := []struct {
+		name     string
+		profile  string
+		enabled  bool
+		selected []int
+		want     nvidiaRampSpec
+		wantErr  string
+	}{
+		{
+			name:     "disabled uses base preset",
+			profile:  "acceptance",
+			selected: []int{0, 1},
+			want:     nvidiaRampSpec{DurationSec: 60 * 60, TotalDurationSec: 60 * 60},
+		},
+		{
+			name:     "smoke ramp uses two minute steps",
+			profile:  "smoke",
+			enabled:  true,
+			selected: []int{0, 1, 2},
+			want:     nvidiaRampSpec{DurationSec: 5 * 60, StaggerSeconds: 2 * 60, TotalDurationSec: 9 * 60},
+		},
+		{
+			name:     "acceptance ramp uses ten minute steps",
+			profile:  "acceptance",
+			enabled:  true,
+			selected: []int{0, 1, 2},
+			want:     nvidiaRampSpec{DurationSec: 60 * 60, StaggerSeconds: 10 * 60, TotalDurationSec: 80 * 60},
+		},
+		{
+			name:     "overnight stays at eight hours when possible",
+			profile:  "overnight",
+			enabled:  true,
+			selected: []int{0, 1, 2},
+			want:     nvidiaRampSpec{DurationSec: 6 * 60 * 60, StaggerSeconds: 60 * 60, TotalDurationSec: 8 * 60 * 60},
+		},
+		{
+			name:     "overnight extends to keep one hour after final gpu",
+			profile:  "overnight",
+			enabled:  true,
+			selected: []int{0, 1, 2, 3, 4, 5, 6, 7, 8},
+			want:     nvidiaRampSpec{DurationSec: 60 * 60, StaggerSeconds: 60 * 60, TotalDurationSec: 9 * 60 * 60},
+		},
+		{
+			name:     "overnight rejects impossible gpu count",
+			profile:  "overnight",
+			enabled:  true,
+			selected: []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+			wantErr:  "at most 10 GPUs",
+		},
+		{
+			name:    "enabled requires explicit selection",
+			profile: "smoke",
+			enabled: true,
+			wantErr: "requires explicit GPU selection",
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			got, err := resolveNvidiaRampPlan(tc.profile, tc.enabled, tc.selected)
+			if tc.wantErr != "" {
+				if err == nil || !strings.Contains(err.Error(), tc.wantErr) {
+					t.Fatalf("err=%v want substring %q", err, tc.wantErr)
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("resolveNvidiaRampPlan error: %v", err)
+			}
+			if got != tc.want {
+				t.Fatalf("resolveNvidiaRampPlan(%q, %t, %v)=%+v want %+v", tc.profile, tc.enabled, tc.selected, got, tc.want)
+			}
+		})
+	}
+}
+
 func TestTaskDisplayNameUsesNvidiaStressLoader(t *testing.T) {
 	tests := []struct {
 		loader string
@@ -595,6 +672,36 @@ func TestRunTaskUsesBurnProfileDurationForCPU(t *testing.T) {
 	}
 }

+func TestRunTaskUsesQuickPresetForMemoryValidate(t *testing.T) {
+	var gotSizeMB, gotPasses int
+	q := &taskQueue{
+		opts: &HandlerOptions{App: &app.App{}},
+	}
+	tk := &Task{
+		ID:        "mem-validate-1",
+		Name:      "Memory SAT",
+		Target:    "memory",
+		Status:    TaskRunning,
+		CreatedAt: time.Now(),
+		params:    taskParams{StressMode: true},
+	}
+	j := &jobState{}
+
+	orig := runMemoryAcceptancePackCtx
+	runMemoryAcceptancePackCtx = func(_ *app.App, _ context.Context, _ string, sizeMB, passes int, _ func(string)) (string, error) {
+		gotSizeMB = sizeMB
+		gotPasses = passes
+		return "/tmp/memory-validate.tar.gz", nil
+	}
+	defer func() { runMemoryAcceptancePackCtx = orig }()
+
+	q.runTask(tk, j, context.Background())
+
+	if gotSizeMB != 512 || gotPasses != 1 {
+		t.Fatalf("memory validate preset=%dMB x%d want 512MB x1", gotSizeMB, gotPasses)
+	}
+}
+
 func TestRunTaskBuildsSupportBundleWithoutApp(t *testing.T) {
 	dir := t.TempDir()
 	q := &taskQueue{
--- a/bible-local/docs/benchmark-clock-calibration.md
+++ b/bible-local/docs/benchmark-clock-calibration.md
@@ -1,5 +1,34 @@
 # Benchmark clock calibration research

+## Benchmark methodology versioning
+
+Every benchmark methodology change must bump the benchmark version constant in
+source code by exactly `+1`.
+
+Methodology change means any change that affects comparability of benchmark
+results, including for example:
+- phase durations or phase order
+- enabled/disabled precisions
+- fallback rules
+- normalization rules
+- score formulas or weights
+- degradation thresholds
+- power calibration logic
+- thermal/power penalty logic
+
+Requirements:
+- benchmark version must be stored in source code as an explicit version
+  constant, not inferred from git tag or build metadata
+- benchmark report must always print the benchmark version
+- `result.json` must always include the benchmark version
+- results from different benchmark versions must be treated as non-comparable by
+  default
+
+Purpose:
+- prevent accidental comparison of runs produced by different methodologies
+- make historical benchmark archives self-describing even when detached from git
+- force deliberate version bumps whenever scoring or execution semantics change
+
 ## Status
 In progress. Baseline data from production servers pending.

--- a/bible-local/docs/gpu-model-propagation.md
+++ b/bible-local/docs/gpu-model-propagation.md
@@ -0,0 +1,121 @@
+# GPU Model Name Propagation
+
+How GPU model names are detected, stored, and displayed throughout the project.
+
+---
+
+## Detection Sources
+
+There are **two separate pipelines** for GPU model names — they use different structs and don't share state.
+
+### Pipeline A — Live / SAT (nvidia-smi query at runtime)
+
+**File:** `audit/internal/platform/sat.go`
+
+- `ListNvidiaGPUs()` → `NvidiaGPU.Name` (field: `name`, from `nvidia-smi --query-gpu=index,name,...`)
+- `ListNvidiaGPUStatuses()` → `NvidiaGPUStatus.Name`
+- Used by: GPU selection UI, live metrics labels, burn/stress test logic
+
+### Pipeline B — Benchmark results
+
+**File:** `audit/internal/platform/benchmark.go`, line 124
+
+- `queryBenchmarkGPUInfo(selected)` → `benchmarkGPUInfo.Name`
+- Stored in `BenchmarkGPUResult.Name` (`json:"name,omitempty"`)
+- Used by: benchmark history table, benchmark report
+
+### Pipeline C — Hardware audit JSON (PCIe schema)
+
+**File:** `audit/internal/schema/hardware.go`
+
+- `HardwarePCIeDevice.Model *string` (field name is **Model**, not Name)
+- For AMD GPUs: populated by `audit/internal/collector/amdgpu.go` from `info.Product`
+- For NVIDIA GPUs: **NOT populated** by `audit/internal/collector/nvidia.go` — the NVIDIA enricher sets telemetry/status but skips the Model field
+- Used by: hardware summary page (`hwDescribeGPU` in `pages.go:487`)
+
+---
+
+## Key Inconsistency: NVIDIA PCIe Model is Never Set
+
+`audit/internal/collector/nvidia.go` — `enrichPCIeWithNVIDIAData()` enriches NVIDIA PCIe devices with telemetry and status but does **not** populate `HardwarePCIeDevice.Model`.
+
+This means:
+- Hardware summary page shows "Unknown GPU" for all NVIDIA devices (falls back at `pages.go:486`)
+- AMD GPUs do have their model populated
+
+The fix would be: copy `gpu.Name` from the SAT pipeline into `dev.Model` inside `enrichPCIeWithNVIDIAData`.
+
+---
+
+## Benchmark History "Unknown GPU" Issue
+
+**Symptom:** Benchmark history table shows "GPU #N — Unknown GPU" columns instead of real GPU model names.
+
+**Root cause:** `BenchmarkGPUResult.Name` has tag `json:"name,omitempty"`. If `queryBenchmarkGPUInfo()` fails (warns at `benchmark.go:126`) or returns empty names, the Name field is never set and is omitted from JSON. Loaded results have empty Name → falls back to "Unknown GPU" at `pages.go:2226, 2237`.
+
+This happens for:
+- Older result files saved before the `Name` field was added
+- Runs where nvidia-smi query failed before the benchmark started
+
+---
+
+## Fallback Strings — Current State
+
+| Location | File | Fallback string |
+|---|---|---|
+| Hardware summary (PCIe) | `pages.go:486` | `"Unknown GPU"` |
+| Benchmark report summary | `benchmark_report.go:43` | `"Unknown GPU"` |
+| Benchmark report scorecard | `benchmark_report.go:93` | `"Unknown"` ← inconsistent |
+| Benchmark report detail | `benchmark_report.go:122` | `"Unknown GPU"` |
+| Benchmark history per-GPU col | `pages.go:2226` | `"Unknown GPU"` |
+| Benchmark history parallel col | `pages.go:2237` | `"Unknown GPU"` |
+| SAT status file write | `sat.go:922` | `"unknown"` ← lowercase, inconsistent |
+| GPU selection API | `api.go:163` | `"GPU N"` (no "Unknown") |
+
+**Rule:** all UI fallbacks should use `"Unknown GPU"`. The two outliers are `benchmark_report.go:93` (`"Unknown"`) and `sat.go:922` (`"unknown"`).
+
+---
+
+## GPU Selection UI
+
+**File:** `audit/internal/webui/pages.go`
+
+- Source: `GET /api/gpus` → `api.go` → `ListNvidiaGPUs()` → live nvidia-smi
+- Render: `'GPU ' + gpu.index + ' — ' + gpu.name + ' · ' + mem`
+- Fallback: `gpu.name || 'GPU ' + idx` (JS, line ~1432)
+
+This always shows the correct model because it queries nvidia-smi live. It is **not** connected to benchmark result data.
+
+---
+
+## Data Flow Summary
+
+```
+nvidia-smi (live)
+  └─ ListNvidiaGPUs() → NvidiaGPU.Name
+       ├─ GPU selection UI (always correct)
+       ├─ Live metrics labels (charts_svg.go)
+       └─ SAT/burn status file (sat.go)
+
+nvidia-smi (at benchmark start)
+  └─ queryBenchmarkGPUInfo() → benchmarkGPUInfo.Name
+       └─ BenchmarkGPUResult.Name (json:"name,omitempty")
+            ├─ Benchmark report
+            └─ Benchmark history table columns
+
+nvidia-smi / lspci (audit collection)
+  └─ HardwarePCIeDevice.Model (NVIDIA: NOT populated; AMD: populated)
+       └─ Hardware summary page hwDescribeGPU()
+```
+
+---
+
+## Fixed Issues
+
+All previously open items are resolved:
+
+1. **NVIDIA PCIe Model** — `enrichPCIeWithNVIDIAData()` sets `dev.Model = &v` (`nvidia.go:78`).
+2. **Fallback consistency** — `sat.go` and `benchmark_report.go` both use `"Unknown GPU"`.
+3. **`tops_per_sm_per_ghz`** — computed in `benchmark.go` and stored in `BenchmarkGPUScore.TOPSPerSMPerGHz`.
+4. **`MultiprocessorCount`, `PowerLimitW`, `DefaultPowerLimitW`** — present in `benchmark_types.go`.
+5. **Old benchmark JSONs** — no fix possible for already-saved results with missing names (display-only issue).
--- a/bible-local/docs/iso-build-rules.md
+++ b/bible-local/docs/iso-build-rules.md
@@ -15,6 +15,41 @@ This applies to:
 - `iso/builder/config/package-lists/*.list.chroot`
 - Any package referenced in bootloader configs, hooks, or overlay scripts

+## Bootloader sync rule
+
+The ISO has two independent bootloader configs that must be kept in sync manually:
+
+| File | Used by |
+|------|---------|
+| `config/bootloaders/grub-efi/grub.cfg` | UEFI (all modern servers) |
+| `config/bootloaders/isolinux/live.cfg.in` | CSM / legacy BIOS (syslinux) |
+
+live-build does NOT derive one from the other. Any new boot entry, kernel parameter
+change, or new mode added to one file must be manually mirrored in the other.
+
+**Canonical entry list** (both files must have all of these):
+
+| Label | Key params |
+|-------|-----------|
+| normal (default) | `nomodeset bee.nvidia.mode=normal` + full param set |
+| load to RAM | `toram nomodeset bee.nvidia.mode=normal` + full param set |
+| GSP=off | `nomodeset bee.nvidia.mode=gsp-off` + full param set |
+| KMS | no `nomodeset`, `bee.nvidia.mode=normal` + full param set |
+| KMS + GSP=off | no `nomodeset`, `bee.nvidia.mode=gsp-off` + full param set |
+| fail-safe | `nomodeset bee.nvidia.mode=gsp-off noapic noapm nodma nomce nolapic nosmp` |
+
+**Full standard param set** (append after `@APPEND_LIVE@` / `nomodeset` flags):
+```
+net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always
+numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1
+nowatchdog nosoftlockup
+```
+(fail-safe is the exception — it deliberately uses minimal params.)
+
+**Historical note:** `grub-pc/` was mistakenly used instead of `grub-efi/` until v8.25.
+live-build reads `config/bootloaders/grub-efi/` for UEFI because the build is
+configured with `--bootloaders "grub-efi,syslinux"`. Directory `grub-pc` is ignored.
+
 ## Memtest rule

 Do not assume live-build's built-in memtest integration is sufficient for `bee`.
--- a/iso/builder/VERSIONS
+++ b/iso/builder/VERSIONS
@@ -1,12 +1,13 @@
 DEBIAN_VERSION=12
 DEBIAN_KERNEL_ABI=auto
 NVIDIA_DRIVER_VERSION=590.48.01
+NVIDIA_FABRICMANAGER_VERSION=590.48.01-1
 NCCL_VERSION=2.28.9-1
 NCCL_CUDA_VERSION=13.0
 NCCL_SHA256=2e6faafd2c19cffc7738d9283976a3200ea9db9895907f337f0c7e5a25563186
 NCCL_TESTS_VERSION=2.13.10
 NVCC_VERSION=12.8
-CUBLAS_VERSION=13.0.2.14-1
+CUBLAS_VERSION=13.1.1.3-1
 CUDA_USERSPACE_VERSION=13.0.96-1
 DCGM_VERSION=4.5.3-1
 JOHN_JUMBO_COMMIT=67fcf9fe5a
@@ -21,3 +22,4 @@ HIPBLASLT_VERSION=0.10.0.60304-76~22.04
 COMGR_VERSION=2.8.0.60304-76~22.04
 GO_VERSION=1.24.0
 AUDIT_VERSION=1.0.0
+MEMTEST_VERSION=6.10-4
--- a/iso/builder/auto/config
+++ b/iso/builder/auto/config
@@ -23,16 +23,17 @@ lb config noauto \
    --bootloaders "grub-efi,syslinux" \
    --debian-installer none \
    --archive-areas "main contrib non-free non-free-firmware" \
-    --mirror-bootstrap "https://deb.debian.org/debian" \
-    --mirror-chroot "https://deb.debian.org/debian" \
-    --mirror-binary "https://deb.debian.org/debian" \
+    --mirror-bootstrap "http://mirror.mephi.ru/debian/" \
+    --mirror-chroot "http://mirror.mephi.ru/debian/" \
+    --mirror-binary "http://mirror.mephi.ru/debian/" \
    --security true \
    --linux-flavours "amd64" \
    --linux-packages "${LB_LINUX_PACKAGES}" \
    --memtest memtest86+ \
    --iso-volume "EASY_BEE_${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
    --iso-application "EASY-BEE-${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
-    --bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=3 systemd.show_status=1 username=bee user-fullname=Bee modprobe.blacklist=nouveau,snd_hda_intel,snd_hda_codec_realtek,snd_hda_codec_generic,soundcore" \
+    --bootappend-live "boot=live components video=1920x1080 console=ttyS0,115200n8 console=tty0 loglevel=3 systemd.show_status=1 username=bee user-fullname=Bee modprobe.blacklist=nouveau,snd_hda_intel,snd_hda_codec_realtek,snd_hda_codec_generic,soundcore" \
+    --debootstrap-options "--include=ca-certificates" \
    --apt-recommends false \
    --chroot-squashfs-compression-type zstd \
    "${@}"
--- a/iso/builder/bee-gpu-stress.c
+++ b/iso/builder/bee-gpu-stress.c
@@ -33,9 +33,10 @@ typedef void *CUstream;
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76
 #define MAX_STRESS_STREAMS 16
-#define MAX_CUBLAS_PROFILES 5
 #define MIN_PROFILE_BUDGET_BYTES ((size_t)4u * 1024u * 1024u)
 #define MIN_STREAM_BUDGET_BYTES ((size_t)64u * 1024u * 1024u)
+#define MAX_SINGLE_PRECISION_STREAMS 4
+#define MAX_SINGLE_PRECISION_PROFILE_BUDGET_BYTES ((size_t)2u * 1024u * 1024u * 1024u)

 static const char *ptx_source =
    ".version 6.0\n"
@@ -297,6 +298,13 @@ static int choose_stream_count(int mp_count, int planned_profiles, size_t total_
    return stream_count;
 }

+static size_t clamp_single_precision_profile_budget(size_t profile_budget_bytes) {
+    if (profile_budget_bytes > MAX_SINGLE_PRECISION_PROFILE_BUDGET_BYTES) {
+        return MAX_SINGLE_PRECISION_PROFILE_BUDGET_BYTES;
+    }
+    return profile_budget_bytes;
+}
+
 static void destroy_streams(struct cuda_api *api, CUstream *streams, int count) {
    if (!api->cuStreamDestroy) {
        return;
@@ -643,6 +651,20 @@ static const struct profile_desc k_profiles[] = {
        CUDA_R_16F,
        CUBLAS_COMPUTE_32F_FAST_16F,
    },
+    {
+        "int8_tensor",
+        "int8",
+        75,
+        1,
+        0,
+        0,
+        128,
+        CUDA_R_8I,
+        CUDA_R_8I,
+        CUDA_R_32I,
+        CUDA_R_32I,
+        CUBLAS_COMPUTE_32I,
+    },
    {
        "fp8_e4m3",
        "fp8",
@@ -689,6 +711,21 @@ static const struct profile_desc k_profiles[] = {
 #endif
 };

+#define PROFILE_COUNT ((int)(sizeof(k_profiles) / sizeof(k_profiles[0])))
+
+static int profile_allowed_for_run(const struct profile_desc *desc, int cc, const char *precision_filter) {
+    if (!(desc->enabled && cc >= desc->min_cc)) {
+        return 0;
+    }
+    if (precision_filter != NULL) {
+        return strcmp(desc->block_label, precision_filter) == 0;
+    }
+    /* Mixed/all phases intentionally exclude fp64/fp4 for now: both paths are
+     * unstable on the current benchmark fleet and can abort the whole mixed
+     * pass after earlier phases already collected useful telemetry. */
+    return strcmp(desc->block_label, "fp64") != 0 && strcmp(desc->block_label, "fp4") != 0;
+}
+
 static int load_cublaslt(struct cublaslt_api *api) {
    memset(api, 0, sizeof(*api));
    api->lib = dlopen("libcublasLt.so.13", RTLD_NOW | RTLD_LOCAL);
@@ -759,10 +796,12 @@ static int check_cublas(const char *step, cublasStatus_t status) {
 static size_t bytes_for_elements(cudaDataType_t type, uint64_t elements) {
    switch (type) {
        case CUDA_R_32F:
+        case CUDA_R_32I:
            return (size_t)(elements * 4u);
        case CUDA_R_16F:
        case CUDA_R_16BF:
            return (size_t)(elements * 2u);
+        case CUDA_R_8I:
        case CUDA_R_8F_E4M3:
        case CUDA_R_8F_E5M2:
            return (size_t)(elements);
@@ -775,6 +814,16 @@ static size_t bytes_for_elements(cudaDataType_t type, uint64_t elements) {
    }
 }

+static cudaDataType_t matmul_scale_type(const struct profile_desc *desc) {
+    if (desc->compute_type == CUBLAS_COMPUTE_32I) {
+        return CUDA_R_32I;
+    }
+    if (desc->compute_type == CUBLAS_COMPUTE_64F) {
+        return CUDA_R_64F;
+    }
+    return CUDA_R_32F;
+}
+
 static size_t fp4_scale_bytes(uint64_t rows, uint64_t cols) {
    uint64_t row_tiles = (rows + 127u) / 128u;
    uint64_t col_tiles = (cols + 63u) / 64u;
@@ -881,11 +930,9 @@ static int prepare_profile(struct cublaslt_api *cublas,
                           CUstream stream,
                           size_t profile_budget_bytes,
                           struct prepared_profile *out) {
-    memset(out, 0, sizeof(*out));
-    out->desc = *desc;
-    out->stream = stream;
-
    size_t bytes_per_cell = 0;
+    size_t attempt_budget = profile_budget_bytes;
+
    bytes_per_cell += bytes_for_elements(desc->a_type, 1);
    bytes_per_cell += bytes_for_elements(desc->b_type, 1);
    bytes_per_cell += bytes_for_elements(desc->c_type, 1);
@@ -894,105 +941,115 @@ static int prepare_profile(struct cublaslt_api *cublas,
        return 0;
    }

-    uint64_t dim = choose_square_dim(profile_budget_bytes, bytes_per_cell, desc->min_multiple);
-    out->m = dim;
-    out->n = dim;
-    out->k = dim;
+    while (attempt_budget >= MIN_PROFILE_BUDGET_BYTES) {
+        memset(out, 0, sizeof(*out));
+        out->desc = *desc;
+        out->stream = stream;

-    size_t desired_workspace = profile_budget_bytes / 8u;
-    if (desired_workspace > 32u * 1024u * 1024u) {
-        desired_workspace = 32u * 1024u * 1024u;
-    }
-    desired_workspace = round_down_size(desired_workspace, 256u);
+        uint64_t dim = choose_square_dim(attempt_budget, bytes_per_cell, desc->min_multiple);
+        out->m = dim;
+        out->n = dim;
+        out->k = dim;

-    size_t a_bytes = 0;
-    size_t b_bytes = 0;
-    size_t c_bytes = 0;
-    size_t d_bytes = 0;
-    size_t scale_bytes = 0;
-    while (1) {
-        a_bytes = bytes_for_elements(desc->a_type, out->k * out->m);
-        b_bytes = bytes_for_elements(desc->b_type, out->k * out->n);
-        c_bytes = bytes_for_elements(desc->c_type, out->m * out->n);
-        d_bytes = bytes_for_elements(desc->d_type, out->m * out->n);
-        scale_bytes = profile_scale_bytes(desc, out->m, out->n, out->k);
+        size_t desired_workspace = attempt_budget / 8u;
+        if (desired_workspace > 32u * 1024u * 1024u) {
+            desired_workspace = 32u * 1024u * 1024u;
+        }
+        desired_workspace = round_down_size(desired_workspace, 256u);

-        size_t matrix_bytes = a_bytes + b_bytes + c_bytes + d_bytes + scale_bytes;
-        if (matrix_bytes <= profile_budget_bytes) {
-            size_t remaining = profile_budget_bytes - matrix_bytes;
-            out->workspace_size = desired_workspace;
-            if (out->workspace_size > remaining) {
-                out->workspace_size = round_down_size(remaining, 256u);
+        size_t a_bytes = 0;
+        size_t b_bytes = 0;
+        size_t c_bytes = 0;
+        size_t d_bytes = 0;
+        size_t scale_bytes = 0;
+        while (1) {
+            a_bytes = bytes_for_elements(desc->a_type, out->k * out->m);
+            b_bytes = bytes_for_elements(desc->b_type, out->k * out->n);
+            c_bytes = bytes_for_elements(desc->c_type, out->m * out->n);
+            d_bytes = bytes_for_elements(desc->d_type, out->m * out->n);
+            scale_bytes = profile_scale_bytes(desc, out->m, out->n, out->k);
+
+            size_t matrix_bytes = a_bytes + b_bytes + c_bytes + d_bytes + scale_bytes;
+            if (matrix_bytes <= attempt_budget) {
+                size_t remaining = attempt_budget - matrix_bytes;
+                out->workspace_size = desired_workspace;
+                if (out->workspace_size > remaining) {
+                    out->workspace_size = round_down_size(remaining, 256u);
+                }
+                break;
            }
-            break;
+
+            if (out->m <= (uint64_t)desc->min_multiple) {
+                break;
+            }
+            out->m -= (uint64_t)desc->min_multiple;
+            out->n = out->m;
+            out->k = out->m;
+        }
+        if (out->m < (uint64_t)desc->min_multiple) {
+            attempt_budget /= 2u;
+            continue;
        }

-        if (out->m <= (uint64_t)desc->min_multiple) {
-            return 0;
-        }
-        out->m -= (uint64_t)desc->min_multiple;
-        out->n = out->m;
-        out->k = out->m;
-    }
-
-    if (!alloc_filled(cuda, &out->a_dev, a_bytes, 0x11) ||
-        !alloc_filled(cuda, &out->b_dev, b_bytes, 0x11) ||
-        !alloc_filled(cuda, &out->c_dev, c_bytes, 0x00) ||
-        !alloc_filled(cuda, &out->d_dev, d_bytes, 0x00)) {
-        destroy_profile(cublas, cuda, out);
-        return 0;
-    }
-
-    if (!check_cublas("cublasLtMatmulDescCreate",
-                      cublas->cublasLtMatmulDescCreate(&out->op_desc, desc->compute_type, CUDA_R_32F))) {
-        destroy_profile(cublas, cuda, out);
-        return 0;
-    }
-
-    cublasOperation_t transa = CUBLAS_OP_T;
-    cublasOperation_t transb = CUBLAS_OP_N;
-    if (!check_cublas("set TRANSA",
-                      cublas->cublasLtMatmulDescSetAttribute(out->op_desc,
-                                                             CUBLASLT_MATMUL_DESC_TRANSA,
-                                                             &transa,
-                                                             sizeof(transa))) ||
-        !check_cublas("set TRANSB",
-                      cublas->cublasLtMatmulDescSetAttribute(out->op_desc,
-                                                             CUBLASLT_MATMUL_DESC_TRANSB,
-                                                             &transb,
-                                                             sizeof(transb)))) {
-        destroy_profile(cublas, cuda, out);
-        return 0;
-    }
-
-    if (desc->needs_scalar_scale) {
-        float one = 1.0f;
-        if (!alloc_filled(cuda, &out->a_scale_dev, sizeof(one), 0x00) ||
-            !alloc_filled(cuda, &out->b_scale_dev, sizeof(one), 0x00)) {
+        if (!alloc_filled(cuda, &out->a_dev, a_bytes, 0x11) ||
+            !alloc_filled(cuda, &out->b_dev, b_bytes, 0x11) ||
+            !alloc_filled(cuda, &out->c_dev, c_bytes, 0x00) ||
+            !alloc_filled(cuda, &out->d_dev, d_bytes, 0x00)) {
            destroy_profile(cublas, cuda, out);
            return 0;
        }
-        if (!device_upload(cuda, out->a_scale_dev, &one, sizeof(one)) ||
-            !device_upload(cuda, out->b_scale_dev, &one, sizeof(one))) {
+
+        cudaDataType_t scale_type = matmul_scale_type(desc);
+        if (!check_cublas("cublasLtMatmulDescCreate",
+                          cublas->cublasLtMatmulDescCreate(&out->op_desc, desc->compute_type, scale_type))) {
            destroy_profile(cublas, cuda, out);
            return 0;
        }
-        void *a_scale_ptr = (void *)(uintptr_t)out->a_scale_dev;
-        void *b_scale_ptr = (void *)(uintptr_t)out->b_scale_dev;
-        if (!check_cublas("set A scale ptr",
+
+        cublasOperation_t transa = CUBLAS_OP_T;
+        cublasOperation_t transb = CUBLAS_OP_N;
+        if (!check_cublas("set TRANSA",
                          cublas->cublasLtMatmulDescSetAttribute(out->op_desc,
-                                                                 CUBLASLT_MATMUL_DESC_A_SCALE_POINTER,
-                                                                 &a_scale_ptr,
-                                                                 sizeof(a_scale_ptr))) ||
-            !check_cublas("set B scale ptr",
+                                                                 CUBLASLT_MATMUL_DESC_TRANSA,
+                                                                 &transa,
+                                                                 sizeof(transa))) ||
+            !check_cublas("set TRANSB",
                          cublas->cublasLtMatmulDescSetAttribute(out->op_desc,
-                                                                 CUBLASLT_MATMUL_DESC_B_SCALE_POINTER,
-                                                                 &b_scale_ptr,
-                                                                 sizeof(b_scale_ptr)))) {
+                                                                 CUBLASLT_MATMUL_DESC_TRANSB,
+                                                                 &transb,
+                                                                 sizeof(transb)))) {
            destroy_profile(cublas, cuda, out);
            return 0;
        }
-    }
+
+        if (desc->needs_scalar_scale) {
+            float one = 1.0f;
+            if (!alloc_filled(cuda, &out->a_scale_dev, sizeof(one), 0x00) ||
+                !alloc_filled(cuda, &out->b_scale_dev, sizeof(one), 0x00)) {
+                destroy_profile(cublas, cuda, out);
+                return 0;
+            }
+            if (!device_upload(cuda, out->a_scale_dev, &one, sizeof(one)) ||
+                !device_upload(cuda, out->b_scale_dev, &one, sizeof(one))) {
+                destroy_profile(cublas, cuda, out);
+                return 0;
+            }
+            void *a_scale_ptr = (void *)(uintptr_t)out->a_scale_dev;
+            void *b_scale_ptr = (void *)(uintptr_t)out->b_scale_dev;
+            if (!check_cublas("set A scale ptr",
+                              cublas->cublasLtMatmulDescSetAttribute(out->op_desc,
+                                                                     CUBLASLT_MATMUL_DESC_A_SCALE_POINTER,
+                                                                     &a_scale_ptr,
+                                                                     sizeof(a_scale_ptr))) ||
+                !check_cublas("set B scale ptr",
+                              cublas->cublasLtMatmulDescSetAttribute(out->op_desc,
+                                                                     CUBLASLT_MATMUL_DESC_B_SCALE_POINTER,
+                                                                     &b_scale_ptr,
+                                                                     sizeof(b_scale_ptr)))) {
+                destroy_profile(cublas, cuda, out);
+                return 0;
+            }
+        }

 #if defined(CUBLASLT_MATMUL_MATRIX_SCALE_VEC16_UE4M3)
    if (desc->needs_block_scale) {
@@ -1032,78 +1089,94 @@ static int prepare_profile(struct cublaslt_api *cublas,
    }
 #endif

-    if (!check_cublas("create A layout",
-                      cublas->cublasLtMatrixLayoutCreate(&out->a_layout, desc->a_type, out->k, out->m, out->k)) ||
-        !check_cublas("create B layout",
-                      cublas->cublasLtMatrixLayoutCreate(&out->b_layout, desc->b_type, out->k, out->n, out->k)) ||
-        !check_cublas("create C layout",
-                      cublas->cublasLtMatrixLayoutCreate(&out->c_layout, desc->c_type, out->m, out->n, out->m)) ||
-        !check_cublas("create D layout",
-                      cublas->cublasLtMatrixLayoutCreate(&out->d_layout, desc->d_type, out->m, out->n, out->m))) {
-        destroy_profile(cublas, cuda, out);
-        return 0;
-    }
-
-    if (!check_cublas("create preference", cublas->cublasLtMatmulPreferenceCreate(&out->preference))) {
-        destroy_profile(cublas, cuda, out);
-        return 0;
-    }
-
-    if (out->workspace_size > 0) {
-        if (!alloc_filled(cuda, &out->workspace_dev, out->workspace_size, 0x00)) {
+        if (!check_cublas("create A layout",
+                          cublas->cublasLtMatrixLayoutCreate(&out->a_layout, desc->a_type, out->k, out->m, out->k)) ||
+            !check_cublas("create B layout",
+                          cublas->cublasLtMatrixLayoutCreate(&out->b_layout, desc->b_type, out->k, out->n, out->k)) ||
+            !check_cublas("create C layout",
+                          cublas->cublasLtMatrixLayoutCreate(&out->c_layout, desc->c_type, out->m, out->n, out->m)) ||
+            !check_cublas("create D layout",
+                          cublas->cublasLtMatrixLayoutCreate(&out->d_layout, desc->d_type, out->m, out->n, out->m))) {
            destroy_profile(cublas, cuda, out);
            return 0;
        }
+
+        if (!check_cublas("create preference", cublas->cublasLtMatmulPreferenceCreate(&out->preference))) {
+            destroy_profile(cublas, cuda, out);
+            return 0;
+        }
+
+        if (out->workspace_size > 0) {
+            if (!alloc_filled(cuda, &out->workspace_dev, out->workspace_size, 0x00)) {
+                destroy_profile(cublas, cuda, out);
+                return 0;
+            }
+        }
+
+        if (!check_cublas("set workspace",
+                          cublas->cublasLtMatmulPreferenceSetAttribute(
+                              out->preference,
+                              CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES,
+                              &out->workspace_size,
+                              sizeof(out->workspace_size)))) {
+            destroy_profile(cublas, cuda, out);
+            return 0;
+        }
+
+        int found = 0;
+        if (check_cublas("heuristic",
+                         cublas->cublasLtMatmulAlgoGetHeuristic(handle,
+                                                                out->op_desc,
+                                                                out->a_layout,
+                                                                out->b_layout,
+                                                                out->c_layout,
+                                                                out->d_layout,
+                                                                out->preference,
+                                                                1,
+                                                                &out->heuristic,
+                                                                &found)) &&
+            found > 0) {
+            out->ready = 1;
+            return 1;
+        }
+
+        destroy_profile(cublas, cuda, out);
+        attempt_budget = round_down_size(attempt_budget * 3u / 4u, 256u);
+        if (attempt_budget < MIN_PROFILE_BUDGET_BYTES) {
+            break;
+        }
    }

-    if (!check_cublas("set workspace",
-                      cublas->cublasLtMatmulPreferenceSetAttribute(
-                          out->preference,
-                          CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES,
-                          &out->workspace_size,
-                          sizeof(out->workspace_size)))) {
-        destroy_profile(cublas, cuda, out);
-        return 0;
-    }
-
-    int found = 0;
-    if (!check_cublas("heuristic",
-                      cublas->cublasLtMatmulAlgoGetHeuristic(handle,
-                                                             out->op_desc,
-                                                             out->a_layout,
-                                                             out->b_layout,
-                                                             out->c_layout,
-                                                             out->d_layout,
-                                                             out->preference,
-                                                             1,
-                                                             &out->heuristic,
-                                                             &found))) {
-        destroy_profile(cublas, cuda, out);
-        return 0;
-    }
-    if (found <= 0) {
-        destroy_profile(cublas, cuda, out);
-        return 0;
-    }
-
-    out->ready = 1;
-    return 1;
+    return 0;
 }

 static int run_cublas_profile(cublasLtHandle_t handle,
                              struct cublaslt_api *cublas,
                              struct prepared_profile *profile) {
+    int32_t alpha_i32 = 1;
+    int32_t beta_i32 = 0;
+    double alpha_f64 = 1.0;
+    double beta_f64 = 0.0;
    float alpha = 1.0f;
    float beta = 0.0f;
+    const void *alpha_ptr = &alpha;
+    const void *beta_ptr = &beta;
+    if (profile->desc.compute_type == CUBLAS_COMPUTE_32I) {
+        alpha_ptr = &alpha_i32;
+        beta_ptr = &beta_i32;
+    } else if (profile->desc.compute_type == CUBLAS_COMPUTE_64F) {
+        alpha_ptr = &alpha_f64;
+        beta_ptr = &beta_f64;
+    }
    return check_cublas(profile->desc.name,
                        cublas->cublasLtMatmul(handle,
                                               profile->op_desc,
-                                               &alpha,
+                                               alpha_ptr,
                                               (const void *)(uintptr_t)profile->a_dev,
                                               profile->a_layout,
                                               (const void *)(uintptr_t)profile->b_dev,
                                               profile->b_layout,
-                                               &beta,
+                                               beta_ptr,
                                               (const void *)(uintptr_t)profile->c_dev,
                                               profile->c_layout,
                                               (void *)(uintptr_t)profile->d_dev,
@@ -1121,9 +1194,10 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                               int cc_minor,
                               int seconds,
                               int size_mb,
+                               const char *precision_filter,
                               struct stress_report *report) {
    struct cublaslt_api cublas;
-    struct prepared_profile prepared[MAX_STRESS_STREAMS * MAX_CUBLAS_PROFILES];
+    struct prepared_profile prepared[MAX_STRESS_STREAMS * PROFILE_COUNT];
    cublasLtHandle_t handle = NULL;
    CUcontext ctx = NULL;
    CUstream streams[MAX_STRESS_STREAMS] = {0};
@@ -1133,11 +1207,12 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
    int active = 0;
    int mp_count = 0;
    int stream_count = 1;
-    int profile_count = (int)(sizeof(k_profiles) / sizeof(k_profiles[0]));
+    int profile_count = PROFILE_COUNT;
    int prepared_count = 0;
    size_t requested_budget = 0;
    size_t total_budget = 0;
    size_t per_profile_budget = 0;
+    int budget_profiles = 0;

    memset(report, 0, sizeof(*report));
    snprintf(report->backend, sizeof(report->backend), "cublasLt");
@@ -1158,8 +1233,9 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }

+    /* Count profiles matching the filter (for deciding what to run). */
    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
-        if (k_profiles[i].enabled && cc >= k_profiles[i].min_cc) {
+        if (profile_allowed_for_run(&k_profiles[i], cc, precision_filter)) {
            planned++;
        }
    }
@@ -1170,18 +1246,42 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }

+    /* Count all profiles active on this GPU regardless of filter.
+     * Mixed phases still divide budget across the full precision set, while
+     * single-precision benchmark phases dedicate budget only to active
+     * profiles matching precision_filter. */
+    int planned_total = 0;
+    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
+        if (profile_allowed_for_run(&k_profiles[i], cc, precision_filter)) {
+            planned_total++;
+        }
+    }
+    if (planned_total < planned) {
+        planned_total = planned;
+    }
+    budget_profiles = planned_total;
+    if (precision_filter != NULL) {
+        budget_profiles = planned;
+    }
+    if (budget_profiles <= 0) {
+        budget_profiles = planned_total;
+    }
+
    requested_budget = (size_t)size_mb * 1024u * 1024u;
-    if (requested_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
-        requested_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
+    if (requested_budget < (size_t)budget_profiles * MIN_PROFILE_BUDGET_BYTES) {
+        requested_budget = (size_t)budget_profiles * MIN_PROFILE_BUDGET_BYTES;
    }
    total_budget = clamp_budget_to_free_memory(cuda, requested_budget);
-    if (total_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
-        total_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
+    if (total_budget < (size_t)budget_profiles * MIN_PROFILE_BUDGET_BYTES) {
+        total_budget = (size_t)budget_profiles * MIN_PROFILE_BUDGET_BYTES;
    }
    if (query_multiprocessor_count(cuda, dev, &mp_count) &&
        cuda->cuStreamCreate &&
        cuda->cuStreamDestroy) {
-        stream_count = choose_stream_count(mp_count, planned, total_budget, 1);
+        stream_count = choose_stream_count(mp_count, budget_profiles, total_budget, 1);
+    }
+    if (precision_filter != NULL && stream_count > MAX_SINGLE_PRECISION_STREAMS) {
+        stream_count = MAX_SINGLE_PRECISION_STREAMS;
    }
    if (stream_count > 1) {
        int created = 0;
@@ -1194,18 +1294,22 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        }
    }
    report->stream_count = stream_count;
-    per_profile_budget = total_budget / ((size_t)planned * (size_t)stream_count);
+    per_profile_budget = total_budget / ((size_t)budget_profiles * (size_t)stream_count);
    if (per_profile_budget < MIN_PROFILE_BUDGET_BYTES) {
        per_profile_budget = MIN_PROFILE_BUDGET_BYTES;
    }
+    if (precision_filter != NULL) {
+        per_profile_budget = clamp_single_precision_profile_budget(per_profile_budget);
+    }
    report->buffer_mb = (int)(total_budget / (1024u * 1024u));
    append_detail(report->details,
                  sizeof(report->details),
-                  "requested_mb=%d actual_mb=%d streams=%d mp_count=%d per_worker_mb=%zu\n",
+                  "requested_mb=%d actual_mb=%d streams=%d mp_count=%d budget_profiles=%d per_worker_mb=%zu\n",
                  size_mb,
                  report->buffer_mb,
                  report->stream_count,
                  mp_count,
+                  budget_profiles,
                  per_profile_budget / (1024u * 1024u));

    for (int i = 0; i < profile_count; i++) {
@@ -1218,6 +1322,13 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                          desc->min_cc);
            continue;
        }
+        if (!profile_allowed_for_run(desc, cc, precision_filter)) {
+            append_detail(report->details,
+                          sizeof(report->details),
+                          "%s=SKIPPED benchmark_disabled\n",
+                          desc->name);
+            continue;
+        }
        for (int lane = 0; lane < stream_count; lane++) {
            CUstream stream = streams[lane];
            if (prepared_count >= (int)(sizeof(prepared) / sizeof(prepared[0]))) {
@@ -1335,10 +1446,29 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
 }
 #endif

+static void print_stress_report(const struct stress_report *report, int device_index, int seconds) {
+    printf("device=%s\n", report->device);
+    printf("device_index=%d\n", device_index);
+    printf("compute_capability=%d.%d\n", report->cc_major, report->cc_minor);
+    printf("backend=%s\n", report->backend);
+    printf("duration_s=%d\n", seconds);
+    printf("buffer_mb=%d\n", report->buffer_mb);
+    printf("streams=%d\n", report->stream_count);
+    printf("iterations=%lu\n", report->iterations);
+    printf("checksum=%llu\n", (unsigned long long)report->checksum);
+    if (report->details[0] != '\0') {
+        printf("%s", report->details);
+    }
+    printf("status=OK\n");
+}
+
 int main(int argc, char **argv) {
    int seconds = 5;
    int size_mb = 64;
    int device_index = 0;
+    const char *precision_filter = NULL; /* NULL = all; else block_label to match */
+    const char *precision_plan = NULL;
+    const char *precision_plan_seconds = NULL;
    for (int i = 1; i < argc; i++) {
        if ((strcmp(argv[i], "--seconds") == 0 || strcmp(argv[i], "-t") == 0) && i + 1 < argc) {
            seconds = atoi(argv[++i]);
@@ -1346,8 +1476,16 @@ int main(int argc, char **argv) {
            size_mb = atoi(argv[++i]);
        } else if ((strcmp(argv[i], "--device") == 0 || strcmp(argv[i], "-d") == 0) && i + 1 < argc) {
            device_index = atoi(argv[++i]);
+        } else if (strcmp(argv[i], "--precision") == 0 && i + 1 < argc) {
+            precision_filter = argv[++i];
+        } else if (strcmp(argv[i], "--precision-plan") == 0 && i + 1 < argc) {
+            precision_plan = argv[++i];
+        } else if (strcmp(argv[i], "--precision-plan-seconds") == 0 && i + 1 < argc) {
+            precision_plan_seconds = argv[++i];
        } else {
-            fprintf(stderr, "usage: %s [--seconds N] [--size-mb N] [--device N]\n", argv[0]);
+            fprintf(stderr,
+                    "usage: %s [--seconds N] [--size-mb N] [--device N] [--precision int8|fp8|fp16|fp32|fp64|fp4] [--precision-plan p1,p2,...,mixed] [--precision-plan-seconds s1,s2,...]\n",
+                    argv[0]);
            return 2;
        }
    }
@@ -1407,26 +1545,94 @@ int main(int argc, char **argv) {
    int ok = 0;

 #if HAVE_CUBLASLT_HEADERS
-    ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, &report);
+    if (precision_plan != NULL && precision_plan[0] != '\0') {
+        char *plan_copy = strdup(precision_plan);
+        char *plan_seconds_copy = NULL;
+        int phase_seconds[32] = {0};
+        int phase_seconds_count = 0;
+        int phase_ok = 0;
+        if (plan_copy == NULL) {
+            fprintf(stderr, "failed to allocate precision plan buffer\n");
+            return 1;
+        }
+        if (precision_plan_seconds != NULL && precision_plan_seconds[0] != '\0') {
+            plan_seconds_copy = strdup(precision_plan_seconds);
+            if (plan_seconds_copy == NULL) {
+                free(plan_copy);
+                fprintf(stderr, "failed to allocate precision plan seconds buffer\n");
+                return 1;
+            }
+            for (char *sec_token = strtok(plan_seconds_copy, ",");
+                 sec_token != NULL && phase_seconds_count < (int)(sizeof(phase_seconds) / sizeof(phase_seconds[0]));
+                 sec_token = strtok(NULL, ",")) {
+                while (*sec_token == ' ' || *sec_token == '\t') {
+                    sec_token++;
+                }
+                if (*sec_token == '\0') {
+                    continue;
+                }
+                phase_seconds[phase_seconds_count++] = atoi(sec_token);
+            }
+        }
+        int phase_idx = 0;
+        for (char *token = strtok(plan_copy, ","); token != NULL; token = strtok(NULL, ","), phase_idx++) {
+            while (*token == ' ' || *token == '\t') {
+                token++;
+            }
+            if (*token == '\0') {
+                continue;
+            }
+            const char *phase_name = token;
+            const char *phase_filter = token;
+            if (strcmp(token, "mixed") == 0 || strcmp(token, "all") == 0) {
+                phase_filter = NULL;
+            }
+            int phase_duration = seconds;
+            if (phase_idx < phase_seconds_count && phase_seconds[phase_idx] > 0) {
+                phase_duration = phase_seconds[phase_idx];
+            }
+            printf("phase_begin=%s\n", phase_name);
+            fflush(stdout);
+            memset(&report, 0, sizeof(report));
+            ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, phase_duration, size_mb, phase_filter, &report);
+            if (ok) {
+                print_stress_report(&report, device_index, phase_duration);
+                phase_ok = 1;
+            } else {
+                printf("phase_error=%s\n", phase_name);
+                if (report.details[0] != '\0') {
+                    printf("%s", report.details);
+                    if (report.details[strlen(report.details) - 1] != '\n') {
+                        printf("\n");
+                    }
+                }
+                printf("status=FAILED\n");
+            }
+            printf("phase_end=%s\n", phase_name);
+            fflush(stdout);
+        }
+        free(plan_seconds_copy);
+        free(plan_copy);
+        return phase_ok ? 0 : 1;
+    }
+    ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, precision_filter, &report);
 #endif
    if (!ok) {
-        if (!run_ptx_fallback(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, &report)) {
+        if (precision_filter != NULL) {
+            fprintf(stderr,
+                    "requested precision path unavailable: precision=%s device=%s cc=%d.%d\n",
+                    precision_filter,
+                    name,
+                    cc_major,
+                    cc_minor);
+            return 1;
+        }
+        int ptx_mb = size_mb;
+        if (!run_ptx_fallback(&cuda, dev, name, cc_major, cc_minor, seconds, ptx_mb, &report)) {
            return 1;
        }
    }

-    printf("device=%s\n", report.device);
-    printf("device_index=%d\n", device_index);
-    printf("compute_capability=%d.%d\n", report.cc_major, report.cc_minor);
-    printf("backend=%s\n", report.backend);
-    printf("duration_s=%d\n", seconds);
-    printf("buffer_mb=%d\n", report.buffer_mb);
-    printf("streams=%d\n", report.stream_count);
-    printf("iterations=%lu\n", report.iterations);
-    printf("checksum=%llu\n", (unsigned long long)report.checksum);
-    if (report.details[0] != '\0') {
-        printf("%s", report.details);
-    }
-    printf("status=OK\n");
+    print_stress_report(&report, device_index, seconds);
    return 0;
 }
--- a/iso/builder/build-in-container.sh
+++ b/iso/builder/build-in-container.sh
@@ -161,6 +161,7 @@ run_variant() {
            -e GOMODCACHE=/cache/go-mod \
            -e TMPDIR=/cache/tmp \
            -e BEE_CACHE_DIR=/cache/bee \
+            -e BEE_REQUIRE_MEMTEST=1 \
            -w /work \
            "${IMAGE_REF}" \
            sh /work/iso/builder/build.sh --variant "${_v}" \
@@ -175,6 +176,7 @@ run_variant() {
            -e GOMODCACHE=/cache/go-mod \
            -e TMPDIR=/cache/tmp \
            -e BEE_CACHE_DIR=/cache/bee \
+            -e BEE_REQUIRE_MEMTEST=1 \
            -w /work \
            "${IMAGE_REF}" \
            sh /work/iso/builder/build.sh --variant "${_v}"
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
@@ -57,6 +57,7 @@ OVERLAY_STAGE_DIR="${DIST_DIR}/overlay-stage-${BUILD_VARIANT}"
 export BEE_GPU_VENDOR BEE_NVIDIA_MODULE_FLAVOR BUILD_VARIANT

 . "${BUILDER_DIR}/VERSIONS"
+export MEMTEST_VERSION
 export PATH="$PATH:/usr/local/go/bin"
 : "${BEE_REQUIRE_MEMTEST:=0}"

@@ -125,6 +126,37 @@ resolve_iso_version() {
    resolve_audit_version
 }

+sync_builder_workdir() {
+    src_dir="$1"
+    dst_dir="$2"
+
+    mkdir -p "$dst_dir"
+
+    # Historical bug: old workdirs could keep config/bootloaders/grub-pc even
+    # after the source tree moved to grub-efi only. Remove bootloaders eagerly
+    # so reused workdirs cannot leak stale templates into a new ISO build.
+    rm -rf "$dst_dir/config/bootloaders"
+
+    rsync -a --delete \
+        --exclude='cache/' \
+        --exclude='chroot/' \
+        --exclude='.build/' \
+        --exclude='*.iso' \
+        --exclude='*.packages' \
+        --exclude='*.contents' \
+        --exclude='*.files' \
+        "$src_dir/" "$dst_dir/"
+
+    if [ ! -f "$dst_dir/config/bootloaders/grub-efi/grub.cfg" ]; then
+        echo "ERROR: staged workdir is missing config/bootloaders/grub-efi/grub.cfg" >&2
+        exit 1
+    fi
+    if [ -e "$dst_dir/config/bootloaders/grub-pc" ]; then
+        echo "ERROR: stale config/bootloaders/grub-pc remained in staged workdir" >&2
+        exit 1
+    fi
+}
+
 iso_list_files() {
    iso_path="$1"

@@ -202,7 +234,7 @@ dump_memtest_debug() {

        echo "-- source bootloader templates --"
        for cfg in \
-            "${BUILDER_DIR}/config/bootloaders/grub-pc/grub.cfg" \
+            "${BUILDER_DIR}/config/bootloaders/grub-efi/grub.cfg" \
            "${BUILDER_DIR}/config/bootloaders/isolinux/live.cfg.in"; do
            if [ -f "$cfg" ]; then
                echo "  file: $cfg"
@@ -465,6 +497,75 @@ validate_iso_memtest() {
    echo "=== memtest validation OK ==="
 }

+validate_iso_live_boot_entries() {
+    iso_path="$1"
+    echo "=== validating live boot entries in ISO ==="
+
+    [ -f "$iso_path" ] || {
+        echo "ERROR: ISO not found for live boot validation: $iso_path" >&2
+        exit 1
+    }
+    require_iso_reader "$iso_path" >/dev/null 2>&1 || {
+        echo "ERROR: ISO reader unavailable for live boot validation" >&2
+        exit 1
+    }
+
+    grub_cfg="$(mktemp)"
+    isolinux_cfg="$(mktemp)"
+
+    iso_read_member "$iso_path" boot/grub/grub.cfg "$grub_cfg" || {
+        echo "ERROR: failed to read boot/grub/grub.cfg from ISO" >&2
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        exit 1
+    }
+    iso_read_member "$iso_path" isolinux/live.cfg "$isolinux_cfg" || {
+        echo "ERROR: failed to read isolinux/live.cfg from ISO" >&2
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        exit 1
+    }
+
+    if grep -q '@APPEND_LIVE@\|@KERNEL_LIVE@\|@INITRD_LIVE@' "$grub_cfg" "$isolinux_cfg"; then
+        echo "ERROR: unresolved live-build placeholders remain in ISO bootloader config" >&2
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        exit 1
+    fi
+
+    grep -q 'menuentry "EASY-BEE"' "$grub_cfg" || {
+        echo "ERROR: GRUB default EASY-BEE entry is missing" >&2
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        exit 1
+    }
+    grep -q 'menuentry "EASY-BEE -- load to RAM (toram)"' "$grub_cfg" || {
+        echo "ERROR: GRUB toram entry is missing" >&2
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        exit 1
+    }
+    grep -q 'linux .*boot=live ' "$grub_cfg" || {
+        echo "ERROR: GRUB live entry is missing boot=live" >&2
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        exit 1
+    }
+    grep -q 'linux .*boot=live .*toram ' "$grub_cfg" || {
+        echo "ERROR: GRUB toram entry is missing boot=live or toram" >&2
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        exit 1
+    }
+
+    grep -q 'append .*boot=live ' "$isolinux_cfg" || {
+        echo "ERROR: isolinux live entry is missing boot=live" >&2
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        exit 1
+    }
+    grep -q 'append .*boot=live .*toram ' "$isolinux_cfg" || {
+        echo "ERROR: isolinux toram entry is missing boot=live or toram" >&2
+        rm -f "$grub_cfg" "$isolinux_cfg"
+        exit 1
+    }
+
+    rm -f "$grub_cfg" "$isolinux_cfg"
+    echo "=== live boot validation OK ==="
+}
+
 validate_iso_nvidia_runtime() {
    iso_path="$1"
    [ "$BEE_GPU_VENDOR" = "nvidia" ] || return 0
@@ -541,6 +642,185 @@ label memtest
 EOF
 }

+extract_live_grub_entry() {
+    cfg="$1"
+    live_linux="$(awk '/^[[:space:]]*linux[[:space:]]+\/live\// { print; exit }' "$cfg")"
+    live_initrd="$(awk '/^[[:space:]]*initrd[[:space:]]+\/live\// { print; exit }' "$cfg")"
+    [ -n "$live_linux" ] || return 1
+    [ -n "$live_initrd" ] || return 1
+
+    grub_kernel="$(printf '%s\n' "$live_linux" | awk '{print $2}')"
+    grub_append="$(printf '%s\n' "$live_linux" | cut -d' ' -f3-)"
+    grub_initrd="$(printf '%s\n' "$live_initrd" | awk '{print $2}')"
+    [ -n "$grub_kernel" ] || return 1
+    [ -n "$grub_append" ] || return 1
+    [ -n "$grub_initrd" ] || return 1
+    return 0
+}
+
+load_live_build_append() {
+    lb_dir="$1"
+    binary_cfg="$lb_dir/config/binary"
+    [ -f "$binary_cfg" ] || return 1
+
+    # config/binary is generated by live-build and contains shell variable
+    # assignments such as LB_BOOTAPPEND_LIVE="boot=live ...".
+    # shellcheck disable=SC1090
+    . "$binary_cfg"
+
+    [ -n "${LB_BOOTAPPEND_LIVE:-}" ] || return 1
+    live_build_append="$LB_BOOTAPPEND_LIVE"
+    return 0
+}
+
+extract_live_isolinux_entry() {
+    cfg="$1"
+    isolinux_linux="$(awk '/^[[:space:]]*linux[[:space:]]+\/live\// { print; exit }' "$cfg")"
+    isolinux_initrd="$(awk '/^[[:space:]]*initrd[[:space:]]+\/live\// { print; exit }' "$cfg")"
+    isolinux_append="$(awk '/^[[:space:]]*append[[:space:]]+/ { sub(/^[[:space:]]*append[[:space:]]+/, ""); print; exit }' "$cfg")"
+    [ -n "$isolinux_linux" ] || return 1
+    [ -n "$isolinux_initrd" ] || return 1
+    [ -n "$isolinux_append" ] || return 1
+
+    isolinux_kernel="$(printf '%s\n' "$isolinux_linux" | awk '{print $2}')"
+    isolinux_initrd_path="$(printf '%s\n' "$isolinux_initrd" | awk '{print $2}')"
+    [ -n "$isolinux_kernel" ] || return 1
+    [ -n "$isolinux_initrd_path" ] || return 1
+    return 0
+}
+
+write_canonical_grub_cfg() {
+    cfg="$1"
+    kernel="$2"
+    append_live="$3"
+    initrd="$4"
+
+    cat > "$cfg" <<EOF
+source /boot/grub/config.cfg
+
+echo ""
+echo "  ███████╗ █████╗ ███████╗██╗   ██╗      ██████╗ ███████╗███████╗"
+echo "  ██╔════╝██╔══██╗██╔════╝╚██╗ ██╔╝      ██╔══██╗██╔════╝██╔════╝"
+echo "  █████╗  ███████║███████╗ ╚████╔╝ █████╗██████╔╝█████╗  █████╗"
+echo "  ██╔══╝  ██╔══██║╚════██║  ╚██╔╝  ╚════╝██╔══██╗██╔══╝  ██╔══╝"
+echo "  ███████╗██║  ██║███████║   ██║         ██████╔╝███████╗███████╗"
+echo "  ╚══════╝╚═╝  ╚═╝╚══════╝   ╚═╝         ╚═════╝ ╚══════╝╚══════╝"
+echo "  Hardware Audit LiveCD"
+echo ""
+
+menuentry "EASY-BEE" {
+    linux   ${kernel} ${append_live} bee.display=kms bee.nvidia.mode=normal pci=realloc net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
+    initrd  ${initrd}
+}
+
+menuentry "EASY-BEE -- load to RAM (toram)" {
+    linux   ${kernel} ${append_live} toram bee.display=kms bee.nvidia.mode=normal pci=realloc net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
+    initrd  ${initrd}
+}
+
+
+if [ "\${grub_platform}" = "efi" ]; then
+    menuentry "Memory Test (memtest86+)" {
+        chainloader /boot/memtest86+x64.efi
+    }
+else
+    menuentry "Memory Test (memtest86+)" {
+        linux16 /boot/memtest86+x64.bin
+    }
+fi
+
+if [ "\${grub_platform}" = "efi" ]; then
+    menuentry "UEFI Firmware Settings" {
+        fwsetup
+    }
+fi
+EOF
+}
+
+write_canonical_isolinux_cfg() {
+    cfg="$1"
+    kernel="$2"
+    initrd="$3"
+    append_live="$4"
+
+    cat > "$cfg" <<EOF
+label live-@FLAVOUR@-normal
+    menu label ^EASY-BEE
+    menu default
+    linux ${kernel}
+    initrd ${initrd}
+    append ${append_live} nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
+
+label live-@FLAVOUR@-toram
+    menu label EASY-BEE (^load to RAM)
+    linux ${kernel}
+    initrd ${initrd}
+    append ${append_live} toram nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
+
+label live-@FLAVOUR@-gsp-off
+    menu label EASY-BEE (^NVIDIA GSP=off)
+    linux ${kernel}
+    initrd ${initrd}
+    append ${append_live} nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
+
+label live-@FLAVOUR@-kms
+    menu label EASY-BEE (^KMS, no nomodeset)
+    linux ${kernel}
+    initrd ${initrd}
+    append ${append_live} bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
+
+label live-@FLAVOUR@-kms-gsp-off
+    menu label EASY-BEE (KMS, ^GSP=off)
+    linux ${kernel}
+    initrd ${initrd}
+    append ${append_live} bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
+
+label live-@FLAVOUR@-failsafe
+    menu label EASY-BEE (^fail-safe)
+    linux ${kernel}
+    initrd ${initrd}
+    append ${append_live} nomodeset bee.nvidia.mode=gsp-off noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0
+
+label memtest
+    menu label ^Memory Test (memtest86+)
+    linux /boot/memtest86+x64.bin
+EOF
+}
+
+enforce_live_build_bootloader_assets() {
+    lb_dir="$1"
+    grub_cfg="$lb_dir/binary/boot/grub/grub.cfg"
+    grub_dir="$lb_dir/binary/boot/grub"
+    isolinux_cfg="$lb_dir/binary/isolinux/live.cfg"
+
+    if ! load_live_build_append "$lb_dir"; then
+        echo "bootloader sync: WARNING: could not load LB_BOOTAPPEND_LIVE from $lb_dir/config/binary" >&2
+        live_build_append=""
+    fi
+
+    if [ -f "$grub_cfg" ]; then
+        if extract_live_grub_entry "$grub_cfg"; then
+            mkdir -p "$grub_dir/live-theme"
+            cp "${BUILDER_DIR}/config/bootloaders/grub-efi/config.cfg" "$grub_dir/config.cfg"
+            cp "${BUILDER_DIR}/config/bootloaders/grub-efi/theme.cfg" "$grub_dir/theme.cfg"
+            cp -R "${BUILDER_DIR}/config/bootloaders/grub-efi/live-theme/." "$grub_dir/live-theme/"
+            write_canonical_grub_cfg "$grub_cfg" "$grub_kernel" "${live_build_append:-$grub_append}" "$grub_initrd"
+            echo "bootloader sync: rewrote binary/boot/grub/grub.cfg with canonical EASY-BEE menu"
+        else
+            echo "bootloader sync: WARNING: could not extract live entry from $grub_cfg" >&2
+        fi
+    fi
+
+    if [ -f "$isolinux_cfg" ]; then
+        if extract_live_isolinux_entry "$isolinux_cfg"; then
+            write_canonical_isolinux_cfg "$isolinux_cfg" "$isolinux_kernel" "$isolinux_initrd_path" "${live_build_append:-$isolinux_append}"
+            echo "bootloader sync: rewrote binary/isolinux/live.cfg with canonical EASY-BEE menu"
+        else
+            echo "bootloader sync: WARNING: could not extract live entry from $isolinux_cfg" >&2
+        fi
+    fi
+}
+
 copy_memtest_from_deb() {
    deb="$1"
    dst_boot="$2"
@@ -775,6 +1055,7 @@ run_optional_step_sh() {
        return 0
    fi

+    mkdir -p "${LOG_DIR}" 2>/dev/null || true
    step_log="${LOG_DIR}/${step_slug}.log"
    echo ""
    echo "=== optional step: ${step_name} ==="
@@ -798,13 +1079,14 @@ start_build_log
 # install them on the fly so NVIDIA modules and ISO kernel always match.
 if [ -z "${DEBIAN_KERNEL_ABI}" ] || [ "${DEBIAN_KERNEL_ABI}" = "auto" ]; then
    echo "=== refreshing apt index to detect current kernel ABI ==="
-    apt-get update -qq
+    apt-get update -qq || echo "WARNING: apt-get update failed, trying cached index"
    DEBIAN_KERNEL_ABI=$(apt-cache depends linux-image-amd64 2>/dev/null \
        | awk '/Depends:.*linux-image-[0-9]/{print $2}' \
        | grep -oE '[0-9]+\.[0-9]+\.[0-9]+-[0-9]+' \
        | head -1)
    if [ -z "${DEBIAN_KERNEL_ABI}" ]; then
        echo "ERROR: could not auto-detect kernel ABI from apt-cache" >&2
+        echo "Hint: set DEBIAN_KERNEL_ABI=x.y.z-N in iso/builder/VERSIONS to skip auto-detection" >&2
        exit 1
    fi
    echo "=== kernel ABI: ${DEBIAN_KERNEL_ABI} ==="
@@ -873,9 +1155,37 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then

    CUBLAS_CACHE="${DIST_DIR}/cublas-${CUBLAS_VERSION}+cuda${NCCL_CUDA_VERSION}"

+    echo "=== bee-gpu-burn FP4 header probe ==="
+    fp4_type_match="$(grep -Rsnm 1 'CUDA_R_4F_E2M1' "${CUBLAS_CACHE}/include" 2>/dev/null || true)"
+    fp4_scale_match="$(grep -Rsnm 1 'CUBLASLT_MATMUL_MATRIX_SCALE_VEC16_UE4M3' "${CUBLAS_CACHE}/include" 2>/dev/null || true)"
+    if [ -n "$fp4_type_match" ]; then
+        echo "fp4_header_symbol=present"
+        echo "$fp4_type_match"
+    else
+        echo "fp4_header_symbol=missing"
+    fi
+    if [ -n "$fp4_scale_match" ]; then
+        echo "fp4_scale_mode_symbol=present"
+        echo "$fp4_scale_match"
+    else
+        echo "fp4_scale_mode_symbol=missing"
+    fi
+
    GPU_STRESS_NEED_BUILD=1
-    if [ -f "$GPU_BURN_WORKER_BIN" ] && [ "${BUILDER_DIR}/bee-gpu-stress.c" -ot "$GPU_BURN_WORKER_BIN" ]; then
+    if [ -f "$GPU_BURN_WORKER_BIN" ]; then
        GPU_STRESS_NEED_BUILD=0
+        for dep in \
+            "${BUILDER_DIR}/bee-gpu-stress.c" \
+            "${BUILDER_DIR}/VERSIONS"; do
+            if [ "$dep" -nt "$GPU_BURN_WORKER_BIN" ]; then
+                GPU_STRESS_NEED_BUILD=1
+                break
+            fi
+        done
+        if [ "$GPU_STRESS_NEED_BUILD" = "0" ] && \
+            find "${CUBLAS_CACHE}/include" "${CUBLAS_CACHE}/lib" -type f -newer "$GPU_BURN_WORKER_BIN" | grep -q .; then
+            GPU_STRESS_NEED_BUILD=1
+        fi
    fi

    if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then
@@ -889,21 +1199,19 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
    else
        echo "=== bee-gpu-burn worker up to date, skipping build ==="
    fi
+    echo "=== bee-gpu-burn compiled profile probe ==="
+    if grep -aq 'fp4_e2m1' "$GPU_BURN_WORKER_BIN"; then
+        echo "fp4_profile_string=present"
+    else
+        echo "fp4_profile_string=missing"
+    fi
 fi

 echo "=== preparing staged overlay (${BUILD_VARIANT}) ==="
 mkdir -p "${BUILD_WORK_DIR}" "${OVERLAY_STAGE_DIR}"

 # Sync builder config into variant work dir, preserving lb cache.
-rsync -a --delete \
-    --exclude='cache/' \
-    --exclude='chroot/' \
-    --exclude='.build/' \
-    --exclude='*.iso' \
-    --exclude='*.packages' \
-    --exclude='*.contents' \
-    --exclude='*.files' \
-    "${BUILDER_DIR}/" "${BUILD_WORK_DIR}/"
+sync_builder_workdir "${BUILDER_DIR}" "${BUILD_WORK_DIR}"

 # Share deb package cache across variants.
 # Restore: populate work dir cache from shared cache before build.
@@ -917,86 +1225,6 @@ elif [ -d "${LB_PKG_CACHE}" ] && [ "$(ls -A "${LB_PKG_CACHE}" 2>/dev/null)" ]; t
    rsync -a "${LB_PKG_CACHE}/" "${BUILD_WORK_DIR}/cache/packages.chroot/"
 fi

-if [ "$BEE_GPU_VENDOR" != "nvidia" ] || [ "$BEE_NVIDIA_MODULE_FLAVOR" != "proprietary" ]; then
-    cat > "${BUILD_WORK_DIR}/config/bootloaders/grub-pc/grub.cfg" <<'EOF'
-source /boot/grub/config.cfg
-
-echo ""
-echo "  ███████╗ █████╗ ███████╗██╗   ██╗      ██████╗ ███████╗███████╗"
-echo "  ██╔════╝██╔══██╗██╔════╝╚██╗ ██╔╝      ██╔══██╗██╔════╝██╔════╝"
-echo "  █████╗  ███████║███████╗ ╚████╔╝ █████╗██████╔╝█████╗  █████╗"
-echo "  ██╔══╝  ██╔══██║╚════██║  ╚██╔╝  ╚════╝██╔══██╗██╔══╝  ██╔══╝"
-echo "  ███████╗██║  ██║███████║   ██║         ██████╔╝███████╗███████╗"
-echo "  ╚══════╝╚═╝  ╚═╝╚══════╝   ╚═╝         ╚═════╝ ╚══════╝╚══════╝"
-echo "  Hardware Audit LiveCD"
-echo ""
-
-menuentry "EASY-BEE" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
-    initrd  @INITRD_LIVE@
-}
-
-submenu "EASY-BEE (advanced options) -->" {
-    menuentry "EASY-BEE — KMS (no nomodeset)" {
-        linux   @KERNEL_LIVE@ @APPEND_LIVE@ net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
-        initrd  @INITRD_LIVE@
-    }
-
-    menuentry "EASY-BEE — fail-safe" {
-        linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0
-        initrd  @INITRD_LIVE@
-    }
-}
-
-if [ "${grub_platform}" = "efi" ]; then
-    menuentry "Memory Test (memtest86+)" {
-        chainloader /boot/memtest86+x64.efi
-    }
-else
-    menuentry "Memory Test (memtest86+)" {
-        linux16 /boot/memtest86+x64.bin
-    }
-fi
-
-if [ "${grub_platform}" = "efi" ]; then
-    menuentry "UEFI Firmware Settings" {
-        fwsetup
-    }
-fi
-EOF
-
-    cat > "${BUILD_WORK_DIR}/config/bootloaders/isolinux/live.cfg.in" <<'EOF'
-label live-@FLAVOUR@-normal
-    menu label ^EASY-BEE
-    menu default
-    linux @LINUX@
-    initrd @INITRD@
-    append @APPEND_LIVE@
-
-label live-@FLAVOUR@-kms
-    menu label EASY-BEE (^graphics/KMS)
-    linux @LINUX@
-    initrd @INITRD@
-    append @APPEND_LIVE@ bee.display=kms
-
-label live-@FLAVOUR@-toram
-    menu label EASY-BEE (^load to RAM)
-    linux @LINUX@
-    initrd @INITRD@
-    append @APPEND_LIVE@ toram
-
-label live-@FLAVOUR@-failsafe
-    menu label EASY-BEE (^fail-safe)
-    linux @LINUX@
-    initrd @INITRD@
-    append @APPEND_LIVE@ memtest noapic noapm nodma nomce nolapic nosmp vga=normal
-
-label memtest
-    menu label ^Memory Test (memtest86+)
-    linux /boot/memtest86+x64.bin
-EOF
-fi
-
 rsync -a "${OVERLAY_DIR}/" "${OVERLAY_STAGE_DIR}/"
 rm -f \
    "${OVERLAY_STAGE_DIR}/etc/bee-ssh-password-fallback" \
@@ -1225,6 +1453,7 @@ fi
 # --- substitute version placeholders in package list and archive ---
 if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
    sed -i \
+        -e "s/%%NVIDIA_FABRICMANAGER_VERSION%%/${NVIDIA_FABRICMANAGER_VERSION}/g" \
        -e "s/%%DCGM_VERSION%%/${DCGM_VERSION}/g" \
        "${BUILD_WORK_DIR}/config/package-lists/bee-gpu.list.chroot"
 elif [ "$BEE_GPU_VENDOR" = "amd" ]; then
@@ -1267,10 +1496,18 @@ BEE_GPU_VENDOR_UPPER="$(echo "${BUILD_VARIANT}" | tr 'a-z-' 'A-Z_')"
 export BEE_GPU_VENDOR_UPPER

 cd "${LB_DIR}"
-run_step_sh "live-build clean" "80-lb-clean" "lb clean 2>&1 | tail -3"
+run_step_sh "live-build clean" "80-lb-clean" "lb clean --all 2>&1 | tail -3"
 run_step_sh "live-build config" "81-lb-config" "lb config 2>&1 | tail -5"
 dump_memtest_debug "pre-build" "${LB_DIR}"
 run_step_sh "live-build build" "90-lb-build" "lb build 2>&1"
+echo "=== enforcing canonical bootloader assets ==="
+enforce_live_build_bootloader_assets "${LB_DIR}"
+reset_live_build_stage "${LB_DIR}" "binary_checksums"
+reset_live_build_stage "${LB_DIR}" "binary_iso"
+reset_live_build_stage "${LB_DIR}" "binary_zsync"
+run_step_sh "rebuild live-build checksums after bootloader sync" "91b-lb-checksums" "lb binary_checksums 2>&1"
+run_step_sh "rebuild ISO after bootloader sync" "91c-lb-binary-iso" "lb binary_iso 2>&1"
+run_step_sh "rebuild zsync after bootloader sync" "91d-lb-zsync" "lb binary_zsync 2>&1"

 # --- persist deb package cache back to shared location ---
 # This allows the second variant to reuse all downloaded packages.
@@ -1295,6 +1532,7 @@ if [ -f "$ISO_RAW" ]; then
        fi
    fi
    validate_iso_memtest "$ISO_RAW"
+    validate_iso_live_boot_entries "$ISO_RAW"
    validate_iso_nvidia_runtime "$ISO_RAW"
    cp "$ISO_RAW" "$ISO_OUT"
    echo ""
--- a/iso/builder/config/bootloaders/grub-efi/config.cfg
+++ b/iso/builder/config/bootloaders/grub-efi/config.cfg
@@ -23,9 +23,9 @@ insmod serial
 serial --unit=0 --speed=115200 --word=8 --parity=no --stop=1

 insmod gfxterm
-insmod png
-
-source /boot/grub/theme.cfg

 terminal_input console serial
 terminal_output gfxterm serial
+
+insmod png
+source /boot/grub/theme.cfg
--- a/iso/builder/config/bootloaders/grub-efi/grub.cfg
+++ b/iso/builder/config/bootloaders/grub-efi/grub.cfg
@@ -0,0 +1,28 @@
+source /boot/grub/config.cfg
+
+menuentry "EASY-BEE" {
+    linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=normal pci=realloc net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
+    initrd  @INITRD_LIVE@
+}
+
+menuentry "EASY-BEE -- load to RAM (toram)" {
+    linux   @KERNEL_LIVE@ @APPEND_LIVE@ toram bee.display=kms bee.nvidia.mode=normal pci=realloc net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
+    initrd  @INITRD_LIVE@
+}
+
+
+if [ "${grub_platform}" = "efi" ]; then
+    menuentry "Memory Test (memtest86+)" {
+        chainloader /boot/memtest86+x64.efi
+    }
+else
+    menuentry "Memory Test (memtest86+)" {
+        linux16 /boot/memtest86+x64.bin
+    }
+fi
+
+if [ "${grub_platform}" = "efi" ]; then
+    menuentry "UEFI Firmware Settings" {
+        fwsetup
+    }
+fi
--- a/iso/builder/config/bootloaders/grub-efi/live-theme/bee-logo.png
+++ b/iso/builder/config/bootloaders/grub-efi/live-theme/bee-logo.png
--- a/iso/builder/config/bootloaders/grub-efi/live-theme/theme.txt
+++ b/iso/builder/config/bootloaders/grub-efi/live-theme/theme.txt
@@ -5,6 +5,13 @@ title-text: ""
 message-font: "Unifont Regular 16"
 terminal-font: "Unifont Regular 16"

+#bee logo - centered, upper third of screen
+ image {
+        top = 4%
+        left = 50%-200
+        file = "bee-logo.png"
+}
+
 #help bar at the bottom
 + label {
        top = 100%-50
@@ -21,17 +28,17 @@ terminal-font: "Unifont Regular 16"
 + boot_menu {
        left = 20%
        width = 60%
-        top = 62%
-        height = 38%-80
+        top = 65%
+        height = 35%-80
        item_color = "#c88000"
        item_font = "Unifont Regular 16"
        selected_item_color= "#f5a800"
        selected_item_font = "Unifont Regular 16"
-        item_height = 16
-        item_padding = 0
+        item_height = 20
+        item_padding = 2
        item_spacing = 4
        icon_width = 0
-        icon_heigh = 0
+        icon_height = 0
        item_icon_space = 0
 }

--- a/iso/builder/config/bootloaders/grub-efi/theme.cfg
+++ b/iso/builder/config/bootloaders/grub-efi/theme.cfg
@@ -1,7 +1,7 @@
 set color_normal=light-gray/black
 set color_highlight=yellow/black

-if [ -e /boot/grub/splash.png ]; then
+if [ -e /boot/grub/live-theme/theme.txt ]; then
    set theme=/boot/grub/live-theme/theme.txt
 else
    set menu_color_normal=yellow/black
--- a/iso/builder/config/bootloaders/grub-pc/grub.cfg
+++ b/iso/builder/config/bootloaders/grub-pc/grub.cfg
@@ -1,49 +0,0 @@
-source /boot/grub/config.cfg
-
-echo ""
-echo "  ███████╗ █████╗ ███████╗██╗   ██╗      ██████╗ ███████╗███████╗"
-echo "  ██╔════╝██╔══██╗██╔════╝╚██╗ ██╔╝      ██╔══██╗██╔════╝██╔════╝"
-echo "  █████╗  ███████║███████╗ ╚████╔╝ █████╗██████╔╝█████╗  █████╗"
-echo "  ██╔══╝  ██╔══██║╚════██║  ╚██╔╝  ╚════╝██╔══██╗██╔══╝  ██╔══╝"
-echo "  ███████╗██║  ██║███████║   ██║         ██████╔╝███████╗███████╗"
-echo "  ╚══════╝╚═╝  ╚═╝╚══════╝   ╚═╝         ╚═════╝ ╚══════╝╚══════╝"
-echo "  Hardware Audit LiveCD"
-echo ""
-
-menuentry "EASY-BEE" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
-    initrd  @INITRD_LIVE@
-}
-
-submenu "EASY-BEE (advanced options) -->" {
-    menuentry "EASY-BEE — GSP=off" {
-        linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
-        initrd  @INITRD_LIVE@
-    }
-
-    menuentry "EASY-BEE — KMS (no nomodeset)" {
-        linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
-        initrd  @INITRD_LIVE@
-    }
-
-    menuentry "EASY-BEE — fail-safe" {
-        linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0
-        initrd  @INITRD_LIVE@
-    }
-}
-
-if [ "${grub_platform}" = "efi" ]; then
-    menuentry "Memory Test (memtest86+)" {
-        chainloader /boot/memtest86+x64.efi
-    }
-else
-    menuentry "Memory Test (memtest86+)" {
-        linux16 /boot/memtest86+x64.bin
-    }
-fi
-
-if [ "${grub_platform}" = "efi" ]; then
-    menuentry "UEFI Firmware Settings" {
-        fwsetup
-    }
-fi
--- a/iso/builder/config/bootloaders/isolinux/live.cfg.in
+++ b/iso/builder/config/bootloaders/isolinux/live.cfg.in
@@ -3,37 +3,37 @@ label live-@FLAVOUR@-normal
    menu default
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.nvidia.mode=normal
-
-label live-@FLAVOUR@-kms
-    menu label EASY-BEE (^graphics/KMS)
-    linux @LINUX@
-    initrd @INITRD@
-    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=normal
+    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup

 label live-@FLAVOUR@-toram
    menu label EASY-BEE (^load to RAM)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ toram bee.nvidia.mode=normal
+    append @APPEND_LIVE@ toram nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup

 label live-@FLAVOUR@-gsp-off
    menu label EASY-BEE (^NVIDIA GSP=off)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off
+    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup

-label live-@FLAVOUR@-kms-gsp-off
-    menu label EASY-BEE (g^raphics/KMS, GSP=off)
+label live-@FLAVOUR@-kms
+    menu label EASY-BEE (^KMS, no nomodeset)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.display=kms bee.nvidia.mode=gsp-off
+    append @APPEND_LIVE@ bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
+
+label live-@FLAVOUR@-kms-gsp-off
+    menu label EASY-BEE (KMS, ^GSP=off)
+    linux @LINUX@
+    initrd @INITRD@
+    append @APPEND_LIVE@ bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup

 label live-@FLAVOUR@-failsafe
    menu label EASY-BEE (^fail-safe)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal
+    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0

 label memtest
    menu label ^Memory Test (memtest86+)
--- a/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
+++ b/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
@@ -25,6 +25,7 @@ ensure_bee_console_user() {
 ensure_bee_console_user

 # Enable common bee services
+systemctl enable bee-hpc-tuning.service
 systemctl enable bee-network.service
 systemctl enable bee-preflight.service
 systemctl enable bee-audit.service
@@ -42,6 +43,7 @@ systemctl enable bee-journal-mirror@ttyS1.service 2>/dev/null || true
 # Enable GPU-vendor specific services
 if [ "$GPU_VENDOR" = "nvidia" ]; then
    systemctl enable nvidia-dcgm.service 2>/dev/null || true
+    systemctl enable nvidia-fabricmanager.service 2>/dev/null || true
    systemctl enable bee-nvidia.service
 elif [ "$GPU_VENDOR" = "amd" ]; then
    # ROCm symlinks (packages install to /opt/rocm-*/bin/)
@@ -55,13 +57,16 @@ fi
 # nogpu: no GPU services needed

 # Ensure scripts are executable
+chmod +x /usr/local/bin/bee-hpc-tuning  2>/dev/null || true
 chmod +x /usr/local/bin/bee-network.sh  2>/dev/null || true
 chmod +x /usr/local/bin/bee-sshsetup   2>/dev/null || true
 chmod +x /usr/local/bin/bee-smoketest  2>/dev/null || true
 chmod +x /usr/local/bin/bee            2>/dev/null || true
 chmod +x /usr/local/bin/bee-log-run    2>/dev/null || true
-chmod +x /usr/local/bin/bee-selfheal      2>/dev/null || true
-chmod +x /usr/local/bin/bee-boot-status  2>/dev/null || true
+chmod +x /usr/local/bin/bee-selfheal        2>/dev/null || true
+chmod +x /usr/local/bin/bee-boot-status    2>/dev/null || true
+chmod +x /usr/local/bin/bee-install        2>/dev/null || true
+chmod +x /usr/local/bin/bee-remount-medium 2>/dev/null || true
 if [ "$GPU_VENDOR" = "nvidia" ]; then
    chmod +x /usr/local/bin/bee-nvidia-load 2>/dev/null || true
    chmod +x /usr/local/bin/bee-gpu-burn 2>/dev/null || true
--- a/iso/builder/config/hooks/normal/9001-wallpaper.hook.chroot
+++ b/iso/builder/config/hooks/normal/9001-wallpaper.hook.chroot
@@ -1,117 +0,0 @@
-#!/bin/sh
-# 9001-wallpaper.hook.chroot — generate /usr/share/bee/wallpaper.png inside chroot
-set -e
-echo "=== generating bee wallpaper ==="
-mkdir -p /usr/share/bee
-
-python3 - <<'PYEOF'
-from PIL import Image, ImageDraw, ImageFont, ImageFilter
-import os
-
-W, H = 1920, 1080
-
-ASCII_ART = [
-    "  ███████╗ █████╗ ███████╗██╗   ██╗      ██████╗ ███████╗███████╗",
-    "  ██╔════╝██╔══██╗██╔════╝╚██╗ ██╔╝      ██╔══██╗██╔════╝██╔════╝",
-    "  █████╗  ███████║███████╗ ╚████╔╝ █████╗██████╔╝█████╗  █████╗",
-    "  ██╔══╝  ██╔══██║╚════██║  ╚██╔╝  ╚════╝██╔══██╗██╔══╝  ██╔══╝",
-    "  ███████╗██║  ██║███████║   ██║         ██████╔╝███████╗███████╗",
-    "  ╚══════╝╚═╝  ╚═╝╚══════╝   ╚═╝         ╚═════╝ ╚══════╝╚══════╝",
-]
-SUBTITLE = "  Hardware Audit LiveCD"
-
-FG = (0xF6, 0xD0, 0x47)
-FG_DIM = (0xD4, 0xA9, 0x1C)
-SHADOW = (0x5E, 0x47, 0x05)
-SUB = (0x96, 0x7A, 0x17)
-BG = (0x05, 0x05, 0x05)
-
-MONO_FONT_CANDIDATES = [
-    '/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf',
-    '/usr/share/fonts/truetype/liberation2/LiberationMono-Bold.ttf',
-    '/usr/share/fonts/truetype/liberation/LiberationMono-Bold.ttf',
-    '/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf',
-]
-SUB_FONT_CANDIDATES = [
-    '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf',
-    '/usr/share/fonts/truetype/liberation2/LiberationSans-Bold.ttf',
-    '/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf',
-    '/usr/share/fonts/truetype/freefont/FreeSansBold.ttf',
-]
-
-
-def load_font(candidates, size):
-    for path in candidates:
-        if os.path.exists(path):
-            return ImageFont.truetype(path, size)
-    return ImageFont.load_default()
-
-
-def mono_metrics(font):
-    probe = Image.new('L', (W, H), 0)
-    draw = ImageDraw.Draw(probe)
-    char_w = int(round(draw.textlength("M", font=font)))
-    bb = draw.textbbox((0, 0), "Mg", font=font)
-    char_h = bb[3] - bb[1]
-    return char_w, char_h
-
-
-def render_ascii_mask(font, lines, char_w, char_h, line_gap):
-    width = max(len(line) for line in lines) * char_w
-    height = len(lines) * char_h + line_gap * (len(lines) - 1)
-    mask = Image.new('L', (width, height), 0)
-    draw = ImageDraw.Draw(mask)
-    for row, line in enumerate(lines):
-        y = row * (char_h + line_gap)
-        for col, ch in enumerate(line):
-            if ch == ' ':
-                continue
-            x = col * char_w
-            draw.text((x, y), ch, font=font, fill=255)
-    return mask
-
-
-img = Image.new('RGB', (W, H), BG)
-draw = ImageDraw.Draw(img)
-
-# Soft amber glow under the logo without depending on font rendering.
-glow = Image.new('RGBA', (W, H), (0, 0, 0, 0))
-glow_draw = ImageDraw.Draw(glow)
-glow_draw.ellipse((360, 250, 1560, 840), fill=(180, 120, 10, 56))
-glow_draw.ellipse((520, 340, 1400, 760), fill=(255, 190, 40, 36))
-glow = glow.filter(ImageFilter.GaussianBlur(60))
-img = Image.alpha_composite(img.convert('RGBA'), glow)
-
-TARGET_LOGO_W = 400
-max_chars = max(len(line) for line in ASCII_ART)
-_probe_font = load_font(MONO_FONT_CANDIDATES, 64)
-_probe_cw, _ = mono_metrics(_probe_font)
-font_size_logo = max(6, int(64 * TARGET_LOGO_W / (_probe_cw * max_chars)))
-font_logo = load_font(MONO_FONT_CANDIDATES, font_size_logo)
-char_w, char_h = mono_metrics(font_logo)
-logo_mask = render_ascii_mask(font_logo, ASCII_ART, char_w, char_h, 2)
-logo_w, logo_h = logo_mask.size
-logo_x = (W - logo_w) // 2
-logo_y = 380
-
-sh_off = max(1, font_size_logo // 6)
-shadow_mask = logo_mask.filter(ImageFilter.GaussianBlur(1))
-img.paste(SHADOW, (logo_x + sh_off * 2, logo_y + sh_off * 2), shadow_mask)
-img.paste(FG_DIM, (logo_x + sh_off, logo_y + sh_off), logo_mask)
-img.paste(FG, (logo_x, logo_y), logo_mask)
-
-font_sub = load_font(SUB_FONT_CANDIDATES, 30)
-sub_bb = draw.textbbox((0, 0), SUBTITLE, font=font_sub)
-sub_x = (W - (sub_bb[2] - sub_bb[0])) // 2
-sub_y = logo_y + logo_h + 48
-draw = ImageDraw.Draw(img)
-draw.text((sub_x + 2, sub_y + 2), SUBTITLE, font=font_sub, fill=(35, 28, 6))
-draw.text((sub_x, sub_y), SUBTITLE, font=font_sub, fill=SUB)
-
-img = img.convert('RGB')
-
-img.save('/usr/share/bee/wallpaper.png', optimize=True)
-print('wallpaper written: /usr/share/bee/wallpaper.png')
-PYEOF
-
-echo "=== wallpaper done ==="
--- a/iso/builder/config/hooks/normal/9011-toram-rsync.hook.chroot
+++ b/iso/builder/config/hooks/normal/9011-toram-rsync.hook.chroot
@@ -0,0 +1,46 @@
+#!/bin/sh
+# 9011-toram-rsync.hook.chroot
+#
+# Adds rsync to the initramfs so that live-boot's toram code takes the
+# rsync --progress path instead of the silent "cp -a" fallback.
+#
+# live-boot's 9990-toram-todisk.sh already contains:
+#   if [ -x /bin/rsync ]; then
+#       rsync -a --progress ... 1>/dev/console
+#   else
+#       cp -a ...   # no output
+#   fi
+#
+# We install an initramfs-tools hook that calls copy_exec /usr/bin/rsync,
+# which copies the binary + all shared-library dependencies into the initrd.
+
+set -e
+
+HOOK_DIR="/etc/initramfs-tools/hooks"
+HOOK="${HOOK_DIR}/bee-rsync"
+
+mkdir -p "${HOOK_DIR}"
+
+cat > "${HOOK}" << 'EOF'
+#!/bin/sh
+# initramfs hook: include rsync for live-boot toram progress output
+PREREQ=""
+prereqs() { echo "$PREREQ"; }
+case "$1" in prereqs) prereqs; exit 0 ;; esac
+
+. /usr/share/initramfs-tools/hook-functions
+
+if [ -x /usr/bin/rsync ]; then
+    copy_exec /usr/bin/rsync /bin
+fi
+EOF
+
+chmod +x "${HOOK}"
+
+echo "9011-toram-rsync: installed initramfs hook at ${HOOK}"
+
+# Rebuild initramfs so the hook takes effect in the ISO's initrd.img
+KVER=$(ls /lib/modules | sort -V | tail -1)
+echo "9011-toram-rsync: rebuilding initramfs for kernel ${KVER}"
+update-initramfs -u -k "${KVER}"
+echo "9011-toram-rsync: done"
--- a/iso/builder/config/hooks/normal/9100-memtest.hook.binary
+++ b/iso/builder/config/hooks/normal/9100-memtest.hook.binary
@@ -5,6 +5,8 @@ set -e

 : "${BEE_REQUIRE_MEMTEST:=0}"

+# memtest86+ 6.x uses memtest86+.bin (no x64 suffix) for the BIOS binary,
+# while 5.x used memtest86+x64.bin. We normalise both to x64 names in the ISO.
 MEMTEST_FILES="memtest86+x64.bin memtest86+x64.efi"
 BINARY_BOOT_DIR="binary/boot"
 GRUB_CFG="binary/boot/grub/grub.cfg"
@@ -24,15 +26,23 @@ fail_or_warn() {
    return 0
 }

+# grub.cfg and live.cfg may not exist yet when binary hooks run — live-build
+# creates them after this hook (lb binary_grub-efi / lb binary_syslinux).
+# The template already has memtest entries hardcoded, so a missing config file
+# here is not an error; validate_iso_memtest() checks the final ISO instead.
+warn_only() {
+    log "WARNING: $1"
+}
+
 copy_memtest_file() {
    src="$1"
-    base="$(basename "$src")"
-    dst="${BINARY_BOOT_DIR}/${base}"
+    dst_name="${2:-$(basename "$src")}"
+    dst="${BINARY_BOOT_DIR}/${dst_name}"

    [ -f "$src" ] || return 1
    mkdir -p "${BINARY_BOOT_DIR}"
    cp "$src" "$dst"
-    log "copied ${base} from ${src}"
+    log "copied ${dst_name} from ${src}"
 }

 extract_memtest_from_deb() {
@@ -41,14 +51,44 @@ extract_memtest_from_deb() {

    log "extracting memtest payload from ${deb}"
    dpkg-deb -x "$deb" "$tmpdir"
-    for f in ${MEMTEST_FILES}; do
-        if [ -f "${tmpdir}/boot/${f}" ]; then
-            copy_memtest_file "${tmpdir}/boot/${f}"
-        fi
-    done
+
+    # EFI binary: both 5.x and 6.x use memtest86+x64.efi
+    if [ -f "${tmpdir}/boot/memtest86+x64.efi" ]; then
+        copy_memtest_file "${tmpdir}/boot/memtest86+x64.efi"
+    fi
+
+    # BIOS binary: 5.x = memtest86+x64.bin, 6.x = memtest86+.bin
+    if [ -f "${tmpdir}/boot/memtest86+x64.bin" ]; then
+        copy_memtest_file "${tmpdir}/boot/memtest86+x64.bin"
+    elif [ -f "${tmpdir}/boot/memtest86+.bin" ]; then
+        copy_memtest_file "${tmpdir}/boot/memtest86+.bin" "memtest86+x64.bin"
+    fi
+
    rm -rf "$tmpdir"
 }

+download_and_extract_memtest() {
+    tmpdl="$(mktemp -d)"
+    if [ -n "${MEMTEST_VERSION:-}" ]; then
+        pkg_spec="memtest86+=${MEMTEST_VERSION}"
+    else
+        pkg_spec="memtest86+"
+    fi
+    log "downloading ${pkg_spec} from apt"
+    if ! ( cd "$tmpdl" && apt-get download "$pkg_spec" 2>/dev/null ); then
+        log "apt download failed, retrying after apt-get update"
+        apt-get update -qq >/dev/null 2>&1 || true
+        ( cd "$tmpdl" && apt-get download "$pkg_spec" 2>/dev/null ) || true
+    fi
+    deb="$(find "$tmpdl" -maxdepth 1 -type f -name 'memtest86+*.deb' 2>/dev/null | head -1)"
+    if [ -n "$deb" ]; then
+        extract_memtest_from_deb "$deb"
+    else
+        log "apt download of memtest86+ failed"
+    fi
+    rm -rf "$tmpdl"
+}
+
 ensure_memtest_binaries() {
    missing=0
    for f in ${MEMTEST_FILES}; do
@@ -56,10 +96,15 @@ ensure_memtest_binaries() {
    done
    [ "$missing" -eq 1 ] || return 0

+    # 1. Try files already placed by lb binary_memtest or chroot
    for root in chroot/boot /boot; do
        for f in ${MEMTEST_FILES}; do
            [ -f "${BINARY_BOOT_DIR}/${f}" ] || copy_memtest_file "${root}/${f}" || true
        done
+        # 6.x BIOS binary may lack x64 in name — copy with normalised name
+        if [ ! -f "${BINARY_BOOT_DIR}/memtest86+x64.bin" ]; then
+            copy_memtest_file "${root}/memtest86+.bin" "memtest86+x64.bin" || true
+        fi
    done

    missing=0
@@ -68,6 +113,7 @@ ensure_memtest_binaries() {
    done
    [ "$missing" -eq 1 ] || return 0

+    # 2. Try apt package cache (may be empty if lb binary_memtest already purged)
    for root in cache chroot/var/cache/apt/archives /var/cache/apt/archives; do
        [ -d "$root" ] || continue
        deb="$(find "$root" -type f \( -name 'memtest86+_*.deb' -o -name 'memtest86+*.deb' \) 2>/dev/null | head -1)"
@@ -76,6 +122,15 @@ ensure_memtest_binaries() {
        break
    done

+    missing=0
+    for f in ${MEMTEST_FILES}; do
+        [ -f "${BINARY_BOOT_DIR}/${f}" ] || missing=1
+    done
+    [ "$missing" -eq 1 ] || return 0
+
+    # 3. Fallback: download fresh from apt (lb binary_memtest purges the cache)
+    download_and_extract_memtest
+
    missing=0
    for f in ${MEMTEST_FILES}; do
        if [ ! -f "${BINARY_BOOT_DIR}/${f}" ]; then
@@ -88,7 +143,7 @@ ensure_memtest_binaries() {

 ensure_grub_entry() {
    [ -f "$GRUB_CFG" ] || {
-        fail_or_warn "missing ${GRUB_CFG}"
+        warn_only "missing ${GRUB_CFG} (will be created by lb binary_grub-efi from template)"
        return 0
    }

@@ -114,7 +169,7 @@ EOF

 ensure_isolinux_entry() {
    [ -f "$ISOLINUX_CFG" ] || {
-        fail_or_warn "missing ${ISOLINUX_CFG}"
+        warn_only "missing ${ISOLINUX_CFG} (will be created by lb binary_syslinux from template)"
        return 0
    }

--- a/iso/builder/config/package-lists/bee-nvidia.list.chroot
+++ b/iso/builder/config/package-lists/bee-nvidia.list.chroot
@@ -5,6 +5,7 @@
 # DCGM 4 is packaged per CUDA major. The image ships NVIDIA driver 590 with
 # CUDA 13 userspace, so install the CUDA 13 build plus proprietary components
 # explicitly.
+nvidia-fabricmanager=%%NVIDIA_FABRICMANAGER_VERSION%%
 datacenter-gpu-manager-4-cuda13=1:%%DCGM_VERSION%%
 datacenter-gpu-manager-4-proprietary=1:%%DCGM_VERSION%%
 datacenter-gpu-manager-4-proprietary-cuda13=1:%%DCGM_VERSION%%
--- a/iso/builder/config/package-lists/bee.list.chroot
+++ b/iso/builder/config/package-lists/bee.list.chroot
@@ -3,6 +3,7 @@ dmidecode
 smartmontools
 nvme-cli
 pciutils
+rsync
 ipmitool
 util-linux
 e2fsprogs
--- a/iso/overlay/etc/systemd/system/bee-hpc-tuning.service
+++ b/iso/overlay/etc/systemd/system/bee-hpc-tuning.service
@@ -0,0 +1,14 @@
+[Unit]
+Description=Bee: HPC tuning (CPU governor, C-states)
+After=local-fs.target
+Before=bee-nvidia.service bee-audit.service
+
+[Service]
+Type=oneshot
+ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-hpc-tuning.log /usr/local/bin/bee-hpc-tuning
+StandardOutput=journal
+StandardError=journal
+RemainAfterExit=yes
+
+[Install]
+WantedBy=multi-user.target
--- a/iso/overlay/etc/systemd/system/bee-web.service
+++ b/iso/overlay/etc/systemd/system/bee-web.service
@@ -10,6 +10,8 @@ RestartSec=3
 StandardOutput=journal
 StandardError=journal
 LimitMEMLOCK=infinity
+# No MemoryMax: bee-web spawns GPU test subprocesses (dcgmproftester etc.)
+# that legitimately use several GB; a cgroup limit kills them via OOM.
 # Keep the web server responsive during GPU/CPU stress (children inherit nice+10
 # via Setpriority in runCmdJob, but the bee-web parent stays at 0).
 Nice=0
--- a/iso/overlay/usr/local/bin/bee-dcgmproftester-staggered
+++ b/iso/overlay/usr/local/bin/bee-dcgmproftester-staggered
@@ -0,0 +1,110 @@
+#!/bin/sh
+set -eu
+
+SECONDS=300
+STAGGER_SECONDS=180
+DEVICES=""
+EXCLUDE=""
+
+usage() {
+    echo "usage: $0 [--seconds N] [--stagger-seconds N] [--devices 0,1] [--exclude 2,3]" >&2
+    exit 2
+}
+
+normalize_list() {
+    echo "${1:-}" | tr ',' '\n' | sed 's/[[:space:]]//g' | awk 'NF' | sort -n | uniq | paste -sd, -
+}
+
+contains_csv() {
+    needle="$1"
+    haystack="${2:-}"
+    echo ",${haystack}," | grep -q ",${needle},"
+}
+
+resolve_dcgmproftester() {
+    for candidate in dcgmproftester dcgmproftester13 dcgmproftester12 dcgmproftester11; do
+        if command -v "${candidate}" >/dev/null 2>&1; then
+            command -v "${candidate}"
+            return 0
+        fi
+    done
+    return 1
+}
+
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+        --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
+        --stagger-seconds) [ "$#" -ge 2 ] || usage; STAGGER_SECONDS="$2"; shift 2 ;;
+        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
+        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
+        *) usage ;;
+    esac
+done
+
+PROF=$(resolve_dcgmproftester) || { echo "dcgmproftester not found in PATH" >&2; exit 1; }
+ALL_DEVICES=$(nvidia-smi --query-gpu=index --format=csv,noheader,nounits 2>/dev/null | sed 's/[[:space:]]//g' | awk 'NF' | paste -sd, -)
+[ -n "${ALL_DEVICES}" ] || { echo "nvidia-smi found no NVIDIA GPUs" >&2; exit 1; }
+
+DEVICES=$(normalize_list "${DEVICES}")
+EXCLUDE=$(normalize_list "${EXCLUDE}")
+SELECTED="${DEVICES}"
+if [ -z "${SELECTED}" ]; then
+    SELECTED="${ALL_DEVICES}"
+fi
+
+FINAL=""
+for id in $(echo "${SELECTED}" | tr ',' ' '); do
+    [ -n "${id}" ] || continue
+    if contains_csv "${id}" "${EXCLUDE}"; then
+        continue
+    fi
+    if [ -z "${FINAL}" ]; then
+        FINAL="${id}"
+    else
+        FINAL="${FINAL},${id}"
+    fi
+done
+
+[ -n "${FINAL}" ] || { echo "no NVIDIA GPUs selected after filters" >&2; exit 1; }
+
+echo "loader=dcgmproftester-staggered"
+echo "selected_gpus=${FINAL}"
+echo "stagger_seconds=${STAGGER_SECONDS}"
+
+TMP_DIR=$(mktemp -d)
+trap 'rm -rf "${TMP_DIR}"' EXIT INT TERM
+
+GPU_COUNT=$(echo "${FINAL}" | tr ',' '\n' | awk 'NF' | wc -l | tr -d '[:space:]')
+gpu_pos=0
+WORKERS=""
+for id in $(echo "${FINAL}" | tr ',' ' '); do
+    gpu_pos=$((gpu_pos + 1))
+    log="${TMP_DIR}/gpu-${id}.log"
+    extra_sec=$(( STAGGER_SECONDS * (GPU_COUNT - gpu_pos) ))
+    gpu_seconds=$(( SECONDS + extra_sec ))
+    echo "starting gpu ${id} seconds=${gpu_seconds}"
+    CUDA_VISIBLE_DEVICES="${id}" "${PROF}" --no-dcgm-validation -t 1004 -d "${gpu_seconds}" >"${log}" 2>&1 &
+    pid=$!
+    WORKERS="${WORKERS} ${pid}:${id}:${log}"
+    if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${gpu_pos}" -lt "${GPU_COUNT}" ]; then
+        sleep "${STAGGER_SECONDS}"
+    fi
+done
+
+status=0
+for spec in ${WORKERS}; do
+    pid=${spec%%:*}
+    rest=${spec#*:}
+    id=${rest%%:*}
+    log=${rest#*:}
+    if wait "${pid}"; then
+        echo "gpu ${id} finished: OK"
+    else
+        rc=$?
+        echo "gpu ${id} finished: FAILED rc=${rc}"
+        status=1
+    fi
+    sed "s/^/[gpu ${id}] /" "${log}" || true
+done
+
+exit "${status}"
--- a/iso/overlay/usr/local/bin/bee-gpu-burn
+++ b/iso/overlay/usr/local/bin/bee-gpu-burn
@@ -2,13 +2,17 @@
 set -eu

 SECONDS=5
+STAGGER_SECONDS=0
 SIZE_MB=0
 DEVICES=""
 EXCLUDE=""
+PRECISION=""
+PRECISION_PLAN=""
+PRECISION_PLAN_SECONDS=""
 WORKER="/usr/local/lib/bee/bee-gpu-burn-worker"

 usage() {
-    echo "usage: $0 [--seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3]" >&2
+    echo "usage: $0 [--seconds N] [--stagger-seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3] [--precision int8|fp8|fp16|fp32|fp64|fp4] [--precision-plan p1,p2,...,mixed] [--precision-plan-seconds s1,s2,...]" >&2
    exit 2
 }

@@ -25,9 +29,13 @@ contains_csv() {
 while [ "$#" -gt 0 ]; do
    case "$1" in
        --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
+        --stagger-seconds) [ "$#" -ge 2 ] || usage; STAGGER_SECONDS="$2"; shift 2 ;;
        --size-mb|-m) [ "$#" -ge 2 ] || usage; SIZE_MB="$2"; shift 2 ;;
        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
+        --precision) [ "$#" -ge 2 ] || usage; PRECISION="$2"; shift 2 ;;
+        --precision-plan) [ "$#" -ge 2 ] || usage; PRECISION_PLAN="$2"; shift 2 ;;
+        --precision-plan-seconds) [ "$#" -ge 2 ] || usage; PRECISION_PLAN_SECONDS="$2"; shift 2 ;;
        *) usage ;;
    esac
 done
@@ -61,14 +69,18 @@ done

 echo "loader=bee-gpu-burn"
 echo "selected_gpus=${FINAL}"
+echo "stagger_seconds=${STAGGER_SECONDS}"

 export CUDA_DEVICE_ORDER="PCI_BUS_ID"

 TMP_DIR=$(mktemp -d)
 trap 'rm -rf "${TMP_DIR}"' EXIT INT TERM

+GPU_COUNT=$(echo "${FINAL}" | tr ',' '\n' | awk 'NF' | wc -l | tr -d '[:space:]')
+gpu_pos=0
 WORKERS=""
 for id in $(echo "${FINAL}" | tr ',' ' '); do
+    gpu_pos=$((gpu_pos + 1))
    log="${TMP_DIR}/gpu-${id}.log"
    gpu_size_mb="${SIZE_MB}"
    if [ "${gpu_size_mb}" -le 0 ] 2>/dev/null; then
@@ -79,11 +91,22 @@ for id in $(echo "${FINAL}" | tr ',' ' '); do
            gpu_size_mb=512
        fi
    fi
-    echo "starting gpu ${id} size=${gpu_size_mb}MB"
+    extra_sec=$(( STAGGER_SECONDS * (GPU_COUNT - gpu_pos) ))
+    gpu_seconds=$(( SECONDS + extra_sec ))
+    echo "starting gpu ${id} size=${gpu_size_mb}MB seconds=${gpu_seconds}"
+    precision_arg=""
+    [ -n "${PRECISION}" ] && precision_arg="--precision ${PRECISION}"
+    precision_plan_arg=""
+    [ -n "${PRECISION_PLAN}" ] && precision_plan_arg="--precision-plan ${PRECISION_PLAN}"
+    precision_plan_seconds_arg=""
+    [ -n "${PRECISION_PLAN_SECONDS}" ] && precision_plan_seconds_arg="--precision-plan-seconds ${PRECISION_PLAN_SECONDS}"
    CUDA_VISIBLE_DEVICES="${id}" \
-        "${WORKER}" --device 0 --seconds "${SECONDS}" --size-mb "${gpu_size_mb}" >"${log}" 2>&1 &
+        "${WORKER}" --device 0 --seconds "${gpu_seconds}" --size-mb "${gpu_size_mb}" ${precision_arg} ${precision_plan_arg} ${precision_plan_seconds_arg} >"${log}" 2>&1 &
    pid=$!
    WORKERS="${WORKERS} ${pid}:${id}:${log}"
+    if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${gpu_pos}" -lt "${GPU_COUNT}" ]; then
+        sleep "${STAGGER_SECONDS}"
+    fi
 done

 status=0
--- a/iso/overlay/usr/local/bin/bee-hpc-tuning
+++ b/iso/overlay/usr/local/bin/bee-hpc-tuning
@@ -0,0 +1,41 @@
+#!/bin/sh
+# bee-hpc-tuning — apply HPC tuning for deterministic benchmarking
+# Called by bee-hpc-tuning.service at boot.
+
+log() { echo "[bee-hpc-tuning] $*"; }
+
+# ── CPU governor ────────────────────────────────────────────────────────────
+# Set all CPU cores to performance governor via sysfs.
+# cpupower is not available; write directly to scaling_governor.
+governor_ok=0
+governor_fail=0
+for gov_path in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do
+    [ -f "$gov_path" ] || continue
+    if echo performance > "$gov_path" 2>/dev/null; then
+        governor_ok=$((governor_ok + 1))
+    else
+        governor_fail=$((governor_fail + 1))
+    fi
+done
+
+if [ "$governor_ok" -gt 0 ] && [ "$governor_fail" -eq 0 ]; then
+    log "CPU governor set to performance on ${governor_ok} core(s)"
+elif [ "$governor_ok" -gt 0 ]; then
+    log "WARN: CPU governor: ${governor_ok} OK, ${governor_fail} failed"
+elif [ "$governor_fail" -gt 0 ]; then
+    log "WARN: failed to set CPU governor on ${governor_fail} core(s)"
+else
+    log "WARN: no cpufreq scaling_governor paths found (C-state governor or HW-controlled)"
+fi
+
+# ── Transparent Huge Pages ───────────────────────────────────────────────────
+# Kernel cmdline sets transparent_hugepage=always at boot, but confirm and log.
+thp_path=/sys/kernel/mm/transparent_hugepage/enabled
+if [ -f "$thp_path" ]; then
+    current=$(cat "$thp_path" 2>/dev/null)
+    log "transparent_hugepage: ${current}"
+else
+    log "WARN: transparent_hugepage sysfs path not found"
+fi
+
+log "done"
--- a/iso/overlay/usr/local/bin/bee-install
+++ b/iso/overlay/usr/local/bin/bee-install
@@ -65,6 +65,9 @@ done
 SQUASHFS="/run/live/medium/live/filesystem.squashfs"
 if [ ! -f "$SQUASHFS" ]; then
    echo "ERROR: squashfs not found at $SQUASHFS" >&2
+    echo "  The live medium may have been disconnected." >&2
+    echo "  Reconnect the disc and run:  bee-remount-medium --wait" >&2
+    echo "  Then re-run bee-install." >&2
    exit 1
 fi

@@ -162,10 +165,59 @@ log "  Mounted."
 log "--- Step 5/7: Unpacking filesystem (this takes 10-20 minutes) ---"
 log "  Source: $SQUASHFS"
 log "  Target: $MOUNT_ROOT"
-unsquashfs -f -d "$MOUNT_ROOT" "$SQUASHFS" 2>&1 | \
-    grep -E '^\[|^inod|^created|^extract' | \
-    while read -r line; do log "  $line"; done || true
-log "  Unpack complete."
+
+# unsquashfs does not support resume, so retry the entire unpack step if the
+# source medium disappears mid-copy (e.g. CD physically disconnected).
+UNPACK_ATTEMPTS=0
+UNPACK_MAX=5
+while true; do
+    UNPACK_ATTEMPTS=$(( UNPACK_ATTEMPTS + 1 ))
+    if [ "$UNPACK_ATTEMPTS" -gt "$UNPACK_MAX" ]; then
+        die "Unpack failed $UNPACK_MAX times — giving up. Check the disc and logs."
+    fi
+    [ "$UNPACK_ATTEMPTS" -gt 1 ] && log "  Retry attempt $UNPACK_ATTEMPTS / $UNPACK_MAX ..."
+
+    # Re-check squashfs is reachable before each attempt
+    if [ ! -f "$SQUASHFS" ]; then
+        log "  SOURCE LOST: $SQUASHFS not found."
+        log "  Reconnect the disc and run 'bee-remount-medium --wait' in another terminal,"
+        log "  then press Enter here to retry."
+        read -r _
+        continue
+    fi
+
+    # wipe partial unpack so unsquashfs starts clean
+    if [ "$UNPACK_ATTEMPTS" -gt 1 ]; then
+        log "  Cleaning partial unpack from $MOUNT_ROOT ..."
+        # keep the mount point itself but remove its contents
+        find "$MOUNT_ROOT" -mindepth 1 -maxdepth 1 -exec rm -rf {} + 2>/dev/null || true
+    fi
+
+    UNPACK_OK=0
+    unsquashfs -f -d "$MOUNT_ROOT" "$SQUASHFS" 2>&1 | \
+        grep -E '^\[|^inod|^created|^extract|^ERROR|failed' | \
+        while IFS= read -r line; do log "  $line"; done || UNPACK_OK=$?
+
+    # Check squashfs is still reachable (gone = disc pulled during copy)
+    if [ ! -f "$SQUASHFS" ]; then
+        log "  WARNING: source medium lost during unpack — will retry after remount."
+        log "  Run 'bee-remount-medium --wait' in another terminal, then press Enter."
+        read -r _
+        continue
+    fi
+
+    # Verify the unpack produced a usable root (presence of /etc is a basic check)
+    if [ -d "${MOUNT_ROOT}/etc" ]; then
+        log "  Unpack complete."
+        break
+    else
+        log "  WARNING: unpack produced no /etc — squashfs may be corrupt or incomplete."
+        if [ "$UNPACK_ATTEMPTS" -lt "$UNPACK_MAX" ]; then
+            log "  Retrying in 5 s ..."
+            sleep 5
+        fi
+    fi
+done

 # ------------------------------------------------------------------
 log "--- Step 6/7: Configuring installed system ---"
--- a/iso/overlay/usr/local/bin/bee-john-gpu-stress
+++ b/iso/overlay/usr/local/bin/bee-john-gpu-stress
@@ -2,6 +2,7 @@
 set -eu

 DURATION_SEC=300
+STAGGER_SECONDS=0
 DEVICES=""
 EXCLUDE=""
 FORMAT=""
@@ -12,7 +13,7 @@ export OCL_ICD_VENDORS="/etc/OpenCL/vendors"
 export LD_LIBRARY_PATH="/usr/lib:/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"

 usage() {
-    echo "usage: $0 [--seconds N] [--devices 0,1] [--exclude 2,3] [--format name]" >&2
+    echo "usage: $0 [--seconds N] [--stagger-seconds N] [--devices 0,1] [--exclude 2,3] [--format name]" >&2
    exit 2
 }

@@ -118,6 +119,7 @@ ensure_opencl_ready() {
 while [ "$#" -gt 0 ]; do
    case "$1" in
        --seconds|-t) [ "$#" -ge 2 ] || usage; DURATION_SEC="$2"; shift 2 ;;
+        --stagger-seconds) [ "$#" -ge 2 ] || usage; STAGGER_SECONDS="$2"; shift 2 ;;
        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
        --format) [ "$#" -ge 2 ] || usage; FORMAT="$2"; shift 2 ;;
@@ -170,6 +172,7 @@ done
 echo "loader=john"
 echo "selected_gpus=${FINAL}"
 echo "john_devices=${JOHN_DEVICES}"
+echo "stagger_seconds=${STAGGER_SECONDS}"

 cd "${JOHN_DIR}"

@@ -232,14 +235,21 @@ trap cleanup EXIT INT TERM
 echo "format=${CHOSEN_FORMAT}"
 echo "target_seconds=${DURATION_SEC}"
 echo "slice_seconds=${TEST_SLICE_SECONDS}"
-DEADLINE=$(( $(date +%s) + DURATION_SEC ))
+TOTAL_DEVICES=$(echo "${JOHN_DEVICES}" | tr ',' '\n' | awk 'NF' | wc -l | tr -d '[:space:]')
 _first=1
+pos=0
 for opencl_id in $(echo "${JOHN_DEVICES}" | tr ',' ' '); do
+    pos=$((pos + 1))
    [ "${_first}" = "1" ] || sleep 3
    _first=0
-    run_john_loop "${opencl_id}" "${DEADLINE}" &
+    extra_sec=$(( STAGGER_SECONDS * (TOTAL_DEVICES - pos) ))
+    deadline=$(( $(date +%s) + DURATION_SEC + extra_sec ))
+    run_john_loop "${opencl_id}" "${deadline}" &
    pid=$!
    PIDS="${PIDS} ${pid}"
+    if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${pos}" -lt "${TOTAL_DEVICES}" ]; then
+        sleep "${STAGGER_SECONDS}"
+    fi
 done
 FAIL=0
 for pid in ${PIDS}; do
--- a/iso/overlay/usr/local/bin/bee-nvidia-load
+++ b/iso/overlay/usr/local/bin/bee-nvidia-load
@@ -21,8 +21,13 @@ read_nvidia_modules_flavor() {

 log "kernel: $(uname -r)"

-# Skip if no NVIDIA GPU present (PCI vendor 10de)
-if ! lspci -nn 2>/dev/null | grep -qi '10de:'; then
+# Skip if no NVIDIA display/compute GPU is present.
+# Match only display-class PCI functions (0300 VGA, 0302 3D controller) from vendor 10de.
+have_nvidia_gpu() {
+    lspci -Dn 2>/dev/null | awk '$2 ~ /^03(00|02):$/ && $3 ~ /^10de:/ { found=1; exit } END { exit(found ? 0 : 1) }'
+}
+
+if ! have_nvidia_gpu; then
    log "no NVIDIA GPU detected — skipping module load"
    exit 0
 fi
@@ -253,6 +258,22 @@ else
    log "WARN: nvidia-smi not found — cannot enable persistence mode"
 fi

+# Start or refresh Fabric Manager after the NVIDIA stack is ready. On NVSwitch
+# systems CUDA/DCGM can report "system not yet initialized" until fabric
+# training completes under nvidia-fabricmanager.
+if command -v systemctl >/dev/null 2>&1 && systemctl list-unit-files --no-legend 2>/dev/null | grep -q '^nvidia-fabricmanager\.service'; then
+    if systemctl restart nvidia-fabricmanager.service >/dev/null 2>&1; then
+        log "nvidia-fabricmanager restarted"
+    elif systemctl start nvidia-fabricmanager.service >/dev/null 2>&1; then
+        log "nvidia-fabricmanager started"
+    else
+        log "WARN: failed to start nvidia-fabricmanager.service"
+        systemctl status nvidia-fabricmanager.service --no-pager 2>&1 | sed 's/^/  fabricmanager: /' || true
+    fi
+else
+    log "WARN: nvidia-fabricmanager.service not installed"
+fi
+
 # Start DCGM host engine so dcgmi can discover GPUs.
 # nv-hostengine must run after the NVIDIA modules and device nodes are ready.
 # If it started too early (for example via systemd before bee-nvidia-load), it can
--- a/iso/overlay/usr/local/bin/bee-nvidia-recover
+++ b/iso/overlay/usr/local/bin/bee-nvidia-recover
@@ -0,0 +1,178 @@
+#!/bin/sh
+# bee-nvidia-recover — drain NVIDIA clients, then reset a GPU or reload drivers.
+
+set -u
+
+log() {
+    echo "[bee-nvidia-recover] $*"
+}
+
+log_blocker() {
+    echo "[bee-nvidia-recover] blocker: $*"
+}
+
+usage() {
+    cat <<'EOF'
+usage:
+  bee-nvidia-recover restart-drivers
+  bee-nvidia-recover reset-gpu <index>
+EOF
+}
+
+unit_exists() {
+    systemctl cat "$1" >/dev/null 2>&1
+}
+
+unit_is_active() {
+    systemctl is-active --quiet "$1" 2>/dev/null
+}
+
+stop_unit_if_active() {
+    unit="$1"
+    if unit_is_active "$unit"; then
+        log "stopping $unit"
+        systemctl stop "$unit"
+        return 0
+    fi
+    return 1
+}
+
+start_unit_if_marked() {
+    unit="$1"
+    marker="$2"
+    if [ "$marker" = "1" ] && unit_exists "$unit"; then
+        log "starting $unit"
+        systemctl start "$unit"
+    fi
+}
+
+wait_for_process_exit() {
+    name="$1"
+    tries=0
+    while pgrep -x "$name" >/dev/null 2>&1; do
+        tries=$((tries + 1))
+        if [ "$tries" -ge 15 ]; then
+            log "WARN: $name is still running after stop request"
+            return 1
+        fi
+        sleep 1
+    done
+    return 0
+}
+
+kill_pattern() {
+    pattern="$1"
+    if pgrep -f "$pattern" >/dev/null 2>&1; then
+        pgrep -af "$pattern" 2>/dev/null | while IFS= read -r line; do
+            [ -n "$line" ] || continue
+            log_blocker "$line"
+        done
+        log "killing processes matching: $pattern"
+        pkill -TERM -f "$pattern" >/dev/null 2>&1 || true
+        sleep 1
+        pkill -KILL -f "$pattern" >/dev/null 2>&1 || true
+    fi
+}
+
+drain_gpu_clients() {
+    display_was_active=0
+    fabric_was_active=0
+
+    for unit in display-manager.service lightdm.service; do
+        if unit_exists "$unit" && stop_unit_if_active "$unit"; then
+            log_blocker "service $unit"
+            display_was_active=1
+        fi
+    done
+
+    if unit_exists nvidia-fabricmanager.service && stop_unit_if_active nvidia-fabricmanager.service; then
+        log_blocker "service nvidia-fabricmanager.service"
+        fabric_was_active=1
+    fi
+
+    if pgrep -x nv-hostengine >/dev/null 2>&1; then
+        pgrep -af "^nv-hostengine$" 2>/dev/null | while IFS= read -r line; do
+            [ -n "$line" ] || continue
+            log_blocker "$line"
+        done
+        log "stopping nv-hostengine"
+        pkill -TERM -x nv-hostengine >/dev/null 2>&1 || true
+        wait_for_process_exit nv-hostengine || pkill -KILL -x nv-hostengine >/dev/null 2>&1 || true
+    fi
+
+    for pattern in \
+        "nvidia-smi" \
+        "dcgmi" \
+        "nvvs" \
+        "dcgmproftester" \
+        "all_reduce_perf" \
+        "nvtop" \
+        "bee-gpu-burn" \
+        "bee-john-gpu-stress" \
+        "bee-nccl-gpu-stress" \
+        "Xorg" \
+        "Xwayland"; do
+        kill_pattern "$pattern"
+    done
+}
+
+restore_gpu_clients() {
+    if command -v nvidia-smi >/dev/null 2>&1; then
+        if nvidia-smi -pm 1 >/dev/null 2>&1; then
+            log "enabled NVIDIA persistence mode"
+        else
+            log "WARN: failed to enable NVIDIA persistence mode"
+        fi
+    fi
+
+    if command -v nv-hostengine >/dev/null 2>&1 && ! pgrep -x nv-hostengine >/dev/null 2>&1; then
+        log "starting nv-hostengine"
+        nv-hostengine
+    fi
+
+    start_unit_if_marked nvidia-fabricmanager.service "${fabric_was_active:-0}"
+    start_unit_if_marked display-manager.service "${display_was_active:-0}"
+    if [ "${display_was_active:-0}" = "1" ] && unit_exists lightdm.service && ! unit_is_active lightdm.service; then
+        start_unit_if_marked lightdm.service "1"
+    fi
+}
+
+restart_drivers() {
+    drain_gpu_clients
+    for mod in nvidia_uvm nvidia_drm nvidia_modeset nvidia; do
+        if lsmod | awk '{print $1}' | grep -qx "$mod"; then
+            log "unloading module $mod"
+            rmmod "$mod"
+        fi
+    done
+    rm -f /dev/nvidiactl /dev/nvidia-uvm /dev/nvidia-uvm-tools /dev/nvidia[0-9]* 2>/dev/null || true
+    log "reloading NVIDIA driver stack"
+    /usr/local/bin/bee-nvidia-load
+    restore_gpu_clients
+}
+
+reset_gpu() {
+    index="$1"
+    drain_gpu_clients
+    log "resetting GPU $index"
+    nvidia-smi -r -i "$index"
+    restore_gpu_clients
+}
+
+cmd="${1:-}"
+case "$cmd" in
+    restart-drivers)
+        restart_drivers
+        ;;
+    reset-gpu)
+        if [ "$#" -ne 2 ]; then
+            usage >&2
+            exit 2
+        fi
+        reset_gpu "$2"
+        ;;
+    *)
+        usage >&2
+        exit 2
+        ;;
+esac
--- a/iso/overlay/usr/local/bin/bee-openbox-session
+++ b/iso/overlay/usr/local/bin/bee-openbox-session
@@ -9,9 +9,9 @@ xset s noblank

 # Set desktop background.
 if [ -f /usr/share/bee/wallpaper.png ]; then
-    feh --bg-fill /usr/share/bee/wallpaper.png
+    feh --bg-center --image-bg '#000000' /usr/share/bee/wallpaper.png
 else
-    xsetroot -solid '#f6c90e'
+    xsetroot -solid '#000000'
 fi

 tint2 &
--- a/iso/overlay/usr/local/bin/bee-remount-medium
+++ b/iso/overlay/usr/local/bin/bee-remount-medium
@@ -0,0 +1,100 @@
+#!/bin/bash
+# bee-remount-medium — find and remount the live ISO medium to /run/live/medium
+#
+# Run this after reconnecting the ISO source disc (USB/CD) if the live medium
+# was lost and /run/live/medium/live/filesystem.squashfs is missing.
+#
+# Usage: bee-remount-medium [--wait]
+#   --wait  keep retrying every 5 seconds until the medium is found (useful
+#           while physically reconnecting the device)
+
+set -euo pipefail
+
+MEDIUM_DIR="/run/live/medium"
+SQUASHFS_REL="live/filesystem.squashfs"
+WAIT_MODE=0
+
+for arg in "$@"; do
+    case "$arg" in
+        --wait|-w) WAIT_MODE=1 ;;
+        --help|-h)
+            echo "Usage: bee-remount-medium [--wait]"
+            echo "  Finds and remounts the live ISO medium to $MEDIUM_DIR"
+            echo "  --wait  retry every 5 s until a medium with squashfs is found"
+            exit 0 ;;
+    esac
+done
+
+log() { echo "[$(date +%H:%M:%S)] $*"; }
+die() { log "ERROR: $*" >&2; exit 1; }
+
+# Return all candidate block devices (optical + removable USB mass storage)
+find_candidates() {
+    # CD/DVD drives
+    for dev in /dev/sr* /dev/scd*; do
+        [ -b "$dev" ] && echo "$dev"
+    done
+    # USB/removable disks and partitions
+    for dev in /dev/sd* /dev/vd*; do
+        [ -b "$dev" ] || continue
+        # Only whole disks or partitions — skip the same device we are running from
+        local removable
+        local base
+        base=$(basename "$dev")
+        removable=$(cat "/sys/block/${base%%[0-9]*}/removable" 2>/dev/null || echo 0)
+        [ "$removable" = "1" ] && echo "$dev"
+    done
+}
+
+# Try to mount $1 to $MEDIUM_DIR and check for squashfs
+try_mount() {
+    local dev="$1"
+    local tmpdir
+    tmpdir=$(mktemp -d /tmp/bee-probe-XXXXXX)
+    if mount -o ro "$dev" "$tmpdir" 2>/dev/null; then
+        if [ -f "${tmpdir}/${SQUASHFS_REL}" ]; then
+            # Unmount probe mount and mount properly onto live path
+            umount "$tmpdir" 2>/dev/null || true
+            rmdir "$tmpdir"  2>/dev/null || true
+            # Unmount whatever is currently on MEDIUM_DIR (may be empty/stale)
+            umount "$MEDIUM_DIR" 2>/dev/null || true
+            mkdir -p "$MEDIUM_DIR"
+            if mount -o ro "$dev" "$MEDIUM_DIR"; then
+                log "Mounted $dev on $MEDIUM_DIR"
+                return 0
+            else
+                log "Mount of $dev on $MEDIUM_DIR failed"
+                return 1
+            fi
+        fi
+        umount "$tmpdir" 2>/dev/null || true
+    fi
+    rmdir "$tmpdir" 2>/dev/null || true
+    return 1
+}
+
+attempt() {
+    log "Scanning for ISO medium..."
+    for dev in $(find_candidates); do
+        log "  Trying $dev ..."
+        if try_mount "$dev"; then
+            local sq="${MEDIUM_DIR}/${SQUASHFS_REL}"
+            log "SUCCESS: squashfs available at $sq ($(du -sh "$sq" | cut -f1))"
+            return 0
+        fi
+    done
+    return 1
+}
+
+if [ "$WAIT_MODE" = "1" ]; then
+    log "Waiting for live medium (press Ctrl+C to abort)..."
+    while true; do
+        if attempt; then
+            exit 0
+        fi
+        log "  Not found — retrying in 5 s (reconnect the disc now)"
+        sleep 5
+    done
+else
+    attempt || die "No ISO medium with ${SQUASHFS_REL} found. Reconnect the disc and re-run, or use --wait."
+fi
--- a/iso/overlay/usr/local/bin/bee-selfheal
+++ b/iso/overlay/usr/local/bin/bee-selfheal
@@ -14,7 +14,7 @@ log() {
 }

 have_nvidia_gpu() {
-    lspci -nn 2>/dev/null | grep -qi '10de:'
+    lspci -Dn 2>/dev/null | awk '$2 ~ /^03(00|02):$/ && $3 ~ /^10de:/ { found=1; exit } END { exit(found ? 0 : 1) }'
 }

 service_active() {
--- a/iso/overlay/usr/share/bee/wallpaper.png
+++ b/iso/overlay/usr/share/bee/wallpaper.png
--- a/scripts/deploy.sh
+++ b/scripts/deploy.sh
@@ -0,0 +1,64 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+REMOTE_USER="bee"
+REMOTE_BIN="/usr/local/bin/bee"
+LOCAL_BIN="audit/bee"
+SERVICES="bee-audit bee-web"
+
+# --- IP ---
+if [[ $# -ge 1 ]]; then
+    HOST="$1"
+else
+    read -rp "IP адрес хоста: " HOST
+fi
+[[ -z "$HOST" ]] && { echo "Ошибка: IP не указан"; exit 1; }
+
+# --- SSH options ---
+SSH_OPTS=(-o StrictHostKeyChecking=no -o ConnectTimeout=10)
+
+# Проверяем, нужен ли пароль
+SSH_PASS=""
+if ! ssh "${SSH_OPTS[@]}" -o BatchMode=yes "${REMOTE_USER}@${HOST}" true 2>/dev/null; then
+    if command -v sshpass &>/dev/null; then
+        read -rsp "Пароль для ${REMOTE_USER}@${HOST}: " SSH_PASS
+        echo
+        SSH_CMD=(sshpass -p "$SSH_PASS" ssh "${SSH_OPTS[@]}")
+        SCP_CMD=(sshpass -p "$SSH_PASS" scp "${SSH_OPTS[@]}")
+    else
+        echo "sshpass не установлен. Введите пароль вручную при запросе (или установите SSH-ключ)."
+        SSH_CMD=(ssh "${SSH_OPTS[@]}")
+        SCP_CMD=(scp "${SSH_OPTS[@]}")
+    fi
+else
+    SSH_CMD=(ssh "${SSH_OPTS[@]}")
+    SCP_CMD=(scp "${SSH_OPTS[@]}")
+fi
+
+REMOTE="${REMOTE_USER}@${HOST}"
+
+# --- Build ---
+echo "==> Сборка бинарника..."
+(
+    cd audit
+    VERSION=$(sh ./scripts/resolve-version.sh 2>/dev/null || echo "dev")
+    CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
+        go build -ldflags "-X main.Version=${VERSION}" -o bee ./cmd/bee
+)
+echo "    OK: $(ls -lh "${LOCAL_BIN}" | awk '{print $5, $9}')"
+
+# --- Deploy ---
+echo "==> Копирование на ${REMOTE}..."
+"${SCP_CMD[@]}" "${LOCAL_BIN}" "${REMOTE}:/tmp/bee-new"
+
+echo "==> Замена бинарника и перезапуск сервисов..."
+"${SSH_CMD[@]}" "$REMOTE" bash -s <<EOF
+set -e
+sudo mv /tmp/bee-new ${REMOTE_BIN}
+sudo chmod +x ${REMOTE_BIN}
+sudo systemctl restart ${SERVICES}
+sleep 2
+systemctl status ${SERVICES} --no-pager -l
+EOF
+
+echo "==> Готово."