fix(webui): repair audit actions and CPU burn flow - v3.15

fix(stress): label loaders and improve john opencl diagnostics
fix(iso): validate memtest with xorriso fallback
2026-04-01 08:19:11 +03:00 · 2026-04-01 07:31:52 +03:00 · 2026-04-01 07:24:05 +03:00 · 2026-04-01 07:14:53 +03:00 · 2026-04-01 07:04:48 +03:00 · 2026-03-31 22:28:26 +03:00
61 changed files with 4610 additions and 590 deletions
--- a/PLAN.md
+++ b/PLAN.md
@@ -343,9 +343,9 @@ Planned code shape:
 - `bee tui` can rerun the audit manually
 - `bee tui` can export the latest audit JSON to removable media
 - `bee tui` can show health summary and run NVIDIA/memory/storage acceptance tests
- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-stress`
+- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-burn`
 - SAT summaries now expose `overall_status` plus per-job `OK/FAILED/UNSUPPORTED`
- Memory/GPU SAT runtime defaults can be overridden via `BEE_MEMTESTER_*` and `BEE_GPU_STRESS_*`
+- Memory SAT runtime defaults can be overridden via `BEE_MEMTESTER_*`
 - removable export requires explicit target selection, mount, confirmation, copy, and cleanup

 ### 2.6 — Vendor utilities and optional assets
--- a/audit/cmd/bee/main.go
+++ b/audit/cmd/bee/main.go
@@ -356,6 +356,7 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	fs := flag.NewFlagSet("sat", flag.ContinueOnError)
 	fs.SetOutput(stderr)
 	duration := fs.Int("duration", 0, "stress-ng duration in seconds (cpu only; default: 60)")
+	diagLevel := fs.Int("diag-level", 0, "DCGM diagnostic level for nvidia (1=quick, 2=medium, 3=targeted stress, 4=extended stress; default: 1)")
 	if err := fs.Parse(args[1:]); err != nil {
 		if err == flag.ErrHelp {
 			return 0
@@ -370,7 +371,7 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	target := args[0]
 	if target != "nvidia" && target != "memory" && target != "storage" && target != "cpu" {
 		fmt.Fprintf(stderr, "bee sat: unknown target %q\n", target)
-		fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>]")
+		fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>] [--diag-level <1-4>]")
 		return 2
 	}

@@ -382,7 +383,12 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	logLine := func(s string) { fmt.Fprintln(os.Stderr, s) }
 	switch target {
 	case "nvidia":
-		archive, err = application.RunNvidiaAcceptancePack("", logLine)
+		level := *diagLevel
+		if level > 0 {
+			_, err = application.RunNvidiaAcceptancePackWithOptions(context.Background(), "", level, nil, logLine)
+		} else {
+			archive, err = application.RunNvidiaAcceptancePack("", logLine)
+		}
 	case "memory":
 		archive, err = application.RunMemoryAcceptancePackCtx(context.Background(), "", logLine)
 	case "storage":
--- a/audit/go.mod
+++ b/audit/go.mod
@@ -1,6 +1,6 @@
 module bee/audit

-go 1.24.0
+go 1.25.0

 replace reanimator/chart => ../internal/chart

@@ -13,5 +13,14 @@ require (
 	github.com/dustin/go-humanize v1.0.1 // indirect
 	github.com/go-analyze/bulk v0.1.3 // indirect
 	github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/ncruces/go-strftime v1.0.0 // indirect
+	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	golang.org/x/image v0.24.0 // indirect
+	golang.org/x/sys v0.42.0 // indirect
+	modernc.org/libc v1.70.0 // indirect
+	modernc.org/mathutil v1.7.1 // indirect
+	modernc.org/memory v1.11.0 // indirect
+	modernc.org/sqlite v1.48.0 // indirect
 )
--- a/audit/go.sum
+++ b/audit/go.sum
@@ -8,11 +8,30 @@ github.com/go-analyze/charts v0.5.26 h1:rSwZikLQuFX6cJzwI8OAgaWZneG1kDYxD857ms00
 github.com/go-analyze/charts v0.5.26/go.mod h1:s1YvQhjiSwtLx1f2dOKfiV9x2TT49nVSL6v2rlRpTbY=
 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
 github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
+github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
 github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
 github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
 golang.org/x/image v0.24.0 h1:AN7zRgVsbvmTfNyqIbbOraYL8mSwcKncEj8ofjgzcMQ=
 golang.org/x/image v0.24.0/go.mod h1:4b/ITuLfqYq1hqZcjofwctIhi7sZh2WaCjvsBNjjya8=
+golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
+golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+modernc.org/libc v1.70.0 h1:U58NawXqXbgpZ/dcdS9kMshu08aiA6b7gusEusqzNkw=
+modernc.org/libc v1.70.0/go.mod h1:OVmxFGP1CI/Z4L3E0Q3Mf1PDE0BucwMkcXjjLntvHJo=
+modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
+modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
+modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
+modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
+modernc.org/sqlite v1.48.0 h1:ElZyLop3Q2mHYk5IFPPXADejZrlHu7APbpB0sF78bq4=
+modernc.org/sqlite v1.48.0/go.mod h1:hWjRO6Tj/5Ik8ieqxQybiEOUXy0NJFNp2tpvVpKlvig=
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -107,6 +107,7 @@ func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
 type satRunner interface {
 	RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error)
 	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
+	RunNvidiaStressPack(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error)
 	RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
@@ -114,10 +115,13 @@ type satRunner interface {
 	DetectGPUVendor() string
 	ListAMDGPUs() ([]platform.AMDGPUInfo, error)
 	RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	RunAMDMemIntegrityPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	RunAMDMemBandwidthPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
+	RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error)
 	RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 }

@@ -505,6 +509,17 @@ func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir st
 	return ActionResult{Title: "NVIDIA DCGM", Body: body}, err
 }

+func (a *App) RunNvidiaStressPack(baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
+	return a.RunNvidiaStressPackCtx(context.Background(), baseDir, opts, logFunc)
+}
+
+func (a *App) RunNvidiaStressPackCtx(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunNvidiaStressPack(ctx, baseDir, opts, logFunc)
+}
+
 func (a *App) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
 	return a.RunMemoryAcceptancePackCtx(context.Background(), baseDir, logFunc)
 }
@@ -577,6 +592,20 @@ func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
 	return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
 }

+func (a *App) RunAMDMemIntegrityPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunAMDMemIntegrityPack(ctx, baseDir, logFunc)
+}
+
+func (a *App) RunAMDMemBandwidthPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunAMDMemBandwidthPack(ctx, baseDir, logFunc)
+}
+
 func (a *App) RunMemoryStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	return a.RunMemoryStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
 }
@@ -611,6 +640,13 @@ func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platfor
 	return a.sat.RunFanStressTest(ctx, baseDir, opts)
 }

+func (a *App) RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunPlatformStress(ctx, baseDir, opts, logFunc)
+}
+
 func (a *App) RunNCCLTestsResult(ctx context.Context) (ActionResult, error) {
 	path, err := a.sat.RunNCCLTests(ctx, DefaultSATBaseDir, nil)
 	body := "Results: " + path
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -120,14 +120,15 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
 }

 type fakeSAT struct {
-	runNvidiaFn      func(string) (string, error)
-	runMemoryFn      func(string) (string, error)
-	runStorageFn     func(string) (string, error)
-	runCPUFn         func(string, int) (string, error)
-	detectVendorFn   func() string
-	listAMDGPUsFn    func() ([]platform.AMDGPUInfo, error)
-	runAMDPackFn     func(string) (string, error)
-	listNvidiaGPUsFn func() ([]platform.NvidiaGPU, error)
+	runNvidiaFn       func(string) (string, error)
+	runNvidiaStressFn func(string, platform.NvidiaStressOptions) (string, error)
+	runMemoryFn       func(string) (string, error)
+	runStorageFn      func(string) (string, error)
+	runCPUFn          func(string, int) (string, error)
+	detectVendorFn    func() string
+	listAMDGPUsFn     func() ([]platform.AMDGPUInfo, error)
+	runAMDPackFn      func(string) (string, error)
+	listNvidiaGPUsFn  func() ([]platform.NvidiaGPU, error)
 }

 func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string, _ func(string)) (string, error) {
@@ -138,6 +139,13 @@ func (f fakeSAT) RunNvidiaAcceptancePackWithOptions(_ context.Context, baseDir s
 	return f.runNvidiaFn(baseDir)
 }

+func (f fakeSAT) RunNvidiaStressPack(_ context.Context, baseDir string, opts platform.NvidiaStressOptions, _ func(string)) (string, error) {
+	if f.runNvidiaStressFn != nil {
+		return f.runNvidiaStressFn(baseDir, opts)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
 func (f fakeSAT) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
 	if f.listNvidiaGPUsFn != nil {
 		return f.listNvidiaGPUsFn()
@@ -181,6 +189,14 @@ func (f fakeSAT) RunAMDAcceptancePack(_ context.Context, baseDir string, _ func(
 	return "", nil
 }

+func (f fakeSAT) RunAMDMemIntegrityPack(_ context.Context, _ string, _ func(string)) (string, error) {
+	return "", nil
+}
+
+func (f fakeSAT) RunAMDMemBandwidthPack(_ context.Context, _ string, _ func(string)) (string, error) {
+	return "", nil
+}
+
 func (f fakeSAT) RunAMDStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
 	return "", nil
 }
@@ -195,6 +211,10 @@ func (f fakeSAT) RunFanStressTest(_ context.Context, _ string, _ platform.FanStr
 	return "", nil
 }

+func (f fakeSAT) RunPlatformStress(_ context.Context, _ string, _ platform.PlatformStressOptions, _ func(string)) (string, error) {
+	return "", nil
+}
+
 func (f fakeSAT) RunNCCLTests(_ context.Context, _ string, _ func(string)) (string, error) {
 	return "", nil
 }
--- a/audit/internal/platform/gpu_metrics.go
+++ b/audit/internal/platform/gpu_metrics.go
@@ -78,48 +78,56 @@ func SampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {

 // sampleAMDGPUMetrics queries rocm-smi for live GPU metrics.
 func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
-	// --showtemp --showuse --showpower --csv — one row per GPU
 	out, err := runROCmSMI("--showtemp", "--showuse", "--showpower", "--showmemuse", "--csv")
 	if err != nil {
 		return nil, err
 	}
-	var rows []GPUMetricRow
-	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
-		line = strings.TrimSpace(line)
-		if line == "" || strings.HasPrefix(strings.ToLower(line), "device") {
-			continue
-		}
-		// CSV format: device,temp_c,gpu_use%,mem_use%,power_w (order may vary by rocm-smi version)
-		// We parse by column header from the first line.
-		parts := strings.Split(line, ",")
-		if len(parts) < 2 {
-			continue
-		}
-		idx := len(rows)
-		row := GPUMetricRow{GPUIndex: idx}
-		// rocm-smi CSV columns vary; extract what we can
-		for i, p := range parts {
-			p = strings.TrimSpace(p)
-			switch {
-			case i == 0:
-				// device index like "card0" or "0"
-			case strings.Contains(strings.ToLower(p), "n/a"):
-				// skip N/A
-			default:
-				// Try to match by position heuristic: temp, use%, memuse%, power
-				v := parseGPUFloat(p)
-				switch {
-				case i == 1 && row.TempC == 0:
-					row.TempC = v
-				case i == 2 && row.UsagePct == 0:
-					row.UsagePct = v
-				case i == 3 && row.MemUsagePct == 0:
-					row.MemUsagePct = v
-				case i == 4 && row.PowerW == 0:
-					row.PowerW = v
+	lines := strings.Split(strings.TrimSpace(string(out)), "\n")
+	if len(lines) < 2 {
+		return nil, fmt.Errorf("rocm-smi: insufficient output")
+	}
+
+	// Parse header to find column indices by name.
+	headers := strings.Split(lines[0], ",")
+	colIdx := func(keywords ...string) int {
+		for i, h := range headers {
+			hl := strings.ToLower(strings.TrimSpace(h))
+			for _, kw := range keywords {
+				if strings.Contains(hl, kw) {
+					return i
 				}
 			}
 		}
+		return -1
+	}
+	idxTemp := colIdx("sensor edge", "temperature (c)", "temp")
+	idxUse := colIdx("gpu use (%)")
+	idxMem := colIdx("vram%", "memory allocated")
+	idxPow := colIdx("average graphics package power", "power (w)")
+
+	var rows []GPUMetricRow
+	for _, line := range lines[1:] {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+		parts := strings.Split(line, ",")
+		idx := len(rows)
+		row := GPUMetricRow{GPUIndex: idx}
+		get := func(i int) float64 {
+			if i < 0 || i >= len(parts) {
+				return 0
+			}
+			v := strings.TrimSpace(parts[i])
+			if strings.EqualFold(v, "n/a") {
+				return 0
+			}
+			return parseGPUFloat(v)
+		}
+		row.TempC = get(idxTemp)
+		row.UsagePct = get(idxUse)
+		row.MemUsagePct = get(idxMem)
+		row.PowerW = get(idxPow)
 		rows = append(rows, row)
 	}
 	if len(rows) == 0 {
--- a/audit/internal/platform/live_metrics.go
+++ b/audit/internal/platform/live_metrics.go
@@ -304,6 +304,7 @@ func classifyLiveTempGroup(chip, name string) string {
 		strings.Contains(text, "x86_pkg_temp"),
 		strings.Contains(text, "tctl"),
 		strings.Contains(text, "tdie"),
+		strings.Contains(text, "tccd"),
 		strings.Contains(text, "cpu"),
 		strings.Contains(text, "peci"):
 		return "cpu"
--- a/audit/internal/platform/network.go
+++ b/audit/internal/platform/network.go
@@ -134,7 +134,17 @@ func (s *System) RestoreNetworkSnapshot(snapshot NetworkSnapshot) error {
 		if len(fields) == 0 {
 			continue
 		}
-		args := append([]string{"route", "add"}, fields...)
+		// Strip state flags that ip-route(8) does not accept as add arguments.
+		filtered := fields[:0]
+		for _, f := range fields {
+			switch f {
+			case "linkdown", "dead", "onlink", "pervasive":
+				// skip
+			default:
+				filtered = append(filtered, f)
+			}
+		}
+		args := append([]string{"route", "add"}, filtered...)
 		if raw, err := exec.Command("ip", args...).CombinedOutput(); err != nil {
 			detail := strings.TrimSpace(string(raw))
 			if detail != "" {
--- a/audit/internal/platform/nvidia_stress.go
+++ b/audit/internal/platform/nvidia_stress.go
@@ -0,0 +1,205 @@
+package platform
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+func (s *System) RunNvidiaStressPack(ctx context.Context, baseDir string, opts NvidiaStressOptions, logFunc func(string)) (string, error) {
+	normalizeNvidiaStressOptions(&opts)
+
+	job, err := buildNvidiaStressJob(opts)
+	if err != nil {
+		return "", err
+	}
+
+	return runAcceptancePackCtx(ctx, baseDir, nvidiaStressArchivePrefix(opts.Loader), []satJob{
+		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
+		{name: "02-nvidia-smi-list.log", cmd: []string{"nvidia-smi", "-L"}},
+		job,
+		{name: "04-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
+	}, logFunc)
+}
+
+func nvidiaStressArchivePrefix(loader string) string {
+	switch strings.TrimSpace(strings.ToLower(loader)) {
+	case NvidiaStressLoaderJohn:
+		return "gpu-nvidia-john"
+	case NvidiaStressLoaderNCCL:
+		return "gpu-nvidia-nccl"
+	default:
+		return "gpu-nvidia-burn"
+	}
+}
+
+func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
+	selected, err := resolveNvidiaGPUSelection(opts.GPUIndices, opts.ExcludeGPUIndices)
+	if err != nil {
+		return satJob{}, err
+	}
+
+	loader := strings.TrimSpace(strings.ToLower(opts.Loader))
+	switch loader {
+	case "", NvidiaStressLoaderBuiltin:
+		cmd := []string{
+			"bee-gpu-burn",
+			"--seconds", strconv.Itoa(opts.DurationSec),
+			"--size-mb", strconv.Itoa(opts.SizeMB),
+		}
+		if len(selected) > 0 {
+			cmd = append(cmd, "--devices", joinIndexList(selected))
+		}
+		return satJob{
+			name:       "03-bee-gpu-burn.log",
+			cmd:        cmd,
+			collectGPU: true,
+			gpuIndices: selected,
+		}, nil
+	case NvidiaStressLoaderJohn:
+		cmd := []string{
+			"bee-john-gpu-stress",
+			"--seconds", strconv.Itoa(opts.DurationSec),
+		}
+		if len(selected) > 0 {
+			cmd = append(cmd, "--devices", joinIndexList(selected))
+		}
+		return satJob{
+			name:       "03-john-gpu-stress.log",
+			cmd:        cmd,
+			collectGPU: true,
+			gpuIndices: selected,
+		}, nil
+	case NvidiaStressLoaderNCCL:
+		cmd := []string{
+			"bee-nccl-gpu-stress",
+			"--seconds", strconv.Itoa(opts.DurationSec),
+		}
+		if len(selected) > 0 {
+			cmd = append(cmd, "--devices", joinIndexList(selected))
+		}
+		return satJob{
+			name:       "03-bee-nccl-gpu-stress.log",
+			cmd:        cmd,
+			collectGPU: true,
+			gpuIndices: selected,
+		}, nil
+	default:
+		return satJob{}, fmt.Errorf("unknown NVIDIA stress loader %q", opts.Loader)
+	}
+}
+
+func normalizeNvidiaStressOptions(opts *NvidiaStressOptions) {
+	if opts.DurationSec <= 0 {
+		opts.DurationSec = 300
+	}
+	if opts.SizeMB <= 0 {
+		opts.SizeMB = 64
+	}
+	switch strings.TrimSpace(strings.ToLower(opts.Loader)) {
+	case "", NvidiaStressLoaderBuiltin:
+		opts.Loader = NvidiaStressLoaderBuiltin
+	case NvidiaStressLoaderJohn:
+		opts.Loader = NvidiaStressLoaderJohn
+	case NvidiaStressLoaderNCCL:
+		opts.Loader = NvidiaStressLoaderNCCL
+	default:
+		opts.Loader = NvidiaStressLoaderBuiltin
+	}
+	opts.GPUIndices = dedupeSortedIndices(opts.GPUIndices)
+	opts.ExcludeGPUIndices = dedupeSortedIndices(opts.ExcludeGPUIndices)
+}
+
+func resolveNvidiaGPUSelection(include, exclude []int) ([]int, error) {
+	all, err := listNvidiaGPUIndices()
+	if err != nil {
+		return nil, err
+	}
+	if len(all) == 0 {
+		return nil, fmt.Errorf("nvidia-smi found no NVIDIA GPUs")
+	}
+
+	selected := all
+	if len(include) > 0 {
+		want := make(map[int]struct{}, len(include))
+		for _, idx := range include {
+			want[idx] = struct{}{}
+		}
+		selected = selected[:0]
+		for _, idx := range all {
+			if _, ok := want[idx]; ok {
+				selected = append(selected, idx)
+			}
+		}
+	}
+	if len(exclude) > 0 {
+		skip := make(map[int]struct{}, len(exclude))
+		for _, idx := range exclude {
+			skip[idx] = struct{}{}
+		}
+		filtered := selected[:0]
+		for _, idx := range selected {
+			if _, ok := skip[idx]; ok {
+				continue
+			}
+			filtered = append(filtered, idx)
+		}
+		selected = filtered
+	}
+	if len(selected) == 0 {
+		return nil, fmt.Errorf("no NVIDIA GPUs selected after applying filters")
+	}
+	out := append([]int(nil), selected...)
+	sort.Ints(out)
+	return out, nil
+}
+
+func listNvidiaGPUIndices() ([]int, error) {
+	out, err := satExecCommand("nvidia-smi", "--query-gpu=index", "--format=csv,noheader,nounits").Output()
+	if err != nil {
+		return nil, fmt.Errorf("nvidia-smi: %w", err)
+	}
+	var indices []int
+	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+		idx, err := strconv.Atoi(line)
+		if err != nil {
+			continue
+		}
+		indices = append(indices, idx)
+	}
+	return dedupeSortedIndices(indices), nil
+}
+
+func dedupeSortedIndices(values []int) []int {
+	if len(values) == 0 {
+		return nil
+	}
+	seen := make(map[int]struct{}, len(values))
+	out := make([]int, 0, len(values))
+	for _, value := range values {
+		if value < 0 {
+			continue
+		}
+		if _, ok := seen[value]; ok {
+			continue
+		}
+		seen[value] = struct{}{}
+		out = append(out, value)
+	}
+	sort.Ints(out)
+	return out
+}
+
+func joinIndexList(values []int) string {
+	parts := make([]string, 0, len(values))
+	for _, value := range values {
+		parts = append(parts, strconv.Itoa(value))
+	}
+	return strings.Join(parts, ",")
+}
--- a/audit/internal/platform/platform_stress.go
+++ b/audit/internal/platform/platform_stress.go
@@ -0,0 +1,528 @@
+package platform
+
+import (
+	"archive/tar"
+	"bytes"
+	"compress/gzip"
+	"context"
+	"encoding/csv"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+)
+
+// PlatformStressCycle defines one load+idle cycle.
+type PlatformStressCycle struct {
+	LoadSec int // seconds of simultaneous CPU+GPU stress
+	IdleSec int // seconds of idle monitoring after load cut
+}
+
+// PlatformStressOptions controls the thermal cycling test.
+type PlatformStressOptions struct {
+	Cycles []PlatformStressCycle
+}
+
+// platformStressRow is one second of telemetry.
+type platformStressRow struct {
+	ElapsedSec   float64
+	Cycle        int
+	Phase        string // "load" | "idle"
+	CPULoadPct   float64
+	MaxCPUTempC  float64
+	MaxGPUTempC  float64
+	SysPowerW    float64
+	FanMinRPM    float64
+	FanMaxRPM    float64
+	GPUThrottled bool
+}
+
+// RunPlatformStress runs repeated load+idle thermal cycling.
+// Each cycle starts CPU (stressapptest) and GPU stress simultaneously,
+// runs for LoadSec, then cuts load abruptly and monitors for IdleSec.
+func (s *System) RunPlatformStress(
+	ctx context.Context,
+	baseDir string,
+	opts PlatformStressOptions,
+	logFunc func(string),
+) (string, error) {
+	if logFunc == nil {
+		logFunc = func(string) {}
+	}
+	if len(opts.Cycles) == 0 {
+		return "", fmt.Errorf("no cycles defined")
+	}
+	if err := os.MkdirAll(baseDir, 0755); err != nil {
+		return "", fmt.Errorf("mkdir %s: %w", baseDir, err)
+	}
+
+	stamp := time.Now().UTC().Format("20060102-150405")
+	runDir := filepath.Join(baseDir, "platform-stress-"+stamp)
+	if err := os.MkdirAll(runDir, 0755); err != nil {
+		return "", fmt.Errorf("mkdir run dir: %w", err)
+	}
+
+	vendor := s.DetectGPUVendor()
+	logFunc(fmt.Sprintf("Platform Thermal Cycling — %d cycle(s), GPU vendor: %s", len(opts.Cycles), vendor))
+
+	var rows []platformStressRow
+	start := time.Now()
+
+	var analyses []cycleAnalysis
+
+	for i, cycle := range opts.Cycles {
+		if ctx.Err() != nil {
+			break
+		}
+		cycleNum := i + 1
+		logFunc(fmt.Sprintf("--- Cycle %d/%d: load=%ds, idle=%ds ---", cycleNum, len(opts.Cycles), cycle.LoadSec, cycle.IdleSec))
+
+		// ── LOAD PHASE ───────────────────────────────────────────────────────
+		loadCtx, loadCancel := context.WithTimeout(ctx, time.Duration(cycle.LoadSec)*time.Second)
+		var wg sync.WaitGroup
+
+		// CPU stress
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			cpuCmd, err := buildCPUStressCmd(loadCtx)
+			if err != nil {
+				logFunc("CPU stress: " + err.Error())
+				return
+			}
+			_ = cpuCmd.Wait() // exits when loadCtx times out (SIGKILL)
+		}()
+
+		// GPU stress
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			gpuCmd := buildGPUStressCmd(loadCtx, vendor)
+			if gpuCmd == nil {
+				return
+			}
+			_ = gpuCmd.Wait()
+		}()
+
+		// Monitoring goroutine for load phase
+		loadRows := collectPhase(loadCtx, cycleNum, "load", start)
+		for _, r := range loadRows {
+			logFunc(formatPlatformRow(r))
+		}
+		rows = append(rows, loadRows...)
+		loadCancel()
+		wg.Wait()
+
+		if len(loadRows) > 0 {
+			logFunc(fmt.Sprintf("Cycle %d load ended (%.0fs)", cycleNum, loadRows[len(loadRows)-1].ElapsedSec))
+		}
+
+		// ── IDLE PHASE ───────────────────────────────────────────────────────
+		idleCtx, idleCancel := context.WithTimeout(ctx, time.Duration(cycle.IdleSec)*time.Second)
+		idleRows := collectPhase(idleCtx, cycleNum, "idle", start)
+		for _, r := range idleRows {
+			logFunc(formatPlatformRow(r))
+		}
+		rows = append(rows, idleRows...)
+		idleCancel()
+
+		// Per-cycle analysis
+		an := analyzePlatformCycle(loadRows, idleRows)
+		analyses = append(analyses, an)
+		logFunc(fmt.Sprintf("Cycle %d: maxCPU=%.1f°C maxGPU=%.1f°C power=%.0fW throttled=%v fanDrop=%.0f%%",
+			cycleNum, an.maxCPUTemp, an.maxGPUTemp, an.maxPower, an.throttled, an.fanDropPct))
+	}
+
+	// Write CSV
+	csvData := writePlatformCSV(rows)
+	_ = os.WriteFile(filepath.Join(runDir, "metrics.csv"), csvData, 0644)
+
+	// Write summary
+	summary := writePlatformSummary(opts, analyses)
+	logFunc("--- Summary ---")
+	for _, line := range strings.Split(summary, "\n") {
+		if line != "" {
+			logFunc(line)
+		}
+	}
+	_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)
+
+	// Pack tar.gz
+	archivePath := filepath.Join(baseDir, "platform-stress-"+stamp+".tar.gz")
+	if err := packPlatformDir(runDir, archivePath); err != nil {
+		return "", fmt.Errorf("pack archive: %w", err)
+	}
+	_ = os.RemoveAll(runDir)
+	return archivePath, nil
+}
+
+// collectPhase samples live metrics every second until ctx is done.
+func collectPhase(ctx context.Context, cycle int, phase string, testStart time.Time) []platformStressRow {
+	var rows []platformStressRow
+	ticker := time.NewTicker(time.Second)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return rows
+		case <-ticker.C:
+			sample := SampleLiveMetrics()
+			rows = append(rows, sampleToPlatformRow(sample, cycle, phase, testStart))
+		}
+	}
+}
+
+func sampleToPlatformRow(s LiveMetricSample, cycle int, phase string, testStart time.Time) platformStressRow {
+	r := platformStressRow{
+		ElapsedSec: time.Since(testStart).Seconds(),
+		Cycle:      cycle,
+		Phase:      phase,
+		CPULoadPct: s.CPULoadPct,
+		SysPowerW:  s.PowerW,
+	}
+	for _, t := range s.Temps {
+		switch t.Group {
+		case "cpu":
+			if t.Celsius > r.MaxCPUTempC {
+				r.MaxCPUTempC = t.Celsius
+			}
+		case "gpu":
+			if t.Celsius > r.MaxGPUTempC {
+				r.MaxGPUTempC = t.Celsius
+			}
+		}
+	}
+	for _, g := range s.GPUs {
+		if g.TempC > r.MaxGPUTempC {
+			r.MaxGPUTempC = g.TempC
+		}
+	}
+	if len(s.Fans) > 0 {
+		r.FanMinRPM = s.Fans[0].RPM
+		r.FanMaxRPM = s.Fans[0].RPM
+		for _, f := range s.Fans[1:] {
+			if f.RPM < r.FanMinRPM {
+				r.FanMinRPM = f.RPM
+			}
+			if f.RPM > r.FanMaxRPM {
+				r.FanMaxRPM = f.RPM
+			}
+		}
+	}
+	return r
+}
+
+func formatPlatformRow(r platformStressRow) string {
+	throttle := ""
+	if r.GPUThrottled {
+		throttle = " THROTTLE"
+	}
+	fans := ""
+	if r.FanMinRPM > 0 {
+		fans = fmt.Sprintf(" fans=%.0f-%.0fRPM", r.FanMinRPM, r.FanMaxRPM)
+	}
+	return fmt.Sprintf("[%5.0fs] cycle=%d phase=%-4s cpu=%.0f%% cpuT=%.1f°C gpuT=%.1f°C pwr=%.0fW%s%s",
+		r.ElapsedSec, r.Cycle, r.Phase, r.CPULoadPct, r.MaxCPUTempC, r.MaxGPUTempC, r.SysPowerW, fans, throttle)
+}
+
+func analyzePlatformCycle(loadRows, idleRows []platformStressRow) cycleAnalysis {
+	var an cycleAnalysis
+	for _, r := range loadRows {
+		if r.MaxCPUTempC > an.maxCPUTemp {
+			an.maxCPUTemp = r.MaxCPUTempC
+		}
+		if r.MaxGPUTempC > an.maxGPUTemp {
+			an.maxGPUTemp = r.MaxGPUTempC
+		}
+		if r.SysPowerW > an.maxPower {
+			an.maxPower = r.SysPowerW
+		}
+		if r.GPUThrottled {
+			an.throttled = true
+		}
+	}
+	// Fan RPM at cut = avg of last 5 load rows
+	if n := len(loadRows); n > 0 {
+		window := loadRows
+		if n > 5 {
+			window = loadRows[n-5:]
+		}
+		var sum float64
+		var cnt int
+		for _, r := range window {
+			if r.FanMinRPM > 0 {
+				sum += (r.FanMinRPM + r.FanMaxRPM) / 2
+				cnt++
+			}
+		}
+		if cnt > 0 {
+			an.fanAtCutAvg = sum / float64(cnt)
+		}
+	}
+	// Fan RPM min in first 15s of idle
+	an.fanMin15s = an.fanAtCutAvg
+	var cutElapsed float64
+	if len(loadRows) > 0 {
+		cutElapsed = loadRows[len(loadRows)-1].ElapsedSec
+	}
+	for _, r := range idleRows {
+		if r.ElapsedSec > cutElapsed+15 {
+			break
+		}
+		avg := (r.FanMinRPM + r.FanMaxRPM) / 2
+		if avg > 0 && (an.fanMin15s == 0 || avg < an.fanMin15s) {
+			an.fanMin15s = avg
+		}
+	}
+	if an.fanAtCutAvg > 0 {
+		an.fanDropPct = (an.fanAtCutAvg - an.fanMin15s) / an.fanAtCutAvg * 100
+	}
+	return an
+}
+
+type cycleAnalysis struct {
+	maxCPUTemp  float64
+	maxGPUTemp  float64
+	maxPower    float64
+	throttled   bool
+	fanAtCutAvg float64
+	fanMin15s   float64
+	fanDropPct  float64
+}
+
+func writePlatformSummary(opts PlatformStressOptions, analyses []cycleAnalysis) string {
+	var b strings.Builder
+	fmt.Fprintf(&b, "Platform Thermal Cycling — %d cycle(s)\n", len(opts.Cycles))
+	fmt.Fprintf(&b, "%s\n\n", strings.Repeat("=", 48))
+
+	totalThrottle := 0
+	totalFanWarn := 0
+	for i, an := range analyses {
+		cycle := opts.Cycles[i]
+		fmt.Fprintf(&b, "Cycle %d/%d (load=%ds, idle=%ds)\n", i+1, len(opts.Cycles), cycle.LoadSec, cycle.IdleSec)
+		fmt.Fprintf(&b, "  Max CPU temp: %.1f°C\n", an.maxCPUTemp)
+		fmt.Fprintf(&b, "  Max GPU temp: %.1f°C\n", an.maxGPUTemp)
+		fmt.Fprintf(&b, "  Max sys power: %.0f W\n", an.maxPower)
+		if an.throttled {
+			fmt.Fprintf(&b, "  Throttle: DETECTED\n")
+			totalThrottle++
+		} else {
+			fmt.Fprintf(&b, "  Throttle: none\n")
+		}
+		if an.fanAtCutAvg > 0 {
+			fmt.Fprintf(&b, "  Fan at load cut: %.0f RPM avg\n", an.fanAtCutAvg)
+			fmt.Fprintf(&b, "  Fan min (first 15s idle): %.0f RPM (drop %.0f%%)\n", an.fanMin15s, an.fanDropPct)
+			if an.fanDropPct > 20 {
+				fmt.Fprintf(&b, "  Fan response: WARN — fast spindown (>20%% drop in 15s)\n")
+				totalFanWarn++
+			} else {
+				fmt.Fprintf(&b, "  Fan response: OK\n")
+			}
+		}
+		b.WriteString("\n")
+	}
+
+	fmt.Fprintf(&b, "%s\n", strings.Repeat("=", 48))
+	if totalThrottle > 0 {
+		fmt.Fprintf(&b, "Overall: FAIL — throttle detected in %d/%d cycles\n", totalThrottle, len(analyses))
+	} else if totalFanWarn > 0 {
+		fmt.Fprintf(&b, "Overall: WARN — fast fan spindown in %d/%d cycles (cooling recovery risk)\n", totalFanWarn, len(analyses))
+	} else {
+		fmt.Fprintf(&b, "Overall: PASS\n")
+	}
+	return b.String()
+}
+
+func writePlatformCSV(rows []platformStressRow) []byte {
+	var buf bytes.Buffer
+	w := csv.NewWriter(&buf)
+	_ = w.Write([]string{
+		"elapsed_sec", "cycle", "phase",
+		"cpu_load_pct", "max_cpu_temp_c", "max_gpu_temp_c",
+		"sys_power_w", "fan_min_rpm", "fan_max_rpm", "gpu_throttled",
+	})
+	for _, r := range rows {
+		throttled := "0"
+		if r.GPUThrottled {
+			throttled = "1"
+		}
+		_ = w.Write([]string{
+			strconv.FormatFloat(r.ElapsedSec, 'f', 1, 64),
+			strconv.Itoa(r.Cycle),
+			r.Phase,
+			strconv.FormatFloat(r.CPULoadPct, 'f', 1, 64),
+			strconv.FormatFloat(r.MaxCPUTempC, 'f', 1, 64),
+			strconv.FormatFloat(r.MaxGPUTempC, 'f', 1, 64),
+			strconv.FormatFloat(r.SysPowerW, 'f', 1, 64),
+			strconv.FormatFloat(r.FanMinRPM, 'f', 0, 64),
+			strconv.FormatFloat(r.FanMaxRPM, 'f', 0, 64),
+			throttled,
+		})
+	}
+	w.Flush()
+	return buf.Bytes()
+}
+
+// buildCPUStressCmd creates a stressapptest command that runs until ctx is cancelled.
+func buildCPUStressCmd(ctx context.Context) (*exec.Cmd, error) {
+	path, err := satLookPath("stressapptest")
+	if err != nil {
+		return nil, fmt.Errorf("stressapptest not found: %w", err)
+	}
+	// Use a very long duration; the context timeout will kill it at the right time.
+	cmdArgs := []string{"-s", "86400", "-W", "--cc_test"}
+	if threads := platformStressCPUThreads(); threads > 0 {
+		cmdArgs = append(cmdArgs, "-m", strconv.Itoa(threads))
+	}
+	if mb := platformStressMemoryMB(); mb > 0 {
+		cmdArgs = append(cmdArgs, "-M", strconv.Itoa(mb))
+	}
+	cmd := exec.CommandContext(ctx, path, cmdArgs...)
+	cmd.Stdout = nil
+	cmd.Stderr = nil
+	if err := startLowPriorityCmd(cmd, 15); err != nil {
+		return nil, fmt.Errorf("stressapptest start: %w", err)
+	}
+	return cmd, nil
+}
+
+// buildGPUStressCmd creates a GPU stress command appropriate for the detected vendor.
+// Returns nil if no GPU stress tool is available (CPU-only cycling still useful).
+func buildGPUStressCmd(ctx context.Context, vendor string) *exec.Cmd {
+	switch strings.ToLower(vendor) {
+	case "amd":
+		return buildAMDGPUStressCmd(ctx)
+	case "nvidia":
+		return buildNvidiaGPUStressCmd(ctx)
+	}
+	return nil
+}
+
+func buildAMDGPUStressCmd(ctx context.Context) *exec.Cmd {
+	rvsArgs, err := resolveRVSCommand()
+	if err != nil {
+		return nil
+	}
+	rvsPath := rvsArgs[0]
+	cfg := `actions:
+- name: gst_platform
+  device: all
+  module: gst
+  parallel: true
+  duration: 86400000
+  copy_matrix: false
+  target_stress: 90
+  matrix_size_a: 8640
+  matrix_size_b: 8640
+  matrix_size_c: 8640
+`
+	cfgFile := "/tmp/bee-platform-gst.conf"
+	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
+	cmd := exec.CommandContext(ctx, rvsPath, "-c", cfgFile)
+	cmd.Stdout = nil
+	cmd.Stderr = nil
+	_ = startLowPriorityCmd(cmd, 10)
+	return cmd
+}
+
+func buildNvidiaGPUStressCmd(ctx context.Context) *exec.Cmd {
+	path, err := satLookPath("bee-gpu-burn")
+	if err != nil {
+		path, err = satLookPath("bee-gpu-stress")
+	}
+	if err != nil {
+		return nil
+	}
+	cmd := exec.CommandContext(ctx, path, "--seconds", "86400", "--size-mb", "64")
+	cmd.Stdout = nil
+	cmd.Stderr = nil
+	_ = startLowPriorityCmd(cmd, 10)
+	return cmd
+}
+
+func startLowPriorityCmd(cmd *exec.Cmd, nice int) error {
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+	if cmd.Process != nil {
+		_ = syscall.Setpriority(syscall.PRIO_PROCESS, cmd.Process.Pid, nice)
+	}
+	return nil
+}
+
+func platformStressCPUThreads() int {
+	if n := envInt("BEE_PLATFORM_STRESS_THREADS", 0); n > 0 {
+		return n
+	}
+	cpus := runtime.NumCPU()
+	switch {
+	case cpus <= 2:
+		return 1
+	case cpus <= 8:
+		return cpus - 1
+	default:
+		return cpus - 2
+	}
+}
+
+func platformStressMemoryMB() int {
+	if mb := envInt("BEE_PLATFORM_STRESS_MB", 0); mb > 0 {
+		return mb
+	}
+	free := freeMemBytes()
+	if free <= 0 {
+		return 0
+	}
+	mb := int((free * 60) / 100 / (1024 * 1024))
+	if mb < 1024 {
+		return 1024
+	}
+	return mb
+}
+
+func packPlatformDir(dir, dest string) error {
+	f, err := os.Create(dest)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	gz := gzip.NewWriter(f)
+	defer gz.Close()
+	tw := tar.NewWriter(gz)
+	defer tw.Close()
+
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		return err
+	}
+	base := filepath.Base(dir)
+	for _, e := range entries {
+		if e.IsDir() {
+			continue
+		}
+		fpath := filepath.Join(dir, e.Name())
+		data, err := os.ReadFile(fpath)
+		if err != nil {
+			continue
+		}
+		hdr := &tar.Header{
+			Name:    filepath.Join(base, e.Name()),
+			Size:    int64(len(data)),
+			Mode:    0644,
+			ModTime: time.Now(),
+		}
+		if err := tw.WriteHeader(hdr); err != nil {
+			return err
+		}
+		if _, err := tw.Write(data); err != nil {
+			return err
+		}
+	}
+	return nil
+}
--- a/audit/internal/platform/platform_stress_test.go
+++ b/audit/internal/platform/platform_stress_test.go
@@ -0,0 +1,34 @@
+package platform
+
+import (
+	"runtime"
+	"testing"
+)
+
+func TestPlatformStressCPUThreadsOverride(t *testing.T) {
+	t.Setenv("BEE_PLATFORM_STRESS_THREADS", "7")
+	if got := platformStressCPUThreads(); got != 7 {
+		t.Fatalf("platformStressCPUThreads=%d want 7", got)
+	}
+}
+
+func TestPlatformStressCPUThreadsDefaultLeavesHeadroom(t *testing.T) {
+	t.Setenv("BEE_PLATFORM_STRESS_THREADS", "")
+	got := platformStressCPUThreads()
+	if got < 1 {
+		t.Fatalf("platformStressCPUThreads=%d want >= 1", got)
+	}
+	if got > runtime.NumCPU() {
+		t.Fatalf("platformStressCPUThreads=%d want <= NumCPU=%d", got, runtime.NumCPU())
+	}
+	if runtime.NumCPU() > 2 && got >= runtime.NumCPU() {
+		t.Fatalf("platformStressCPUThreads=%d want headroom below NumCPU=%d", got, runtime.NumCPU())
+	}
+}
+
+func TestPlatformStressMemoryMBOverride(t *testing.T) {
+	t.Setenv("BEE_PLATFORM_STRESS_MB", "8192")
+	if got := platformStressMemoryMB(); got != 8192 {
+		t.Fatalf("platformStressMemoryMB=%d want 8192", got)
+	}
+}
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -136,7 +136,10 @@ func (s *System) runtimeToolStatuses(vendor string) []ToolStatus {
 		tools = append(tools, s.CheckTools([]string{
 			"nvidia-smi",
 			"nvidia-bug-report.sh",
-			"bee-gpu-stress",
+			"bee-gpu-burn",
+			"bee-john-gpu-stress",
+			"bee-nccl-gpu-stress",
+			"all_reduce_perf",
 		})...)
 	case "amd":
 		tool := ToolStatus{Name: "rocm-smi"}
@@ -176,8 +179,8 @@ func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHe
 			health.DriverReady = true
 		}

-		if lookErr := exec.Command("sh", "-c", "command -v bee-gpu-stress >/dev/null 2>&1").Run(); lookErr == nil {
-			out, err := exec.Command("bee-gpu-stress", "--seconds", "1", "--size-mb", "1").CombinedOutput()
+		if _, lookErr := exec.LookPath("bee-gpu-burn"); lookErr == nil {
+			out, err := exec.Command("bee-gpu-burn", "--seconds", "1", "--size-mb", "1").CombinedOutput()
 			if err == nil {
 				health.CUDAReady = true
 			} else if strings.Contains(strings.ToLower(string(out)), "cuda_error_system_not_ready") {
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -33,6 +33,10 @@ var (
 		"/opt/rocm/libexec/rocm_smi/rocm_smi.py",
 		"/opt/rocm-*/libexec/rocm_smi/rocm_smi.py",
 	}
+	rvsExecutableGlobs = []string{
+		"/opt/rocm/bin/rvs",
+		"/opt/rocm-*/bin/rvs",
+	}
 )

 // streamExecOutput runs cmd and streams each output line to logFunc (if non-nil).
@@ -132,6 +136,54 @@ func (s *System) RunAMDAcceptancePack(ctx context.Context, baseDir string, logFu
 	}, logFunc)
 }

+// RunAMDMemIntegrityPack runs the official RVS MEM module as a validate-style memory integrity test.
+func (s *System) RunAMDMemIntegrityPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if err := ensureAMDRuntimeReady(); err != nil {
+		return "", err
+	}
+	cfgFile := "/tmp/bee-amd-mem.conf"
+	cfg := `actions:
+- name: mem_integrity
+  device: all
+  module: mem
+  parallel: true
+  duration: 60000
+  copy_matrix: false
+  target_stress: 90
+  matrix_size: 8640
+`
+	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-mem", []satJob{
+		{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
+		{name: "02-rvs-mem.log", cmd: []string{"rvs", "-c", cfgFile}},
+		{name: "03-rocm-smi-after.log", cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--showmemuse", "--csv"}},
+	}, logFunc)
+}
+
+// RunAMDMemBandwidthPack runs AMD's memory/interconnect bandwidth-oriented tools.
+func (s *System) RunAMDMemBandwidthPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if err := ensureAMDRuntimeReady(); err != nil {
+		return "", err
+	}
+	cfgFile := "/tmp/bee-amd-babel.conf"
+	cfg := `actions:
+- name: babel_mem_bw
+  device: all
+  module: babel
+  parallel: true
+  copy_matrix: true
+  target_stress: 90
+  matrix_size: 134217728
+`
+	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-bandwidth", []satJob{
+		{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
+		{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
+		{name: "03-rvs-babel.log", cmd: []string{"rvs", "-c", cfgFile}},
+		{name: "04-rocm-smi-after.log", cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--showmemuse", "--csv"}},
+	}, logFunc)
+}
+
 // RunAMDStressPack runs an AMD GPU burn-in pack.
 // Missing tools are reported as UNSUPPORTED, consistent with the existing SAT pattern.
 func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
@@ -142,14 +194,36 @@ func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationS
 	if err := ensureAMDRuntimeReady(); err != nil {
 		return "", err
 	}
-	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-stress", []satJob{
+	// Enable copy_matrix so the same GST run drives VRAM traffic in addition to compute.
+	rvsCfg := amdStressRVSConfig(seconds)
+	cfgFile := "/tmp/bee-amd-gst.conf"
+	_ = os.WriteFile(cfgFile, []byte(rvsCfg), 0644)
+
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-stress", amdStressJobs(seconds, cfgFile), logFunc)
+}
+
+func amdStressRVSConfig(seconds int) string {
+	return fmt.Sprintf(`actions:
+- name: gst_stress
+  device: all
+  module: gst
+  parallel: true
+  duration: %d
+  copy_matrix: false
+  target_stress: 90
+  matrix_size_a: 8640
+  matrix_size_b: 8640
+  matrix_size_c: 8640
+`, seconds*1000)
+}
+
+func amdStressJobs(seconds int, cfgFile string) []satJob {
+	return []satJob{
 		{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
 		{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
-		{name: fmt.Sprintf("03-rocm-smi-monitor-%ds.log", seconds), cmd: []string{
-			"bash", "-lc",
-			fmt.Sprintf("end=$((SECONDS+%d)); while [ \"$SECONDS\" -lt \"$end\" ]; do rocm-smi --showtemp --showpower --csv; sleep 1; done", seconds),
-		}},
-	}, logFunc)
+		{name: fmt.Sprintf("03-rvs-gst-%ds.log", seconds), cmd: []string{"rvs", "-c", cfgFile}},
+		{name: fmt.Sprintf("04-rocm-smi-after.log"), cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--csv"}},
+	}
 }

 // ListNvidiaGPUs returns GPUs visible to nvidia-smi.
@@ -351,14 +425,12 @@ type satStats struct {
 }

 func nvidiaSATJobs() []satJob {
-	seconds := envInt("BEE_GPU_STRESS_SECONDS", 5)
-	sizeMB := envInt("BEE_GPU_STRESS_SIZE_MB", 64)
 	return []satJob{
 		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
 		{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
 		{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output-file", "{{run_dir}}/nvidia-bug-report.log"}},
-		{name: "05-bee-gpu-stress.log", cmd: []string{"bee-gpu-stress", "--seconds", fmt.Sprintf("%d", seconds), "--size-mb", fmt.Sprintf("%d", sizeMB)}},
+		{name: "05-bee-gpu-burn.log", cmd: []string{"bee-gpu-burn", "--seconds", "5", "--size-mb", "64"}},
 	}
 }

@@ -606,10 +678,27 @@ func resolveSATCommand(cmd []string) ([]string, error) {
 	if len(cmd) == 0 {
 		return nil, errors.New("empty SAT command")
 	}
-	if cmd[0] != "rocm-smi" {
-		return cmd, nil
+	switch cmd[0] {
+	case "rocm-smi":
+		return resolveROCmSMICommand(cmd[1:]...)
+	case "rvs":
+		return resolveRVSCommand(cmd[1:]...)
 	}
-	return resolveROCmSMICommand(cmd[1:]...)
+	path, err := satLookPath(cmd[0])
+	if err != nil {
+		return nil, fmt.Errorf("%s not found in PATH: %w", cmd[0], err)
+	}
+	return append([]string{path}, cmd[1:]...), nil
+}
+
+func resolveRVSCommand(args ...string) ([]string, error) {
+	if path, err := satLookPath("rvs"); err == nil {
+		return append([]string{path}, args...), nil
+	}
+	for _, path := range expandExistingPaths(rvsExecutableGlobs) {
+		return append([]string{path}, args...), nil
+	}
+	return nil, errors.New("rvs not found in PATH or under /opt/rocm")
 }

 func resolveROCmSMICommand(args ...string) ([]string, error) {
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -130,26 +130,21 @@ func (s *System) RunFanStressTest(ctx context.Context, baseDir string, opts FanS
 		stats.OK++
 	}

-	// loadPhase runs bee-gpu-stress for durSec; sampler stamps phaseName on each row.
+	// loadPhase runs bee-gpu-burn for durSec; sampler stamps phaseName on each row.
 	loadPhase := func(phaseName, stepName string, durSec int) {
 		if ctx.Err() != nil {
 			return
 		}
 		setPhase(phaseName)
-		var env []string
-		if len(opts.GPUIndices) > 0 {
-			ids := make([]string, len(opts.GPUIndices))
-			for i, idx := range opts.GPUIndices {
-				ids[i] = strconv.Itoa(idx)
-			}
-			env = []string{"CUDA_VISIBLE_DEVICES=" + strings.Join(ids, ",")}
-		}
 		cmd := []string{
-			"bee-gpu-stress",
+			"bee-gpu-burn",
 			"--seconds", strconv.Itoa(durSec),
 			"--size-mb", strconv.Itoa(opts.SizeMB),
 		}
-		out, err := runSATCommandCtx(ctx, verboseLog, stepName, cmd, env, nil)
+		if len(opts.GPUIndices) > 0 {
+			cmd = append(cmd, "--devices", joinIndexList(dedupeSortedIndices(opts.GPUIndices)))
+		}
+		out, err := runSATCommandCtx(ctx, verboseLog, stepName, cmd, nil, nil)
 		_ = os.WriteFile(filepath.Join(runDir, stepName+".log"), out, 0644)
 		if err != nil && err != context.Canceled && err.Error() != "signal: killed" {
 			fmt.Fprintf(&summary, "%s_status=FAILED\n", stepName)
@@ -322,7 +317,10 @@ func sampleFanSpeeds() ([]FanReading, error) {
 }

 // parseFanSpeeds parses "ipmitool sdr type Fan" output.
-// Line format: "FAN1             | 2400.000   | RPM        | ok"
+// Handles two formats:
+//
+//	Old: "FAN1 | 2400.000 | RPM | ok"           (value in col[1], unit in col[2])
+//	New: "FAN1 | 41h | ok | 29.1 | 4340 RPM"   (value+unit combined in last col)
 func parseFanSpeeds(raw string) []FanReading {
 	var fans []FanReading
 	for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
@@ -330,25 +328,39 @@ func parseFanSpeeds(raw string) []FanReading {
 		if len(parts) < 2 {
 			continue
 		}
-		unit := ""
-		if len(parts) >= 3 {
-			unit = strings.TrimSpace(parts[2])
+		name := strings.TrimSpace(parts[0])
+		// Find the first field that contains "RPM" (either as a standalone unit or inline)
+		rpmVal := 0.0
+		found := false
+		for _, p := range parts[1:] {
+			p = strings.TrimSpace(p)
+			if !strings.Contains(strings.ToUpper(p), "RPM") {
+				continue
+			}
+			if strings.EqualFold(p, "RPM") {
+				continue // unit-only column in old format; value is in previous field
+			}
+			val, err := parseFanRPMValue(p)
+			if err == nil {
+				rpmVal = val
+				found = true
+				break
+			}
 		}
-		valStr := strings.TrimSpace(parts[1])
-		if !strings.EqualFold(unit, "RPM") && !strings.Contains(strings.ToUpper(valStr), "RPM") {
+		// Old format: unit "RPM" is in col[2], value is in col[1]
+		if !found && len(parts) >= 3 && strings.EqualFold(strings.TrimSpace(parts[2]), "RPM") {
+			valStr := strings.TrimSpace(parts[1])
+			if !strings.EqualFold(valStr, "na") && !strings.EqualFold(valStr, "disabled") && valStr != "" {
+				if val, err := parseFanRPMValue(valStr); err == nil {
+					rpmVal = val
+					found = true
+				}
+			}
+		}
+		if !found {
 			continue
 		}
-		if strings.EqualFold(valStr, "na") || strings.EqualFold(valStr, "disabled") || valStr == "" {
-			continue
-		}
-		val, err := parseFanRPMValue(valStr)
-		if err != nil {
-			continue
-		}
-		fans = append(fans, FanReading{
-			Name: strings.TrimSpace(parts[0]),
-			RPM:  val,
-		})
+		fans = append(fans, FanReading{Name: name, RPM: rpmVal})
 	}
 	return fans
 }
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -5,6 +5,7 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	"strings"
 	"testing"
 )

@@ -30,21 +31,59 @@ func TestRunNvidiaAcceptancePackIncludesGPUStress(t *testing.T) {
 	if len(jobs) != 5 {
 		t.Fatalf("jobs=%d want 5", len(jobs))
 	}
-	if got := jobs[4].cmd[0]; got != "bee-gpu-stress" {
-		t.Fatalf("gpu stress command=%q want bee-gpu-stress", got)
+	if got := jobs[4].cmd[0]; got != "bee-gpu-burn" {
+		t.Fatalf("gpu stress command=%q want bee-gpu-burn", got)
 	}
 	if got := jobs[3].cmd[1]; got != "--output-file" {
 		t.Fatalf("bug report flag=%q want --output-file", got)
 	}
 }

-func TestNvidiaSATJobsUseEnvOverrides(t *testing.T) {
-	t.Setenv("BEE_GPU_STRESS_SECONDS", "9")
-	t.Setenv("BEE_GPU_STRESS_SIZE_MB", "96")
+func TestAMDStressConfigUsesSingleGSTAction(t *testing.T) {
+	t.Parallel()

+	cfg := amdStressRVSConfig(123)
+	if !strings.Contains(cfg, "module: gst") {
+		t.Fatalf("config missing gst module:\n%s", cfg)
+	}
+	if strings.Contains(cfg, "module: mem") {
+		t.Fatalf("config should not include mem module:\n%s", cfg)
+	}
+	if !strings.Contains(cfg, "copy_matrix: false") {
+		t.Fatalf("config should use copy_matrix=false:\n%s", cfg)
+	}
+	if strings.Count(cfg, "duration: 123000") != 1 {
+		t.Fatalf("config should apply duration once:\n%s", cfg)
+	}
+	for _, field := range []string{"matrix_size_a: 8640", "matrix_size_b: 8640", "matrix_size_c: 8640"} {
+		if !strings.Contains(cfg, field) {
+			t.Fatalf("config missing %s:\n%s", field, cfg)
+		}
+	}
+}
+
+func TestAMDStressJobsIncludeBandwidthAndGST(t *testing.T) {
+	t.Parallel()
+
+	jobs := amdStressJobs(300, "/tmp/test-amd-gst.conf")
+	if len(jobs) != 4 {
+		t.Fatalf("jobs=%d want 4", len(jobs))
+	}
+	if got := jobs[1].cmd[0]; got != "rocm-bandwidth-test" {
+		t.Fatalf("jobs[1]=%q want rocm-bandwidth-test", got)
+	}
+	if got := jobs[2].cmd[0]; got != "rvs" {
+		t.Fatalf("jobs[2]=%q want rvs", got)
+	}
+	if got := jobs[2].cmd[2]; got != "/tmp/test-amd-gst.conf" {
+		t.Fatalf("jobs[2] cfg=%q want /tmp/test-amd-gst.conf", got)
+	}
+}
+
+func TestNvidiaSATJobsUseBuiltinBurnDefaults(t *testing.T) {
 	jobs := nvidiaSATJobs()
 	got := jobs[4].cmd
-	want := []string{"bee-gpu-stress", "--seconds", "9", "--size-mb", "96"}
+	want := []string{"bee-gpu-burn", "--seconds", "5", "--size-mb", "64"}
 	if len(got) != len(want) {
 		t.Fatalf("cmd len=%d want %d", len(got), len(want))
 	}
@@ -55,6 +94,93 @@ func TestNvidiaSATJobsUseEnvOverrides(t *testing.T) {
 	}
 }

+func TestBuildNvidiaStressJobUsesSelectedLoaderAndDevices(t *testing.T) {
+	t.Parallel()
+
+	oldExecCommand := satExecCommand
+	satExecCommand = func(name string, args ...string) *exec.Cmd {
+		if name == "nvidia-smi" {
+			return exec.Command("sh", "-c", "printf '0\n1\n2\n'")
+		}
+		return exec.Command(name, args...)
+	}
+	t.Cleanup(func() { satExecCommand = oldExecCommand })
+
+	job, err := buildNvidiaStressJob(NvidiaStressOptions{
+		DurationSec:       600,
+		Loader:            NvidiaStressLoaderJohn,
+		ExcludeGPUIndices: []int{1},
+	})
+	if err != nil {
+		t.Fatalf("buildNvidiaStressJob error: %v", err)
+	}
+	wantCmd := []string{"bee-john-gpu-stress", "--seconds", "600", "--devices", "0,2"}
+	if len(job.cmd) != len(wantCmd) {
+		t.Fatalf("cmd len=%d want %d (%v)", len(job.cmd), len(wantCmd), job.cmd)
+	}
+	for i := range wantCmd {
+		if job.cmd[i] != wantCmd[i] {
+			t.Fatalf("cmd[%d]=%q want %q", i, job.cmd[i], wantCmd[i])
+		}
+	}
+	if got := joinIndexList(job.gpuIndices); got != "0,2" {
+		t.Fatalf("gpuIndices=%q want 0,2", got)
+	}
+}
+
+func TestBuildNvidiaStressJobUsesNCCLLoader(t *testing.T) {
+	t.Parallel()
+
+	oldExecCommand := satExecCommand
+	satExecCommand = func(name string, args ...string) *exec.Cmd {
+		if name == "nvidia-smi" {
+			return exec.Command("sh", "-c", "printf '0\n1\n2\n'")
+		}
+		return exec.Command(name, args...)
+	}
+	t.Cleanup(func() { satExecCommand = oldExecCommand })
+
+	job, err := buildNvidiaStressJob(NvidiaStressOptions{
+		DurationSec: 120,
+		Loader:      NvidiaStressLoaderNCCL,
+		GPUIndices:  []int{2, 0},
+	})
+	if err != nil {
+		t.Fatalf("buildNvidiaStressJob error: %v", err)
+	}
+	wantCmd := []string{"bee-nccl-gpu-stress", "--seconds", "120", "--devices", "0,2"}
+	if len(job.cmd) != len(wantCmd) {
+		t.Fatalf("cmd len=%d want %d (%v)", len(job.cmd), len(wantCmd), job.cmd)
+	}
+	for i := range wantCmd {
+		if job.cmd[i] != wantCmd[i] {
+			t.Fatalf("cmd[%d]=%q want %q", i, job.cmd[i], wantCmd[i])
+		}
+	}
+	if got := joinIndexList(job.gpuIndices); got != "0,2" {
+		t.Fatalf("gpuIndices=%q want 0,2", got)
+	}
+}
+
+func TestNvidiaStressArchivePrefixByLoader(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		loader string
+		want   string
+	}{
+		{loader: NvidiaStressLoaderBuiltin, want: "gpu-nvidia-burn"},
+		{loader: NvidiaStressLoaderJohn, want: "gpu-nvidia-john"},
+		{loader: NvidiaStressLoaderNCCL, want: "gpu-nvidia-nccl"},
+		{loader: "", want: "gpu-nvidia-burn"},
+	}
+	for _, tt := range tests {
+		if got := nvidiaStressArchivePrefix(tt.loader); got != tt.want {
+			t.Fatalf("loader=%q prefix=%q want %q", tt.loader, got, tt.want)
+		}
+	}
+}
+
 func TestEnvIntFallback(t *testing.T) {
 	os.Unsetenv("BEE_MEMTESTER_SIZE_MB")
 	if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
@@ -80,8 +206,8 @@ func TestClassifySATResult(t *testing.T) {
 	}{
 		{name: "ok", job: "memtester", out: "done", err: nil, status: "OK"},
 		{name: "unsupported", job: "smartctl-self-test-short", out: "Self-test not supported", err: errors.New("rc 1"), status: "UNSUPPORTED"},
-		{name: "failed", job: "bee-gpu-stress", out: "cuda error", err: errors.New("rc 1"), status: "FAILED"},
-		{name: "cuda not ready", job: "bee-gpu-stress", out: "cuInit failed: CUDA_ERROR_SYSTEM_NOT_READY", err: errors.New("rc 1"), status: "UNSUPPORTED"},
+		{name: "failed", job: "bee-gpu-burn", out: "cuda error", err: errors.New("rc 1"), status: "FAILED"},
+		{name: "cuda not ready", job: "bee-gpu-burn", out: "cuInit failed: CUDA_ERROR_SYSTEM_NOT_READY", err: errors.New("rc 1"), status: "UNSUPPORTED"},
 	}

 	for _, tt := range tests {
@@ -130,6 +256,44 @@ func TestResolveROCmSMICommandFromPATH(t *testing.T) {
 	}
 }

+func TestResolveSATCommandUsesLookPathForGenericTools(t *testing.T) {
+	oldLookPath := satLookPath
+	satLookPath = func(file string) (string, error) {
+		if file == "stress-ng" {
+			return "/usr/bin/stress-ng", nil
+		}
+		return "", exec.ErrNotFound
+	}
+	t.Cleanup(func() { satLookPath = oldLookPath })
+
+	cmd, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
+	if err != nil {
+		t.Fatalf("resolveSATCommand error: %v", err)
+	}
+	if len(cmd) != 3 {
+		t.Fatalf("cmd len=%d want 3 (%v)", len(cmd), cmd)
+	}
+	if cmd[0] != "/usr/bin/stress-ng" {
+		t.Fatalf("cmd[0]=%q want /usr/bin/stress-ng", cmd[0])
+	}
+}
+
+func TestResolveSATCommandFailsForMissingGenericTool(t *testing.T) {
+	oldLookPath := satLookPath
+	satLookPath = func(file string) (string, error) {
+		return "", exec.ErrNotFound
+	}
+	t.Cleanup(func() { satLookPath = oldLookPath })
+
+	_, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
+	if err == nil {
+		t.Fatal("expected error")
+	}
+	if !strings.Contains(err.Error(), "stress-ng not found in PATH") {
+		t.Fatalf("error=%q", err)
+	}
+}
+
 func TestResolveROCmSMICommandFallsBackToROCmTree(t *testing.T) {
 	tmp := t.TempDir()
 	execPath := filepath.Join(tmp, "opt", "rocm", "bin", "rocm-smi")
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -51,6 +51,20 @@ type ToolStatus struct {
 	OK   bool
 }

+const (
+	NvidiaStressLoaderBuiltin = "builtin"
+	NvidiaStressLoaderJohn    = "john"
+	NvidiaStressLoaderNCCL    = "nccl"
+)
+
+type NvidiaStressOptions struct {
+	DurationSec       int
+	SizeMB            int
+	Loader            string
+	GPUIndices        []int
+	ExcludeGPUIndices []int
+}
+
 func New() *System {
 	return &System{}
 }
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -4,19 +4,25 @@ import (
 	"bufio"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
+	"os"
 	"os/exec"
 	"path/filepath"
+	"regexp"
 	"strings"
 	"sync/atomic"
+	"syscall"
 	"time"

 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 )

+var ansiEscapeRE = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]|\x1b[()][A-Z0-9]|\x1b[DABC]`)
+
 // ── Job ID counter ────────────────────────────────────────────────────────────

 var jobCounter atomic.Uint64
@@ -91,11 +97,25 @@ func runCmdJob(j *jobState, cmd *exec.Cmd) {
 		j.finish(err.Error())
 		return
 	}
+	// Lower the CPU scheduling priority of stress/audit subprocesses to nice+10
+	// so the X server and kernel interrupt handling remain responsive under load
+	// (prevents KVM/IPMI graphical console from freezing during GPU stress tests).
+	if cmd.Process != nil {
+		_ = syscall.Setpriority(syscall.PRIO_PROCESS, cmd.Process.Pid, 10)
+	}

 	go func() {
 		scanner := bufio.NewScanner(pr)
 		for scanner.Scan() {
-			j.append(scanner.Text())
+			// Split on \r to handle progress-bar style output (e.g. \r overwrites)
+			// and strip ANSI escape codes so logs are readable in the browser.
+			parts := strings.Split(scanner.Text(), "\r")
+			for _, part := range parts {
+				line := ansiEscapeRE.ReplaceAllString(part, "")
+				if line != "" {
+					j.append(line)
+				}
+			}
 		}
 	}()

@@ -153,20 +173,22 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 		}

 		var body struct {
-			Duration    int    `json:"duration"`
-			DiagLevel   int    `json:"diag_level"`
-			GPUIndices  []int  `json:"gpu_indices"`
-			Profile     string `json:"profile"`
-			DisplayName string `json:"display_name"`
+			Duration          int    `json:"duration"`
+			DiagLevel         int    `json:"diag_level"`
+			GPUIndices        []int  `json:"gpu_indices"`
+			ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
+			Loader            string `json:"loader"`
+			Profile           string `json:"profile"`
+			DisplayName       string `json:"display_name"`
 		}
-		if r.ContentLength > 0 {
-			_ = json.NewDecoder(r.Body).Decode(&body)
+		if r.Body != nil {
+			if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
+				writeError(w, http.StatusBadRequest, "invalid request body")
+				return
+			}
 		}

-		name := taskNames[target]
-		if name == "" {
-			name = target
-		}
+		name := taskDisplayName(target, body.Profile, body.Loader)
 		t := &Task{
 			ID:        newJobID("sat-" + target),
 			Name:      name,
@@ -174,11 +196,13 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 			Status:    TaskPending,
 			CreatedAt: time.Now(),
 			params: taskParams{
-				Duration:    body.Duration,
-				DiagLevel:   body.DiagLevel,
-				GPUIndices:  body.GPUIndices,
-				BurnProfile: body.Profile,
-				DisplayName: body.DisplayName,
+				Duration:          body.Duration,
+				DiagLevel:         body.DiagLevel,
+				GPUIndices:        body.GPUIndices,
+				ExcludeGPUIndices: body.ExcludeGPUIndices,
+				Loader:            body.Loader,
+				BurnProfile:       body.Profile,
+				DisplayName:       body.DisplayName,
 			},
 		}
 		if strings.TrimSpace(body.DisplayName) != "" {
@@ -405,6 +429,58 @@ func (h *handler) handleAPIExportBundle(w http.ResponseWriter, r *http.Request)
 	})
 }

+func (h *handler) handleAPIExportUSBTargets(w http.ResponseWriter, _ *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	targets, err := h.opts.App.ListRemovableTargets()
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	if targets == nil {
+		targets = []platform.RemovableTarget{}
+	}
+	writeJSON(w, targets)
+}
+
+func (h *handler) handleAPIExportUSBAudit(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	var target platform.RemovableTarget
+	if err := json.NewDecoder(r.Body).Decode(&target); err != nil || target.Device == "" {
+		writeError(w, http.StatusBadRequest, "device is required")
+		return
+	}
+	result, err := h.opts.App.ExportLatestAuditResult(target)
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	writeJSON(w, map[string]string{"status": "ok", "message": result.Body})
+}
+
+func (h *handler) handleAPIExportUSBBundle(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	var target platform.RemovableTarget
+	if err := json.NewDecoder(r.Body).Decode(&target); err != nil || target.Device == "" {
+		writeError(w, http.StatusBadRequest, "device is required")
+		return
+	}
+	result, err := h.opts.App.ExportSupportBundleResult(target)
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	writeJSON(w, map[string]string{"status": "ok", "message": result.Body})
+}
+
 // ── GPU presence ──────────────────────────────────────────────────────────────

 func (h *handler) handleAPIGPUPresence(w http.ResponseWriter, r *http.Request) {
@@ -588,59 +664,37 @@ func (h *handler) handleAPIInstallStream(w http.ResponseWriter, r *http.Request)

 // ── Metrics SSE ───────────────────────────────────────────────────────────────

+func (h *handler) handleAPIMetricsLatest(w http.ResponseWriter, r *http.Request) {
+	sample, ok := h.latestMetric()
+	if !ok {
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte("{}"))
+		return
+	}
+	b, err := json.Marshal(sample)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+	w.Header().Set("Content-Type", "application/json")
+	_, _ = w.Write(b)
+}
+
 func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request) {
 	if !sseStart(w) {
 		return
 	}
-	ticker := time.NewTicker(time.Second)
+	ticker := time.NewTicker(1 * time.Second)
 	defer ticker.Stop()
 	for {
 		select {
 		case <-r.Context().Done():
 			return
 		case <-ticker.C:
-			sample := platform.SampleLiveMetrics()
-
-			// Feed server ring buffers
-			for _, t := range sample.Temps {
-				switch t.Group {
-				case "cpu":
-					h.pushNamedMetricRing(&h.cpuTempRings, t.Name, t.Celsius)
-				case "ambient":
-					h.pushNamedMetricRing(&h.ambientTempRings, t.Name, t.Celsius)
-				}
+			sample, ok := h.latestMetric()
+			if !ok {
+				continue
 			}
-			h.ringPower.push(sample.PowerW)
-			h.ringCPULoad.push(sample.CPULoadPct)
-			h.ringMemLoad.push(sample.MemLoadPct)
-
-			// Feed fan ring buffers (grow on first sight)
-			h.ringsMu.Lock()
-			for i, fan := range sample.Fans {
-				for len(h.ringFans) <= i {
-					h.ringFans = append(h.ringFans, newMetricsRing(120))
-					h.fanNames = append(h.fanNames, fan.Name)
-				}
-				h.ringFans[i].push(float64(fan.RPM))
-			}
-			// Feed per-GPU ring buffers (grow on first sight)
-			for _, gpu := range sample.GPUs {
-				idx := gpu.GPUIndex
-				for len(h.gpuRings) <= idx {
-					h.gpuRings = append(h.gpuRings, &gpuRings{
-						Temp:    newMetricsRing(120),
-						Util:    newMetricsRing(120),
-						MemUtil: newMetricsRing(120),
-						Power:   newMetricsRing(120),
-					})
-				}
-				h.gpuRings[idx].Temp.push(gpu.TempC)
-				h.gpuRings[idx].Util.push(gpu.UsagePct)
-				h.gpuRings[idx].MemUtil.push(gpu.MemUsagePct)
-				h.gpuRings[idx].Power.push(gpu.PowerW)
-			}
-			h.ringsMu.Unlock()
-
 			b, err := json.Marshal(sample)
 			if err != nil {
 				continue
@@ -652,6 +706,46 @@ func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request)
 	}
 }

+// feedRings pushes one sample into all in-memory ring buffers.
+func (h *handler) feedRings(sample platform.LiveMetricSample) {
+	for _, t := range sample.Temps {
+		switch t.Group {
+		case "cpu":
+			h.pushNamedMetricRing(&h.cpuTempRings, t.Name, t.Celsius)
+		case "ambient":
+			h.pushNamedMetricRing(&h.ambientTempRings, t.Name, t.Celsius)
+		}
+	}
+	h.ringPower.push(sample.PowerW)
+	h.ringCPULoad.push(sample.CPULoadPct)
+	h.ringMemLoad.push(sample.MemLoadPct)
+
+	h.ringsMu.Lock()
+	for i, fan := range sample.Fans {
+		for len(h.ringFans) <= i {
+			h.ringFans = append(h.ringFans, newMetricsRing(120))
+			h.fanNames = append(h.fanNames, fan.Name)
+		}
+		h.ringFans[i].push(float64(fan.RPM))
+	}
+	for _, gpu := range sample.GPUs {
+		idx := gpu.GPUIndex
+		for len(h.gpuRings) <= idx {
+			h.gpuRings = append(h.gpuRings, &gpuRings{
+				Temp:    newMetricsRing(120),
+				Util:    newMetricsRing(120),
+				MemUtil: newMetricsRing(120),
+				Power:   newMetricsRing(120),
+			})
+		}
+		h.gpuRings[idx].Temp.push(gpu.TempC)
+		h.gpuRings[idx].Util.push(gpu.UsagePct)
+		h.gpuRings[idx].MemUtil.push(gpu.MemUsagePct)
+		h.gpuRings[idx].Power.push(gpu.PowerW)
+	}
+	h.ringsMu.Unlock()
+}
+
 func (h *handler) pushNamedMetricRing(dst *[]*namedMetricsRing, name string, value float64) {
 	if name == "" {
 		return
@@ -788,3 +882,108 @@ func (h *handler) rollbackPendingNetworkChange() error {
 	}
 	return nil
 }
+
+// ── Display / Screen Resolution ───────────────────────────────────────────────
+
+type displayMode struct {
+	Output  string `json:"output"`
+	Mode    string `json:"mode"`
+	Current bool   `json:"current"`
+}
+
+type displayInfo struct {
+	Output  string        `json:"output"`
+	Modes   []displayMode `json:"modes"`
+	Current string        `json:"current"`
+}
+
+var xrandrOutputRE = regexp.MustCompile(`^(\S+)\s+connected`)
+var xrandrModeRE = regexp.MustCompile(`^\s{3}(\d+x\d+)\s`)
+var xrandrCurrentRE = regexp.MustCompile(`\*`)
+
+func parseXrandrOutput(out string) []displayInfo {
+	var infos []displayInfo
+	var cur *displayInfo
+	for _, line := range strings.Split(out, "\n") {
+		if m := xrandrOutputRE.FindStringSubmatch(line); m != nil {
+			if cur != nil {
+				infos = append(infos, *cur)
+			}
+			cur = &displayInfo{Output: m[1]}
+			continue
+		}
+		if cur == nil {
+			continue
+		}
+		if m := xrandrModeRE.FindStringSubmatch(line); m != nil {
+			isCurrent := xrandrCurrentRE.MatchString(line)
+			mode := displayMode{Output: cur.Output, Mode: m[1], Current: isCurrent}
+			cur.Modes = append(cur.Modes, mode)
+			if isCurrent {
+				cur.Current = m[1]
+			}
+		}
+	}
+	if cur != nil {
+		infos = append(infos, *cur)
+	}
+	return infos
+}
+
+func xrandrCommand(args ...string) *exec.Cmd {
+	cmd := exec.Command("xrandr", args...)
+	env := append([]string{}, os.Environ()...)
+	hasDisplay := false
+	hasXAuthority := false
+	for _, kv := range env {
+		if strings.HasPrefix(kv, "DISPLAY=") && strings.TrimPrefix(kv, "DISPLAY=") != "" {
+			hasDisplay = true
+		}
+		if strings.HasPrefix(kv, "XAUTHORITY=") && strings.TrimPrefix(kv, "XAUTHORITY=") != "" {
+			hasXAuthority = true
+		}
+	}
+	if !hasDisplay {
+		env = append(env, "DISPLAY=:0")
+	}
+	if !hasXAuthority {
+		env = append(env, "XAUTHORITY=/home/bee/.Xauthority")
+	}
+	cmd.Env = env
+	return cmd
+}
+
+func (h *handler) handleAPIDisplayResolutions(w http.ResponseWriter, _ *http.Request) {
+	out, err := xrandrCommand().Output()
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, "xrandr: "+err.Error())
+		return
+	}
+	writeJSON(w, parseXrandrOutput(string(out)))
+}
+
+func (h *handler) handleAPIDisplaySet(w http.ResponseWriter, r *http.Request) {
+	var req struct {
+		Output string `json:"output"`
+		Mode   string `json:"mode"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil || req.Output == "" || req.Mode == "" {
+		writeError(w, http.StatusBadRequest, "output and mode are required")
+		return
+	}
+	// Validate mode looks like WxH to prevent injection
+	if !regexp.MustCompile(`^\d+x\d+$`).MatchString(req.Mode) {
+		writeError(w, http.StatusBadRequest, "invalid mode format")
+		return
+	}
+	// Validate output name (no special chars)
+	if !regexp.MustCompile(`^[A-Za-z0-9_\-]+$`).MatchString(req.Output) {
+		writeError(w, http.StatusBadRequest, "invalid output name")
+		return
+	}
+	if out, err := xrandrCommand("--output", req.Output, "--mode", req.Mode).CombinedOutput(); err != nil {
+		writeError(w, http.StatusInternalServerError, "xrandr: "+strings.TrimSpace(string(out)))
+		return
+	}
+	writeJSON(w, map[string]string{"status": "ok", "output": req.Output, "mode": req.Mode})
+}
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -0,0 +1,64 @@
+package webui
+
+import (
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"bee/audit/internal/app"
+)
+
+func TestXrandrCommandAddsDefaultX11Env(t *testing.T) {
+	t.Setenv("DISPLAY", "")
+	t.Setenv("XAUTHORITY", "")
+
+	cmd := xrandrCommand("--query")
+
+	var hasDisplay bool
+	var hasXAuthority bool
+	for _, kv := range cmd.Env {
+		if kv == "DISPLAY=:0" {
+			hasDisplay = true
+		}
+		if kv == "XAUTHORITY=/home/bee/.Xauthority" {
+			hasXAuthority = true
+		}
+	}
+	if !hasDisplay {
+		t.Fatalf("DISPLAY not injected: %v", cmd.Env)
+	}
+	if !hasXAuthority {
+		t.Fatalf("XAUTHORITY not injected: %v", cmd.Env)
+	}
+}
+
+func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
+	globalQueue.mu.Lock()
+	originalTasks := globalQueue.tasks
+	globalQueue.tasks = nil
+	globalQueue.mu.Unlock()
+	t.Cleanup(func() {
+		globalQueue.mu.Lock()
+		globalQueue.tasks = originalTasks
+		globalQueue.mu.Unlock()
+	})
+
+	h := &handler{opts: HandlerOptions{App: &app.App{}}}
+	req := httptest.NewRequest("POST", "/api/sat/cpu/run", strings.NewReader(`{"profile":"smoke"}`))
+	req.ContentLength = -1
+	rec := httptest.NewRecorder()
+
+	h.handleAPISATRun("cpu").ServeHTTP(rec, req)
+
+	if rec.Code != 200 {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	globalQueue.mu.Lock()
+	defer globalQueue.mu.Unlock()
+	if len(globalQueue.tasks) != 1 {
+		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
+	}
+	if got := globalQueue.tasks[0].params.BurnProfile; got != "smoke" {
+		t.Fatalf("burn profile=%q want smoke", got)
+	}
+}
--- a/audit/internal/webui/metricsdb.go
+++ b/audit/internal/webui/metricsdb.go
@@ -0,0 +1,317 @@
+package webui
+
+import (
+	"database/sql"
+	"encoding/csv"
+	"io"
+	"strconv"
+	"time"
+
+	"bee/audit/internal/platform"
+	_ "modernc.org/sqlite"
+)
+
+const metricsDBPath = "/appdata/bee/metrics.db"
+
+// MetricsDB persists live metric samples to SQLite.
+type MetricsDB struct {
+	db *sql.DB
+}
+
+// openMetricsDB opens (or creates) the metrics database at the given path.
+func openMetricsDB(path string) (*MetricsDB, error) {
+	db, err := sql.Open("sqlite", path+"?_journal=WAL&_busy_timeout=5000")
+	if err != nil {
+		return nil, err
+	}
+	db.SetMaxOpenConns(1)
+	if err := initMetricsSchema(db); err != nil {
+		_ = db.Close()
+		return nil, err
+	}
+	return &MetricsDB{db: db}, nil
+}
+
+func initMetricsSchema(db *sql.DB) error {
+	_, err := db.Exec(`
+CREATE TABLE IF NOT EXISTS sys_metrics (
+  ts           INTEGER NOT NULL,
+  cpu_load_pct REAL,
+  mem_load_pct REAL,
+  power_w      REAL,
+  PRIMARY KEY (ts)
+);
+CREATE TABLE IF NOT EXISTS gpu_metrics (
+  ts            INTEGER NOT NULL,
+  gpu_index     INTEGER NOT NULL,
+  temp_c        REAL,
+  usage_pct     REAL,
+  mem_usage_pct REAL,
+  power_w       REAL,
+  PRIMARY KEY (ts, gpu_index)
+);
+CREATE TABLE IF NOT EXISTS fan_metrics (
+  ts   INTEGER NOT NULL,
+  name TEXT NOT NULL,
+  rpm  REAL,
+  PRIMARY KEY (ts, name)
+);
+CREATE TABLE IF NOT EXISTS temp_metrics (
+  ts      INTEGER NOT NULL,
+  name    TEXT NOT NULL,
+  grp     TEXT NOT NULL,
+  celsius REAL,
+  PRIMARY KEY (ts, name)
+);
+`)
+	return err
+}
+
+// Write inserts one sample into all relevant tables.
+func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
+	ts := s.Timestamp.Unix()
+	tx, err := m.db.Begin()
+	if err != nil {
+		return err
+	}
+	defer func() { _ = tx.Rollback() }()
+
+	_, err = tx.Exec(
+		`INSERT OR REPLACE INTO sys_metrics(ts,cpu_load_pct,mem_load_pct,power_w) VALUES(?,?,?,?)`,
+		ts, s.CPULoadPct, s.MemLoadPct, s.PowerW,
+	)
+	if err != nil {
+		return err
+	}
+	for _, g := range s.GPUs {
+		_, err = tx.Exec(
+			`INSERT OR REPLACE INTO gpu_metrics(ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w) VALUES(?,?,?,?,?,?)`,
+			ts, g.GPUIndex, g.TempC, g.UsagePct, g.MemUsagePct, g.PowerW,
+		)
+		if err != nil {
+			return err
+		}
+	}
+	for _, f := range s.Fans {
+		_, err = tx.Exec(
+			`INSERT OR REPLACE INTO fan_metrics(ts,name,rpm) VALUES(?,?,?)`,
+			ts, f.Name, f.RPM,
+		)
+		if err != nil {
+			return err
+		}
+	}
+	for _, t := range s.Temps {
+		_, err = tx.Exec(
+			`INSERT OR REPLACE INTO temp_metrics(ts,name,grp,celsius) VALUES(?,?,?,?)`,
+			ts, t.Name, t.Group, t.Celsius,
+		)
+		if err != nil {
+			return err
+		}
+	}
+	return tx.Commit()
+}
+
+// LoadRecent returns up to n samples in chronological order (oldest first).
+func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?`, n)
+}
+
+// LoadAll returns all persisted samples in chronological order (oldest first).
+func (m *MetricsDB) LoadAll() ([]platform.LiveMetricSample, error) {
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts`, nil)
+}
+
+// loadSamples reconstructs LiveMetricSample rows from the normalized tables.
+func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetricSample, error) {
+	rows, err := m.db.Query(query, args...)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	type sysRow struct {
+		ts          int64
+		cpu, mem, pwr float64
+	}
+	var sysRows []sysRow
+	for rows.Next() {
+		var r sysRow
+		if err := rows.Scan(&r.ts, &r.cpu, &r.mem, &r.pwr); err != nil {
+			continue
+		}
+		sysRows = append(sysRows, r)
+	}
+	if len(sysRows) == 0 {
+		return nil, nil
+	}
+	// Reverse to chronological order
+	for i, j := 0, len(sysRows)-1; i < j; i, j = i+1, j-1 {
+		sysRows[i], sysRows[j] = sysRows[j], sysRows[i]
+	}
+
+	// Collect min/max ts for range query
+	minTS := sysRows[0].ts
+	maxTS := sysRows[len(sysRows)-1].ts
+
+	// Load GPU rows in range
+	type gpuKey struct{ ts int64; idx int }
+	gpuData := map[gpuKey]platform.GPUMetricRow{}
+	gRows, err := m.db.Query(
+		`SELECT ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w FROM gpu_metrics WHERE ts>=? AND ts<=? ORDER BY ts,gpu_index`,
+		minTS, maxTS,
+	)
+	if err == nil {
+		defer gRows.Close()
+		for gRows.Next() {
+			var ts int64
+			var g platform.GPUMetricRow
+			if err := gRows.Scan(&ts, &g.GPUIndex, &g.TempC, &g.UsagePct, &g.MemUsagePct, &g.PowerW); err == nil {
+				gpuData[gpuKey{ts, g.GPUIndex}] = g
+			}
+		}
+	}
+
+	// Load fan rows in range
+	type fanKey struct{ ts int64; name string }
+	fanData := map[fanKey]float64{}
+	fRows, err := m.db.Query(
+		`SELECT ts,name,rpm FROM fan_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
+	)
+	if err == nil {
+		defer fRows.Close()
+		for fRows.Next() {
+			var ts int64
+			var name string
+			var rpm float64
+			if err := fRows.Scan(&ts, &name, &rpm); err == nil {
+				fanData[fanKey{ts, name}] = rpm
+			}
+		}
+	}
+
+	// Load temp rows in range
+	type tempKey struct{ ts int64; name string }
+	tempData := map[tempKey]platform.TempReading{}
+	tRows, err := m.db.Query(
+		`SELECT ts,name,grp,celsius FROM temp_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
+	)
+	if err == nil {
+		defer tRows.Close()
+		for tRows.Next() {
+			var ts int64
+			var t platform.TempReading
+			if err := tRows.Scan(&ts, &t.Name, &t.Group, &t.Celsius); err == nil {
+				tempData[tempKey{ts, t.Name}] = t
+			}
+		}
+	}
+
+	// Collect unique GPU indices and fan names from loaded data (preserve order)
+	seenGPU := map[int]bool{}
+	var gpuIndices []int
+	for k := range gpuData {
+		if !seenGPU[k.idx] {
+			seenGPU[k.idx] = true
+			gpuIndices = append(gpuIndices, k.idx)
+		}
+	}
+	seenFan := map[string]bool{}
+	var fanNames []string
+	for k := range fanData {
+		if !seenFan[k.name] {
+			seenFan[k.name] = true
+			fanNames = append(fanNames, k.name)
+		}
+	}
+	seenTemp := map[string]bool{}
+	var tempNames []string
+	for k := range tempData {
+		if !seenTemp[k.name] {
+			seenTemp[k.name] = true
+			tempNames = append(tempNames, k.name)
+		}
+	}
+
+	samples := make([]platform.LiveMetricSample, len(sysRows))
+	for i, r := range sysRows {
+		s := platform.LiveMetricSample{
+			Timestamp:  time.Unix(r.ts, 0).UTC(),
+			CPULoadPct: r.cpu,
+			MemLoadPct: r.mem,
+			PowerW:     r.pwr,
+		}
+		for _, idx := range gpuIndices {
+			if g, ok := gpuData[gpuKey{r.ts, idx}]; ok {
+				s.GPUs = append(s.GPUs, g)
+			}
+		}
+		for _, name := range fanNames {
+			if rpm, ok := fanData[fanKey{r.ts, name}]; ok {
+				s.Fans = append(s.Fans, platform.FanReading{Name: name, RPM: rpm})
+			}
+		}
+		for _, name := range tempNames {
+			if t, ok := tempData[tempKey{r.ts, name}]; ok {
+				s.Temps = append(s.Temps, t)
+			}
+		}
+		samples[i] = s
+	}
+	return samples, nil
+}
+
+// ExportCSV writes all sys+gpu data as CSV to w.
+func (m *MetricsDB) ExportCSV(w io.Writer) error {
+	rows, err := m.db.Query(`
+		SELECT s.ts, s.cpu_load_pct, s.mem_load_pct, s.power_w,
+		       g.gpu_index, g.temp_c, g.usage_pct, g.mem_usage_pct, g.power_w
+		FROM sys_metrics s
+		LEFT JOIN gpu_metrics g ON g.ts = s.ts
+		ORDER BY s.ts, g.gpu_index
+	`)
+	if err != nil {
+		return err
+	}
+	defer rows.Close()
+
+	cw := csv.NewWriter(w)
+	_ = cw.Write([]string{"ts", "cpu_load_pct", "mem_load_pct", "sys_power_w", "gpu_index", "gpu_temp_c", "gpu_usage_pct", "gpu_mem_pct", "gpu_power_w"})
+	for rows.Next() {
+		var ts int64
+		var cpu, mem, pwr float64
+		var gpuIdx sql.NullInt64
+		var gpuTemp, gpuUse, gpuMem, gpuPow sql.NullFloat64
+		if err := rows.Scan(&ts, &cpu, &mem, &pwr, &gpuIdx, &gpuTemp, &gpuUse, &gpuMem, &gpuPow); err != nil {
+			continue
+		}
+		row := []string{
+			strconv.FormatInt(ts, 10),
+			strconv.FormatFloat(cpu, 'f', 2, 64),
+			strconv.FormatFloat(mem, 'f', 2, 64),
+			strconv.FormatFloat(pwr, 'f', 1, 64),
+		}
+		if gpuIdx.Valid {
+			row = append(row,
+				strconv.FormatInt(gpuIdx.Int64, 10),
+				strconv.FormatFloat(gpuTemp.Float64, 'f', 1, 64),
+				strconv.FormatFloat(gpuUse.Float64, 'f', 1, 64),
+				strconv.FormatFloat(gpuMem.Float64, 'f', 1, 64),
+				strconv.FormatFloat(gpuPow.Float64, 'f', 1, 64),
+			)
+		} else {
+			row = append(row, "", "", "", "", "")
+		}
+		_ = cw.Write(row)
+	}
+	cw.Flush()
+	return cw.Error()
+}
+
+// Close closes the database.
+func (m *MetricsDB) Close() { _ = m.db.Close() }
+
+func nullFloat(v float64) sql.NullFloat64 {
+	return sql.NullFloat64{Float64: v, Valid: true}
+}
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
@@ -61,7 +61,8 @@ tbody tr:hover td{background:rgba(0,0,0,.03)}
 .badge-err{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)}
 .badge-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)}
 /* Output terminal */
-.terminal{background:#1b1c1d;border:1px solid rgba(0,0,0,.2);border-radius:4px;padding:14px;font-family:monospace;font-size:12px;color:#b5cea8;max-height:400px;overflow-y:auto;white-space:pre-wrap;word-break:break-all}
+.terminal{background:#1b1c1d;border:1px solid rgba(0,0,0,.2);border-radius:4px;padding:14px;font-family:monospace;font-size:12px;color:#b5cea8;max-height:400px;overflow-y:auto;white-space:pre-wrap;word-break:break-all;user-select:text;-webkit-user-select:text}
+.terminal-wrap{position:relative}.terminal-copy{position:absolute;top:6px;right:6px;background:#2d2f30;border:1px solid #444;color:#aaa;font-size:11px;padding:2px 8px;border-radius:3px;cursor:pointer;opacity:.7}.terminal-copy:hover{opacity:1}
 /* Forms */
 .form-row{margin-bottom:14px}
 .form-row label{display:block;font-size:12px;color:var(--muted);margin-bottom:5px;font-weight:700}
@@ -187,6 +188,16 @@ func renderPage(page string, opts HandlerOptions) string {
 		body +
 		`</div></div>` +
 		renderAuditModal() +
+		`<script>
+// Add copy button to every .terminal on the page
+document.querySelectorAll('.terminal').forEach(function(t){
+  var w=document.createElement('div');w.className='terminal-wrap';
+  t.parentNode.insertBefore(w,t);w.appendChild(t);
+  var btn=document.createElement('button');btn.className='terminal-copy';btn.textContent='Copy';
+  btn.onclick=function(){navigator.clipboard.writeText(t.textContent).then(function(){btn.textContent='Copied!';setTimeout(function(){btn.textContent='Copy';},1500);});};
+  w.appendChild(btn);
+});
+</script>` +
 		`</body></html>`
 }

@@ -194,12 +205,83 @@ func renderPage(page string, opts HandlerOptions) string {

 func renderDashboard(opts HandlerOptions) string {
 	var b strings.Builder
+	b.WriteString(renderAuditStatusBanner(opts))
 	b.WriteString(renderHardwareSummaryCard(opts))
 	b.WriteString(renderHealthCard(opts))
 	b.WriteString(renderMetrics())
 	return b.String()
 }

+// renderAuditStatusBanner shows a live progress banner when an audit task is
+// running and auto-reloads the page when it completes.
+func renderAuditStatusBanner(opts HandlerOptions) string {
+	// If audit data already exists, no banner needed — data is fresh.
+	// We still inject the polling script so a newly-triggered audit also reloads.
+	hasData := false
+	if _, err := loadSnapshot(opts.AuditPath); err == nil {
+		hasData = true
+	}
+	_ = hasData
+
+	return `<div id="audit-banner" style="display:none" class="alert alert-warn" style="margin-bottom:16px">
+  <span id="audit-banner-text">&#9654; Hardware audit is running — page will refresh automatically when complete.</span>
+  <a href="/tasks" style="margin-left:12px;font-size:12px">View in Tasks</a>
+</div>
+<script>
+(function(){
+var _auditPoll = null;
+var _auditSeenRunning = false;
+
+function pollAuditTask() {
+  fetch('/api/tasks').then(function(r){ return r.json(); }).then(function(tasks){
+    if (!tasks) return;
+    var audit = null;
+    for (var i = 0; i < tasks.length; i++) {
+      if (tasks[i].target === 'audit') { audit = tasks[i]; break; }
+    }
+    var banner = document.getElementById('audit-banner');
+    var txt = document.getElementById('audit-banner-text');
+    if (!audit) {
+      if (banner) banner.style.display = 'none';
+      return;
+    }
+    if (audit.status === 'running' || audit.status === 'pending') {
+      _auditSeenRunning = true;
+      if (banner) {
+        banner.style.display = '';
+        var label = audit.status === 'pending' ? 'pending\u2026' : 'running\u2026';
+        if (txt) txt.textContent = '\u25b6 Hardware audit ' + label + ' \u2014 page will refresh when complete.';
+      }
+    } else if (audit.status === 'done' && _auditSeenRunning) {
+      // Audit just finished — reload to show fresh hardware data.
+      clearInterval(_auditPoll);
+      if (banner) {
+        if (txt) txt.textContent = '\u2713 Audit complete \u2014 reloading\u2026';
+        banner.style.background = 'var(--ok-bg,#fcfff5)';
+        banner.style.color = 'var(--ok-fg,#2c662d)';
+      }
+      setTimeout(function(){ window.location.reload(); }, 800);
+    } else if (audit.status === 'failed') {
+      _auditSeenRunning = false;
+      if (banner) {
+        banner.style.display = '';
+        banner.style.background = 'var(--crit-bg,#fff6f6)';
+        banner.style.color = 'var(--crit-fg,#9f3a38)';
+        if (txt) txt.textContent = '\u2717 Audit failed: ' + (audit.error||'unknown error');
+        clearInterval(_auditPoll);
+      }
+    } else {
+      if (banner) banner.style.display = 'none';
+    }
+  }).catch(function(){});
+}
+
+_auditPoll = setInterval(pollAuditTask, 3000);
+pollAuditTask();
+})();
+</script>`
+}
+
 func renderAudit() string {
 	return `<div class="card"><div class="card-head">Audit Viewer <button class="btn btn-sm btn-secondary" style="margin-left:auto" onclick="openAuditModal()">Actions</button></div><div class="card-body" style="padding:0"><iframe class="viewer-frame" src="/viewer" title="Audit viewer"></iframe></div></div>`
 }
@@ -207,7 +289,7 @@ func renderAudit() string {
 func renderHardwareSummaryCard(opts HandlerOptions) string {
 	data, err := loadSnapshot(opts.AuditPath)
 	if err != nil {
-		return `<div class="card"><div class="card-head">Hardware Summary</div><div class="card-body"><span class="badge badge-unknown">No audit data</span></div></div>`
+		return `<div class="card"><div class="card-head">Hardware Summary</div><div class="card-body"><button class="btn btn-primary" onclick="auditModalRun()">&#9654; Run Audit</button></div></div>`
 	}
 	// Parse just enough fields for the summary banner
 	var snap struct {
@@ -392,12 +474,6 @@ func renderMetrics() string {
  </div>
 </div>

-<div class="card" style="margin-bottom:16px">
-  <div class="card-head">Temperature — GPUs</div>
-  <div class="card-body" style="padding:8px">
-    <img id="chart-server-temp-gpu" src="/api/metrics/chart/server-temp-gpu.svg" style="width:100%;display:block;border-radius:6px" alt="GPU temperature">
-  </div>
-</div>

 <div class="card" style="margin-bottom:16px">
  <div class="card-head">Temperature — Ambient Sensors</div>
@@ -413,81 +489,53 @@ func renderMetrics() string {
  </div>
 </div>

-<div class="card" style="margin-bottom:16px">
+<div id="card-server-fans" class="card" style="margin-bottom:16px;display:none">
  <div class="card-head">Server — Fan RPM</div>
  <div class="card-body" style="padding:8px">
    <img id="chart-server-fans" src="/api/metrics/chart/server-fans.svg" style="width:100%;display:block;border-radius:6px" alt="Fan RPM">
-    <div id="sys-table" style="margin-top:8px;font-size:12px"></div>
  </div>
 </div>

-<div id="gpu-charts"></div>
+<div class="card" style="margin-bottom:16px">
+  <div class="card-head">GPU — Compute Load</div>
+  <div class="card-body" style="padding:8px">
+    <img id="chart-gpu-all-load" src="/api/metrics/chart/gpu-all-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU compute load">
+  </div>
+</div>
+<div class="card" style="margin-bottom:16px">
+  <div class="card-head">GPU — Memory Load</div>
+  <div class="card-body" style="padding:8px">
+    <img id="chart-gpu-all-memload" src="/api/metrics/chart/gpu-all-memload.svg" style="width:100%;display:block;border-radius:6px" alt="GPU memory load">
+  </div>
+</div>
+<div class="card" style="margin-bottom:16px">
+  <div class="card-head">GPU — Power</div>
+  <div class="card-body" style="padding:8px">
+    <img id="chart-gpu-all-power" src="/api/metrics/chart/gpu-all-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU power">
+  </div>
+</div>
+<div class="card" style="margin-bottom:16px">
+  <div class="card-head">GPU — Temperature</div>
+  <div class="card-body" style="padding:8px">
+    <img id="chart-gpu-all-temp" src="/api/metrics/chart/gpu-all-temp.svg" style="width:100%;display:block;border-radius:6px" alt="GPU temperature">
+  </div>
+</div>

 <script>
-let knownGPUs = [];
-
 function refreshCharts() {
  const t = '?t=' + Date.now();
-  ['chart-server-load','chart-server-temp-cpu','chart-server-temp-gpu','chart-server-temp-ambient','chart-server-power','chart-server-fans'].forEach(id => {
+  ['chart-server-load','chart-server-temp-cpu','chart-server-temp-gpu','chart-server-temp-ambient','chart-server-power','chart-server-fans',
+   'chart-gpu-all-load','chart-gpu-all-memload','chart-gpu-all-power','chart-gpu-all-temp'].forEach(id => {
    const el = document.getElementById(id);
    if (el) el.src = el.src.split('?')[0] + t;
  });
-  knownGPUs.forEach(idx => {
-    ['load','power'].forEach(kind => {
-      const el = document.getElementById('chart-gpu-' + idx + '-' + kind);
-      if (el) el.src = el.src.split('?')[0] + t;
-    });
-  });
 }
-setInterval(refreshCharts, 2000);
+setInterval(refreshCharts, 3000);

-const es = new EventSource('/api/metrics/stream');
-es.addEventListener('metrics', e => {
-  const d = JSON.parse(e.data);
-
-  // Add GPU chart cards as GPUs appear
-  (d.gpus||[]).forEach(g => {
-    if (knownGPUs.includes(g.index)) return;
-    knownGPUs.push(g.index);
-    const div = document.createElement('div');
-    div.className = 'card';
-    div.style.marginBottom = '16px';
-    div.innerHTML =
-      '<div class="card-head">GPU ' + g.index + ' — Load</div>' +
-      '<div class="card-body" style="padding:8px">' +
-        '<img id="chart-gpu-' + g.index + '-load" src="/api/metrics/chart/gpu/' + g.index + '-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' load">' +
-      '</div>' +
-      '<div class="card-head">GPU ' + g.index + ' — Power</div>' +
-      '<div class="card-body" style="padding:8px">' +
-        '<img id="chart-gpu-' + g.index + '-power" src="/api/metrics/chart/gpu/' + g.index + '-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU ' + g.index + ' power">' +
-        '<div id="gpu-table-' + g.index + '" style="margin-top:8px;font-size:12px"></div>' +
-      '</div>';
-    document.getElementById('gpu-charts').appendChild(div);
-  });
-
-  // Update numeric tables
-  let sysHTML = '';
-  (d.temps||[]).filter(t => t.group === 'cpu').forEach(t => {
-    sysHTML += '<tr><td>'+t.name+'</td><td>'+t.celsius.toFixed(1)+'°C</td></tr>';
-  });
-  if (d.cpu_load_pct) sysHTML += '<tr><td>CPU Load</td><td>'+d.cpu_load_pct.toFixed(1)+'%</td></tr>';
-  if (d.mem_load_pct) sysHTML += '<tr><td>Mem Load</td><td>'+d.mem_load_pct.toFixed(1)+'%</td></tr>';
-  (d.fans||[]).forEach(f => sysHTML += '<tr><td>'+f.name+'</td><td>'+f.rpm+' RPM</td></tr>');
-  if (d.power_w) sysHTML += '<tr><td>Power</td><td>'+d.power_w.toFixed(0)+' W</td></tr>';
-  const st = document.getElementById('sys-table');
-  if (st) st.innerHTML = sysHTML ? '<table>'+sysHTML+'</table>' : '<p style="color:var(--muted)">No sensor data (ipmitool/sensors required)</p>';
-
-  (d.gpus||[]).forEach(g => {
-    const t = document.getElementById('gpu-table-' + g.index);
-    if (!t) return;
-    t.innerHTML = '<table>' +
-      '<tr><td>Temp</td><td>'+g.temp_c+'°C</td>' +
-      '<td>Load</td><td>'+g.usage_pct+'%</td>' +
-      '<td>Mem</td><td>'+g.mem_usage_pct+'%</td>' +
-      '<td>Power</td><td>'+g.power_w+' W</td></tr></table>';
-  });
-});
-es.onerror = () => {};
+fetch('/api/metrics/latest').then(r => r.json()).then(d => {
+  const fanCard = document.getElementById('card-server-fans');
+  if (fanCard) fanCard.style.display = (d.fans && d.fans.length > 0) ? '' : 'none';
+}).catch(() => {});
 </script>`
 }

@@ -511,7 +559,11 @@ func renderValidate() string {
 		renderSATCard("memory", "Memory", "") +
 		renderSATCard("storage", "Storage", "") +
 		renderSATCard("cpu", "CPU", `<div class="form-row"><label>Duration (seconds)</label><input type="number" id="sat-cpu-dur" value="60" min="10"></div>`) +
-		renderSATCard("amd", "AMD GPU", "") +
+		renderSATCard("amd", "AMD GPU", `<div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
+<button id="sat-btn-amd-mem" class="btn" type="button" onclick="runSAT('amd-mem')">MEM Integrity</button>
+<button id="sat-btn-amd-bandwidth" class="btn" type="button" onclick="runSAT('amd-bandwidth')">MEM Bandwidth</button>
+</div>
+<p style="color:var(--muted);font-size:12px;margin:0">Additional AMD memory diagnostics: RVS MEM for integrity and BABEL + rocm-bandwidth-test for memory/interconnect bandwidth.</p>`) +
 		`</div>
 <div id="sat-output" style="display:none;margin-top:16px" class="card">
  <div class="card-head">Test Output <span id="sat-title"></span></div>
@@ -522,7 +574,7 @@ let satES = null;
 function runSAT(target) {
  if (satES) { satES.close(); satES = null; }
  const body = {};
-  const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU'};
+  const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
  body.display_name = labels[target] || ('Validate ' + target);
  if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
  if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
@@ -541,7 +593,7 @@ function runSAT(target) {
 }
 function runAllSAT() {
  const cycles = Math.max(1, parseInt(document.getElementById('sat-cycles').value)||1);
-  const targets = ['nvidia','memory','storage','cpu','amd'];
+  const targets = ['nvidia','memory','storage','cpu','amd','amd-mem','amd-bandwidth'];
  const total = targets.length * cycles;
  let enqueued = 0;
  const status = document.getElementById('sat-all-status');
@@ -553,7 +605,7 @@ function runAllSAT() {
    const btn = document.getElementById('sat-btn-' + target);
    if (btn && btn.disabled) { enqueueNext(cycle, idx+1); return; }
    const body = {};
-    const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU'};
+    const labels = {nvidia:'Validate GPU', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
    body.display_name = labels[target] || ('Validate ' + target);
    if (target === 'nvidia') body.diag_level = parseInt(document.getElementById('sat-nvidia-level').value)||1;
    if (target === 'cpu') body.duration = parseInt(document.getElementById('sat-cpu-dur').value)||60;
@@ -571,6 +623,8 @@ function runAllSAT() {
 fetch('/api/gpu/presence').then(r=>r.json()).then(gp => {
    if (!gp.nvidia) disableSATCard('nvidia', 'No NVIDIA GPU detected');
    if (!gp.amd) disableSATCard('amd', 'No AMD GPU detected');
+    if (!gp.amd) disableSATCard('amd-mem', 'No AMD GPU detected');
+    if (!gp.amd) disableSATCard('amd-bandwidth', 'No AMD GPU detected');
 });
 function disableSATCard(id, reason) {
    const btn = document.getElementById('sat-btn-' + id);
@@ -604,18 +658,21 @@ func renderBurn() string {
 	return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>&#9888; Warning:</strong> Stress tests on this page run hardware at maximum load. Repeated or prolonged use may reduce hardware lifespan (storage endurance, GPU wear). Use only when necessary.</div>
 <p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
 <div class="card"><div class="card-head">Burn Profile</div><div class="card-body">
-<div class="form-row" style="max-width:320px"><label>Preset</label><select id="burn-profile"><option value="smoke">Smoke: 5 minutes</option><option value="acceptance">Acceptance: 1 hour</option><option value="overnight">Overnight: 8 hours</option></select></div>
-<p style="color:var(--muted);font-size:12px">Applied to all tests on this page. NVIDIA uses mapped DCGM levels: smoke=quick, acceptance=targeted stress, overnight=extended stress.</p>
+<div class="form-row" style="max-width:320px"><label>Preset</label><select id="burn-profile"><option value="smoke" selected>Smoke: quick check (~5 min CPU / DCGM level 1)</option><option value="acceptance">Acceptance: 1 hour (DCGM level 3)</option><option value="overnight">Overnight: 8 hours (DCGM level 4)</option></select></div>
+<p style="color:var(--muted);font-size:12px">Applied to all tests on this page. NVIDIA SAT on the Validate page still uses DCGM. NVIDIA GPU Stress on this page uses the selected stress loader for the preset duration.</p>
 </div></div>
 <div class="grid3">
 <div class="card"><div class="card-head">NVIDIA GPU Stress</div><div class="card-body">
-<button id="sat-btn-nvidia" class="btn btn-primary" onclick="runBurnIn('nvidia')">&#9654; Start NVIDIA Stress</button>
+<div class="form-row"><label>Load Tool</label><select id="nvidia-stress-loader"><option value="builtin" selected>bee-gpu-burn</option><option value="nccl">NCCL all_reduce_perf</option><option value="john">John the Ripper jumbo (OpenCL)</option></select></div>
+<div class="form-row"><label>Exclude GPU indices</label><input type="text" id="nvidia-stress-exclude" placeholder="e.g. 1,3"></div>
+<p style="color:var(--muted);font-size:12px;margin-bottom:8px"><code>bee-gpu-burn</code> runs on all detected NVIDIA GPUs by default. <code>NCCL all_reduce_perf</code> is useful for multi-GPU / interconnect load. Use exclusions only when one or more cards must be skipped.</p>
+<button id="sat-btn-nvidia-stress" class="btn btn-primary" onclick="runBurnIn('nvidia-stress')">&#9654; Start NVIDIA Stress</button>
 </div></div>
 <div class="card"><div class="card-head">CPU Stress</div><div class="card-body">
 <button class="btn btn-primary" onclick="runBurnIn('cpu')">&#9654; Start CPU Stress</button>
 </div></div>
 <div class="card"><div class="card-head">AMD GPU Stress</div><div class="card-body">
-<p style="color:var(--muted);font-size:12px;margin-bottom:8px">Requires ROCm tools (rocm-bandwidth-test). Missing tools reported as UNSUPPORTED.</p>
+<p style="color:var(--muted);font-size:12px;margin-bottom:8px">Runs ROCm compute stress together with VRAM copy/load activity via RVS GST and records a separate <code>rocm-bandwidth-test</code> snapshot. Missing tools reported as UNSUPPORTED.</p>
 <button id="sat-btn-amd-stress" class="btn btn-primary" onclick="runBurnIn('amd-stress')">&#9654; Start AMD Stress</button>
 </div></div>
 <div class="card"><div class="card-head">Memory Stress</div><div class="card-body">
@@ -626,6 +683,10 @@ func renderBurn() string {
 <p style="color:var(--muted);font-size:12px;margin-bottom:8px">Google stressapptest saturates CPU, memory and cache buses simultaneously. Env: <code>BEE_SAT_STRESS_SECONDS</code> (default 300), <code>BEE_SAT_STRESS_MB</code> (default auto).</p>
 <button class="btn btn-primary" onclick="runBurnIn('sat-stress')">&#9654; Start SAT Stress</button>
 </div></div>
+<div class="card"><div class="card-head">Platform Thermal Cycling</div><div class="card-body">
+<p style="color:var(--muted);font-size:12px;margin-bottom:8px">Runs CPU + GPU stress simultaneously across multiple load/idle cycles with varying durations. Detects cooling systems that fail to recover under repeated load cycles. Smoke: 2 cycles ~5 min. Acceptance: 4 cycles ~25 min.</p>
+<button class="btn btn-primary" onclick="runBurnIn('platform-stress')">&#9654; Start Thermal Cycling</button>
+</div></div>
 </div>
 <div id="bi-output" style="display:none;margin-top:16px" class="card">
  <div class="card-head">Output <span id="bi-title"></span></div>
@@ -633,11 +694,24 @@ func renderBurn() string {
 </div>
 <script>
 let biES = null;
+function parseGPUIndexList(raw) {
+  return (raw || '')
+    .split(',')
+    .map(v => v.trim())
+    .filter(v => v !== '')
+    .map(v => Number(v))
+    .filter(v => Number.isInteger(v) && v >= 0);
+}
 function runBurnIn(target) {
  if (biES) { biES.close(); biES = null; }
  const body = { profile: document.getElementById('burn-profile').value || 'smoke' };
+  if (target === 'nvidia-stress') {
+    body.loader = document.getElementById('nvidia-stress-loader').value || 'builtin';
+    body.exclude_gpu_indices = parseGPUIndexList(document.getElementById('nvidia-stress-exclude').value);
+  }
  document.getElementById('bi-output').style.display='block';
-  document.getElementById('bi-title').textContent = '— ' + target + ' [' + body.profile + ']';
+  const loaderLabel = body.loader ? ' / ' + body.loader : '';
+  document.getElementById('bi-title').textContent = '— ' + target + loaderLabel + ' [' + body.profile + ']';
  const term = document.getElementById('bi-terminal');
  term.textContent = 'Enqueuing ' + target + ' stress...\n';
  fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(body)})
@@ -652,7 +726,7 @@ function runBurnIn(target) {
 </script>
 <script>
 fetch('/api/gpu/presence').then(r=>r.json()).then(gp => {
-    if (!gp.nvidia) disableSATCard('nvidia', 'No NVIDIA GPU detected');
+    if (!gp.nvidia) disableSATCard('nvidia-stress', 'No NVIDIA GPU detected');
    if (!gp.amd) disableSATCard('amd-stress', 'No AMD GPU detected');
 });
 function disableSATCard(id, reason) {
@@ -852,12 +926,79 @@ func renderExport(exportDir string) string {
 	return `<div class="grid2">
 <div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
 <p style="font-size:13px;color:var(--muted);margin-bottom:12px">Creates a tar.gz archive of all audit files, SAT results, and logs.</p>
-<a class="btn btn-primary" href="/export/support.tar.gz">⬇ Download Support Bundle</a>
+<a class="btn btn-primary" href="/export/support.tar.gz">&#8595; Download Support Bundle</a>
 </div></div>
 <div class="card"><div class="card-head">Export Files</div><div class="card-body">
 <table><tr><th>File</th></tr>` + rows.String() + `</table>
 </div></div>
-</div>`
+</div>
+
+<div class="card" style="margin-top:16px">
+  <div class="card-head">Export to USB
+    <button class="btn btn-sm btn-secondary" onclick="usbRefresh()" style="margin-left:auto">&#8635; Refresh</button>
+  </div>
+  <div class="card-body">
+    <p style="font-size:13px;color:var(--muted);margin-bottom:12px">Write audit JSON or support bundle directly to a removable USB drive.</p>
+    <div id="usb-status" style="font-size:13px;color:var(--muted)">Scanning for USB devices...</div>
+    <div id="usb-targets" style="margin-top:12px"></div>
+    <div id="usb-msg" style="margin-top:10px;font-size:13px"></div>
+  </div>
+</div>
+<script>
+(function(){
+function usbRefresh() {
+  document.getElementById('usb-status').textContent = 'Scanning...';
+  document.getElementById('usb-targets').innerHTML = '';
+  document.getElementById('usb-msg').textContent = '';
+  fetch('/api/export/usb').then(r=>r.json()).then(targets => {
+    const st = document.getElementById('usb-status');
+    const ct = document.getElementById('usb-targets');
+    if (!targets || targets.length === 0) {
+      st.textContent = 'No removable USB devices found.';
+      return;
+    }
+    st.textContent = targets.length + ' device(s) found:';
+    ct.innerHTML = '<table><tr><th>Device</th><th>FS</th><th>Size</th><th>Label</th><th>Model</th><th>Actions</th></tr>' +
+      targets.map(t => {
+        const dev = t.device || '';
+        const label = t.label || '';
+        const model = t.model || '';
+        return '<tr>' +
+          '<td style="font-family:monospace">'+dev+'</td>' +
+          '<td>'+t.fs_type+'</td>' +
+          '<td>'+t.size+'</td>' +
+          '<td>'+label+'</td>' +
+          '<td style="font-size:12px;color:var(--muted)">'+model+'</td>' +
+          '<td style="white-space:nowrap">' +
+            '<button class="btn btn-sm btn-primary" onclick="usbExport(\'audit\','+JSON.stringify(t)+')">Audit JSON</button> ' +
+            '<button class="btn btn-sm btn-secondary" onclick="usbExport(\'bundle\','+JSON.stringify(t)+')">Support Bundle</button>' +
+          '</td></tr>';
+      }).join('') + '</table>';
+  }).catch(e => {
+    document.getElementById('usb-status').textContent = 'Error: ' + e;
+  });
+}
+window.usbExport = function(type, target) {
+  const msg = document.getElementById('usb-msg');
+  msg.style.color = 'var(--muted)';
+  msg.textContent = 'Exporting to ' + (target.device||'') + '...';
+  fetch('/api/export/usb/'+type, {
+    method: 'POST',
+    headers: {'Content-Type':'application/json'},
+    body: JSON.stringify(target)
+  }).then(r=>r.json()).then(d => {
+    if (d.error) { msg.style.color='var(--err,red)'; msg.textContent = 'Error: '+d.error; return; }
+    msg.style.color = 'var(--ok,green)';
+    msg.textContent = d.message || 'Done.';
+  }).catch(e => {
+    msg.style.color = 'var(--err,red)';
+    msg.textContent = 'Error: '+e;
+  });
+};
+window.usbRefresh = usbRefresh;
+usbRefresh();
+})();
+</script>`
 }

 func listExportFiles(exportDir string) ([]string, error) {
@@ -883,6 +1024,56 @@ func listExportFiles(exportDir string) ([]string, error) {
 	return entries, nil
 }

+// ── Display Resolution ────────────────────────────────────────────────────────
+
+func renderDisplayInline() string {
+	return `<div id="display-status" style="color:var(--muted);font-size:13px;margin-bottom:12px">Loading displays...</div>
+<div id="display-controls"></div>
+<script>
+(function(){
+function loadDisplays() {
+  fetch('/api/display/resolutions').then(r=>r.json()).then(displays => {
+    const status = document.getElementById('display-status');
+    const ctrl = document.getElementById('display-controls');
+    if (!displays || displays.length === 0) {
+      status.textContent = 'No connected displays found or xrandr not available.';
+      return;
+    }
+    status.textContent = '';
+    ctrl.innerHTML = displays.map(d => {
+      const opts = (d.modes||[]).map(m =>
+        '<option value="'+m.mode+'"'+(m.current?' selected':'')+'>'+m.mode+(m.current?' (current)':'')+'</option>'
+      ).join('');
+      return '<div style="margin-bottom:12px">'
+        +'<span style="font-weight:600;margin-right:8px">'+d.output+'</span>'
+        +'<span style="color:var(--muted);font-size:12px;margin-right:12px">Current: '+d.current+'</span>'
+        +'<select id="res-sel-'+d.output+'" style="margin-right:8px">'+opts+'</select>'
+        +'<button class="btn btn-sm btn-primary" onclick="applyResolution(\''+d.output+'\')">Apply</button>'
+        +'</div>';
+    }).join('');
+  }).catch(()=>{
+    document.getElementById('display-status').textContent = 'xrandr not available on this system.';
+  });
+}
+window.applyResolution = function(output) {
+  const sel = document.getElementById('res-sel-'+output);
+  if (!sel) return;
+  const mode = sel.value;
+  const btn = sel.nextElementSibling;
+  btn.disabled = true;
+  btn.textContent = 'Applying...';
+  fetch('/api/display/set', {method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify({output:output,mode:mode})})
+    .then(r=>r.json()).then(d=>{
+      if (d.error) { alert('Error: '+d.error); }
+      loadDisplays();
+    }).catch(e=>{ alert('Error: '+e); })
+    .finally(()=>{ btn.disabled=false; btn.textContent='Apply'; });
+};
+loadDisplays();
+})();
+</script>`
+}
+
 // ── Tools ─────────────────────────────────────────────────────────────────────

 func renderTools() string {
@@ -934,6 +1125,9 @@ function installToRAM() {
 <div class="card"><div class="card-head">Services</div><div class="card-body">` +
 		renderServicesInline() + `</div></div>

+<div class="card"><div class="card-head">Display Resolution</div><div class="card-body">` +
+		renderDisplayInline() + `</div></div>
+
 <script>
 function checkTools() {
  document.getElementById('tools-table').innerHTML = '<p style="color:var(--muted);font-size:13px">Checking...</p>';
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -72,29 +72,36 @@ func (r *metricsRing) snapshot() ([]float64, []string) {
 	defer r.mu.Unlock()
 	v := make([]float64, len(r.vals))
 	copy(v, r.vals)
-	now := time.Now()
 	labels := make([]string, len(r.times))
+	if len(r.times) == 0 {
+		return v, labels
+	}
+	sameDay := timestampsSameLocalDay(r.times)
 	for i, t := range r.times {
-		labels[i] = relAgeLabel(now.Sub(t))
+		labels[i] = formatTimelineLabel(t.Local(), sameDay)
 	}
 	return v, labels
 }

-func relAgeLabel(age time.Duration) string {
-	if age <= 0 {
-		return "0"
+func timestampsSameLocalDay(times []time.Time) bool {
+	if len(times) == 0 {
+		return true
 	}
-	if age < time.Hour {
-		m := int(age.Minutes())
-		if m == 0 {
-			return "-1m"
+	first := times[0].Local()
+	for _, t := range times[1:] {
+		local := t.Local()
+		if local.Year() != first.Year() || local.YearDay() != first.YearDay() {
+			return false
 		}
-		return fmt.Sprintf("-%dm", m)
 	}
-	if age < 24*time.Hour {
-		return fmt.Sprintf("-%dh", int(age.Hours()))
+	return true
+}
+
+func formatTimelineLabel(ts time.Time, sameDay bool) string {
+	if sameDay {
+		return ts.Format("15:04")
 	}
-	return fmt.Sprintf("-%dd", int(age.Hours()/24))
+	return ts.Format("01-02 15:04")
 }

 // gpuRings holds per-GPU ring buffers.
@@ -132,6 +139,10 @@ type handler struct {
 	// per-GPU rings (index = GPU index)
 	gpuRings []*gpuRings
 	ringsMu  sync.Mutex
+	latestMu sync.RWMutex
+	latest   *platform.LiveMetricSample
+	// metrics persistence (nil if DB unavailable)
+	metricsDB *MetricsDB
 	// install job (at most one at a time)
 	installJob *jobState
 	installMu  sync.Mutex
@@ -158,11 +169,27 @@ func NewHandler(opts HandlerOptions) http.Handler {
 		ringMemLoad: newMetricsRing(120),
 		ringPower:   newMetricsRing(120),
 	}
+
+	// Open metrics DB and pre-fill ring buffers from history.
+	if db, err := openMetricsDB(metricsDBPath); err == nil {
+		h.metricsDB = db
+		if samples, err := db.LoadRecent(120); err == nil {
+			for _, s := range samples {
+				h.feedRings(s)
+			}
+			if len(samples) > 0 {
+				h.setLatestMetric(samples[len(samples)-1])
+			}
+		}
+	}
+	h.startMetricsCollector()
+
 	globalQueue.startWorker(&opts)
 	mux := http.NewServeMux()

 	// ── Infrastructure ──────────────────────────────────────────────────────
 	mux.HandleFunc("GET /healthz", h.handleHealthz)
+	mux.HandleFunc("GET /api/ready", h.handleReady)

 	// ── Existing read-only endpoints (preserved for compatibility) ──────────
 	mux.HandleFunc("GET /audit.json", h.handleAuditJSON)
@@ -179,13 +206,17 @@ func NewHandler(opts HandlerOptions) http.Handler {

 	// SAT
 	mux.HandleFunc("POST /api/sat/nvidia/run", h.handleAPISATRun("nvidia"))
+	mux.HandleFunc("POST /api/sat/nvidia-stress/run", h.handleAPISATRun("nvidia-stress"))
 	mux.HandleFunc("POST /api/sat/memory/run", h.handleAPISATRun("memory"))
 	mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage"))
 	mux.HandleFunc("POST /api/sat/cpu/run", h.handleAPISATRun("cpu"))
 	mux.HandleFunc("POST /api/sat/amd/run", h.handleAPISATRun("amd"))
+	mux.HandleFunc("POST /api/sat/amd-mem/run", h.handleAPISATRun("amd-mem"))
+	mux.HandleFunc("POST /api/sat/amd-bandwidth/run", h.handleAPISATRun("amd-bandwidth"))
 	mux.HandleFunc("POST /api/sat/amd-stress/run", h.handleAPISATRun("amd-stress"))
 	mux.HandleFunc("POST /api/sat/memory-stress/run", h.handleAPISATRun("memory-stress"))
 	mux.HandleFunc("POST /api/sat/sat-stress/run", h.handleAPISATRun("sat-stress"))
+	mux.HandleFunc("POST /api/sat/platform-stress/run", h.handleAPISATRun("platform-stress"))
 	mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
 	mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)

@@ -211,10 +242,17 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	// Export
 	mux.HandleFunc("GET /api/export/list", h.handleAPIExportList)
 	mux.HandleFunc("POST /api/export/bundle", h.handleAPIExportBundle)
+	mux.HandleFunc("GET /api/export/usb", h.handleAPIExportUSBTargets)
+	mux.HandleFunc("POST /api/export/usb/audit", h.handleAPIExportUSBAudit)
+	mux.HandleFunc("POST /api/export/usb/bundle", h.handleAPIExportUSBBundle)

 	// Tools
 	mux.HandleFunc("GET /api/tools/check", h.handleAPIToolsCheck)

+	// Display
+	mux.HandleFunc("GET /api/display/resolutions", h.handleAPIDisplayResolutions)
+	mux.HandleFunc("POST /api/display/set", h.handleAPIDisplaySet)
+
 	// GPU presence
 	mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)

@@ -230,9 +268,11 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	mux.HandleFunc("POST /api/install/run", h.handleAPIInstallRun)
 	mux.HandleFunc("GET /api/install/stream", h.handleAPIInstallStream)

-	// Metrics — SSE stream of live sensor data + server-side SVG charts
+	// Metrics — SSE stream of live sensor data + server-side SVG charts + CSV export
 	mux.HandleFunc("GET /api/metrics/stream", h.handleAPIMetricsStream)
+	mux.HandleFunc("GET /api/metrics/latest", h.handleAPIMetricsLatest)
 	mux.HandleFunc("GET /api/metrics/chart/", h.handleMetricsChartSVG)
+	mux.HandleFunc("GET /api/metrics/export.csv", h.handleAPIMetricsExportCSV)

 	// Reanimator chart static assets (viewer template expects /static/*)
 	mux.Handle("GET /static/", http.StripPrefix("/static/", web.Static()))
@@ -244,6 +284,37 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	return mux
 }

+func (h *handler) startMetricsCollector() {
+	go func() {
+		ticker := time.NewTicker(1 * time.Second)
+		defer ticker.Stop()
+		for range ticker.C {
+			sample := platform.SampleLiveMetrics()
+			h.feedRings(sample)
+			h.setLatestMetric(sample)
+			if h.metricsDB != nil {
+				_ = h.metricsDB.Write(sample)
+			}
+		}
+	}()
+}
+
+func (h *handler) setLatestMetric(sample platform.LiveMetricSample) {
+	h.latestMu.Lock()
+	defer h.latestMu.Unlock()
+	cp := sample
+	h.latest = &cp
+}
+
+func (h *handler) latestMetric() (platform.LiveMetricSample, bool) {
+	h.latestMu.RLock()
+	defer h.latestMu.RUnlock()
+	if h.latest == nil {
+		return platform.LiveMetricSample{}, false
+	}
+	return *h.latest, true
+}
+
 // ListenAndServe starts the HTTP server.
 func ListenAndServe(addr string, opts HandlerOptions) error {
 	return http.ListenAndServe(addr, NewHandler(opts))
@@ -371,6 +442,20 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 	path := strings.TrimPrefix(r.URL.Path, "/api/metrics/chart/")
 	path = strings.TrimSuffix(path, ".svg")

+	if h.metricsDB != nil {
+		if datasets, names, labels, title, yMin, yMax, ok := h.chartDataFromDB(path); ok {
+			buf, err := renderChartSVG(title, datasets, names, labels, yMin, yMax)
+			if err != nil {
+				http.Error(w, err.Error(), http.StatusInternalServerError)
+				return
+			}
+			w.Header().Set("Content-Type", "image/svg+xml")
+			w.Header().Set("Cache-Control", "no-store")
+			_, _ = w.Write(buf)
+			return
+		}
+	}
+
 	var datasets [][]float64
 	var names []string
 	var labels []string
@@ -448,7 +533,80 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 		yMin = floatPtr(0)
 		yMax = autoMax120(datasets...)

-	// ── GPU sub-charts ────────────────────────────────────────────────────
+	// ── Combined GPU charts (all GPUs on one chart) ───────────────────────
+	case path == "gpu-all-load":
+		title = "GPU Compute Load"
+		h.ringsMu.Lock()
+		for idx, gr := range h.gpuRings {
+			if gr == nil {
+				continue
+			}
+			vUtil, l := gr.Util.snapshot()
+			datasets = append(datasets, vUtil)
+			names = append(names, fmt.Sprintf("GPU %d", idx))
+			if len(labels) == 0 {
+				labels = l
+			}
+		}
+		h.ringsMu.Unlock()
+		yMin = floatPtr(0)
+		yMax = floatPtr(100)
+
+	case path == "gpu-all-memload":
+		title = "GPU Memory Load"
+		h.ringsMu.Lock()
+		for idx, gr := range h.gpuRings {
+			if gr == nil {
+				continue
+			}
+			vMem, l := gr.MemUtil.snapshot()
+			datasets = append(datasets, vMem)
+			names = append(names, fmt.Sprintf("GPU %d", idx))
+			if len(labels) == 0 {
+				labels = l
+			}
+		}
+		h.ringsMu.Unlock()
+		yMin = floatPtr(0)
+		yMax = floatPtr(100)
+
+	case path == "gpu-all-power":
+		title = "GPU Power"
+		h.ringsMu.Lock()
+		for idx, gr := range h.gpuRings {
+			if gr == nil {
+				continue
+			}
+			vPow, l := gr.Power.snapshot()
+			datasets = append(datasets, vPow)
+			names = append(names, fmt.Sprintf("GPU %d", idx))
+			if len(labels) == 0 {
+				labels = l
+			}
+		}
+		h.ringsMu.Unlock()
+		yMin = floatPtr(0)
+		yMax = autoMax120(datasets...)
+
+	case path == "gpu-all-temp":
+		title = "GPU Temperature"
+		h.ringsMu.Lock()
+		for idx, gr := range h.gpuRings {
+			if gr == nil {
+				continue
+			}
+			vTemp, l := gr.Temp.snapshot()
+			datasets = append(datasets, vTemp)
+			names = append(names, fmt.Sprintf("GPU %d", idx))
+			if len(labels) == 0 {
+				labels = l
+			}
+		}
+		h.ringsMu.Unlock()
+		yMin = floatPtr(0)
+		yMax = autoMax120(datasets...)
+
+	// ── Per-GPU sub-charts ────────────────────────────────────────────────
 	case strings.HasPrefix(path, "gpu/"):
 		rest := strings.TrimPrefix(path, "gpu/")
 		// rest is either "{idx}-load", "{idx}-temp", "{idx}-power", or legacy "{idx}"
@@ -512,6 +670,259 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 	_, _ = w.Write(buf)
 }

+func (h *handler) chartDataFromDB(path string) ([][]float64, []string, []string, string, *float64, *float64, bool) {
+	samples, err := h.metricsDB.LoadAll()
+	if err != nil || len(samples) == 0 {
+		return nil, nil, nil, "", nil, nil, false
+	}
+	return chartDataFromSamples(path, samples)
+}
+
+func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][]float64, []string, []string, string, *float64, *float64, bool) {
+	var datasets [][]float64
+	var names []string
+	var title string
+	var yMin, yMax *float64
+	labels := sampleTimeLabels(samples)
+
+	switch {
+	case path == "server-load":
+		title = "CPU / Memory Load"
+		cpu := make([]float64, len(samples))
+		mem := make([]float64, len(samples))
+		for i, s := range samples {
+			cpu[i] = s.CPULoadPct
+			mem[i] = s.MemLoadPct
+		}
+		datasets = [][]float64{cpu, mem}
+		names = []string{"CPU Load %", "Mem Load %"}
+		yMin = floatPtr(0)
+		yMax = floatPtr(100)
+
+	case path == "server-temp", path == "server-temp-cpu":
+		title = "CPU Temperature"
+		datasets, names = namedTempDatasets(samples, "cpu")
+		yMin = floatPtr(0)
+		yMax = autoMax120(datasets...)
+
+	case path == "server-temp-gpu":
+		title = "GPU Temperature"
+		datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.TempC })
+		yMin = floatPtr(0)
+		yMax = autoMax120(datasets...)
+
+	case path == "server-temp-ambient":
+		title = "Ambient / Other Sensors"
+		datasets, names = namedTempDatasets(samples, "ambient")
+		yMin = floatPtr(0)
+		yMax = autoMax120(datasets...)
+
+	case path == "server-power":
+		title = "System Power"
+		power := make([]float64, len(samples))
+		for i, s := range samples {
+			power[i] = s.PowerW
+		}
+		datasets = [][]float64{power}
+		names = []string{"Power W"}
+		yMin, yMax = autoBounds120(power)
+
+	case path == "server-fans":
+		title = "Fan RPM"
+		datasets, names = namedFanDatasets(samples)
+		yMin, yMax = autoBounds120(datasets...)
+
+	case path == "gpu-all-load":
+		title = "GPU Compute Load"
+		datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.UsagePct })
+		yMin = floatPtr(0)
+		yMax = floatPtr(100)
+
+	case path == "gpu-all-memload":
+		title = "GPU Memory Load"
+		datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct })
+		yMin = floatPtr(0)
+		yMax = floatPtr(100)
+
+	case path == "gpu-all-power":
+		title = "GPU Power"
+		datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.PowerW })
+		yMin, yMax = autoBounds120(datasets...)
+
+	case path == "gpu-all-temp":
+		title = "GPU Temperature"
+		datasets, names = gpuDatasets(samples, func(g platform.GPUMetricRow) float64 { return g.TempC })
+		yMin = floatPtr(0)
+		yMax = autoMax120(datasets...)
+
+	case strings.HasPrefix(path, "gpu/"):
+		rest := strings.TrimPrefix(path, "gpu/")
+		sub := ""
+		if i := strings.LastIndex(rest, "-"); i > 0 {
+			sub = rest[i+1:]
+			rest = rest[:i]
+		}
+		idx := 0
+		fmt.Sscanf(rest, "%d", &idx)
+		switch sub {
+		case "load":
+			title = fmt.Sprintf("GPU %d Load", idx)
+			util := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.UsagePct })
+			mem := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct })
+			if util == nil && mem == nil {
+				return nil, nil, nil, "", nil, nil, false
+			}
+			datasets = [][]float64{coalesceDataset(util, len(samples)), coalesceDataset(mem, len(samples))}
+			names = []string{"Load %", "Mem %"}
+			yMin = floatPtr(0)
+			yMax = floatPtr(100)
+		case "temp":
+			title = fmt.Sprintf("GPU %d Temperature", idx)
+			temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
+			if temp == nil {
+				return nil, nil, nil, "", nil, nil, false
+			}
+			datasets = [][]float64{temp}
+			names = []string{"Temp °C"}
+			yMin = floatPtr(0)
+			yMax = autoMax120(temp)
+		default:
+			title = fmt.Sprintf("GPU %d Power", idx)
+			power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
+			if power == nil {
+				return nil, nil, nil, "", nil, nil, false
+			}
+			datasets = [][]float64{power}
+			names = []string{"Power W"}
+			yMin, yMax = autoBounds120(power)
+		}
+
+	default:
+		return nil, nil, nil, "", nil, nil, false
+	}
+
+	return datasets, names, labels, title, yMin, yMax, len(datasets) > 0
+}
+
+func sampleTimeLabels(samples []platform.LiveMetricSample) []string {
+	labels := make([]string, len(samples))
+	if len(samples) == 0 {
+		return labels
+	}
+	times := make([]time.Time, len(samples))
+	for i, s := range samples {
+		times[i] = s.Timestamp
+	}
+	sameDay := timestampsSameLocalDay(times)
+	for i, s := range samples {
+		labels[i] = formatTimelineLabel(s.Timestamp.Local(), sameDay)
+	}
+	return labels
+}
+
+func namedTempDatasets(samples []platform.LiveMetricSample, group string) ([][]float64, []string) {
+	seen := map[string]bool{}
+	var names []string
+	for _, s := range samples {
+		for _, t := range s.Temps {
+			if t.Group == group && !seen[t.Name] {
+				seen[t.Name] = true
+				names = append(names, t.Name)
+			}
+		}
+	}
+	datasets := make([][]float64, 0, len(names))
+	for _, name := range names {
+		ds := make([]float64, len(samples))
+		for i, s := range samples {
+			for _, t := range s.Temps {
+				if t.Group == group && t.Name == name {
+					ds[i] = t.Celsius
+					break
+				}
+			}
+		}
+		datasets = append(datasets, ds)
+	}
+	return datasets, names
+}
+
+func namedFanDatasets(samples []platform.LiveMetricSample) ([][]float64, []string) {
+	seen := map[string]bool{}
+	var names []string
+	for _, s := range samples {
+		for _, f := range s.Fans {
+			if !seen[f.Name] {
+				seen[f.Name] = true
+				names = append(names, f.Name)
+			}
+		}
+	}
+	datasets := make([][]float64, 0, len(names))
+	for _, name := range names {
+		ds := make([]float64, len(samples))
+		for i, s := range samples {
+			for _, f := range s.Fans {
+				if f.Name == name {
+					ds[i] = f.RPM
+					break
+				}
+			}
+		}
+		datasets = append(datasets, ds)
+	}
+	return datasets, names
+}
+
+func gpuDatasets(samples []platform.LiveMetricSample, pick func(platform.GPUMetricRow) float64) ([][]float64, []string) {
+	seen := map[int]bool{}
+	var indices []int
+	for _, s := range samples {
+		for _, g := range s.GPUs {
+			if !seen[g.GPUIndex] {
+				seen[g.GPUIndex] = true
+				indices = append(indices, g.GPUIndex)
+			}
+		}
+	}
+	datasets := make([][]float64, 0, len(indices))
+	names := make([]string, 0, len(indices))
+	for _, idx := range indices {
+		ds := gpuDatasetByIndex(samples, idx, pick)
+		if ds == nil {
+			continue
+		}
+		datasets = append(datasets, ds)
+		names = append(names, fmt.Sprintf("GPU %d", idx))
+	}
+	return datasets, names
+}
+
+func gpuDatasetByIndex(samples []platform.LiveMetricSample, idx int, pick func(platform.GPUMetricRow) float64) []float64 {
+	found := false
+	ds := make([]float64, len(samples))
+	for i, s := range samples {
+		for _, g := range s.GPUs {
+			if g.GPUIndex == idx {
+				ds[i] = pick(g)
+				found = true
+				break
+			}
+		}
+	}
+	if !found {
+		return nil
+	}
+	return ds
+}
+
+func coalesceDataset(ds []float64, n int) []float64 {
+	if ds != nil {
+		return ds
+	}
+	return make([]float64, n)
+}
+
 // floatPtr returns a pointer to a float64 value.
 func floatPtr(v float64) *float64 { return &v }

@@ -532,6 +943,47 @@ func autoMax120(datasets ...[]float64) *float64 {
 	return &v
 }

+func autoBounds120(datasets ...[]float64) (*float64, *float64) {
+	min := 0.0
+	max := 0.0
+	first := true
+	for _, ds := range datasets {
+		for _, v := range ds {
+			if first {
+				min, max = v, v
+				first = false
+				continue
+			}
+			if v < min {
+				min = v
+			}
+			if v > max {
+				max = v
+			}
+		}
+	}
+	if first {
+		return nil, nil
+	}
+	if max <= 0 {
+		return floatPtr(0), nil
+	}
+	span := max - min
+	if span <= 0 {
+		span = max * 0.1
+		if span <= 0 {
+			span = 1
+		}
+	}
+	pad := span * 0.2
+	low := min - pad
+	if low < 0 {
+		low = 0
+	}
+	high := max + pad
+	return floatPtr(low), floatPtr(high)
+}
+
 // renderChartSVG renders a line chart SVG with a fixed Y-axis range.
 func renderChartSVG(title string, datasets [][]float64, names []string, labels []string, yMin, yMax *float64) ([]byte, error) {
 	n := len(labels)
@@ -544,6 +996,16 @@ func renderChartSVG(title string, datasets [][]float64, names []string, labels [
 			datasets[i] = make([]float64, n)
 		}
 	}
+	// Append global min/avg/max to title.
+	mn, avg, mx := globalStats(datasets)
+	if mx > 0 {
+		title = fmt.Sprintf("%s    ↓%s  ~%s  ↑%s",
+			title,
+			chartLegendNumber(mn),
+			chartLegendNumber(avg),
+			chartLegendNumber(mx),
+		)
+	}
 	title = sanitizeChartText(title)
 	names = sanitizeChartTexts(names)
 	sparse := sanitizeChartTexts(sparseLabels(labels, 6))
@@ -552,6 +1014,9 @@ func renderChartSVG(title string, datasets [][]float64, names []string, labels [
 	opt.Title = gocharts.TitleOption{Text: title}
 	opt.XAxis.Labels = sparse
 	opt.Legend = gocharts.LegendOption{SeriesNames: names}
+	opt.Symbol = gocharts.SymbolNone
+	// Right padding: reserve space for the MarkLine label (library recommendation).
+	opt.Padding = gocharts.NewBox(20, 20, 80, 20)
 	if yMin != nil || yMax != nil {
 		opt.YAxis = []gocharts.YAxisOption{{
 			Min:            yMin,
@@ -560,6 +1025,12 @@ func renderChartSVG(title string, datasets [][]float64, names []string, labels [
 		}}
 	}

+	// Add a single peak mark line on the series that holds the global maximum.
+	peakIdx, _ := globalPeakSeries(datasets)
+	if peakIdx >= 0 && peakIdx < len(opt.SeriesList) {
+		opt.SeriesList[peakIdx].MarkLine = gocharts.NewMarkLine(gocharts.SeriesMarkTypeMax)
+	}
+
 	p := gocharts.NewPainter(gocharts.PainterOptions{
 		OutputFormat: gocharts.ChartOutputSVG,
 		Width:        1400,
@@ -571,6 +1042,48 @@ func renderChartSVG(title string, datasets [][]float64, names []string, labels [
 	return p.Bytes()
 }

+// globalPeakSeries returns the index of the series containing the global maximum
+// value across all datasets, and that maximum value.
+func globalPeakSeries(datasets [][]float64) (idx int, peak float64) {
+	idx = -1
+	for i, ds := range datasets {
+		for _, v := range ds {
+			if v > peak {
+				peak = v
+				idx = i
+			}
+		}
+	}
+	return idx, peak
+}
+
+// globalStats returns min, average, and max across all values in all datasets.
+func globalStats(datasets [][]float64) (mn, avg, mx float64) {
+	var sum float64
+	var count int
+	first := true
+	for _, ds := range datasets {
+		for _, v := range ds {
+			if first {
+				mn, mx = v, v
+				first = false
+			}
+			if v < mn {
+				mn = v
+			}
+			if v > mx {
+				mx = v
+			}
+			sum += v
+			count++
+		}
+	}
+	if count > 0 {
+		avg = sum / float64(count)
+	}
+	return mn, avg, mx
+}
+
 func sanitizeChartText(s string) string {
 	if s == "" {
 		return ""
@@ -652,8 +1165,69 @@ func sparseLabels(labels []string, n int) []string {
 	return out
 }

+func (h *handler) handleAPIMetricsExportCSV(w http.ResponseWriter, r *http.Request) {
+	if h.metricsDB == nil {
+		http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
+		return
+	}
+	w.Header().Set("Content-Type", "text/csv; charset=utf-8")
+	w.Header().Set("Content-Disposition", `attachment; filename="bee-metrics.csv"`)
+	w.Header().Set("Cache-Control", "no-store")
+	_ = h.metricsDB.ExportCSV(w)
+}
+
 // ── Page handler ─────────────────────────────────────────────────────────────

+func (h *handler) handleReady(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Cache-Control", "no-store")
+	if _, err := os.Stat(h.opts.AuditPath); err != nil {
+		w.WriteHeader(http.StatusServiceUnavailable)
+		_, _ = w.Write([]byte("starting"))
+		return
+	}
+	w.WriteHeader(http.StatusOK)
+	_, _ = w.Write([]byte("ready"))
+}
+
+const loadingPageHTML = `<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<title>EASY-BEE</title>
+<style>
+*{margin:0;padding:0;box-sizing:border-box}
+html,body{height:100%;background:#0f1117;display:flex;align-items:center;justify-content:center;font-family:'Courier New',monospace;color:#e2e8f0}
+.logo{font-size:13px;line-height:1.4;color:#f6c90e;margin-bottom:48px;white-space:pre}
+.spinner{width:48px;height:48px;border:4px solid #2d3748;border-top-color:#f6c90e;border-radius:50%;animation:spin .8s linear infinite;margin:0 auto 24px}
+@keyframes spin{to{transform:rotate(360deg)}}
+.status{font-size:14px;color:#a0aec0;letter-spacing:.05em}
+</style>
+</head>
+<body>
+<div style="text-align:center">
+  <div class="logo">  ███████╗ █████╗ ███████╗██╗   ██╗      ██████╗ ███████╗███████╗
+  ██╔════╝██╔══██╗██╔════╝╚██╗ ██╔╝      ██╔══██╗██╔════╝██╔════╝
+  █████╗  ███████║███████╗ ╚████╔╝ █████╗██████╔╝█████╗  █████╗
+  ██╔══╝  ██╔══██║╚════██║  ╚██╔╝  ╚════╝██╔══██╗██╔══╝  ██╔══╝
+  ███████╗██║  ██║███████║   ██║         ██████╔╝███████╗███████╗
+  ╚══════╝╚═╝  ╚═╝╚══════╝   ╚═╝         ╚═════╝ ╚══════╝╚══════╝</div>
+  <div class="spinner"></div>
+  <div class="status" id="s">Starting up...</div>
+</div>
+<script>
+function probe(){
+  fetch('/api/ready',{cache:'no-store'})
+    .then(function(r){
+      if(r.ok){window.location.replace('/');}
+      else{setTimeout(probe,1000);}
+    })
+    .catch(function(){setTimeout(probe,1000);});
+}
+probe();
+</script>
+</body>
+</html>`
+
 func (h *handler) handlePage(w http.ResponseWriter, r *http.Request) {
 	page := strings.TrimPrefix(r.URL.Path, "/")
 	if page == "" {
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -7,6 +7,9 @@ import (
 	"path/filepath"
 	"strings"
 	"testing"
+	"time"
+
+	"bee/audit/internal/platform"
 )

 func TestChartLegendNumber(t *testing.T) {
@@ -31,6 +34,61 @@ func TestChartLegendNumber(t *testing.T) {
 	}
 }

+func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
+	samples := []platform.LiveMetricSample{
+		{
+			Timestamp:  time.Now().Add(-3 * time.Minute),
+			CPULoadPct: 10,
+			MemLoadPct: 20,
+			PowerW:     300,
+			GPUs: []platform.GPUMetricRow{
+				{GPUIndex: 0, UsagePct: 90, MemUsagePct: 5, PowerW: 120, TempC: 50},
+			},
+		},
+		{
+			Timestamp:  time.Now().Add(-2 * time.Minute),
+			CPULoadPct: 30,
+			MemLoadPct: 40,
+			PowerW:     320,
+			GPUs: []platform.GPUMetricRow{
+				{GPUIndex: 0, UsagePct: 95, MemUsagePct: 7, PowerW: 125, TempC: 51},
+			},
+		},
+		{
+			Timestamp:  time.Now().Add(-1 * time.Minute),
+			CPULoadPct: 50,
+			MemLoadPct: 60,
+			PowerW:     340,
+			GPUs: []platform.GPUMetricRow{
+				{GPUIndex: 0, UsagePct: 97, MemUsagePct: 9, PowerW: 130, TempC: 52},
+			},
+		},
+	}
+
+	datasets, names, labels, title, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
+	if !ok {
+		t.Fatal("chartDataFromSamples returned ok=false")
+	}
+	if title != "GPU Power" {
+		t.Fatalf("title=%q", title)
+	}
+	if len(names) != 1 || names[0] != "GPU 0" {
+		t.Fatalf("names=%v", names)
+	}
+	if len(labels) != len(samples) {
+		t.Fatalf("labels len=%d want %d", len(labels), len(samples))
+	}
+	if len(datasets) != 1 || len(datasets[0]) != len(samples) {
+		t.Fatalf("datasets shape=%v", datasets)
+	}
+	if got := datasets[0][0]; got != 120 {
+		t.Fatalf("datasets[0][0]=%v want 120", got)
+	}
+	if got := datasets[0][2]; got != 130 {
+		t.Fatalf("datasets[0][2]=%v want 130", got)
+	}
+}
+
 func TestRootRendersDashboard(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
@@ -78,6 +136,33 @@ func TestRootRendersDashboard(t *testing.T) {
 	}
 }

+func TestRootShowsRunAuditButtonWhenSnapshotMissing(t *testing.T) {
+	dir := t.TempDir()
+	exportDir := filepath.Join(dir, "export")
+	if err := os.MkdirAll(exportDir, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	handler := NewHandler(HandlerOptions{
+		Title:     "Bee Hardware Audit",
+		AuditPath: filepath.Join(dir, "missing-audit.json"),
+		ExportDir: exportDir,
+	})
+
+	rec := httptest.NewRecorder()
+	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/", nil))
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d", rec.Code)
+	}
+	body := rec.Body.String()
+	if !strings.Contains(body, `Run Audit`) {
+		t.Fatalf("dashboard missing run audit button: %s", body)
+	}
+	if strings.Contains(body, `No audit data`) {
+		t.Fatalf("dashboard still shows empty audit badge: %s", body)
+	}
+}
+
 func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -8,10 +8,12 @@ import (
 	"os"
 	"path/filepath"
 	"sort"
+	"strings"
 	"sync"
 	"time"

 	"bee/audit/internal/app"
+	"bee/audit/internal/platform"
 )

 // Task statuses.
@@ -23,19 +25,58 @@ const (
 	TaskCancelled = "cancelled"
 )

-// taskNames maps target → human-readable name.
+// taskNames maps target → human-readable name for validate (SAT) runs.
 var taskNames = map[string]string{
-	"nvidia":         "NVIDIA SAT",
-	"memory":         "Memory SAT",
-	"storage":        "Storage SAT",
-	"cpu":            "CPU SAT",
-	"amd":            "AMD GPU SAT",
-	"amd-stress":     "AMD GPU Burn-in",
-	"memory-stress":  "Memory Burn-in",
-	"sat-stress":     "SAT Stress (stressapptest)",
-	"audit":          "Audit",
-	"install":        "Install to Disk",
-	"install-to-ram": "Install to RAM",
+	"nvidia":          "NVIDIA SAT",
+	"nvidia-stress":   "NVIDIA GPU Stress",
+	"memory":          "Memory SAT",
+	"storage":         "Storage SAT",
+	"cpu":             "CPU SAT",
+	"amd":             "AMD GPU SAT",
+	"amd-mem":         "AMD GPU MEM Integrity",
+	"amd-bandwidth":   "AMD GPU MEM Bandwidth",
+	"amd-stress":      "AMD GPU Burn-in",
+	"memory-stress":   "Memory Burn-in",
+	"sat-stress":      "SAT Stress (stressapptest)",
+	"platform-stress": "Platform Thermal Cycling",
+	"audit":           "Audit",
+	"install":         "Install to Disk",
+	"install-to-ram":  "Install to RAM",
+}
+
+// burnNames maps target → human-readable name when a burn profile is set.
+var burnNames = map[string]string{
+	"nvidia": "NVIDIA Burn-in",
+	"memory": "Memory Burn-in",
+	"cpu":    "CPU Burn-in",
+	"amd":    "AMD GPU Burn-in",
+}
+
+func nvidiaStressTaskName(loader string) string {
+	switch strings.TrimSpace(strings.ToLower(loader)) {
+	case platform.NvidiaStressLoaderJohn:
+		return "NVIDIA GPU Stress (John/OpenCL)"
+	case platform.NvidiaStressLoaderNCCL:
+		return "NVIDIA GPU Stress (NCCL)"
+	default:
+		return "NVIDIA GPU Stress (bee-gpu-burn)"
+	}
+}
+
+func taskDisplayName(target, profile, loader string) string {
+	name := taskNames[target]
+	if profile != "" {
+		if n, ok := burnNames[target]; ok {
+			name = n
+		}
+	}
+	if target == "nvidia-stress" {
+		name = nvidiaStressTaskName(loader)
+	}
+	if name == "" {
+		name = target
+	}
+	return name
 }

 // Task represents one unit of work in the queue.
@@ -58,12 +99,14 @@ type Task struct {

 // taskParams holds optional parameters parsed from the run request.
 type taskParams struct {
-	Duration    int    `json:"duration,omitempty"`
-	DiagLevel   int    `json:"diag_level,omitempty"`
-	GPUIndices  []int  `json:"gpu_indices,omitempty"`
-	BurnProfile string `json:"burn_profile,omitempty"`
-	DisplayName string `json:"display_name,omitempty"`
-	Device      string `json:"device,omitempty"` // for install
+	Duration          int    `json:"duration,omitempty"`
+	DiagLevel         int    `json:"diag_level,omitempty"`
+	GPUIndices        []int  `json:"gpu_indices,omitempty"`
+	ExcludeGPUIndices []int  `json:"exclude_gpu_indices,omitempty"`
+	Loader            string `json:"loader,omitempty"`
+	BurnProfile       string `json:"burn_profile,omitempty"`
+	DisplayName       string `json:"display_name,omitempty"`
+	Device            string `json:"device,omitempty"` // for install
 }

 type persistedTask struct {
@@ -96,6 +139,34 @@ func resolveBurnPreset(profile string) burnPreset {
 	}
 }

+func resolvePlatformStressPreset(profile string) platform.PlatformStressOptions {
+	switch profile {
+	case "overnight":
+		return platform.PlatformStressOptions{Cycles: []platform.PlatformStressCycle{
+			{LoadSec: 600, IdleSec: 120},
+			{LoadSec: 600, IdleSec: 60},
+			{LoadSec: 600, IdleSec: 30},
+			{LoadSec: 600, IdleSec: 120},
+			{LoadSec: 600, IdleSec: 60},
+			{LoadSec: 600, IdleSec: 30},
+			{LoadSec: 600, IdleSec: 120},
+			{LoadSec: 600, IdleSec: 60},
+		}}
+	case "acceptance":
+		return platform.PlatformStressOptions{Cycles: []platform.PlatformStressCycle{
+			{LoadSec: 300, IdleSec: 60},
+			{LoadSec: 300, IdleSec: 30},
+			{LoadSec: 300, IdleSec: 60},
+			{LoadSec: 300, IdleSec: 30},
+		}}
+	default: // smoke
+		return platform.PlatformStressOptions{Cycles: []platform.PlatformStressCycle{
+			{LoadSec: 90, IdleSec: 60},
+			{LoadSec: 90, IdleSec: 30},
+		}}
+	}
+}
+
 // taskQueue manages a priority-ordered list of tasks and runs them one at a time.
 type taskQueue struct {
 	mu        sync.Mutex
@@ -124,6 +195,15 @@ var (
 	runAMDAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 		return a.RunAMDAcceptancePackCtx(ctx, baseDir, logFunc)
 	}
+	runAMDMemIntegrityPackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+		return a.RunAMDMemIntegrityPackCtx(ctx, baseDir, logFunc)
+	}
+	runAMDMemBandwidthPackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+		return a.RunAMDMemBandwidthPackCtx(ctx, baseDir, logFunc)
+	}
+	runNvidiaStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
+		return a.RunNvidiaStressPackCtx(ctx, baseDir, opts, logFunc)
+	}
 	runAMDStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
 		return a.RunAMDStressPackCtx(ctx, baseDir, durationSec, logFunc)
 	}
@@ -365,6 +445,17 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		} else {
 			archive, err = a.RunNvidiaAcceptancePack("", j.append)
 		}
+	case "nvidia-stress":
+		dur := t.params.Duration
+		if t.params.BurnProfile != "" && dur <= 0 {
+			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
+		}
+		archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
+			DurationSec:       dur,
+			Loader:            t.params.Loader,
+			GPUIndices:        t.params.GPUIndices,
+			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
+		}, j.append)
 	case "memory":
 		archive, err = runMemoryAcceptancePackCtx(a, ctx, "", j.append)
 	case "storage":
@@ -377,9 +468,14 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		if dur <= 0 {
 			dur = 60
 		}
+		j.append(fmt.Sprintf("CPU stress duration: %ds", dur))
 		archive, err = runCPUAcceptancePackCtx(a, ctx, "", dur, j.append)
 	case "amd":
 		archive, err = runAMDAcceptancePackCtx(a, ctx, "", j.append)
+	case "amd-mem":
+		archive, err = runAMDMemIntegrityPackCtx(a, ctx, "", j.append)
+	case "amd-bandwidth":
+		archive, err = runAMDMemBandwidthPackCtx(a, ctx, "", j.append)
 	case "amd-stress":
 		dur := t.params.Duration
 		if t.params.BurnProfile != "" && dur <= 0 {
@@ -398,6 +494,9 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
 		}
 		archive, err = runSATStressPackCtx(a, ctx, "", dur, j.append)
+	case "platform-stress":
+		opts := resolvePlatformStressPreset(t.params.BurnProfile)
+		archive, err = a.RunPlatformStress(ctx, "", opts, j.append)
 	case "audit":
 		result, e := a.RunAuditNow(q.opts.RuntimeMode)
 		if e != nil {
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -95,6 +95,23 @@ func TestResolveBurnPreset(t *testing.T) {
 	}
 }

+func TestTaskDisplayNameUsesNvidiaStressLoader(t *testing.T) {
+	tests := []struct {
+		loader string
+		want   string
+	}{
+		{loader: "", want: "NVIDIA GPU Stress (bee-gpu-burn)"},
+		{loader: "builtin", want: "NVIDIA GPU Stress (bee-gpu-burn)"},
+		{loader: "john", want: "NVIDIA GPU Stress (John/OpenCL)"},
+		{loader: "nccl", want: "NVIDIA GPU Stress (NCCL)"},
+	}
+	for _, tc := range tests {
+		if got := taskDisplayName("nvidia-stress", "acceptance", tc.loader); got != tc.want {
+			t.Fatalf("taskDisplayName(loader=%q)=%q want %q", tc.loader, got, tc.want)
+		}
+	}
+}
+
 func TestRunTaskHonorsCancel(t *testing.T) {
 	t.Parallel()

@@ -154,3 +171,34 @@ func TestRunTaskHonorsCancel(t *testing.T) {
 		t.Fatal("runTask did not return after cancel")
 	}
 }
+
+func TestRunTaskUsesBurnProfileDurationForCPU(t *testing.T) {
+	t.Parallel()
+
+	var gotDuration int
+	q := &taskQueue{
+		opts: &HandlerOptions{App: &app.App{}},
+	}
+	tk := &Task{
+		ID:        "cpu-burn-1",
+		Name:      "CPU Burn-in",
+		Target:    "cpu",
+		Status:    TaskRunning,
+		CreatedAt: time.Now(),
+		params:    taskParams{BurnProfile: "smoke"},
+	}
+	j := &jobState{}
+
+	orig := runCPUAcceptancePackCtx
+	runCPUAcceptancePackCtx = func(_ *app.App, _ context.Context, _ string, durationSec int, _ func(string)) (string, error) {
+		gotDuration = durationSec
+		return "/tmp/cpu-burn.tar.gz", nil
+	}
+	defer func() { runCPUAcceptancePackCtx = orig }()
+
+	q.runTask(tk, j, context.Background())
+
+	if gotDuration != 5*60 {
+		t.Fatalf("duration=%d want %d", gotDuration, 5*60)
+	}
+}
--- a/bible-local/architecture/runtime-flows.md
+++ b/bible-local/architecture/runtime-flows.md
@@ -81,9 +81,9 @@ build-in-container.sh [--authorized-keys /path/to/keys]
  7. `build-cublas.sh`:
       a. download `libcublas`, `libcublasLt`, `libcudart` runtime + dev packages from the NVIDIA CUDA Debian repo
       b. verify packages against repo `Packages.gz`
-       c. extract headers for `bee-gpu-stress` build
+       c. extract headers for `bee-gpu-burn` worker build
       d. cache userspace libs in `dist/cublas-<version>+cuda<series>/`
-  8. build `bee-gpu-stress` against extracted cuBLASLt/cudart headers
+  8. build `bee-gpu-burn` worker against extracted cuBLASLt/cudart headers
  9. inject NVIDIA `.ko` → staged `/usr/local/lib/nvidia/`
  10. inject `nvidia-smi` → staged `/usr/local/bin/nvidia-smi`
  11. inject `libnvidia-ml` + `libcuda` + `libcublas` + `libcublasLt` + `libcudart` → staged `/usr/lib/`
@@ -104,7 +104,7 @@ Build host notes:
  1. `build-in-container.sh` / `build-nvidia-module.sh` — Debian kernel headers for module build
  2. `auto/config` — `linux-image-${DEBIAN_KERNEL_ABI}` in the ISO
 - NVIDIA modules go to staged `usr/local/lib/nvidia/` — NOT to `/lib/modules/<kver>/extra/`.
- `bee-gpu-stress` must be built against cached CUDA userspace headers from `build-cublas.sh`, not against random host-installed CUDA headers.
+- `bee-gpu-burn` worker must be built against cached CUDA userspace headers from `build-cublas.sh`, not against random host-installed CUDA headers.
 - The live ISO must ship `libcublas`, `libcublasLt`, and `libcudart` together with `libcuda` so tensor-core stress works without internet or package installs at boot.
 - The source overlay in `iso/overlay/` is treated as immutable source. Build-time files are injected only into the staged overlay.
 - The live-build workdir under `dist/` is disposable; source files under `iso/builder/` stay clean.
@@ -153,18 +153,17 @@ Current validation state:
 Every collector returns `nil, nil` on tool-not-found. Errors are logged, never fatal.

 Acceptance flows:
- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + mixed-precision `bee-gpu-stress`
+- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + lightweight `bee-gpu-burn`
+- NVIDIA GPU burn-in can use either `bee-gpu-burn` or `bee-john-gpu-stress` (John the Ripper jumbo via OpenCL)
 - `bee sat memory` → `memtester` archive
 - `bee sat storage` → SMART/NVMe diagnostic archive and short self-test trigger where supported
 - SAT `summary.txt` now includes `overall_status` and per-job `*_status` values (`OK`, `FAILED`, `UNSUPPORTED`)
- `bee-gpu-stress` should prefer cuBLASLt GEMM load over the old integer/PTX burn path:
+- `bee-gpu-burn` should prefer cuBLASLt GEMM load over the old integer/PTX burn path:
  - Ampere: `fp16` + `fp32`/TF32 tensor-core load
  - Ada / Hopper: add `fp8`
  - Blackwell+: add `fp4`
  - PTX fallback is only for missing cuBLASLt/userspace or unsupported narrow datatypes
 - Runtime overrides:
-  - `BEE_GPU_STRESS_SECONDS`
-  - `BEE_GPU_STRESS_SIZE_MB`
  - `BEE_MEMTESTER_SIZE_MB`
  - `BEE_MEMTESTER_PASSES`

@@ -179,6 +178,6 @@ Web UI: Acceptance Tests page → Run Test button
 ```

 **Critical invariants:**
- `bee-gpu-stress` uses `exec.CommandContext` — killed on job context cancel.
+- `bee-gpu-burn` / `bee-john-gpu-stress` use `exec.CommandContext` — killed on job context cancel.
 - Metric goroutine uses stopCh/doneCh pattern; main goroutine waits `<-doneCh` before reading rows (no mutex needed).
 - SVG chart is fully offline: no JS, no external CSS, pure inline SVG.
--- a/bible-local/architecture/system-overview.md
+++ b/bible-local/architecture/system-overview.md
@@ -21,8 +21,8 @@ Fills gaps where Redfish/logpile is blind:
 - Read-only hardware inventory: board, CPU, memory, storage, PCIe, PSU, GPU, NIC, RAID
 - Machine-readable health summary derived from collector verdicts
 - Operator-triggered acceptance tests for NVIDIA, memory, and storage
- NVIDIA SAT includes both diagnostic collection and mixed-precision GPU stress via `bee-gpu-stress`
- `bee-gpu-stress` should exercise tensor/inference paths (`fp16`, `fp32`/TF32, `fp8`, `fp4` when supported by the GPU/userspace stack) and fall back to Driver API PTX burn only if cuBLASLt is unavailable
+- NVIDIA SAT includes diagnostic collection plus a lightweight in-image GPU stress step via `bee-gpu-burn`
+- `bee-gpu-burn` should exercise tensor/inference paths (`fp16`, `fp32`/TF32, `fp8`, `fp4` when supported by the GPU/userspace stack) and fall back to Driver API PTX burn only if cuBLASLt is unavailable
 - Automatic boot audit with operator-facing local console and SSH access
 - NVIDIA proprietary driver loaded at boot for GPU enrichment via `nvidia-smi`
 - SSH access (OpenSSH) always available for inspection and debugging
@@ -70,7 +70,7 @@ Fills gaps where Redfish/logpile is blind:
 | SSH | OpenSSH server |
 | NVIDIA driver | Proprietary `.run` installer, built against Debian kernel headers |
 | NVIDIA modules | Loaded via `insmod` from `/usr/local/lib/nvidia/` |
-| GPU stress backend | `bee-gpu-stress` + cuBLASLt/cuBLAS/cudart mixed-precision GEMM, with Driver API PTX fallback |
+| GPU stress backend | `bee-gpu-burn` + cuBLASLt/cuBLAS/cudart mixed-precision GEMM, with Driver API PTX fallback |
 | Builder | Debian 12 host/VM or Debian 12 container image |

 ## Operator UX
--- a/bible-local/decisions/2026-03-05-nvidia-proprietary-driver.md
+++ b/bible-local/decisions/2026-03-05-nvidia-proprietary-driver.md
@@ -18,6 +18,8 @@ Use the official proprietary NVIDIA `.run` installer for both kernel modules and
 - Kernel modules and nvidia-smi come from a single verified source.
 - NVIDIA publishes `.sha256sum` alongside each installer — download and verify before use.
 - Driver version pinned in `iso/builder/VERSIONS` as `NVIDIA_DRIVER_VERSION`.
+- DCGM must track the CUDA user-mode driver major version exposed by `nvidia-smi`.
+- For NVIDIA driver branch `590` with CUDA `13.x`, use DCGM 4 package family `datacenter-gpu-manager-4-cuda13`; legacy `datacenter-gpu-manager` 3.x does not provide a working path for this stack.
 - Build process: download `.run`, extract, compile `kernel/` sources against `linux-lts-dev`.
 - Modules cached in `dist/nvidia-<version>-<kver>/` — rebuild only on version or kernel change.
 - ISO size increases by ~50MB for .ko files + nvidia-smi.
--- a/bible-local/docs/iso-build-rules.md
+++ b/bible-local/docs/iso-build-rules.md
@@ -13,9 +13,10 @@ Use one of:

 This applies to:
 - `iso/builder/config/package-lists/*.list.chroot`
- Any package referenced in `grub.cfg`, hooks, or overlay scripts (e.g. file paths like `/boot/memtest86+x64.bin`)
+- Any package referenced in bootloader configs, hooks, or overlay scripts

-## Example of what goes wrong without this
+## Memtest rule

-`memtest86+` in Debian bookworm installs `/boot/memtest86+x64.bin`, not `/boot/memtest86+.bin`.
-Guessing the filename caused a broken GRUB entry that only surfaced at boot time, after a full rebuild.
+Prefer live-build's built-in memtest integration over custom hooks or hardcoded
+bootloader paths. If you ever need to reference memtest files manually, verify
+the exact package file list first for the target Debian release.
--- a/iso/README.md
+++ b/iso/README.md
@@ -48,6 +48,7 @@ sh iso/builder/build-in-container.sh --cache-dir /path/to/cache
 - The builder image is automatically rebuilt if the local tag exists for the wrong architecture.
 - The live ISO boots with Debian `live-boot` `toram`, so the read-only medium is copied into RAM during boot and the runtime no longer depends on the original USB/BMC virtual media staying present.
 - Target systems need enough RAM for the full compressed live medium plus normal runtime overhead, or boot may fail before reaching the TUI.
+- The NVIDIA variant installs DCGM 4 packages matched to the CUDA user-mode driver major version. For driver branch `590` / CUDA `13.x`, the package family is `datacenter-gpu-manager-4-cuda13` rather than legacy `datacenter-gpu-manager`.
 - Override the container platform only if you know why:

 ```sh
--- a/iso/builder/Dockerfile
+++ b/iso/builder/Dockerfile
@@ -23,6 +23,16 @@ RUN apt-get update -qq && apt-get install -y \
    gcc \
    make \
    perl \
+    pkg-config \
+    yasm \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libgmp-dev \
+    libpcap-dev \
+    libsqlite3-dev \
+    libcurl4-openssl-dev \
+    ocl-icd-opencl-dev \
    linux-headers-amd64 \
    && rm -rf /var/lib/apt/lists/*

--- a/iso/builder/VERSIONS
+++ b/iso/builder/VERSIONS
@@ -8,8 +8,16 @@ NCCL_TESTS_VERSION=2.13.10
 NVCC_VERSION=12.8
 CUBLAS_VERSION=13.0.2.14-1
 CUDA_USERSPACE_VERSION=13.0.96-1
-DCGM_VERSION=3.3.9
+DCGM_VERSION=4.5.2-1
+JOHN_JUMBO_COMMIT=67fcf9fe5a
 ROCM_VERSION=6.3.4
 ROCM_SMI_VERSION=7.4.0.60304-76~22.04
+ROCM_BANDWIDTH_TEST_VERSION=1.4.0.60304-76~22.04
+ROCM_VALIDATION_SUITE_VERSION=1.1.0.60304-76~22.04
+ROCBLAS_VERSION=4.3.0.60304-76~22.04
+ROCRAND_VERSION=3.2.0.60304-76~22.04
+HIP_RUNTIME_AMD_VERSION=6.3.42134.60304-76~22.04
+HIPBLASLT_VERSION=0.10.0.60304-76~22.04
+COMGR_VERSION=2.8.0.60304-76~22.04
 GO_VERSION=1.24.0
 AUDIT_VERSION=1.0.0
--- a/iso/builder/auto/config
+++ b/iso/builder/auto/config
@@ -29,9 +29,9 @@ lb config noauto \
    --security true \
    --linux-flavours "amd64" \
    --linux-packages "${LB_LINUX_PACKAGES}" \
-    --memtest none \
-    --iso-volume "EASY-BEE" \
-    --iso-application "EASY-BEE" \
+    --memtest memtest86+ \
+    --iso-volume "EASY_BEE_${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
+    --iso-application "EASY-BEE-${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
    --bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
    --apt-recommends false \
    --chroot-squashfs-compression-type zstd \
--- a/iso/builder/bee-gpu-stress.c
+++ b/iso/builder/bee-gpu-stress.c
@@ -29,8 +29,14 @@ typedef void *CUfunction;
 typedef void *CUstream;

 #define CU_SUCCESS 0
+#define CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT 16
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76
+#define MAX_STRESS_STREAMS 16
+#define MAX_CUBLAS_PROFILES 5
+#define MIN_PROFILE_BUDGET_BYTES ((size_t)4u * 1024u * 1024u)
+#define MIN_STREAM_BUDGET_BYTES ((size_t)64u * 1024u * 1024u)
+#define STRESS_LAUNCH_DEPTH 8

 static const char *ptx_source =
    ".version 6.0\n"
@@ -97,6 +103,9 @@ typedef CUresult (*cuLaunchKernel_fn)(CUfunction,
                                      CUstream,
                                      void **,
                                      void **);
+typedef CUresult (*cuMemGetInfo_fn)(size_t *, size_t *);
+typedef CUresult (*cuStreamCreate_fn)(CUstream *, unsigned int);
+typedef CUresult (*cuStreamDestroy_fn)(CUstream);
 typedef CUresult (*cuGetErrorName_fn)(CUresult, const char **);
 typedef CUresult (*cuGetErrorString_fn)(CUresult, const char **);

@@ -118,6 +127,9 @@ struct cuda_api {
    cuModuleLoadDataEx_fn cuModuleLoadDataEx;
    cuModuleGetFunction_fn cuModuleGetFunction;
    cuLaunchKernel_fn cuLaunchKernel;
+    cuMemGetInfo_fn cuMemGetInfo;
+    cuStreamCreate_fn cuStreamCreate;
+    cuStreamDestroy_fn cuStreamDestroy;
    cuGetErrorName_fn cuGetErrorName;
    cuGetErrorString_fn cuGetErrorString;
 };
@@ -128,9 +140,10 @@ struct stress_report {
    int cc_major;
    int cc_minor;
    int buffer_mb;
+    int stream_count;
    unsigned long iterations;
    uint64_t checksum;
-    char details[1024];
+    char details[16384];
 };

 static int load_symbol(void *lib, const char *name, void **out) {
@@ -144,7 +157,7 @@ static int load_cuda(struct cuda_api *api) {
    if (!api->lib) {
        return 0;
    }
-    return
+    if (!(
        load_symbol(api->lib, "cuInit", (void **)&api->cuInit) &&
        load_symbol(api->lib, "cuDeviceGetCount", (void **)&api->cuDeviceGetCount) &&
        load_symbol(api->lib, "cuDeviceGet", (void **)&api->cuDeviceGet) &&
@@ -160,7 +173,17 @@ static int load_cuda(struct cuda_api *api) {
        load_symbol(api->lib, "cuMemcpyDtoH_v2", (void **)&api->cuMemcpyDtoH) &&
        load_symbol(api->lib, "cuModuleLoadDataEx", (void **)&api->cuModuleLoadDataEx) &&
        load_symbol(api->lib, "cuModuleGetFunction", (void **)&api->cuModuleGetFunction) &&
-        load_symbol(api->lib, "cuLaunchKernel", (void **)&api->cuLaunchKernel);
+        load_symbol(api->lib, "cuLaunchKernel", (void **)&api->cuLaunchKernel))) {
+        dlclose(api->lib);
+        memset(api, 0, sizeof(*api));
+        return 0;
+    }
+    load_symbol(api->lib, "cuMemGetInfo_v2", (void **)&api->cuMemGetInfo);
+    load_symbol(api->lib, "cuStreamCreate", (void **)&api->cuStreamCreate);
+    if (!load_symbol(api->lib, "cuStreamDestroy_v2", (void **)&api->cuStreamDestroy)) {
+        load_symbol(api->lib, "cuStreamDestroy", (void **)&api->cuStreamDestroy);
+    }
+    return 1;
 }

 static const char *cu_error_name(struct cuda_api *api, CUresult rc) {
@@ -193,14 +216,12 @@ static double now_seconds(void) {
    return (double)ts.tv_sec + ((double)ts.tv_nsec / 1000000000.0);
 }

-#if HAVE_CUBLASLT_HEADERS
 static size_t round_down_size(size_t value, size_t multiple) {
    if (multiple == 0 || value < multiple) {
        return value;
    }
    return value - (value % multiple);
 }
-#endif

 static int query_compute_capability(struct cuda_api *api, CUdevice dev, int *major, int *minor) {
    int cc_major = 0;
@@ -220,6 +241,75 @@ static int query_compute_capability(struct cuda_api *api, CUdevice dev, int *maj
    return 1;
 }

+static int query_multiprocessor_count(struct cuda_api *api, CUdevice dev, int *count) {
+    int mp_count = 0;
+    if (!check_rc(api,
+                  "cuDeviceGetAttribute(multiprocessors)",
+                  api->cuDeviceGetAttribute(&mp_count, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev))) {
+        return 0;
+    }
+    *count = mp_count;
+    return 1;
+}
+
+static size_t clamp_budget_to_free_memory(struct cuda_api *api, size_t requested_bytes) {
+    size_t free_bytes = 0;
+    size_t total_bytes = 0;
+    size_t max_bytes = requested_bytes;
+
+    if (!api->cuMemGetInfo) {
+        return requested_bytes;
+    }
+    if (api->cuMemGetInfo(&free_bytes, &total_bytes) != CU_SUCCESS || free_bytes == 0) {
+        return requested_bytes;
+    }
+
+    max_bytes = (free_bytes * 9u) / 10u;
+    if (max_bytes < (size_t)4u * 1024u * 1024u) {
+        max_bytes = (size_t)4u * 1024u * 1024u;
+    }
+    if (requested_bytes > max_bytes) {
+        return max_bytes;
+    }
+    return requested_bytes;
+}
+
+static int choose_stream_count(int mp_count, int planned_profiles, size_t total_budget, int have_streams) {
+    int stream_count = 1;
+    if (!have_streams || mp_count <= 0 || planned_profiles <= 0) {
+        return 1;
+    }
+
+    stream_count = mp_count / 8;
+    if (stream_count < 2) {
+        stream_count = 2;
+    }
+    if (stream_count > MAX_STRESS_STREAMS) {
+        stream_count = MAX_STRESS_STREAMS;
+    }
+
+    while (stream_count > 1) {
+        size_t per_stream_budget = total_budget / ((size_t)planned_profiles * (size_t)stream_count);
+        if (per_stream_budget >= MIN_STREAM_BUDGET_BYTES) {
+            break;
+        }
+        stream_count--;
+    }
+    return stream_count;
+}
+
+static void destroy_streams(struct cuda_api *api, CUstream *streams, int count) {
+    if (!api->cuStreamDestroy) {
+        return;
+    }
+    for (int i = 0; i < count; i++) {
+        if (streams[i]) {
+            api->cuStreamDestroy(streams[i]);
+            streams[i] = NULL;
+        }
+    }
+}
+
 #if HAVE_CUBLASLT_HEADERS
 static void append_detail(char *buf, size_t cap, const char *fmt, ...) {
    size_t len = strlen(buf);
@@ -242,12 +332,19 @@ static int run_ptx_fallback(struct cuda_api *api,
                            int size_mb,
                            struct stress_report *report) {
    CUcontext ctx = NULL;
-    CUdeviceptr device_mem = 0;
    CUmodule module = NULL;
    CUfunction kernel = NULL;
    uint32_t sample[256];
-    uint32_t words = 0;
+    CUdeviceptr device_mem[MAX_STRESS_STREAMS] = {0};
+    CUstream streams[MAX_STRESS_STREAMS] = {0};
+    uint32_t words[MAX_STRESS_STREAMS] = {0};
+    uint32_t rounds[MAX_STRESS_STREAMS] = {0};
+    void *params[MAX_STRESS_STREAMS][3];
+    size_t bytes_per_stream[MAX_STRESS_STREAMS] = {0};
    unsigned long iterations = 0;
+    int mp_count = 0;
+    int stream_count = 1;
+    int launches_per_wave = 0;

    memset(report, 0, sizeof(*report));
    snprintf(report->backend, sizeof(report->backend), "driver-ptx");
@@ -260,64 +357,109 @@ static int run_ptx_fallback(struct cuda_api *api,
        return 0;
    }

-    size_t bytes = (size_t)size_mb * 1024u * 1024u;
-    if (bytes < 4u * 1024u * 1024u) {
-        bytes = 4u * 1024u * 1024u;
+    size_t requested_bytes = (size_t)size_mb * 1024u * 1024u;
+    if (requested_bytes < MIN_PROFILE_BUDGET_BYTES) {
+        requested_bytes = MIN_PROFILE_BUDGET_BYTES;
    }
-    if (bytes > (size_t)1024u * 1024u * 1024u) {
-        bytes = (size_t)1024u * 1024u * 1024u;
+    size_t total_bytes = clamp_budget_to_free_memory(api, requested_bytes);
+    if (total_bytes < MIN_PROFILE_BUDGET_BYTES) {
+        total_bytes = MIN_PROFILE_BUDGET_BYTES;
    }
-    words = (uint32_t)(bytes / sizeof(uint32_t));
+    report->buffer_mb = (int)(total_bytes / (1024u * 1024u));

-    if (!check_rc(api, "cuMemAlloc", api->cuMemAlloc(&device_mem, bytes))) {
-        api->cuCtxDestroy(ctx);
-        return 0;
+    if (query_multiprocessor_count(api, dev, &mp_count) &&
+        api->cuStreamCreate &&
+        api->cuStreamDestroy) {
+        stream_count = choose_stream_count(mp_count, 1, total_bytes, 1);
    }
-    if (!check_rc(api, "cuMemsetD8", api->cuMemsetD8(device_mem, 0, bytes))) {
-        api->cuMemFree(device_mem);
-        api->cuCtxDestroy(ctx);
-        return 0;
+    if (stream_count > 1) {
+        int created = 0;
+        for (; created < stream_count; created++) {
+            if (!check_rc(api, "cuStreamCreate", api->cuStreamCreate(&streams[created], 0))) {
+                destroy_streams(api, streams, created);
+                stream_count = 1;
+                break;
+            }
+        }
    }
+    report->stream_count = stream_count;
+
+    for (int lane = 0; lane < stream_count; lane++) {
+        size_t slice = total_bytes / (size_t)stream_count;
+        if (lane == stream_count - 1) {
+            slice = total_bytes - ((size_t)lane * (total_bytes / (size_t)stream_count));
+        }
+        slice = round_down_size(slice, sizeof(uint32_t));
+        if (slice < MIN_PROFILE_BUDGET_BYTES) {
+            slice = MIN_PROFILE_BUDGET_BYTES;
+        }
+        bytes_per_stream[lane] = slice;
+        words[lane] = (uint32_t)(slice / sizeof(uint32_t));
+
+        if (!check_rc(api, "cuMemAlloc", api->cuMemAlloc(&device_mem[lane], slice))) {
+            goto fail;
+        }
+        if (!check_rc(api, "cuMemsetD8", api->cuMemsetD8(device_mem[lane], 0, slice))) {
+            goto fail;
+        }
+        rounds[lane] = 2048;
+        params[lane][0] = &device_mem[lane];
+        params[lane][1] = &words[lane];
+        params[lane][2] = &rounds[lane];
+    }
+
    if (!check_rc(api,
                  "cuModuleLoadDataEx",
                  api->cuModuleLoadDataEx(&module, ptx_source, 0, NULL, NULL))) {
-        api->cuMemFree(device_mem);
-        api->cuCtxDestroy(ctx);
-        return 0;
+        goto fail;
    }
    if (!check_rc(api, "cuModuleGetFunction", api->cuModuleGetFunction(&kernel, module, "burn"))) {
-        api->cuMemFree(device_mem);
-        api->cuCtxDestroy(ctx);
-        return 0;
+        goto fail;
    }

    unsigned int threads = 256;
-    unsigned int blocks = (unsigned int)((words + threads - 1) / threads);
-    uint32_t rounds = 1024;
-    void *params[] = {&device_mem, &words, &rounds};

    double start = now_seconds();
    double deadline = start + (double)seconds;
    while (now_seconds() < deadline) {
-        if (!check_rc(api,
-                      "cuLaunchKernel",
-                      api->cuLaunchKernel(kernel, blocks, 1, 1, threads, 1, 1, 0, NULL, params, NULL))) {
-            api->cuMemFree(device_mem);
-            api->cuCtxDestroy(ctx);
-            return 0;
+        launches_per_wave = 0;
+        for (int depth = 0; depth < STRESS_LAUNCH_DEPTH && now_seconds() < deadline; depth++) {
+            int launched_this_batch = 0;
+            for (int lane = 0; lane < stream_count; lane++) {
+                unsigned int blocks = (unsigned int)((words[lane] + threads - 1) / threads);
+                if (!check_rc(api,
+                              "cuLaunchKernel",
+                              api->cuLaunchKernel(kernel,
+                                                  blocks,
+                                                  1,
+                                                  1,
+                                                  threads,
+                                                  1,
+                                                  1,
+                                                  0,
+                                                  streams[lane],
+                                                  params[lane],
+                                                  NULL))) {
+                    goto fail;
+                }
+                launches_per_wave++;
+                launched_this_batch++;
+            }
+            if (launched_this_batch <= 0) {
+                break;
+            }
        }
-        iterations++;
+        if (launches_per_wave <= 0) {
+            goto fail;
+        }
+        if (!check_rc(api, "cuCtxSynchronize", api->cuCtxSynchronize())) {
+            goto fail;
+        }
+        iterations += (unsigned long)launches_per_wave;
    }

-    if (!check_rc(api, "cuCtxSynchronize", api->cuCtxSynchronize())) {
-        api->cuMemFree(device_mem);
-        api->cuCtxDestroy(ctx);
-        return 0;
-    }
-    if (!check_rc(api, "cuMemcpyDtoH", api->cuMemcpyDtoH(sample, device_mem, sizeof(sample)))) {
-        api->cuMemFree(device_mem);
-        api->cuCtxDestroy(ctx);
-        return 0;
+    if (!check_rc(api, "cuMemcpyDtoH", api->cuMemcpyDtoH(sample, device_mem[0], sizeof(sample)))) {
+        goto fail;
    }

    for (size_t i = 0; i < sizeof(sample) / sizeof(sample[0]); i++) {
@@ -326,12 +468,34 @@ static int run_ptx_fallback(struct cuda_api *api,
    report->iterations = iterations;
    snprintf(report->details,
             sizeof(report->details),
-             "profile_int32_fallback=OK iterations=%lu\n",
+             "fallback_int32=OK requested_mb=%d actual_mb=%d streams=%d queue_depth=%d per_stream_mb=%zu iterations=%lu\n",
+             size_mb,
+             report->buffer_mb,
+             report->stream_count,
+             STRESS_LAUNCH_DEPTH,
+             bytes_per_stream[0] / (1024u * 1024u),
             iterations);

-    api->cuMemFree(device_mem);
+    for (int lane = 0; lane < stream_count; lane++) {
+        if (device_mem[lane]) {
+            api->cuMemFree(device_mem[lane]);
+        }
+    }
+    destroy_streams(api, streams, stream_count);
    api->cuCtxDestroy(ctx);
    return 1;
+
+fail:
+    for (int lane = 0; lane < MAX_STRESS_STREAMS; lane++) {
+        if (device_mem[lane]) {
+            api->cuMemFree(device_mem[lane]);
+        }
+    }
+    destroy_streams(api, streams, MAX_STRESS_STREAMS);
+    if (ctx) {
+        api->cuCtxDestroy(ctx);
+    }
+    return 0;
 }

 #if HAVE_CUBLASLT_HEADERS
@@ -418,6 +582,7 @@ struct profile_desc {

 struct prepared_profile {
    struct profile_desc desc;
+    CUstream stream;
    cublasLtMatmulDesc_t op_desc;
    cublasLtMatrixLayout_t a_layout;
    cublasLtMatrixLayout_t b_layout;
@@ -617,8 +782,8 @@ static uint64_t choose_square_dim(size_t budget_bytes, size_t bytes_per_cell, in
    if (dim < (uint64_t)multiple) {
        dim = (uint64_t)multiple;
    }
-    if (dim > 8192u) {
-        dim = 8192u;
+    if (dim > 65536u) {
+        dim = 65536u;
    }
    return dim;
 }
@@ -704,10 +869,12 @@ static int prepare_profile(struct cublaslt_api *cublas,
                           cublasLtHandle_t handle,
                           struct cuda_api *cuda,
                           const struct profile_desc *desc,
+                           CUstream stream,
                           size_t profile_budget_bytes,
                           struct prepared_profile *out) {
    memset(out, 0, sizeof(*out));
    out->desc = *desc;
+    out->stream = stream;

    size_t bytes_per_cell = 0;
    bytes_per_cell += bytes_for_elements(desc->a_type, 1);
@@ -935,7 +1102,7 @@ static int run_cublas_profile(cublasLtHandle_t handle,
                                               &profile->heuristic.algo,
                                               (void *)(uintptr_t)profile->workspace_dev,
                                               profile->workspace_size,
-                                               (cudaStream_t)0));
+                                               profile->stream));
 }

 static int run_cublaslt_stress(struct cuda_api *cuda,
@@ -947,13 +1114,22 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                               int size_mb,
                               struct stress_report *report) {
    struct cublaslt_api cublas;
-    struct prepared_profile prepared[sizeof(k_profiles) / sizeof(k_profiles[0])];
+    struct prepared_profile prepared[MAX_STRESS_STREAMS * MAX_CUBLAS_PROFILES];
    cublasLtHandle_t handle = NULL;
    CUcontext ctx = NULL;
+    CUstream streams[MAX_STRESS_STREAMS] = {0};
    uint16_t sample[256];
    int cc = cc_major * 10 + cc_minor;
    int planned = 0;
    int active = 0;
+    int mp_count = 0;
+    int stream_count = 1;
+    int profile_count = (int)(sizeof(k_profiles) / sizeof(k_profiles[0]));
+    int prepared_count = 0;
+    int wave_launches = 0;
+    size_t requested_budget = 0;
+    size_t total_budget = 0;
+    size_t per_profile_budget = 0;

    memset(report, 0, sizeof(*report));
    snprintf(report->backend, sizeof(report->backend), "cublasLt");
@@ -986,16 +1162,46 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }

-    size_t total_budget = (size_t)size_mb * 1024u * 1024u;
-    if (total_budget < (size_t)planned * 4u * 1024u * 1024u) {
-        total_budget = (size_t)planned * 4u * 1024u * 1024u;
+    requested_budget = (size_t)size_mb * 1024u * 1024u;
+    if (requested_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
+        requested_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
    }
-    size_t per_profile_budget = total_budget / (size_t)planned;
-    if (per_profile_budget < 4u * 1024u * 1024u) {
-        per_profile_budget = 4u * 1024u * 1024u;
+    total_budget = clamp_budget_to_free_memory(cuda, requested_budget);
+    if (total_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
+        total_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
    }
+    if (query_multiprocessor_count(cuda, dev, &mp_count) &&
+        cuda->cuStreamCreate &&
+        cuda->cuStreamDestroy) {
+        stream_count = choose_stream_count(mp_count, planned, total_budget, 1);
+    }
+    if (stream_count > 1) {
+        int created = 0;
+        for (; created < stream_count; created++) {
+            if (!check_rc(cuda, "cuStreamCreate", cuda->cuStreamCreate(&streams[created], 0))) {
+                destroy_streams(cuda, streams, created);
+                stream_count = 1;
+                break;
+            }
+        }
+    }
+    report->stream_count = stream_count;
+    per_profile_budget = total_budget / ((size_t)planned * (size_t)stream_count);
+    if (per_profile_budget < MIN_PROFILE_BUDGET_BYTES) {
+        per_profile_budget = MIN_PROFILE_BUDGET_BYTES;
+    }
+    report->buffer_mb = (int)(total_budget / (1024u * 1024u));
+    append_detail(report->details,
+                  sizeof(report->details),
+                  "requested_mb=%d actual_mb=%d streams=%d queue_depth=%d mp_count=%d per_worker_mb=%zu\n",
+                  size_mb,
+                  report->buffer_mb,
+                  report->stream_count,
+                  STRESS_LAUNCH_DEPTH,
+                  mp_count,
+                  per_profile_budget / (1024u * 1024u));

-    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
+    for (int i = 0; i < profile_count; i++) {
        const struct profile_desc *desc = &k_profiles[i];
        if (!(desc->enabled && cc >= desc->min_cc)) {
            append_detail(report->details,
@@ -1005,63 +1211,87 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                          desc->min_cc);
            continue;
        }
-        if (prepare_profile(&cublas, handle, cuda, desc, per_profile_budget, &prepared[i])) {
-            active++;
-            append_detail(report->details,
-                          sizeof(report->details),
-                          "%s=READY dim=%llux%llux%llu block=%s\n",
-                          desc->name,
-                          (unsigned long long)prepared[i].m,
-                          (unsigned long long)prepared[i].n,
-                          (unsigned long long)prepared[i].k,
-                          desc->block_label);
-        } else {
-            append_detail(report->details, sizeof(report->details), "%s=SKIPPED unsupported\n", desc->name);
+        for (int lane = 0; lane < stream_count; lane++) {
+            CUstream stream = streams[lane];
+            if (prepared_count >= (int)(sizeof(prepared) / sizeof(prepared[0]))) {
+                break;
+            }
+            if (prepare_profile(&cublas, handle, cuda, desc, stream, per_profile_budget, &prepared[prepared_count])) {
+                active++;
+                append_detail(report->details,
+                              sizeof(report->details),
+                              "%s[%d]=READY dim=%llux%llux%llu block=%s stream=%d\n",
+                              desc->name,
+                              lane,
+                              (unsigned long long)prepared[prepared_count].m,
+                              (unsigned long long)prepared[prepared_count].n,
+                              (unsigned long long)prepared[prepared_count].k,
+                              desc->block_label,
+                              lane);
+                prepared_count++;
+            } else {
+                append_detail(report->details,
+                              sizeof(report->details),
+                              "%s[%d]=SKIPPED unsupported\n",
+                              desc->name,
+                              lane);
+            }
        }
    }

    if (active <= 0) {
        cublas.cublasLtDestroy(handle);
+        destroy_streams(cuda, streams, stream_count);
        cuda->cuCtxDestroy(ctx);
        return 0;
    }

    double deadline = now_seconds() + (double)seconds;
    while (now_seconds() < deadline) {
-        for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
-            if (!prepared[i].ready) {
-                continue;
-            }
-            if (!run_cublas_profile(handle, &cublas, &prepared[i])) {
-                append_detail(report->details,
-                              sizeof(report->details),
-                              "%s=FAILED runtime\n",
-                              prepared[i].desc.name);
-                for (size_t j = 0; j < sizeof(prepared) / sizeof(prepared[0]); j++) {
-                    destroy_profile(&cublas, cuda, &prepared[j]);
+        wave_launches = 0;
+        for (int depth = 0; depth < STRESS_LAUNCH_DEPTH && now_seconds() < deadline; depth++) {
+            int launched_this_batch = 0;
+            for (int i = 0; i < prepared_count; i++) {
+                if (!prepared[i].ready) {
+                    continue;
                }
-                cublas.cublasLtDestroy(handle);
-                cuda->cuCtxDestroy(ctx);
-                return 0;
+                if (!run_cublas_profile(handle, &cublas, &prepared[i])) {
+                    append_detail(report->details,
+                                  sizeof(report->details),
+                                  "%s=FAILED runtime\n",
+                                  prepared[i].desc.name);
+                    for (int j = 0; j < prepared_count; j++) {
+                        destroy_profile(&cublas, cuda, &prepared[j]);
+                    }
+                    cublas.cublasLtDestroy(handle);
+                    destroy_streams(cuda, streams, stream_count);
+                    cuda->cuCtxDestroy(ctx);
+                    return 0;
+                }
+                prepared[i].iterations++;
+                report->iterations++;
+                wave_launches++;
+                launched_this_batch++;
            }
-            prepared[i].iterations++;
-            report->iterations++;
-            if (now_seconds() >= deadline) {
+            if (launched_this_batch <= 0) {
                break;
            }
        }
-    }
-
-    if (!check_rc(cuda, "cuCtxSynchronize", cuda->cuCtxSynchronize())) {
-        for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
-            destroy_profile(&cublas, cuda, &prepared[i]);
+        if (wave_launches <= 0) {
+            break;
+        }
+        if (!check_rc(cuda, "cuCtxSynchronize", cuda->cuCtxSynchronize())) {
+            for (int i = 0; i < prepared_count; i++) {
+                destroy_profile(&cublas, cuda, &prepared[i]);
+            }
+            cublas.cublasLtDestroy(handle);
+            destroy_streams(cuda, streams, stream_count);
+            cuda->cuCtxDestroy(ctx);
+            return 0;
        }
-        cublas.cublasLtDestroy(handle);
-        cuda->cuCtxDestroy(ctx);
-        return 0;
    }

-    for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+    for (int i = 0; i < prepared_count; i++) {
        if (!prepared[i].ready) {
            continue;
        }
@@ -1072,7 +1302,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                      prepared[i].iterations);
    }

-    for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+    for (int i = 0; i < prepared_count; i++) {
        if (prepared[i].ready) {
            if (check_rc(cuda, "cuMemcpyDtoH", cuda->cuMemcpyDtoH(sample, prepared[i].d_dev, sizeof(sample)))) {
                for (size_t j = 0; j < sizeof(sample) / sizeof(sample[0]); j++) {
@@ -1083,10 +1313,11 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        }
    }

-    for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+    for (int i = 0; i < prepared_count; i++) {
        destroy_profile(&cublas, cuda, &prepared[i]);
    }
    cublas.cublasLtDestroy(handle);
+    destroy_streams(cuda, streams, stream_count);
    cuda->cuCtxDestroy(ctx);
    return 1;
 }
@@ -1095,13 +1326,16 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
 int main(int argc, char **argv) {
    int seconds = 5;
    int size_mb = 64;
+    int device_index = 0;
    for (int i = 1; i < argc; i++) {
        if ((strcmp(argv[i], "--seconds") == 0 || strcmp(argv[i], "-t") == 0) && i + 1 < argc) {
            seconds = atoi(argv[++i]);
        } else if ((strcmp(argv[i], "--size-mb") == 0 || strcmp(argv[i], "-m") == 0) && i + 1 < argc) {
            size_mb = atoi(argv[++i]);
+        } else if ((strcmp(argv[i], "--device") == 0 || strcmp(argv[i], "-d") == 0) && i + 1 < argc) {
+            device_index = atoi(argv[++i]);
        } else {
-            fprintf(stderr, "usage: %s [--seconds N] [--size-mb N]\n", argv[0]);
+            fprintf(stderr, "usage: %s [--seconds N] [--size-mb N] [--device N]\n", argv[0]);
            return 2;
        }
    }
@@ -1111,6 +1345,9 @@ int main(int argc, char **argv) {
    if (size_mb <= 0) {
        size_mb = 64;
    }
+    if (device_index < 0) {
+        device_index = 0;
+    }

    struct cuda_api cuda;
    if (!load_cuda(&cuda)) {
@@ -1133,8 +1370,13 @@ int main(int argc, char **argv) {
        return 1;
    }

+    if (device_index >= count) {
+        fprintf(stderr, "device index %d out of range (found %d CUDA device(s))\n", device_index, count);
+        return 1;
+    }
+
    CUdevice dev = 0;
-    if (!check_rc(&cuda, "cuDeviceGet", cuda.cuDeviceGet(&dev, 0))) {
+    if (!check_rc(&cuda, "cuDeviceGet", cuda.cuDeviceGet(&dev, device_index))) {
        return 1;
    }

@@ -1162,10 +1404,12 @@ int main(int argc, char **argv) {
    }

    printf("device=%s\n", report.device);
+    printf("device_index=%d\n", device_index);
    printf("compute_capability=%d.%d\n", report.cc_major, report.cc_minor);
    printf("backend=%s\n", report.backend);
    printf("duration_s=%d\n", seconds);
    printf("buffer_mb=%d\n", report.buffer_mb);
+    printf("streams=%d\n", report.stream_count);
    printf("iterations=%lu\n", report.iterations);
    printf("checksum=%llu\n", (unsigned long long)report.checksum);
    if (report.details[0] != '\0') {
--- a/iso/builder/build-cublas.sh
+++ b/iso/builder/build-cublas.sh
@@ -1,9 +1,9 @@
 #!/bin/sh
-# build-cublas.sh — download cuBLASLt/cuBLAS/cudart runtime + headers for bee-gpu-stress.
+# build-cublas.sh — download cuBLASLt/cuBLAS/cudart runtime + headers for bee-gpu-burn worker.
 #
 # Downloads .deb packages from NVIDIA's CUDA apt repository (Debian 12, x86_64),
 # verifies them against Packages.gz, and extracts the small subset we need:
-#   - headers for compiling bee-gpu-stress against cuBLASLt
+#   - headers for compiling bee-gpu-burn worker against cuBLASLt
 #   - runtime libs for libcublas, libcublasLt, libcudart inside the ISO

 set -e
--- a/iso/builder/build-in-container.sh
+++ b/iso/builder/build-in-container.sh
@@ -12,6 +12,7 @@ CACHE_DIR="${BEE_BUILDER_CACHE_DIR:-${REPO_ROOT}/dist/container-cache}"
 AUTH_KEYS=""
 REBUILD_IMAGE=0
 CLEAN_CACHE=0
+VARIANT="all"

 . "${BUILDER_DIR}/VERSIONS"

@@ -34,14 +35,23 @@ while [ $# -gt 0 ]; do
            REBUILD_IMAGE=1
            shift
            ;;
+        --variant)
+            VARIANT="$2"
+            shift 2
+            ;;
        *)
            echo "unknown arg: $1" >&2
-            echo "usage: $0 [--cache-dir /path] [--rebuild-image] [--clean-build] [--authorized-keys /path/to/authorized_keys]" >&2
+            echo "usage: $0 [--cache-dir /path] [--rebuild-image] [--clean-build] [--authorized-keys /path/to/authorized_keys] [--variant nvidia|amd|all]" >&2
            exit 1
            ;;
    esac
 done

+case "$VARIANT" in
+    nvidia|amd|nogpu|all) ;;
+    *) echo "unknown variant: $VARIANT (expected nvidia, amd, nogpu, or all)" >&2; exit 1 ;;
+esac
+
 if [ "$CLEAN_CACHE" = "1" ]; then
    echo "=== cleaning build cache: ${CACHE_DIR} ==="
    rm -rf "${CACHE_DIR:?}/go-build" \
@@ -49,8 +59,10 @@ if [ "$CLEAN_CACHE" = "1" ]; then
           "${CACHE_DIR:?}/tmp" \
           "${CACHE_DIR:?}/bee" \
           "${CACHE_DIR:?}/lb-packages"
-    echo "=== cleaning live-build work dir: ${REPO_ROOT}/dist/live-build-work ==="
-    rm -rf "${REPO_ROOT}/dist/live-build-work"
+    echo "=== cleaning live-build work dirs ==="
+    rm -rf "${REPO_ROOT}/dist/live-build-work-nvidia"
+    rm -rf "${REPO_ROOT}/dist/live-build-work-amd"
+    rm -rf "${REPO_ROOT}/dist/live-build-work-nogpu"
    echo "=== caches cleared, proceeding with build ==="
 fi

@@ -108,34 +120,75 @@ else
    echo "=== using existing builder image ${IMAGE_REF} (${BUILDER_PLATFORM}) ==="
 fi

-set -- \
-    run --rm --privileged \
-    --platform "${BUILDER_PLATFORM}" \
-    -v "${REPO_ROOT}:/work" \
-    -v "${CACHE_DIR}:/cache" \
-    -e BEE_CONTAINER_BUILD=1 \
-    -e GOCACHE=/cache/go-build \
-    -e GOMODCACHE=/cache/go-mod \
-    -e TMPDIR=/cache/tmp \
-    -e BEE_CACHE_DIR=/cache/bee \
-    -w /work \
-    "${IMAGE_REF}" \
-    sh /work/iso/builder/build.sh
-
-if [ -n "$AUTH_KEYS" ]; then
-    set -- run --rm --privileged \
-        --platform "${BUILDER_PLATFORM}" \
-        -v "${REPO_ROOT}:/work" \
-        -v "${CACHE_DIR}:/cache" \
-        -v "${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro" \
+# Build base docker run args (without --authorized-keys)
+build_run_args() {
+    _variant="$1"
+    _auth_arg=""
+    if [ -n "$AUTH_KEYS" ]; then
+        _auth_arg="--authorized-keys /tmp/bee-authkeys/${AUTH_KEYS_BASE}"
+    fi
+    echo "run --rm --privileged \
+        --platform ${BUILDER_PLATFORM} \
+        -v ${REPO_ROOT}:/work \
+        -v ${CACHE_DIR}:/cache \
+        ${AUTH_KEYS:+-v ${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro} \
        -e BEE_CONTAINER_BUILD=1 \
        -e GOCACHE=/cache/go-build \
        -e GOMODCACHE=/cache/go-mod \
        -e TMPDIR=/cache/tmp \
        -e BEE_CACHE_DIR=/cache/bee \
        -w /work \
-        "${IMAGE_REF}" \
-        sh /work/iso/builder/build.sh --authorized-keys "/tmp/bee-authkeys/${AUTH_KEYS_BASE}"
-fi
+        ${IMAGE_REF} \
+        sh /work/iso/builder/build.sh --variant ${_variant} ${_auth_arg}"
+}

-"$CONTAINER_TOOL" "$@"
+run_variant() {
+    _v="$1"
+    echo "=== building variant: ${_v} ==="
+    if [ -n "$AUTH_KEYS" ]; then
+        "$CONTAINER_TOOL" run --rm --privileged \
+            --platform "${BUILDER_PLATFORM}" \
+            -v "${REPO_ROOT}:/work" \
+            -v "${CACHE_DIR}:/cache" \
+            -v "${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro" \
+            -e BEE_CONTAINER_BUILD=1 \
+            -e GOCACHE=/cache/go-build \
+            -e GOMODCACHE=/cache/go-mod \
+            -e TMPDIR=/cache/tmp \
+            -e BEE_CACHE_DIR=/cache/bee \
+            -w /work \
+            "${IMAGE_REF}" \
+            sh /work/iso/builder/build.sh --variant "${_v}" \
+                --authorized-keys "/tmp/bee-authkeys/${AUTH_KEYS_BASE}"
+    else
+        "$CONTAINER_TOOL" run --rm --privileged \
+            --platform "${BUILDER_PLATFORM}" \
+            -v "${REPO_ROOT}:/work" \
+            -v "${CACHE_DIR}:/cache" \
+            -e BEE_CONTAINER_BUILD=1 \
+            -e GOCACHE=/cache/go-build \
+            -e GOMODCACHE=/cache/go-mod \
+            -e TMPDIR=/cache/tmp \
+            -e BEE_CACHE_DIR=/cache/bee \
+            -w /work \
+            "${IMAGE_REF}" \
+            sh /work/iso/builder/build.sh --variant "${_v}"
+    fi
+}
+
+case "$VARIANT" in
+    nvidia)
+        run_variant nvidia
+        ;;
+    amd)
+        run_variant amd
+        ;;
+    nogpu)
+        run_variant nogpu
+        ;;
+    all)
+        run_variant nvidia
+        run_variant amd
+        run_variant nogpu
+        ;;
+esac
--- a/iso/builder/build-john.sh
+++ b/iso/builder/build-john.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+# build-john.sh — build John the Ripper jumbo with OpenCL support for the LiveCD.
+#
+# Downloads a pinned source snapshot from the official openwall/john repository,
+# builds it inside the builder container, and caches the resulting run/ tree.
+
+set -e
+
+JOHN_COMMIT="$1"
+DIST_DIR="$2"
+
+[ -n "$JOHN_COMMIT" ] || { echo "usage: $0 <john-commit> <dist-dir>"; exit 1; }
+[ -n "$DIST_DIR" ] || { echo "usage: $0 <john-commit> <dist-dir>"; exit 1; }
+
+echo "=== John the Ripper jumbo ${JOHN_COMMIT} ==="
+
+CACHE_DIR="${DIST_DIR}/john-${JOHN_COMMIT}"
+CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
+DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/john-downloads"
+SRC_TAR="${DOWNLOAD_CACHE_DIR}/john-${JOHN_COMMIT}.tar.gz"
+SRC_URL="https://github.com/openwall/john/archive/${JOHN_COMMIT}.tar.gz"
+
+if [ -x "${CACHE_DIR}/run/john" ] && [ -f "${CACHE_DIR}/run/john.conf" ]; then
+    echo "=== john cached, skipping build ==="
+    echo "run dir: ${CACHE_DIR}/run"
+    exit 0
+fi
+
+mkdir -p "${DOWNLOAD_CACHE_DIR}"
+if [ ! -f "${SRC_TAR}" ]; then
+    echo "=== downloading john source snapshot ==="
+    wget --show-progress -O "${SRC_TAR}" "${SRC_URL}"
+fi
+
+BUILD_TMP=$(mktemp -d)
+trap 'rm -rf "${BUILD_TMP}"' EXIT INT TERM
+
+cd "${BUILD_TMP}"
+tar xf "${SRC_TAR}"
+SRC_DIR=$(find . -maxdepth 1 -type d -name 'john-*' | head -1)
+[ -n "${SRC_DIR}" ] || { echo "ERROR: john source directory not found"; exit 1; }
+
+cd "${SRC_DIR}/src"
+echo "=== configuring john ==="
+./configure
+echo "=== building john ==="
+make clean >/dev/null 2>&1 || true
+make -j"$(nproc)"
+
+mkdir -p "${CACHE_DIR}"
+cp -a "../run" "${CACHE_DIR}/run"
+chmod +x "${CACHE_DIR}/run/john"
+
+echo "=== john build complete ==="
+echo "run dir: ${CACHE_DIR}/run"
--- a/iso/builder/build-nccl-tests.sh
+++ b/iso/builder/build-nccl-tests.sh
@@ -9,6 +9,7 @@
 #
 # Output layout:
 #   $CACHE_DIR/bin/all_reduce_perf
+#   $CACHE_DIR/lib/libcudart.so* copied from the nvcc toolchain used to build nccl-tests

 set -e

@@ -30,7 +31,7 @@ CACHE_DIR="${DIST_DIR}/nccl-tests-${NCCL_TESTS_VERSION}"
 CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
 DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nccl-tests-downloads"

-if [ -f "${CACHE_DIR}/bin/all_reduce_perf" ]; then
+if [ -f "${CACHE_DIR}/bin/all_reduce_perf" ] && [ "$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libcudart.so*' 2>/dev/null | wc -l)" -gt 0 ]; then
    echo "=== nccl-tests cached, skipping build ==="
    echo "binary: ${CACHE_DIR}/bin/all_reduce_perf"
    exit 0
@@ -52,6 +53,23 @@ echo "nvcc: $NVCC"
 CUDA_HOME="$(dirname "$(dirname "$NVCC")")"
 echo "CUDA_HOME: $CUDA_HOME"

+find_cudart_dir() {
+    for dir in \
+        "${CUDA_HOME}/targets/x86_64-linux/lib" \
+        "${CUDA_HOME}/targets/x86_64-linux/lib/stubs" \
+        "${CUDA_HOME}/lib64" \
+        "${CUDA_HOME}/lib"; do
+        if [ -d "$dir" ] && find "$dir" -maxdepth 1 -name 'libcudart.so*' -type f | grep -q .; then
+            printf '%s\n' "$dir"
+            return 0
+        fi
+    done
+    return 1
+}
+
+CUDART_DIR="$(find_cudart_dir)" || { echo "ERROR: libcudart.so* not found under ${CUDA_HOME}"; exit 1; }
+echo "cudart dir: $CUDART_DIR"
+
 # Download libnccl-dev for nccl.h
 REPO_BASE="https://developer.download.nvidia.com/compute/cuda/repos/debian${DEBIAN_VERSION}/x86_64"
 DEV_PKG="libnccl-dev_${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}_amd64.deb"
@@ -136,6 +154,11 @@ mkdir -p "${CACHE_DIR}/bin"
 cp "./build/all_reduce_perf" "${CACHE_DIR}/bin/all_reduce_perf"
 chmod +x "${CACHE_DIR}/bin/all_reduce_perf"

+mkdir -p "${CACHE_DIR}/lib"
+find "${CUDART_DIR}" -maxdepth 1 -name 'libcudart.so*' -type f -exec cp -a {} "${CACHE_DIR}/lib/" \;
+[ "$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libcudart.so*' -type f | wc -l)" -gt 0 ] || { echo "ERROR: libcudart runtime copy failed"; exit 1; }
+
 echo "=== nccl-tests build complete ==="
 echo "binary: ${CACHE_DIR}/bin/all_reduce_perf"
 ls -lh "${CACHE_DIR}/bin/all_reduce_perf"
+ls -lh "${CACHE_DIR}/lib/"libcudart.so* 2>/dev/null || true
--- a/iso/builder/build-nvidia-module.sh
+++ b/iso/builder/build-nvidia-module.sh
@@ -10,7 +10,7 @@
 # Output layout:
 #   $CACHE_DIR/modules/   — nvidia*.ko files
 #   $CACHE_DIR/bin/       — nvidia-smi, nvidia-debugdump
-#   $CACHE_DIR/lib/       — libnvidia-ml.so*, libcuda.so* (for nvidia-smi)
+#   $CACHE_DIR/lib/       — libnvidia-ml.so*, libcuda.so*, OpenCL-related libs

 set -e

@@ -133,7 +133,14 @@ fi
 # Copy ALL userspace library files.
 # libnvidia-ptxjitcompiler is required by libcuda for PTX JIT compilation
 # (cuModuleLoadDataEx with PTX source) — without it CUDA_ERROR_JIT_COMPILER_NOT_FOUND.
-for lib in libnvidia-ml libcuda libnvidia-ptxjitcompiler; do
+for lib in \
+    libnvidia-ml \
+    libcuda \
+    libnvidia-ptxjitcompiler \
+    libnvidia-opencl \
+    libnvidia-compiler \
+    libnvidia-nvvm \
+    libnvidia-fatbinaryloader; do
    count=0
    for f in $(find "$EXTRACT_DIR" -maxdepth 1 -name "${lib}.so.*" 2>/dev/null); do
        cp "$f" "$CACHE_DIR/lib/" && count=$((count+1))
@@ -150,7 +157,14 @@ ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l)
 [ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; }

 # Create soname symlinks: use [0-9][0-9]* to avoid circular symlink (.so.1 has single digit)
-for lib in libnvidia-ml libcuda libnvidia-ptxjitcompiler; do
+for lib in \
+    libnvidia-ml \
+    libcuda \
+    libnvidia-ptxjitcompiler \
+    libnvidia-opencl \
+    libnvidia-compiler \
+    libnvidia-nvvm \
+    libnvidia-fatbinaryloader; do
    versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9][0-9]* 2>/dev/null | head -1)
    [ -n "$versioned" ] || continue
    base=$(basename "$versioned")
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
@@ -13,19 +13,29 @@ BUILDER_DIR="${REPO_ROOT}/iso/builder"
 OVERLAY_DIR="${REPO_ROOT}/iso/overlay"
 DIST_DIR="${REPO_ROOT}/dist"
 VENDOR_DIR="${REPO_ROOT}/iso/vendor"
-BUILD_WORK_DIR="${DIST_DIR}/live-build-work"
-OVERLAY_STAGE_DIR="${DIST_DIR}/overlay-stage"
 CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
 AUTH_KEYS=""
+BEE_GPU_VENDOR="nvidia"

 # parse args
 while [ $# -gt 0 ]; do
    case "$1" in
        --authorized-keys) AUTH_KEYS="$2"; shift 2 ;;
+        --variant) BEE_GPU_VENDOR="$2"; shift 2 ;;
        *) echo "unknown arg: $1"; exit 1 ;;
    esac
 done

+case "$BEE_GPU_VENDOR" in
+    nvidia|amd|nogpu) ;;
+    *) echo "unknown variant: $BEE_GPU_VENDOR (expected nvidia, amd, or nogpu)" >&2; exit 1 ;;
+esac
+
+BUILD_WORK_DIR="${DIST_DIR}/live-build-work-${BEE_GPU_VENDOR}"
+OVERLAY_STAGE_DIR="${DIST_DIR}/overlay-stage-${BEE_GPU_VENDOR}"
+
+export BEE_GPU_VENDOR
+
 . "${BUILDER_DIR}/VERSIONS"
 export PATH="$PATH:/usr/local/go/bin"

@@ -101,8 +111,231 @@ resolve_iso_version() {
    resolve_audit_version
 }

+iso_list_files() {
+    iso_path="$1"
+
+    if command -v bsdtar >/dev/null 2>&1; then
+        bsdtar -tf "$iso_path"
+        return $?
+    fi
+
+    if command -v xorriso >/dev/null 2>&1; then
+        xorriso -indev "$iso_path" -find / -type f -print 2>/dev/null | sed 's#^/##'
+        return $?
+    fi
+
+    return 127
+}
+
+iso_extract_file() {
+    iso_path="$1"
+    iso_member="$2"
+
+    if command -v bsdtar >/dev/null 2>&1; then
+        bsdtar -xOf "$iso_path" "$iso_member"
+        return $?
+    fi
+
+    if command -v xorriso >/dev/null 2>&1; then
+        xorriso -osirrox on -indev "$iso_path" -cat "/$iso_member" 2>/dev/null
+        return $?
+    fi
+
+    return 127
+}
+
+require_iso_reader() {
+    command -v bsdtar >/dev/null 2>&1 && return 0
+    command -v xorriso >/dev/null 2>&1 && return 0
+    memtest_fail "ISO reader is required for validation/debug (expected bsdtar or xorriso)" "${1:-}"
+}
+
+dump_memtest_debug() {
+    phase="$1"
+    lb_dir="${2:-}"
+    iso_path="${3:-}"
+    phase_slug="$(printf '%s' "${phase}" | tr ' /' '__')"
+    memtest_log="${LOG_DIR:-}/memtest-${phase_slug}.log"
+
+    (
+        echo "=== memtest debug: ${phase} ==="
+
+        echo "-- auto/config --"
+        if [ -f "${BUILDER_DIR}/auto/config" ]; then
+            grep -n -- '--memtest' "${BUILDER_DIR}/auto/config" || echo "  (no --memtest line found)"
+        else
+            echo "  (missing ${BUILDER_DIR}/auto/config)"
+        fi
+
+        echo "-- source bootloader templates --"
+        for cfg in \
+            "${BUILDER_DIR}/config/bootloaders/grub-pc/grub.cfg" \
+            "${BUILDER_DIR}/config/bootloaders/isolinux/live.cfg.in"; do
+            if [ -f "$cfg" ]; then
+                echo "  file: $cfg"
+                grep -n 'Memory Test\|memtest' "$cfg" || echo "    (no memtest lines)"
+            fi
+        done
+
+        if [ -n "$lb_dir" ] && [ -d "$lb_dir" ]; then
+            echo "-- live-build workdir package lists --"
+            for pkg in \
+                "$lb_dir/config/package-lists/bee.list.chroot" \
+                "$lb_dir/config/package-lists/bee-gpu.list.chroot" \
+                "$lb_dir/config/package-lists/bee-nvidia.list.chroot"; do
+                if [ -f "$pkg" ]; then
+                    echo "  file: $pkg"
+                    grep -n 'memtest' "$pkg" || echo "    (no memtest lines)"
+                fi
+            done
+
+            echo "-- live-build chroot/boot --"
+            if [ -d "$lb_dir/chroot/boot" ]; then
+                find "$lb_dir/chroot/boot" -maxdepth 1 -name 'memtest*' -print | sed 's/^/  /' || true
+            else
+                echo "  (missing $lb_dir/chroot/boot)"
+            fi
+
+            echo "-- live-build binary/boot --"
+            if [ -d "$lb_dir/binary/boot" ]; then
+                find "$lb_dir/binary/boot" -maxdepth 1 -name 'memtest*' -print | sed 's/^/  /' || true
+            else
+                echo "  (missing $lb_dir/binary/boot)"
+            fi
+
+            echo "-- live-build package cache --"
+            if [ -d "$lb_dir/cache/packages.chroot" ]; then
+                find "$lb_dir/cache/packages.chroot" -maxdepth 1 -name 'memtest86+*.deb' -print | sed 's/^/  /' || true
+            else
+                echo "  (missing $lb_dir/cache/packages.chroot)"
+            fi
+        fi
+
+        if [ -n "$iso_path" ] && [ -f "$iso_path" ]; then
+            echo "-- ISO memtest files --"
+            iso_list_files "$iso_path" | grep 'memtest' | sed 's/^/  /' || echo "  (no memtest files in ISO)"
+
+            echo "-- ISO GRUB memtest lines --"
+            iso_extract_file "$iso_path" boot/grub/grub.cfg 2>/dev/null | grep -n 'Memory Test\|memtest' || echo "  (no memtest lines in boot/grub/grub.cfg)"
+
+            echo "-- ISO isolinux memtest lines --"
+            iso_extract_file "$iso_path" isolinux/live.cfg 2>/dev/null | grep -n 'Memory Test\|memtest' || echo "  (no memtest lines in isolinux/live.cfg)"
+        fi
+
+        echo "=== end memtest debug: ${phase} ==="
+    ) | {
+        if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR}" ]; then
+            tee "${memtest_log}"
+        else
+            cat
+        fi
+    }
+}
+
+memtest_fail() {
+    msg="$1"
+    iso_path="${2:-}"
+    echo "ERROR: ${msg}" >&2
+    dump_memtest_debug "failure" "${LB_DIR:-}" "$iso_path" >&2
+    exit 1
+}
+
+validate_iso_memtest() {
+    iso_path="$1"
+    echo "=== validating memtest in ISO ==="
+
+    [ -f "$iso_path" ] || memtest_fail "ISO not found for validation: $iso_path" "$iso_path"
+    require_iso_reader "$iso_path"
+
+    iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.bin$' || {
+        memtest_fail "memtest BIOS binary missing in ISO: boot/memtest86+x64.bin" "$iso_path"
+    }
+    iso_list_files "$iso_path" | grep -q '^boot/memtest86+x64\.efi$' || {
+        memtest_fail "memtest EFI binary missing in ISO: boot/memtest86+x64.efi" "$iso_path"
+    }
+
+    grub_cfg="$(mktemp)"
+    isolinux_cfg="$(mktemp)"
+
+    iso_extract_file "$iso_path" boot/grub/grub.cfg > "$grub_cfg" || memtest_fail "failed to extract boot/grub/grub.cfg from ISO" "$iso_path"
+    iso_extract_file "$iso_path" isolinux/live.cfg > "$isolinux_cfg" || memtest_fail "failed to extract isolinux/live.cfg from ISO" "$iso_path"
+
+    grep -q 'Memory Test (memtest86+)' "$grub_cfg" || {
+        memtest_fail "GRUB menu entry for memtest is missing" "$iso_path"
+    }
+    grep -q '/boot/memtest86+x64\.efi' "$grub_cfg" || {
+        memtest_fail "GRUB memtest EFI path is missing" "$iso_path"
+    }
+    grep -q '/boot/memtest86+x64\.bin' "$grub_cfg" || {
+        memtest_fail "GRUB memtest BIOS path is missing" "$iso_path"
+    }
+    grep -q 'Memory Test (memtest86+)' "$isolinux_cfg" || {
+        memtest_fail "isolinux menu entry for memtest is missing" "$iso_path"
+    }
+    grep -q '/boot/memtest86+x64\.bin' "$isolinux_cfg" || {
+        memtest_fail "isolinux memtest path is missing" "$iso_path"
+    }
+
+    rm -f "$grub_cfg" "$isolinux_cfg"
+    echo "=== memtest validation OK ==="
+}
+
 AUDIT_VERSION_EFFECTIVE="$(resolve_audit_version)"
 ISO_VERSION_EFFECTIVE="$(resolve_iso_version)"
+ISO_BASENAME="easy-bee-${BEE_GPU_VENDOR}-v${ISO_VERSION_EFFECTIVE}-amd64"
+LOG_DIR="${DIST_DIR}/${ISO_BASENAME}.logs"
+LOG_ARCHIVE="${DIST_DIR}/${ISO_BASENAME}.logs.tar.gz"
+ISO_OUT="${DIST_DIR}/${ISO_BASENAME}.iso"
+LOG_OUT="${LOG_DIR}/build.log"
+
+cleanup_build_log() {
+    status="${1:-$?}"
+    trap - EXIT INT TERM HUP
+
+    if [ "${BUILD_LOG_ACTIVE:-0}" = "1" ]; then
+        BUILD_LOG_ACTIVE=0
+        exec 1>&3 2>&4
+        exec 3>&- 4>&-
+        if [ -n "${BUILD_TEE_PID:-}" ]; then
+            wait "${BUILD_TEE_PID}" 2>/dev/null || true
+        fi
+        rm -f "${BUILD_LOG_PIPE}"
+    fi
+
+    if [ -n "${LOG_DIR:-}" ] && [ -d "${LOG_DIR}" ] && command -v tar >/dev/null 2>&1; then
+        rm -f "${LOG_ARCHIVE}"
+        tar -czf "${LOG_ARCHIVE}" -C "${DIST_DIR}" "$(basename "${LOG_DIR}")" 2>/dev/null || true
+    fi
+
+    exit "${status}"
+}
+
+start_build_log() {
+    command -v tee >/dev/null 2>&1 || {
+        echo "ERROR: tee is required for build logging" >&2
+        exit 1
+    }
+
+    rm -rf "${LOG_DIR}"
+    rm -f "${LOG_ARCHIVE}"
+    mkdir -p "${LOG_DIR}"
+    BUILD_LOG_PIPE="$(mktemp -u "${TMPDIR:-/tmp}/bee-build-log.XXXXXX")"
+    mkfifo "${BUILD_LOG_PIPE}"
+
+    exec 3>&1 4>&2
+    tee "${LOG_OUT}" < "${BUILD_LOG_PIPE}" &
+    BUILD_TEE_PID=$!
+    exec > "${BUILD_LOG_PIPE}" 2>&1
+    BUILD_LOG_ACTIVE=1
+
+    trap 'cleanup_build_log "$?"' EXIT INT TERM HUP
+
+    echo "=== build log dir: ${LOG_DIR} ==="
+    echo "=== build log: ${LOG_OUT} ==="
+    echo "=== build log archive: ${LOG_ARCHIVE} ==="
+}
+
+start_build_log

 # Auto-detect kernel ABI: refresh apt index, then query current linux-image-amd64 dependency.
 # If headers for the detected ABI are not yet installed (kernel updated since image build),
@@ -132,7 +365,7 @@ if [ ! -d "/usr/src/linux-headers-${KVER}" ]; then
    apt-get install -y "linux-headers-${KVER}"
 fi

-echo "=== bee ISO build ==="
+echo "=== bee ISO build (variant: ${BEE_GPU_VENDOR}) ==="
 echo "Debian: ${DEBIAN_VERSION}, Kernel ABI: ${DEBIAN_KERNEL_ABI}, Go: ${GO_VERSION}"
 echo "Audit version: ${AUDIT_VERSION_EFFECTIVE}, ISO version: ${ISO_VERSION_EFFECTIVE}"
 echo ""
@@ -141,8 +374,8 @@ echo "=== syncing git submodules ==="
 git -C "${REPO_ROOT}" submodule update --init --recursive

 # --- compile bee binary (static, Linux amd64) ---
+# Shared between variants — built once, reused on second pass.
 BEE_BIN="${DIST_DIR}/bee-linux-amd64"
-GPU_STRESS_BIN="${DIST_DIR}/bee-gpu-stress-linux-amd64"
 NEED_BUILD=1
 if [ -f "$BEE_BIN" ]; then
    NEWEST_SRC=$(find "${REPO_ROOT}/audit" -name '*.go' -newer "$BEE_BIN" | head -1)
@@ -172,37 +405,41 @@ else
    echo "=== bee binary up to date, skipping build ==="
 fi

-echo ""
-echo "=== downloading cuBLAS/cuBLASLt/cudart ${NCCL_CUDA_VERSION} userspace ==="
-sh "${BUILDER_DIR}/build-cublas.sh" \
-    "${CUBLAS_VERSION}" \
-    "${CUDA_USERSPACE_VERSION}" \
-    "${NCCL_CUDA_VERSION}" \
-    "${DIST_DIR}"
+# --- NVIDIA-only build steps ---
+GPU_BURN_WORKER_BIN="${DIST_DIR}/bee-gpu-burn-worker-linux-amd64"
+if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
+    echo ""
+    echo "=== downloading cuBLAS/cuBLASLt/cudart ${NCCL_CUDA_VERSION} userspace ==="
+    sh "${BUILDER_DIR}/build-cublas.sh" \
+        "${CUBLAS_VERSION}" \
+        "${CUDA_USERSPACE_VERSION}" \
+        "${NCCL_CUDA_VERSION}" \
+        "${DIST_DIR}"

-CUBLAS_CACHE="${DIST_DIR}/cublas-${CUBLAS_VERSION}+cuda${NCCL_CUDA_VERSION}"
+    CUBLAS_CACHE="${DIST_DIR}/cublas-${CUBLAS_VERSION}+cuda${NCCL_CUDA_VERSION}"

-GPU_STRESS_NEED_BUILD=1
-if [ -f "$GPU_STRESS_BIN" ] && [ "${BUILDER_DIR}/bee-gpu-stress.c" -ot "$GPU_STRESS_BIN" ]; then
-    GPU_STRESS_NEED_BUILD=0
+    GPU_STRESS_NEED_BUILD=1
+    if [ -f "$GPU_BURN_WORKER_BIN" ] && [ "${BUILDER_DIR}/bee-gpu-stress.c" -ot "$GPU_BURN_WORKER_BIN" ]; then
+        GPU_STRESS_NEED_BUILD=0
+    fi
+
+    if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then
+        echo "=== building bee-gpu-burn worker ==="
+        gcc -O2 -s -Wall -Wextra \
+            -I"${CUBLAS_CACHE}/include" \
+            -o "$GPU_BURN_WORKER_BIN" \
+            "${BUILDER_DIR}/bee-gpu-stress.c" \
+            -ldl -lm
+        echo "binary: $GPU_BURN_WORKER_BIN"
+    else
+        echo "=== bee-gpu-burn worker up to date, skipping build ==="
+    fi
 fi

-if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then
-    echo "=== building bee-gpu-stress ==="
-    gcc -O2 -s -Wall -Wextra \
-        -I"${CUBLAS_CACHE}/include" \
-        -o "$GPU_STRESS_BIN" \
-        "${BUILDER_DIR}/bee-gpu-stress.c" \
-        -ldl -lm
-    echo "binary: $GPU_STRESS_BIN"
-else
-    echo "=== bee-gpu-stress up to date, skipping build ==="
-fi
-
-echo "=== preparing staged overlay ==="
-# Sync builder config into work dir, preserving lb cache (chroot + packages).
-# We do NOT rm -rf BUILD_WORK_DIR so lb can reuse its chroot on repeat builds.
+echo "=== preparing staged overlay (${BEE_GPU_VENDOR}) ==="
 mkdir -p "${BUILD_WORK_DIR}" "${OVERLAY_STAGE_DIR}"
+
+# Sync builder config into variant work dir, preserving lb cache.
 rsync -a --delete \
    --exclude='cache/' \
    --exclude='chroot/' \
@@ -212,7 +449,10 @@ rsync -a --delete \
    --exclude='*.contents' \
    --exclude='*.files' \
    "${BUILDER_DIR}/" "${BUILD_WORK_DIR}/"
-# Also persist package cache to CACHE_ROOT so it survives a manual wipe of BUILD_WORK_DIR.
+
+# Share deb package cache across variants.
+# Restore: populate work dir cache from shared cache before build.
+# Persist: sync back after build (done after lb build below).
 LB_PKG_CACHE="${CACHE_ROOT}/lb-packages"
 mkdir -p "${LB_PKG_CACHE}"
 if [ -d "${BUILD_WORK_DIR}/cache/packages.chroot" ]; then
@@ -221,15 +461,26 @@ elif [ -d "${LB_PKG_CACHE}" ] && [ "$(ls -A "${LB_PKG_CACHE}" 2>/dev/null)" ]; t
    mkdir -p "${BUILD_WORK_DIR}/cache/packages.chroot"
    rsync -a "${LB_PKG_CACHE}/" "${BUILD_WORK_DIR}/cache/packages.chroot/"
 fi
+
 rsync -a "${OVERLAY_DIR}/" "${OVERLAY_STAGE_DIR}/"
 rm -f \
    "${OVERLAY_STAGE_DIR}/etc/bee-ssh-password-fallback" \
    "${OVERLAY_STAGE_DIR}/etc/bee-release" \
    "${OVERLAY_STAGE_DIR}/root/.ssh/authorized_keys" \
    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee" \
-    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress" \
+    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nccl-gpu-stress" \
+    "${OVERLAY_STAGE_DIR}/usr/local/bin/john" \
+    "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/bee-gpu-burn-worker" \
    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest" \
    "${OVERLAY_STAGE_DIR}/usr/local/bin/all_reduce_perf"
+rm -rf \
+    "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john"
+
+# Remove NVIDIA-specific overlay files for non-nvidia variants
+if [ "$BEE_GPU_VENDOR" != "nvidia" ]; then
+    rm -f "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nvidia-load"
+    rm -f "${OVERLAY_STAGE_DIR}/etc/systemd/system/bee-nvidia.service"
+fi

 # --- inject authorized_keys for SSH access ---
 AUTHORIZED_KEYS_FILE="${OVERLAY_STAGE_DIR}/root/.ssh/authorized_keys"
@@ -268,8 +519,15 @@ fi
 mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/bin"
 cp "${DIST_DIR}/bee-linux-amd64" "${OVERLAY_STAGE_DIR}/usr/local/bin/bee"
 chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee"
-cp "${GPU_STRESS_BIN}" "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress"
-chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress"
+
+if [ "$BEE_GPU_VENDOR" = "nvidia" ] && [ -f "$GPU_BURN_WORKER_BIN" ]; then
+    mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/lib/bee" "${OVERLAY_STAGE_DIR}/usr/local/bin"
+    cp "${GPU_BURN_WORKER_BIN}" "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/bee-gpu-burn-worker"
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/bee-gpu-burn-worker"
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-burn" 2>/dev/null || true
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-john-gpu-stress" 2>/dev/null || true
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nccl-gpu-stress" 2>/dev/null || true
+fi

 # --- inject smoketest into overlay so it runs directly on the live CD ---
 cp "${BUILDER_DIR}/smoketest.sh" "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest"
@@ -286,100 +544,170 @@ for tool in storcli64 sas2ircu sas3ircu arcconf ssacli; do
    fi
 done

-# --- build NVIDIA kernel modules ---
-echo ""
-echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
-sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${DEBIAN_KERNEL_ABI}"
+# --- NVIDIA kernel modules and userspace libs ---
+if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
+    echo ""
+    echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
+    sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${DEBIAN_KERNEL_ABI}"

-KVER="${DEBIAN_KERNEL_ABI}-amd64"
-NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
+    KVER="${DEBIAN_KERNEL_ABI}-amd64"
+    NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"

-# Inject .ko files into overlay at /usr/local/lib/nvidia/
-OVERLAY_KMOD_DIR="${OVERLAY_DIR}/usr/local/lib/nvidia"
-OVERLAY_KMOD_DIR="${OVERLAY_STAGE_DIR}/usr/local/lib/nvidia"
-mkdir -p "${OVERLAY_KMOD_DIR}"
-cp "${NVIDIA_CACHE}/modules/"*.ko "${OVERLAY_KMOD_DIR}/"
+    # Inject .ko files into overlay at /usr/local/lib/nvidia/
+    OVERLAY_KMOD_DIR="${OVERLAY_STAGE_DIR}/usr/local/lib/nvidia"
+    mkdir -p "${OVERLAY_KMOD_DIR}"
+    cp "${NVIDIA_CACHE}/modules/"*.ko "${OVERLAY_KMOD_DIR}/"

-# Inject nvidia-smi and libnvidia-ml
-mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/bin" "${OVERLAY_STAGE_DIR}/usr/lib"
-cp "${NVIDIA_CACHE}/bin/nvidia-smi" "${OVERLAY_STAGE_DIR}/usr/local/bin/"
-chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/nvidia-smi"
-cp "${NVIDIA_CACHE}/bin/nvidia-bug-report.sh" "${OVERLAY_STAGE_DIR}/usr/local/bin/" 2>/dev/null || true
-chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/nvidia-bug-report.sh" 2>/dev/null || true
-cp "${NVIDIA_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/" 2>/dev/null || true
+    # Inject nvidia-smi and libnvidia-ml
+    mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/bin" "${OVERLAY_STAGE_DIR}/usr/lib"
+    cp "${NVIDIA_CACHE}/bin/nvidia-smi" "${OVERLAY_STAGE_DIR}/usr/local/bin/"
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/nvidia-smi"
+    cp "${NVIDIA_CACHE}/bin/nvidia-bug-report.sh" "${OVERLAY_STAGE_DIR}/usr/local/bin/" 2>/dev/null || true
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/nvidia-bug-report.sh" 2>/dev/null || true
+    cp "${NVIDIA_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/" 2>/dev/null || true
+    mkdir -p "${OVERLAY_STAGE_DIR}/etc/OpenCL/vendors"
+    printf 'libnvidia-opencl.so.1\n' > "${OVERLAY_STAGE_DIR}/etc/OpenCL/vendors/nvidia.icd"

-# Inject GSP firmware into /lib/firmware/nvidia/<version>/
-if [ -d "${NVIDIA_CACHE}/firmware" ] && [ "$(ls -A "${NVIDIA_CACHE}/firmware" 2>/dev/null)" ]; then
-    mkdir -p "${OVERLAY_STAGE_DIR}/lib/firmware/nvidia/${NVIDIA_DRIVER_VERSION}"
-    cp "${NVIDIA_CACHE}/firmware/"* "${OVERLAY_STAGE_DIR}/lib/firmware/nvidia/${NVIDIA_DRIVER_VERSION}/"
-    echo "=== firmware: $(ls "${OVERLAY_STAGE_DIR}/lib/firmware/nvidia/${NVIDIA_DRIVER_VERSION}/" | wc -l) files injected ==="
+    # Inject GSP firmware into /lib/firmware/nvidia/<version>/
+    if [ -d "${NVIDIA_CACHE}/firmware" ] && [ "$(ls -A "${NVIDIA_CACHE}/firmware" 2>/dev/null)" ]; then
+        mkdir -p "${OVERLAY_STAGE_DIR}/lib/firmware/nvidia/${NVIDIA_DRIVER_VERSION}"
+        cp "${NVIDIA_CACHE}/firmware/"* "${OVERLAY_STAGE_DIR}/lib/firmware/nvidia/${NVIDIA_DRIVER_VERSION}/"
+        echo "=== firmware: $(ls "${OVERLAY_STAGE_DIR}/lib/firmware/nvidia/${NVIDIA_DRIVER_VERSION}/" | wc -l) files injected ==="
+    fi
+
+    # --- build / download NCCL ---
+    echo ""
+    echo "=== downloading NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ==="
+    sh "${BUILDER_DIR}/build-nccl.sh" "${NCCL_VERSION}" "${NCCL_CUDA_VERSION}" "${DIST_DIR}" "${NCCL_SHA256:-}"
+
+    NCCL_CACHE="${DIST_DIR}/nccl-${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}"
+
+    # Inject libnccl.so.* into overlay alongside other NVIDIA userspace libs
+    cp "${NCCL_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/"
+    echo "=== NCCL: $(ls "${NCCL_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="
+
+    # Inject cuBLAS/cuBLASLt/cudart runtime libs used by the bee-gpu-burn worker tensor-core GEMM path
+    cp "${CUBLAS_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/"
+    echo "=== cuBLAS: $(ls "${CUBLAS_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="
+
+    # --- build nccl-tests ---
+    echo ""
+    echo "=== building nccl-tests ${NCCL_TESTS_VERSION} ==="
+    sh "${BUILDER_DIR}/build-nccl-tests.sh" \
+        "${NCCL_TESTS_VERSION}" \
+        "${NCCL_VERSION}" \
+        "${NCCL_CUDA_VERSION}" \
+        "${DIST_DIR}" \
+        "${NVCC_VERSION}" \
+        "${DEBIAN_VERSION}"
+
+    NCCL_TESTS_CACHE="${DIST_DIR}/nccl-tests-${NCCL_TESTS_VERSION}"
+    cp "${NCCL_TESTS_CACHE}/bin/all_reduce_perf" "${OVERLAY_STAGE_DIR}/usr/local/bin/all_reduce_perf"
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/all_reduce_perf"
+    cp "${NCCL_TESTS_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/" 2>/dev/null || true
+    echo "=== all_reduce_perf injected ==="
+
+    echo ""
+    echo "=== building john jumbo ${JOHN_JUMBO_COMMIT} ==="
+    sh "${BUILDER_DIR}/build-john.sh" "${JOHN_JUMBO_COMMIT}" "${DIST_DIR}"
+    JOHN_CACHE="${DIST_DIR}/john-${JOHN_JUMBO_COMMIT}"
+    mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john"
+    rsync -a --delete "${JOHN_CACHE}/run/" "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john/run/"
+    ln -sfn ../lib/bee/john/run/john "${OVERLAY_STAGE_DIR}/usr/local/bin/john"
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john/run/john"
+    echo "=== john injected ==="
 fi

-# --- build / download NCCL ---
-echo ""
-echo "=== downloading NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ==="
-sh "${BUILDER_DIR}/build-nccl.sh" "${NCCL_VERSION}" "${NCCL_CUDA_VERSION}" "${DIST_DIR}" "${NCCL_SHA256:-}"
-
-NCCL_CACHE="${DIST_DIR}/nccl-${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}"
-
-# Inject libnccl.so.* into overlay alongside other NVIDIA userspace libs
-cp "${NCCL_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/"
-echo "=== NCCL: $(ls "${NCCL_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="
-
-# Inject cuBLAS/cuBLASLt/cudart runtime libs used by bee-gpu-stress tensor-core GEMM path
-cp "${CUBLAS_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/"
-echo "=== cuBLAS: $(ls "${CUBLAS_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="
-
-# --- build nccl-tests ---
-echo ""
-echo "=== building nccl-tests ${NCCL_TESTS_VERSION} ==="
-sh "${BUILDER_DIR}/build-nccl-tests.sh" \
-    "${NCCL_TESTS_VERSION}" \
-    "${NCCL_VERSION}" \
-    "${NCCL_CUDA_VERSION}" \
-    "${DIST_DIR}" \
-    "${NVCC_VERSION}" \
-    "${DEBIAN_VERSION}"
-
-NCCL_TESTS_CACHE="${DIST_DIR}/nccl-tests-${NCCL_TESTS_VERSION}"
-cp "${NCCL_TESTS_CACHE}/bin/all_reduce_perf" "${OVERLAY_STAGE_DIR}/usr/local/bin/all_reduce_perf"
-chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/all_reduce_perf"
-echo "=== all_reduce_perf injected ==="
-
 # --- embed build metadata ---
 mkdir -p "${OVERLAY_STAGE_DIR}/etc"
 BUILD_DATE="$(date +%Y-%m-%d)"
 GIT_COMMIT="$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo unknown)"
-cat > "${OVERLAY_STAGE_DIR}/etc/bee-release" <<EOF
-BEE_ISO_VERSION=${ISO_VERSION_EFFECTIVE}
-BEE_AUDIT_VERSION=${AUDIT_VERSION_EFFECTIVE}
-BUILD_DATE=${BUILD_DATE}
-GIT_COMMIT=${GIT_COMMIT}
-DEBIAN_VERSION=${DEBIAN_VERSION}
-DEBIAN_KERNEL_ABI=${DEBIAN_KERNEL_ABI}
-NVIDIA_DRIVER_VERSION=${NVIDIA_DRIVER_VERSION}
+
+if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
+    GPU_VERSION_LINE="NVIDIA_DRIVER_VERSION=${NVIDIA_DRIVER_VERSION}
 NCCL_VERSION=${NCCL_VERSION}
 NCCL_CUDA_VERSION=${NCCL_CUDA_VERSION}
 CUBLAS_VERSION=${CUBLAS_VERSION}
 CUDA_USERSPACE_VERSION=${CUDA_USERSPACE_VERSION}
 NCCL_TESTS_VERSION=${NCCL_TESTS_VERSION}
+JOHN_JUMBO_COMMIT=${JOHN_JUMBO_COMMIT}"
+    GPU_BUILD_INFO="nvidia:${NVIDIA_DRIVER_VERSION}"
+elif [ "$BEE_GPU_VENDOR" = "amd" ]; then
+    GPU_VERSION_LINE="ROCM_VERSION=${ROCM_VERSION}"
+    GPU_BUILD_INFO="rocm:${ROCM_VERSION}"
+else
+    GPU_VERSION_LINE=""
+    GPU_BUILD_INFO="nogpu"
+fi
+
+cat > "${OVERLAY_STAGE_DIR}/etc/bee-release" <<EOF
+BEE_ISO_VERSION=${ISO_VERSION_EFFECTIVE}
+BEE_AUDIT_VERSION=${AUDIT_VERSION_EFFECTIVE}
+BEE_GPU_VENDOR=${BEE_GPU_VENDOR}
+BUILD_DATE=${BUILD_DATE}
+GIT_COMMIT=${GIT_COMMIT}
+DEBIAN_VERSION=${DEBIAN_VERSION}
+DEBIAN_KERNEL_ABI=${DEBIAN_KERNEL_ABI}
+${GPU_VERSION_LINE}
 EOF

+# Write GPU vendor marker for hooks
+echo "${BEE_GPU_VENDOR}" > "${OVERLAY_STAGE_DIR}/etc/bee-gpu-vendor"
+
 # Patch motd with build info
-BEE_BUILD_INFO="${BUILD_DATE} git:${GIT_COMMIT} debian:${DEBIAN_VERSION} nvidia:${NVIDIA_DRIVER_VERSION}"
+BEE_BUILD_INFO="${BUILD_DATE} git:${GIT_COMMIT} debian:${DEBIAN_VERSION} ${GPU_BUILD_INFO}"
 if [ -f "${OVERLAY_STAGE_DIR}/etc/motd" ]; then
    sed "s/%%BUILD_INFO%%/${BEE_BUILD_INFO}/" "${OVERLAY_STAGE_DIR}/etc/motd" \
        > "${OVERLAY_STAGE_DIR}/etc/motd.patched"
    mv "${OVERLAY_STAGE_DIR}/etc/motd.patched" "${OVERLAY_STAGE_DIR}/etc/motd"
 fi

-# --- substitute version placeholders in package list ---
-sed -i \
-    -e "s/%%DCGM_VERSION%%/${DCGM_VERSION}/g" \
-    -e "s/%%ROCM_VERSION%%/${ROCM_VERSION}/g" \
-    -e "s/%%ROCM_SMI_VERSION%%/${ROCM_SMI_VERSION}/g" \
-    "${BUILD_WORK_DIR}/config/package-lists/bee.list.chroot" \
-    "${BUILD_WORK_DIR}/config/archives/rocm.list.chroot"
+# --- copy variant-specific package list, remove all other variant lists ---
+# live-build picks up ALL .list.chroot files — delete other variants to avoid conflicts.
+cp "${BUILD_WORK_DIR}/config/package-lists/bee-${BEE_GPU_VENDOR}.list.chroot" \
+   "${BUILD_WORK_DIR}/config/package-lists/bee-gpu.list.chroot"
+rm -f "${BUILD_WORK_DIR}/config/package-lists/bee-nvidia.list.chroot" \
+      "${BUILD_WORK_DIR}/config/package-lists/bee-amd.list.chroot" \
+      "${BUILD_WORK_DIR}/config/package-lists/bee-nogpu.list.chroot"
+
+# --- remove archives for the other vendor(s) ---
+if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
+    rm -f "${BUILD_WORK_DIR}/config/archives/rocm.list.chroot" \
+          "${BUILD_WORK_DIR}/config/archives/rocm.key.chroot"
+elif [ "$BEE_GPU_VENDOR" = "amd" ]; then
+    rm -f "${BUILD_WORK_DIR}/config/archives/nvidia-cuda.list.chroot" \
+          "${BUILD_WORK_DIR}/config/archives/nvidia-cuda.key.chroot"
+else
+    # nogpu: remove both
+    rm -f "${BUILD_WORK_DIR}/config/archives/rocm.list.chroot" \
+          "${BUILD_WORK_DIR}/config/archives/rocm.key.chroot" \
+          "${BUILD_WORK_DIR}/config/archives/nvidia-cuda.list.chroot" \
+          "${BUILD_WORK_DIR}/config/archives/nvidia-cuda.key.chroot"
+fi
+
+# --- substitute version placeholders in package list and archive ---
+if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
+    sed -i \
+        -e "s/%%DCGM_VERSION%%/${DCGM_VERSION}/g" \
+        "${BUILD_WORK_DIR}/config/package-lists/bee-gpu.list.chroot"
+elif [ "$BEE_GPU_VENDOR" = "amd" ]; then
+    sed -i \
+        -e "s/%%ROCM_VERSION%%/${ROCM_VERSION}/g" \
+        -e "s/%%ROCM_SMI_VERSION%%/${ROCM_SMI_VERSION}/g" \
+        -e "s/%%ROCM_BANDWIDTH_TEST_VERSION%%/${ROCM_BANDWIDTH_TEST_VERSION}/g" \
+        -e "s/%%ROCM_VALIDATION_SUITE_VERSION%%/${ROCM_VALIDATION_SUITE_VERSION}/g" \
+        -e "s/%%ROCBLAS_VERSION%%/${ROCBLAS_VERSION}/g" \
+        -e "s/%%ROCRAND_VERSION%%/${ROCRAND_VERSION}/g" \
+        -e "s/%%HIP_RUNTIME_AMD_VERSION%%/${HIP_RUNTIME_AMD_VERSION}/g" \
+        -e "s/%%HIPBLASLT_VERSION%%/${HIPBLASLT_VERSION}/g" \
+        -e "s/%%COMGR_VERSION%%/${COMGR_VERSION}/g" \
+        "${BUILD_WORK_DIR}/config/package-lists/bee-gpu.list.chroot"
+    if [ -f "${BUILD_WORK_DIR}/config/archives/rocm.list.chroot" ]; then
+        sed -i \
+            -e "s/%%ROCM_VERSION%%/${ROCM_VERSION}/g" \
+            "${BUILD_WORK_DIR}/config/archives/rocm.list.chroot"
+    fi
+fi

 # --- sync overlay into live-build includes.chroot ---
 LB_DIR="${BUILD_WORK_DIR}"
@@ -395,20 +723,33 @@ fi

 # --- build ISO using live-build ---
 echo ""
-echo "=== building ISO (live-build) ==="
+echo "=== building ISO (live-build, variant: ${BEE_GPU_VENDOR}) ==="
+
+# Export for auto/config
+BEE_GPU_VENDOR_UPPER="$(echo "${BEE_GPU_VENDOR}" | tr 'a-z' 'A-Z')"
+export BEE_GPU_VENDOR_UPPER

 cd "${LB_DIR}"
 lb clean 2>&1 | tail -3
 lb config 2>&1 | tail -5
+dump_memtest_debug "pre-build" "${LB_DIR}"
 lb build 2>&1

+# --- persist deb package cache back to shared location ---
+# This allows the second variant to reuse all downloaded packages.
+if [ -d "${BUILD_WORK_DIR}/cache/packages.chroot" ]; then
+    rsync -a "${BUILD_WORK_DIR}/cache/packages.chroot/" "${LB_PKG_CACHE}/"
+    echo "=== package cache synced to ${LB_PKG_CACHE} ==="
+fi
+
 # live-build outputs live-image-amd64.hybrid.iso in LB_DIR
 ISO_RAW="${LB_DIR}/live-image-amd64.hybrid.iso"
-ISO_OUT="${DIST_DIR}/bee-debian${DEBIAN_VERSION}-v${ISO_VERSION_EFFECTIVE}-amd64.iso"
 if [ -f "$ISO_RAW" ]; then
+    dump_memtest_debug "post-build" "${LB_DIR}" "$ISO_RAW"
+    validate_iso_memtest "$ISO_RAW"
    cp "$ISO_RAW" "$ISO_OUT"
    echo ""
-    echo "=== done ==="
+    echo "=== done (${BEE_GPU_VENDOR}) ==="
    echo "ISO: $ISO_OUT"
    if command -v stat >/dev/null 2>&1; then
        ISO_SIZE_BYTES="$(stat -c '%s' "$ISO_OUT" 2>/dev/null || stat -f '%z' "$ISO_OUT")"
--- a/iso/builder/config/bootloaders/grub-pc/grub.cfg
+++ b/iso/builder/config/bootloaders/grub-pc/grub.cfg
@@ -10,12 +10,12 @@ echo "  ╚══════╝╚═╝  ╚═╝╚══════╝
 echo ""

 menuentry "EASY-BEE" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
+    linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
    initrd  @INITRD_LIVE@
 }

 menuentry "EASY-BEE (load to RAM)" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ toram bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
+    linux   @KERNEL_LIVE@ @APPEND_LIVE@ toram nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
    initrd  @INITRD_LIVE@
 }

--- a/iso/builder/config/bootloaders/isolinux/live.cfg.in
+++ b/iso/builder/config/bootloaders/isolinux/live.cfg.in
@@ -22,3 +22,7 @@ label live-@FLAVOUR@-failsafe
    linux @LINUX@
    initrd @INITRD@
    append @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal
+
+label memtest
+    menu label ^Memory Test (memtest86+)
+    linux /boot/memtest86+x64.bin
--- a/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
+++ b/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
@@ -5,25 +5,27 @@ set -e

 echo "=== bee chroot setup ==="

+GPU_VENDOR=$(cat /etc/bee-gpu-vendor 2>/dev/null || echo nvidia)
+echo "=== GPU vendor: ${GPU_VENDOR} ==="
+
 ensure_bee_console_user() {
    if id bee >/dev/null 2>&1; then
-        usermod -d /home/bee -s /bin/sh bee 2>/dev/null || true
+        usermod -d /home/bee -s /bin/bash bee 2>/dev/null || true
    else
-        useradd -d /home/bee -m -s /bin/sh -U bee
+        useradd -d /home/bee -m -s /bin/bash -U bee
    fi

    mkdir -p /home/bee
    chown -R bee:bee /home/bee
    echo "bee:eeb" | chpasswd
-    usermod -aG sudo,video,input bee 2>/dev/null || true
+    groupadd -f ipmi 2>/dev/null || true
+    usermod -aG sudo,video,input,render,ipmi bee 2>/dev/null || true
 }

 ensure_bee_console_user

-# Enable bee services
-systemctl enable nvidia-dcgm.service 2>/dev/null || true
+# Enable common bee services
 systemctl enable bee-network.service
-systemctl enable bee-nvidia.service
 systemctl enable bee-preflight.service
 systemctl enable bee-audit.service
 systemctl enable bee-web.service
@@ -35,23 +37,37 @@ systemctl enable serial-getty@ttyS0.service 2>/dev/null || true
 systemctl enable serial-getty@ttyS1.service 2>/dev/null || true
 systemctl enable bee-journal-mirror@ttyS1.service 2>/dev/null || true

+# Enable GPU-vendor specific services
+if [ "$GPU_VENDOR" = "nvidia" ]; then
+    systemctl enable nvidia-dcgm.service 2>/dev/null || true
+    systemctl enable bee-nvidia.service
+elif [ "$GPU_VENDOR" = "amd" ]; then
+    # ROCm symlinks (packages install to /opt/rocm-*/bin/)
+    for tool in rocm-smi rocm-bandwidth-test rvs; do
+        if [ ! -e /usr/local/bin/${tool} ]; then
+            bin_path="$(find /opt -path "*/bin/${tool}" -type f 2>/dev/null | sort | tail -1)"
+            [ -n "${bin_path}" ] && ln -sf "${bin_path}" /usr/local/bin/${tool}
+        fi
+    done
+fi
+# nogpu: no GPU services needed
+
 # Ensure scripts are executable
 chmod +x /usr/local/bin/bee-network.sh  2>/dev/null || true
-chmod +x /usr/local/bin/bee-nvidia-load 2>/dev/null || true
 chmod +x /usr/local/bin/bee-sshsetup   2>/dev/null || true
 chmod +x /usr/local/bin/bee-smoketest  2>/dev/null || true
 chmod +x /usr/local/bin/bee            2>/dev/null || true
 chmod +x /usr/local/bin/bee-log-run    2>/dev/null || true
+if [ "$GPU_VENDOR" = "nvidia" ]; then
+    chmod +x /usr/local/bin/bee-nvidia-load 2>/dev/null || true
+    chmod +x /usr/local/bin/bee-gpu-burn 2>/dev/null || true
+    chmod +x /usr/local/bin/bee-john-gpu-stress 2>/dev/null || true
+    chmod +x /usr/local/bin/bee-nccl-gpu-stress 2>/dev/null || true
+fi

 # Reload udev rules
 udevadm control --reload-rules 2>/dev/null || true

-# rocm-smi symlink (package installs to /opt/rocm-*/bin/rocm-smi)
-if [ ! -e /usr/local/bin/rocm-smi ]; then
-    smi_path="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
-    [ -n "${smi_path}" ] && ln -sf "${smi_path}" /usr/local/bin/rocm-smi
-fi
-
 # Create export directory
 mkdir -p /appdata/bee/export

@@ -59,4 +75,4 @@ if [ -f /etc/sudoers.d/bee ]; then
    chmod 0440 /etc/sudoers.d/bee
 fi

-echo "=== bee chroot setup complete ==="
+echo "=== bee chroot setup complete (${GPU_VENDOR}) ==="
--- a/iso/builder/config/package-lists/bee-amd.list.chroot
+++ b/iso/builder/config/package-lists/bee-amd.list.chroot
@@ -0,0 +1,9 @@
+# AMD ROCm — GPU monitoring, bandwidth test, and compute stress (RVS GST)
+rocm-smi-lib=%%ROCM_SMI_VERSION%%
+rocm-bandwidth-test=%%ROCM_BANDWIDTH_TEST_VERSION%%
+rocm-validation-suite=%%ROCM_VALIDATION_SUITE_VERSION%%
+rocblas=%%ROCBLAS_VERSION%%
+rocrand=%%ROCRAND_VERSION%%
+hip-runtime-amd=%%HIP_RUNTIME_AMD_VERSION%%
+hipblaslt=%%HIPBLASLT_VERSION%%
+comgr=%%COMGR_VERSION%%
--- a/iso/builder/config/package-lists/bee-nogpu.list.chroot
+++ b/iso/builder/config/package-lists/bee-nogpu.list.chroot
@@ -0,0 +1 @@
+# No GPU variant — no NVIDIA, no AMD/ROCm packages
--- a/iso/builder/config/package-lists/bee-nvidia.list.chroot
+++ b/iso/builder/config/package-lists/bee-nvidia.list.chroot
@@ -0,0 +1,8 @@
+# NVIDIA DCGM (Data Center GPU Manager) — dcgmi diag for acceptance testing.
+# DCGM 4 is packaged per CUDA major. The image ships NVIDIA driver 590 with CUDA 13 userspace,
+# so install the CUDA 13 build plus proprietary diagnostic components explicitly.
+datacenter-gpu-manager-4-cuda13=1:%%DCGM_VERSION%%
+datacenter-gpu-manager-4-proprietary=1:%%DCGM_VERSION%%
+datacenter-gpu-manager-4-proprietary-cuda13=1:%%DCGM_VERSION%%
+ocl-icd-libopencl1
+clinfo
--- a/iso/builder/config/package-lists/bee.list.chroot
+++ b/iso/builder/config/package-lists/bee.list.chroot
@@ -21,8 +21,15 @@ openssh-server
 # Disk installer
 squashfs-tools
 parted
+# Keep GRUB install tools without selecting a single active platform package.
+# grub-pc and grub-efi-amd64 conflict with each other, but grub2-common
+# provides grub-install/update-grub and the *-bin packages provide BIOS/UEFI modules.
+grub2-common
 grub-pc-bin
 grub-efi-amd64-bin
+grub-efi-amd64-signed
+shim-signed
+efibootmgr

 # Filesystem support for USB export targets
 exfatprogs
@@ -39,11 +46,11 @@ vim-tiny
 mc
 htop
 nvtop
+btop
 sudo
 zstd
 mstflint
 memtester
-memtest86+
 stress-ng
 stressapptest

@@ -72,11 +79,5 @@ firmware-bnx2x
 firmware-cavium
 firmware-qlogic

-# NVIDIA DCGM (Data Center GPU Manager) — dcgmi diag for acceptance testing
-datacenter-gpu-manager=1:%%DCGM_VERSION%%
-
-# AMD ROCm SMI — GPU monitoring for Instinct cards (repo: rocm/apt/6.3.4 jammy)
-rocm-smi-lib=%%ROCM_SMI_VERSION%%
-
 # glibc compat helpers (for any external binaries that need it)
 libc6
--- a/iso/builder/smoketest.sh
+++ b/iso/builder/smoketest.sh
@@ -39,7 +39,7 @@ info "nvidia boot mode: ${NVIDIA_BOOT_MODE}"
 # --- PATH & binaries ---
 echo "-- PATH & binaries --"
 for tool in dmidecode smartctl nvme ipmitool lspci bee; do
-    if p=$(PATH="/usr/local/bin:$PATH" command -v "$tool" 2>/dev/null); then
+    if p=$(PATH="/usr/local/bin:/usr/sbin:/sbin:$PATH" command -v "$tool" 2>/dev/null); then
        ok "$tool found: $p"
    else
        fail "$tool: NOT FOUND"
--- a/iso/overlay/etc/modules-load.d/bee-ipmi.conf
+++ b/iso/overlay/etc/modules-load.d/bee-ipmi.conf
@@ -0,0 +1,3 @@
+# Load IPMI modules for fan/sensor/power monitoring via ipmitool
+ipmi_si
+ipmi_devintf
--- a/iso/overlay/etc/profile.d/bee.sh
+++ b/iso/overlay/etc/profile.d/bee.sh
@@ -1,4 +1,4 @@
-export PATH="$PATH:/usr/local/bin:/opt/rocm/bin:/opt/rocm/sbin"
+export PATH="$PATH:/usr/local/bin:/usr/sbin:/sbin:/opt/rocm/bin:/opt/rocm/sbin"

 # Print web UI URLs on the local console at login.
 if [ -z "${SSH_CONNECTION:-}" ] \
--- a/iso/overlay/etc/systemd/system/bee-audit.service
+++ b/iso/overlay/etc/systemd/system/bee-audit.service
@@ -1,14 +1,9 @@
 [Unit]
-Description=Bee: run hardware audit
-After=bee-network.service bee-nvidia.service bee-preflight.service
-Before=bee-web.service
+Description=Bee: on-demand hardware audit (not started automatically)

 [Service]
 Type=oneshot
-ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-audit.log /bin/sh -c '/usr/local/bin/bee audit --runtime livecd --output file:/appdata/bee/export/bee-audit.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-audit] WARN: audit exited with rc=$rc"; fi; exit 0'
+RemainAfterExit=yes
+ExecStart=/bin/sh -c 'curl -sf -X POST http://localhost/api/audit/run >/dev/null'
 StandardOutput=journal
 StandardError=journal
-RemainAfterExit=yes
-
-[Install]
-WantedBy=multi-user.target
--- a/iso/overlay/etc/systemd/system/bee-web.service
+++ b/iso/overlay/etc/systemd/system/bee-web.service
@@ -1,7 +1,5 @@
 [Unit]
 Description=Bee: hardware audit web viewer
-After=bee-network.service bee-audit.service
-Wants=bee-audit.service

 [Service]
 Type=simple
@@ -11,6 +9,9 @@ RestartSec=2
 StandardOutput=journal
 StandardError=journal
 LimitMEMLOCK=infinity
+# Keep the web server responsive during GPU/CPU stress (children inherit nice+10
+# via Setpriority in runCmdJob, but the bee-web parent stays at 0).
+Nice=0

 [Install]
 WantedBy=multi-user.target
--- a/iso/overlay/etc/systemd/system/lightdm.service.d/bee-limits.conf
+++ b/iso/overlay/etc/systemd/system/lightdm.service.d/bee-limits.conf
@@ -4,3 +4,6 @@
 RestartSec=10
 StartLimitIntervalSec=60
 StartLimitBurst=3
+# Raise scheduling priority of the X server so the graphical console (KVM/IPMI)
+# stays responsive during GPU/CPU stress tests running at nice+10.
+Nice=-5
--- a/iso/overlay/etc/udev/rules.d/99-ipmi.rules
+++ b/iso/overlay/etc/udev/rules.d/99-ipmi.rules
@@ -0,0 +1,2 @@
+# Allow ipmi group to access IPMI device without root
+KERNEL=="ipmi[0-9]*", GROUP="ipmi", MODE="0660"
--- a/iso/overlay/usr/local/bin/bee-gpu-burn
+++ b/iso/overlay/usr/local/bin/bee-gpu-burn
@@ -0,0 +1,93 @@
+#!/bin/sh
+set -eu
+
+SECONDS=5
+SIZE_MB=64
+DEVICES=""
+EXCLUDE=""
+WORKER="/usr/local/lib/bee/bee-gpu-burn-worker"
+
+usage() {
+    echo "usage: $0 [--seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3]" >&2
+    exit 2
+}
+
+normalize_list() {
+    echo "${1:-}" | tr ',' '\n' | sed 's/[[:space:]]//g' | awk 'NF' | sort -n | uniq | paste -sd, -
+}
+
+contains_csv() {
+    needle="$1"
+    haystack="${2:-}"
+    echo ",${haystack}," | grep -q ",${needle},"
+}
+
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+        --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
+        --size-mb|-m) [ "$#" -ge 2 ] || usage; SIZE_MB="$2"; shift 2 ;;
+        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
+        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
+        *) usage ;;
+    esac
+done
+
+[ -x "${WORKER}" ] || { echo "bee-gpu-burn worker not found: ${WORKER}" >&2; exit 1; }
+
+ALL_DEVICES=$(nvidia-smi --query-gpu=index --format=csv,noheader,nounits 2>/dev/null | sed 's/[[:space:]]//g' | awk 'NF' | paste -sd, -)
+[ -n "${ALL_DEVICES}" ] || { echo "nvidia-smi found no NVIDIA GPUs" >&2; exit 1; }
+
+DEVICES=$(normalize_list "${DEVICES}")
+EXCLUDE=$(normalize_list "${EXCLUDE}")
+SELECTED="${DEVICES}"
+if [ -z "${SELECTED}" ]; then
+    SELECTED="${ALL_DEVICES}"
+fi
+
+FINAL=""
+for id in $(echo "${SELECTED}" | tr ',' ' '); do
+    [ -n "${id}" ] || continue
+    if contains_csv "${id}" "${EXCLUDE}"; then
+        continue
+    fi
+    if [ -z "${FINAL}" ]; then
+        FINAL="${id}"
+    else
+        FINAL="${FINAL},${id}"
+    fi
+done
+
+[ -n "${FINAL}" ] || { echo "no NVIDIA GPUs selected after filters" >&2; exit 1; }
+
+echo "loader=bee-gpu-burn"
+echo "selected_gpus=${FINAL}"
+
+TMP_DIR=$(mktemp -d)
+trap 'rm -rf "${TMP_DIR}"' EXIT INT TERM
+
+WORKERS=""
+for id in $(echo "${FINAL}" | tr ',' ' '); do
+    log="${TMP_DIR}/gpu-${id}.log"
+    echo "starting gpu ${id}"
+    "${WORKER}" --device "${id}" --seconds "${SECONDS}" --size-mb "${SIZE_MB}" >"${log}" 2>&1 &
+    pid=$!
+    WORKERS="${WORKERS} ${pid}:${id}:${log}"
+done
+
+status=0
+for spec in ${WORKERS}; do
+    pid=${spec%%:*}
+    rest=${spec#*:}
+    id=${rest%%:*}
+    log=${rest#*:}
+    if wait "${pid}"; then
+        echo "gpu ${id} finished: OK"
+    else
+        rc=$?
+        echo "gpu ${id} finished: FAILED rc=${rc}"
+        status=1
+    fi
+    sed "s/^/[gpu ${id}] /" "${log}" || true
+done
+
+exit "${status}"
--- a/iso/overlay/usr/local/bin/bee-install
+++ b/iso/overlay/usr/local/bin/bee-install
@@ -12,17 +12,55 @@

 set -euo pipefail

+usage() {
+    cat >&2 <<'EOF'
+Usage: bee-install <device> [logfile]
+
+  Installs the live system to a local disk (WIPES the target).
+
+  device   Target block device, e.g. /dev/sda or /dev/nvme0n1
+           Must be a hard disk or NVMe — NOT a CD-ROM (/dev/sr*)
+  logfile  Optional path for progress log (default: /tmp/bee-install.log)
+
+Examples:
+  bee-install /dev/sda
+  bee-install /dev/nvme0n1
+  bee-install /dev/sdb /tmp/my-install.log
+
+WARNING: ALL DATA ON <device> WILL BE ERASED.
+
+Layout (UEFI):  GPT — partition 1: EFI 512MB vfat, partition 2: root ext4
+Layout (BIOS):  MBR — partition 1: root ext4
+EOF
+    exit 1
+}
+
 DEVICE="${1:-}"
 LOGFILE="${2:-/tmp/bee-install.log}"

-if [ -z "$DEVICE" ]; then
-    echo "Usage: bee-install <device> [logfile]" >&2
-    exit 1
+if [ -z "$DEVICE" ] || [ "$DEVICE" = "--help" ] || [ "$DEVICE" = "-h" ]; then
+    usage
 fi
 if [ ! -b "$DEVICE" ]; then
    echo "ERROR: $DEVICE is not a block device" >&2
+    echo "Run 'lsblk' to list available disks." >&2
    exit 1
 fi
+# Block CD-ROM devices
+case "$DEVICE" in
+    /dev/sr*|/dev/scd*)
+        echo "ERROR: $DEVICE is a CD-ROM/optical device — cannot install to it." >&2
+        echo "Run 'lsblk' to find the target disk (e.g. /dev/sda, /dev/nvme0n1)." >&2
+        exit 1
+        ;;
+esac
+# Check required tools
+for tool in parted mkfs.vfat mkfs.ext4 unsquashfs grub-install update-grub; do
+    if ! command -v "$tool" >/dev/null 2>&1; then
+        echo "ERROR: required tool not found: $tool" >&2
+        exit 1
+    fi
+done

 SQUASHFS="/run/live/medium/live/filesystem.squashfs"
 if [ ! -f "$SQUASHFS" ]; then
@@ -158,20 +196,56 @@ mount --bind /sys  "${MOUNT_ROOT}/sys"

 # ------------------------------------------------------------------
 log "--- Step 7/7: Installing GRUB bootloader ---"
+
+# Helper: run a chroot command, log all output, return its exit code.
+# Needed because "cmd | while" pipelines hide the exit code of cmd.
+chroot_log() {
+    local rc=0
+    local out
+    out=$(chroot "$MOUNT_ROOT" "$@" 2>&1) || rc=$?
+    echo "$out" | while IFS= read -r line; do log "  $line"; done
+    return $rc
+}
+
 if [ "$UEFI" = "1" ]; then
-    chroot "$MOUNT_ROOT" grub-install \
-        --target=x86_64-efi \
-        --efi-directory=/boot/efi \
-        --bootloader-id=bee \
-        --recheck 2>&1 | while read -r line; do log "  $line"; done || true
+    # Primary attempt: write EFI NVRAM entry (requires writable efivars)
+    if ! chroot_log grub-install \
+            --target=x86_64-efi \
+            --efi-directory=/boot/efi \
+            --bootloader-id=bee \
+            --recheck; then
+        log "  WARNING: grub-install (with NVRAM) failed — retrying with --no-nvram"
+        # --no-nvram: write grubx64.efi but skip EFI variable update.
+        # Needed on headless servers where efivars is read-only or unavailable.
+        chroot_log grub-install \
+            --target=x86_64-efi \
+            --efi-directory=/boot/efi \
+            --bootloader-id=bee \
+            --no-nvram \
+            --recheck || log "  WARNING: grub-install --no-nvram also failed — check logs"
+    fi
+
+    # Always install the UEFI fallback path EFI/BOOT/BOOTX64.EFI.
+    # Many UEFI implementations (especially server BMCs and some firmware)
+    # ignore the NVRAM boot entry and only look for this path.
+    GRUB_EFI="${MOUNT_ROOT}/boot/efi/EFI/bee/grubx64.efi"
+    FALLBACK_DIR="${MOUNT_ROOT}/boot/efi/EFI/BOOT"
+    if [ -f "$GRUB_EFI" ]; then
+        mkdir -p "$FALLBACK_DIR"
+        cp "$GRUB_EFI" "${FALLBACK_DIR}/BOOTX64.EFI"
+        log "  Fallback EFI binary installed: EFI/BOOT/BOOTX64.EFI"
+    else
+        log "  WARNING: grubx64.efi not found at $GRUB_EFI — UEFI fallback path not set"
+    fi
 else
-    chroot "$MOUNT_ROOT" grub-install \
+    chroot_log grub-install \
        --target=i386-pc \
        --recheck \
-        "$DEVICE" 2>&1 | while read -r line; do log "  $line"; done || true
+        "$DEVICE" || log "  WARNING: grub-install (BIOS) failed — check logs"
 fi
-chroot "$MOUNT_ROOT" update-grub 2>&1 | while read -r line; do log "  $line"; done || true
-log "  GRUB installed."
+
+chroot_log update-grub || log "  WARNING: update-grub failed — check logs"
+log "  GRUB step complete."

 # ------------------------------------------------------------------
 # Cleanup
--- a/iso/overlay/usr/local/bin/bee-john-gpu-stress
+++ b/iso/overlay/usr/local/bin/bee-john-gpu-stress
@@ -0,0 +1,193 @@
+#!/bin/sh
+set -eu
+
+SECONDS=300
+DEVICES=""
+EXCLUDE=""
+FORMAT=""
+JOHN_DIR="/usr/local/lib/bee/john/run"
+JOHN_BIN="${JOHN_DIR}/john"
+export OCL_ICD_VENDORS="/etc/OpenCL/vendors"
+export LD_LIBRARY_PATH="/usr/lib:/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
+
+usage() {
+    echo "usage: $0 [--seconds N] [--devices 0,1] [--exclude 2,3] [--format name]" >&2
+    exit 2
+}
+
+normalize_list() {
+    echo "${1:-}" | tr ',' '\n' | sed 's/[[:space:]]//g' | awk 'NF' | sort -n | uniq | paste -sd, -
+}
+
+contains_csv() {
+    needle="$1"
+    haystack="${2:-}"
+    echo ",${haystack}," | grep -q ",${needle},"
+}
+
+show_opencl_diagnostics() {
+    echo "-- OpenCL ICD vendors --" >&2
+    if [ -d /etc/OpenCL/vendors ]; then
+        ls -l /etc/OpenCL/vendors >&2 || true
+        for icd in /etc/OpenCL/vendors/*.icd; do
+            [ -f "${icd}" ] || continue
+            echo "  file: ${icd}" >&2
+            sed 's/^/    /' "${icd}" >&2 || true
+        done
+    else
+        echo "  /etc/OpenCL/vendors is missing" >&2
+    fi
+    echo "-- NVIDIA device nodes --" >&2
+    ls -l /dev/nvidia* >&2 || true
+    echo "-- ldconfig OpenCL/NVIDIA --" >&2
+    ldconfig -p 2>/dev/null | grep 'libOpenCL\|libcuda\|libnvidia-opencl' >&2 || true
+    if command -v clinfo >/dev/null 2>&1; then
+        echo "-- clinfo -l --" >&2
+        clinfo -l >&2 || true
+    fi
+    echo "-- john --list=opencl-devices --" >&2
+    ./john --list=opencl-devices >&2 || true
+}
+
+refresh_nvidia_runtime() {
+    if [ "$(id -u)" != "0" ]; then
+        return 1
+    fi
+    if command -v bee-nvidia-load >/dev/null 2>&1; then
+        bee-nvidia-load >/dev/null 2>&1 || true
+    fi
+    ldconfig >/dev/null 2>&1 || true
+    return 0
+}
+
+ensure_nvidia_uvm() {
+    if lsmod 2>/dev/null | grep -q '^nvidia_uvm '; then
+        return 0
+    fi
+    if [ "$(id -u)" != "0" ]; then
+        return 1
+    fi
+
+    ko="/usr/local/lib/nvidia/nvidia-uvm.ko"
+    [ -f "${ko}" ] || return 1
+
+    if ! insmod "${ko}" >/dev/null 2>&1; then
+        return 1
+    fi
+
+    uvm_major=$(grep -m1 ' nvidia-uvm$' /proc/devices | awk '{print $1}')
+    if [ -n "${uvm_major}" ]; then
+        mknod -m 666 /dev/nvidia-uvm c "${uvm_major}" 0 2>/dev/null || true
+        mknod -m 666 /dev/nvidia-uvm-tools c "${uvm_major}" 1 2>/dev/null || true
+    fi
+    return 0
+}
+
+ensure_opencl_ready() {
+    out=$(./john --list=opencl-devices 2>&1 || true)
+    if echo "${out}" | grep -q "Device #"; then
+        return 0
+    fi
+
+    if refresh_nvidia_runtime; then
+        out=$(./john --list=opencl-devices 2>&1 || true)
+        if echo "${out}" | grep -q "Device #"; then
+            return 0
+        fi
+    fi
+
+    if ensure_nvidia_uvm; then
+        out=$(./john --list=opencl-devices 2>&1 || true)
+        if echo "${out}" | grep -q "Device #"; then
+            return 0
+        fi
+    fi
+
+    echo "OpenCL devices are not available for John." >&2
+    if ! lsmod 2>/dev/null | grep -q '^nvidia_uvm '; then
+        echo "nvidia_uvm is not loaded." >&2
+    fi
+    if [ ! -e /dev/nvidia-uvm ]; then
+        echo "/dev/nvidia-uvm is missing." >&2
+    fi
+    show_opencl_diagnostics
+    return 1
+}
+
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+        --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
+        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
+        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
+        --format) [ "$#" -ge 2 ] || usage; FORMAT="$2"; shift 2 ;;
+        *) usage ;;
+    esac
+done
+
+[ -x "${JOHN_BIN}" ] || { echo "john binary not found: ${JOHN_BIN}" >&2; exit 1; }
+
+ALL_DEVICES=$(nvidia-smi --query-gpu=index --format=csv,noheader,nounits 2>/dev/null | sed 's/[[:space:]]//g' | awk 'NF' | paste -sd, -)
+[ -n "${ALL_DEVICES}" ] || { echo "nvidia-smi found no NVIDIA GPUs" >&2; exit 1; }
+
+DEVICES=$(normalize_list "${DEVICES}")
+EXCLUDE=$(normalize_list "${EXCLUDE}")
+SELECTED="${DEVICES}"
+if [ -z "${SELECTED}" ]; then
+    SELECTED="${ALL_DEVICES}"
+fi
+
+FINAL=""
+for id in $(echo "${SELECTED}" | tr ',' ' '); do
+    [ -n "${id}" ] || continue
+    if contains_csv "${id}" "${EXCLUDE}"; then
+        continue
+    fi
+    if [ -z "${FINAL}" ]; then
+        FINAL="${id}"
+    else
+        FINAL="${FINAL},${id}"
+    fi
+done
+
+[ -n "${FINAL}" ] || { echo "no NVIDIA GPUs selected after filters" >&2; exit 1; }
+
+JOHN_DEVICES=""
+for id in $(echo "${FINAL}" | tr ',' ' '); do
+    opencl_id=$((id + 1))
+    if [ -z "${JOHN_DEVICES}" ]; then
+        JOHN_DEVICES="${opencl_id}"
+    else
+        JOHN_DEVICES="${JOHN_DEVICES},${opencl_id}"
+    fi
+done
+
+echo "loader=john"
+echo "selected_gpus=${FINAL}"
+echo "john_devices=${JOHN_DEVICES}"
+
+cd "${JOHN_DIR}"
+
+ensure_opencl_ready || exit 1
+
+choose_format() {
+    if [ -n "${FORMAT}" ]; then
+        echo "${FORMAT}"
+        return 0
+    fi
+    for candidate in sha512crypt-opencl pbkdf2-hmac-sha512-opencl 7z-opencl sha256crypt-opencl md5crypt-opencl; do
+        if ./john --test=1 --format="${candidate}" --devices="${JOHN_DEVICES}" >/dev/null 2>&1; then
+            echo "${candidate}"
+            return 0
+        fi
+    done
+    return 1
+}
+
+CHOSEN_FORMAT=$(choose_format) || {
+    echo "no suitable john OpenCL format found" >&2
+    ./john --list=opencl-devices >&2 || true
+    exit 1
+}
+
+echo "format=${CHOSEN_FORMAT}"
+exec ./john --test="${SECONDS}" --format="${CHOSEN_FORMAT}" --devices="${JOHN_DEVICES}"
--- a/iso/overlay/usr/local/bin/bee-log-run
+++ b/iso/overlay/usr/local/bin/bee-log-run
@@ -17,7 +17,7 @@ mkdir -p "$(dirname "$log_file")"
 serial_sink() {
    local tty="$1"
    if [ -w "$tty" ]; then
-        cat > "$tty"
+        cat > "$tty" 2>/dev/null || true
    else
        cat > /dev/null
    fi
--- a/iso/overlay/usr/local/bin/bee-nccl-gpu-stress
+++ b/iso/overlay/usr/local/bin/bee-nccl-gpu-stress
@@ -0,0 +1,91 @@
+#!/bin/sh
+set -eu
+
+SECONDS=300
+DEVICES=""
+EXCLUDE=""
+MIN_BYTES="512M"
+MAX_BYTES="4G"
+FACTOR="2"
+ITERS="20"
+ALL_REDUCE_BIN="/usr/local/bin/all_reduce_perf"
+
+usage() {
+    echo "usage: $0 [--seconds N] [--devices 0,1] [--exclude 2,3]" >&2
+    exit 2
+}
+
+normalize_list() {
+    echo "${1:-}" | tr ',' '\n' | sed 's/[[:space:]]//g' | awk 'NF' | sort -n | uniq | paste -sd, -
+}
+
+contains_csv() {
+    needle="$1"
+    haystack="${2:-}"
+    echo ",${haystack}," | grep -q ",${needle},"
+}
+
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+        --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
+        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
+        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
+        *) usage ;;
+    esac
+done
+
+[ -x "${ALL_REDUCE_BIN}" ] || { echo "all_reduce_perf not found: ${ALL_REDUCE_BIN}" >&2; exit 1; }
+
+ALL_DEVICES=$(nvidia-smi --query-gpu=index --format=csv,noheader,nounits 2>/dev/null | sed 's/[[:space:]]//g' | awk 'NF' | paste -sd, -)
+[ -n "${ALL_DEVICES}" ] || { echo "nvidia-smi found no NVIDIA GPUs" >&2; exit 1; }
+
+DEVICES=$(normalize_list "${DEVICES}")
+EXCLUDE=$(normalize_list "${EXCLUDE}")
+SELECTED="${DEVICES}"
+if [ -z "${SELECTED}" ]; then
+    SELECTED="${ALL_DEVICES}"
+fi
+
+FINAL=""
+for id in $(echo "${SELECTED}" | tr ',' ' '); do
+    [ -n "${id}" ] || continue
+    if contains_csv "${id}" "${EXCLUDE}"; then
+        continue
+    fi
+    if [ -z "${FINAL}" ]; then
+        FINAL="${id}"
+    else
+        FINAL="${FINAL},${id}"
+    fi
+done
+
+[ -n "${FINAL}" ] || { echo "no NVIDIA GPUs selected after filters" >&2; exit 1; }
+
+GPU_COUNT=$(echo "${FINAL}" | tr ',' '\n' | awk 'NF' | wc -l | awk '{print $1}')
+[ "${GPU_COUNT}" -gt 0 ] || { echo "selected GPU count is zero" >&2; exit 1; }
+
+echo "loader=nccl"
+echo "selected_gpus=${FINAL}"
+echo "gpu_count=${GPU_COUNT}"
+echo "range=${MIN_BYTES}..${MAX_BYTES}"
+echo "iters=${ITERS}"
+
+deadline=$(( $(date +%s) + SECONDS ))
+round=0
+
+while :; do
+    now=$(date +%s)
+    if [ "${now}" -ge "${deadline}" ]; then
+        break
+    fi
+    round=$((round + 1))
+    remaining=$((deadline - now))
+    echo "round=${round} remaining_sec=${remaining}"
+    CUDA_VISIBLE_DEVICES="${FINAL}" \
+        "${ALL_REDUCE_BIN}" \
+        -b "${MIN_BYTES}" \
+        -e "${MAX_BYTES}" \
+        -f "${FACTOR}" \
+        -g "${GPU_COUNT}" \
+        --iters "${ITERS}"
+done
--- a/iso/overlay/usr/local/bin/bee-nvidia-load
+++ b/iso/overlay/usr/local/bin/bee-nvidia-load
@@ -59,15 +59,28 @@ load_module() {
    return 1
 }

+load_host_module() {
+    mod="$1"
+    if modprobe "$mod" >/dev/null 2>&1; then
+        log "host module loaded: $mod"
+        return 0
+    fi
+    return 1
+}
+
 case "$nvidia_mode" in
    normal|full)
        if ! load_module nvidia; then
            exit 1
        fi
+        # nvidia-modeset on some server kernels needs ACPI video helper symbols
+        # exported by the generic "video" module. Best-effort only; compute paths
+        # remain functional even if display-related modules stay absent.
+        load_host_module video || true
        load_module nvidia-modeset || true
        load_module nvidia-uvm || true
        ;;
-    gsp-off|safe|*)
+    gsp-off|safe)
        # NVIDIA documents that GSP firmware is enabled by default on newer GPUs and can
        # be disabled via NVreg_EnableGpuFirmware=0. Safe mode keeps the live ISO on the
        # conservative path for platforms where full boot-time GSP init is unstable.
@@ -76,6 +89,15 @@ case "$nvidia_mode" in
        fi
        log "GSP-off mode: skipping nvidia-modeset and nvidia-uvm during boot"
        ;;
+    nomsi|*)
+        # nomsi: disable MSI-X/MSI interrupts — use when RmInitAdapter fails with
+        # "Failed to enable MSI-X" on one or more GPUs (IOMMU group interrupt limits).
+        # NVreg_EnableMSI=0 forces legacy INTx interrupts for all GPUs.
+        if ! load_module nvidia NVreg_EnableGpuFirmware=0 NVreg_EnableMSI=0; then
+            exit 1
+        fi
+        log "nomsi mode: MSI-X disabled (NVreg_EnableMSI=0), skipping nvidia-modeset and nvidia-uvm"
+        ;;
 esac

 # Create /dev/nvidia* device nodes (udev rules absent since we use .run installer)
@@ -105,4 +127,19 @@ fi
 ldconfig 2>/dev/null || true
 log "ldconfig refreshed"

+# Start DCGM host engine so dcgmi can discover GPUs.
+# nv-hostengine must run before any dcgmi command — without it, dcgmi reports
+# "group is empty" even when GPUs and modules are present.
+# Skip if already running (e.g. started by a dcgm systemd service or prior boot).
+if command -v nv-hostengine >/dev/null 2>&1; then
+    if pgrep -x nv-hostengine >/dev/null 2>&1; then
+        log "nv-hostengine already running — skipping"
+    else
+        nv-hostengine
+        log "nv-hostengine started"
+    fi
+else
+    log "WARN: nv-hostengine not found — dcgmi diagnostics will not work"
+fi
+
 log "done"
--- a/iso/overlay/usr/local/bin/bee-openbox-session
+++ b/iso/overlay/usr/local/bin/bee-openbox-session
@@ -2,22 +2,22 @@
 # openbox session: launch tint2 taskbar + chromium, then openbox as WM.
 # This file is used as an xinitrc by bee-desktop.

-# Wait for bee-web to be accepting connections (up to 15 seconds)
-i=0
-while [ $i -lt 15 ]; do
-    if curl -sf http://localhost/healthz >/dev/null 2>&1; then
-        break
-    fi
-    sleep 1
-    i=$((i+1))
-done
-
 # Disable screensaver and DPMS
 xset s off
 xset -dpms
 xset s noblank

 tint2 &
+
+# Wait up to 120s for bee-web to bind. The web server starts immediately now
+# (audit is deferred), so this should succeed in a few seconds on most hardware.
+i=0
+while [ $i -lt 120 ]; do
+    if curl -sf http://localhost/healthz >/dev/null 2>&1; then break; fi
+    sleep 1
+    i=$((i+1))
+done
+
 chromium \
    --disable-infobars \
    --disable-translate \
Author	SHA1	Message	Date
Mikhail Chusavitin	b5b34983f1	fix(webui): repair audit actions and CPU burn flow - v3.15	2026-04-01 08:19:11 +03:00
Michael Chus	45221d1e9a	fix(stress): label loaders and improve john opencl diagnostics	2026-04-01 07:31:52 +03:00
Michael Chus	3869788bac	fix(iso): validate memtest with xorriso fallback	2026-04-01 07:24:05 +03:00
Michael Chus	3dbc2184ef	fix(iso): archive build logs and memtest diagnostics	2026-04-01 07:14:53 +03:00
Michael Chus	60cb8f889a	fix(iso): restore memtest menu entries and validate ISO	2026-04-01 07:04:48 +03:00
Michael Chus	c9ee078622	fix(stress): keep platform burn responsive under load	2026-03-31 22:28:26 +03:00
Michael Chus	ea660500c9	chore: commit pending repo changes	2026-03-31 22:17:36 +03:00
Michael Chus	d43a9aeec7	fix(iso): restore live-build memtest integration	2026-03-31 22:10:28 +03:00
Mikhail Chusavitin	f5622e351e	Fix staged John cleanup for repeated ISO builds	2026-03-31 11:40:52 +03:00
Mikhail Chusavitin	a20806afc8	Fix ISO grub package conflict	2026-03-31 11:38:30 +03:00
Mikhail Chusavitin	4f9b6b3bcd	Harden NVIDIA boot logging on live ISO	2026-03-31 11:37:21 +03:00
Mikhail Chusavitin	c850b39b01	feat: v3.10 GPU stress and NCCL burn updates	2026-03-31 11:22:27 +03:00
Mikhail Chusavitin	6dee8f3509	Add NVIDIA stress loader selection and DCGM 4 support	2026-03-31 11:15:15 +03:00
Mikhail Chusavitin	20f834aa96	feat: v3.4 — boot reliability, log readability, USB export, screen resolution, GRUB UEFI fix, memtest, KVM console stability Web UI / logs: - Strip ANSI escape codes and handle \r (progress bars) in task log output - Add USB export API + UI card on Export page (list removable devices, write audit JSON or support bundle) - Add Display Resolution card in Tools (xrandr-based, per-output mode selector) - Dashboard: audit status banner with auto-reload when audit task completes Boot & install: - bee-web starts immediately with no dependencies (was blocked by audit + network) - bee-audit.service redesigned: waits for bee-web healthz, sleeps 60s, enqueues audit via /api/audit/run (task system) - bee-install: fix GRUB UEFI — grub-install exit code was silently ignored (\|\| true); add --no-nvram fallback; always copy EFI/BOOT/BOOTX64.EFI fallback path - Add grub-efi-amd64, grub-pc, grub-efi-amd64-signed, shim-signed to package list (grub-install requires these, not just -bin variants) - memtest hook: fix binary/boot/ not created before cp; handle both Debian (no extension) and upstream (x64.efi) naming - bee-openbox-session: increase healthz wait from 30s to 120s KVM console stability: - runCmdJob: syscall.Setpriority(PRIO_PROCESS, pid, 10) on all stress subprocesses - lightdm.service.d: Nice=-5 so X server preempts stress processes Packages: add btop Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-31 10:16:15 +03:00
Michael Chus	105d92df8b	fix(iso): use underscore in volume label to comply with ISO 9660 ISO 9660 volume labels allow only A-Z, 0-9, and underscore. Dashes cause xorriso WARNING on every build. EASY-BEE-NVIDIA → EASY_BEE_NVIDIA (iso-application keeps dashes, it's UDF). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-30 23:38:02 +03:00
Michael Chus	f96b149875	fix(memtest): extract EFI binary from .deb cache if chroot/boot/ is empty memtest86+ postinst does not place files in /boot in a live-build chroot without grub triggers. Added fallback: extract directly from the cached .deb via dpkg-deb -x, with verbose logging throughout. Also remove "NVIDIA no MSI-X" from boot menu (premature — root cause unknown). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-30 23:30:52 +03:00
Michael Chus	5ee120158e	fix(build): remove unused variant package lists before lb build live-build picks up ALL .list.chroot files in config/package-lists/. After rsync, bee-nvidia.list.chroot, bee-amd.list.chroot, and bee-nogpu.list.chroot all end up in BUILD_WORK_DIR — causing lb to try installing packages from every variant (and leaving version placeholders unsubstituted in the unused lists). Fix: after copying bee-${BEE_GPU_VENDOR}.list.chroot → bee-gpu.list.chroot, delete all other bee-{nvidia,amd,nogpu}.list.chroot from BUILD_WORK_DIR. Also includes nomsi boot mode changes (bee-nvidia-load + grub.cfg). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-30 23:03:42 +03:00
Michael Chus	09fe0e2e9e	feat(iso): add nogpu variant (no NVIDIA, no AMD/ROCm) - build.sh: accept --variant nogpu; skips all GPU build steps, removes both nvidia-cuda and rocm archives, strips bee-nvidia-load and bee-nvidia.service from overlay - build-in-container.sh: add nogpu to --variant flag; all variant includes nogpu; --clean-build wipes live-build-work-nogpu - 9000-bee-setup hook: nogpu path enables no GPU services - bee-nogpu.list.chroot: empty GPU package list Output: easy-bee-nogpu-vX.iso Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-30 22:49:25 +03:00
Michael Chus	ace1a9dba6	feat(iso): split into nvidia and amd variants, fix KVM graphics and PATH - build.sh: add --variant nvidia\|amd; separate work dirs per variant (live-build-work-nvidia / live-build-work-amd); GPU-specific steps (modules, NCCL, cuBLAS, nccl-tests) run only for nvidia; deb package cache synced back to shared location after each lb build so second variant reuses downloaded packages; ISO output named easy-bee-{variant}-v{ver}-amd64.iso - build-in-container.sh: add --variant nvidia\|amd\|all (default: all); runs build.sh twice in one container for 'all'; --clean-build wipes both variant work dirs - package-lists: remove GPU packages from bee.list.chroot; add bee-nvidia.list.chroot (DCGM) and bee-amd.list.chroot (ROCm) - 9000-bee-setup hook: read /etc/bee-gpu-vendor; enable bee-nvidia.service and DCGM only for nvidia; set up ROCm symlinks only for amd - auto/config: --iso-volume uses BEE_GPU_VENDOR_UPPER env var - grub.cfg: add nomodeset to EASY-BEE and EASY-BEE (load to RAM) entries — fixes X/lightdm on BMC KVM (ASPEED AST chip requires nomodeset for fbdev to work; NVIDIA H100 compute does not need KMS) - bee.sh / smoketest.sh: add /usr/sbin to PATH so dmidecode, smartctl, nvme are found - 9100-memtest hook: add diagnostic listing of chroot/boot/memtest* files Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-30 22:24:37 +03:00
Michael Chus	905c581ece	fix(iso): substitute all ROCm package version placeholders in build.sh ROCM_BANDWIDTH_TEST_VERSION, ROCM_VALIDATION_SUITE_VERSION, ROCBLAS, ROCRAND, HIP_RUNTIME_AMD, HIPBLASLT, COMGR were defined in VERSIONS and in bee.list.chroot but the sed substitution block only covered 3 of them. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 22:00:05 +03:00
Michael Chus	7c2a0135d2	feat(audit): add platform thermal cycling stress test Runs CPU (stressapptest) + GPU stress simultaneously across multiple load/idle cycles with varying idle durations (120s/60s/30s) to detect cooling systems that fail to recover under repeated load. Presets: smoke (~5 min), acceptance (~25 min), overnight (~100 min). Outputs metrics.csv + summary.txt with per-cycle throttle and fan spindown analysis, packed as tar.gz. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 21:57:33 +03:00
Michael Chus	407c1cd1c4	fix(charts): unify timeline labels across graphs	2026-03-29 21:24:06 +03:00
Michael Chus	e15bcc91c5	feat(metrics): persist history in sqlite and add AMD memory validate tests	2026-03-29 12:28:06 +03:00
Michael Chus	98f0cf0d52	fix(amd-stress): include VRAM load in GST burn	2026-03-29 12:03:50 +03:00
Michael Chus	4db89e9773	fix(metrics): correct chart padding order — right=80 not top=80 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 11:38:45 +03:00
Michael Chus	3fda18f708	feat(metrics): SQLite persistence + chart fixes (no dots, peak label, min/avg/max in title) - Add modernc.org/sqlite dependency; write every sample to /appdata/bee/metrics.db (WAL mode, prune to 24h on startup) - Pre-fill ring buffers from last 120 DB rows on startup so charts survive service restarts - Ticker changed 3s→1s; chart JS refresh will be set to 2s (lag ≤3s) - Add GET /api/metrics/export.csv for full history download - Chart rendering: SymbolNone (no dots), right padding=80px so peak mark line label is not clipped, min/avg/max appended to chart title Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 11:37:59 +03:00
Michael Chus	ea518abf30	feat(metrics): add global peak mark line to all live metric charts Finds the series with the highest value across all datasets and adds a SeriesMarkTypeMax dashed mark line to it. Since all series share the same Y axis this effectively shows a single "global peak" line for the whole chart with a label on the right. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 11:24:50 +03:00
Michael Chus	744de588bb	fix(burn): resolve rvs binary via /opt/rocm-/bin glob like rocm-smi; add terminal copy button rvs was not in PATH so the stress job exited immediately (UNSUPPORTED). Now resolveRVSCommand searches /opt/rocm-/bin/rvs before failing. Also add a Copy button overlay on all .terminal elements and set user-select:text so logs can be copied from the web UI. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 11:20:46 +03:00
Michael Chus	a3ed9473a3	fix(metrics): strip units from GPU legend names; fix fan SDR parsing for new IPMI format Legend names were "GPU 0 %" — remove unit suffix since chart title already conveys it. Fan parsing now handles the 5-field IPMI SDR format where the value+unit ("4340 RPM") are combined in the last column rather than split across separate fields. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 11:14:27 +03:00
Michael Chus	a714c45f10	fix(metrics): parse rocm-smi CSV by header keywords, not column position MI250X outputs 7 temperature columns before power/use%; positional parsing read junction temp (~40°C) as GPU utilisation. Switch to header-based colIdx() lookup so the correct fields are read regardless of column order or rocm-smi version. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 11:10:13 +03:00
Michael Chus	349e026cfa	fix(webui): restore chart legend, remove GPU numeric table Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 11:04:51 +03:00
Michael Chus	889fe1dc2f	fix: IPMI access for bee user + remove chart legend - Add udev rule: /dev/ipmi0 readable by 'ipmi' group (no sudo needed) - Add 'ipmi' group creation and bee user membership in chroot hook - Remove legend from all charts (data shown in GPU table below) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 11:03:35 +03:00
Michael Chus	befdbf3768	fix(iso): autoload ipmi_si/ipmi_devintf for fan/sensor monitoring Without these modules /dev/ipmi0 doesn't exist and ipmitool can't read fan RPM, PSU fans, or IPMI temperature sensors. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 10:59:15 +03:00
Michael Chus	ec6a0b292d	fix(webui): fix sensor grouping and fan card visibility - Tccd1-8 (AMD CCD die temps) now classified as 'cpu' group, appear on CPU Temperature chart instead of ambient - Fan RPM card hidden when no fans detected - Remove CPU Load/Mem Load/Power from fan table (have dedicated charts) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 10:58:01 +03:00
Michael Chus	a03312c286	feat: AMD GPU compute stress via rocm-validation-suite GST (GEMM) - Add rocm-validation-suite, rocblas, rocrand, hip-runtime-amd, hipblaslt, comgr to ISO (~700MB, needed for HIP compute) - RunAMDStressPack: run RVS GST (SGEMM ~31 TFLOPS/GPU) + bandwidth test - Add rvs symlink in chroot setup hook - Pin all new package versions in VERSIONS Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 10:56:32 +03:00
Michael Chus	e69e9109da	fix(iso): set bash as default shell for bee user Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 10:49:18 +03:00
Michael Chus	413869809d	feat(iso): add rocm-bandwidth-test for AMD GPU burn-in - Add rocm-bandwidth-test package to ISO - Add bee user to 'render' group (/dev/kfd, /dev/dri/renderD* access) - Add rocm-bandwidth-test symlink alongside rocm-smi Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 10:48:29 +03:00
Michael Chus	f9bd38572a	fix(network): strip linkdown/dead/onlink flags when restoring routes ip route show includes state flags like 'linkdown' that ip route add does not accept, causing restore to fail. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 10:39:16 +03:00
Michael Chus	662e3d2cdd	feat(webui): combined GPU charts (load/memload/power/temp all GPUs per chart) Replace per-GPU cards with 4 combined charts showing all GPUs as separate series. Add gpu-all-load/memload/power/temp endpoints. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 10:37:33 +03:00
Michael Chus	126af96780	fix(webui): slow metrics chart refresh to 3s interval Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 10:32:35 +03:00
Michael Chus	ada15ac777	fix: loading screen via Go handler instead of file:// HTML - bee-web.service: remove After=bee-audit so Go starts immediately - Go serves loading page from / when audit JSON not yet present; JS polls /api/ready (503 until file exists, 200 when ready) then redirects to dashboard - bee-openbox-session: wait for /healthz (Go binds fast <2s), open http://localhost/ directly — no file:// cross-origin issues - Remove loading.html static file Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 10:31:46 +03:00
Michael Chus	dfb94f9ca6	feat(iso): loading screen while bee-web starts Replace 15s blocking wait with instant Chromium launch showing a dark loading page that polls /healthz every 500ms and auto-redirects to the app when ready. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 09:33:04 +03:00
Michael Chus	5857805518	fix(iso): copy memtest86+ to ISO root via binary hook memtest files live in chroot /boot (inside squashfs) but GRUB needs them on the ISO filesystem. Binary hook copies them out at build time. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 09:02:40 +03:00
				`@@ -0,0 +1 @@`
				`# No GPU variant — no NVIDIA, no AMD/ROCm packages`