fix(iso): restore memtest menu entries and validate ISO

fix(stress): keep platform burn responsive under load
chore: commit pending repo changes
2026-04-01 07:04:48 +03:00 · 2026-03-31 22:28:26 +03:00 · 2026-03-31 22:17:36 +03:00 · 2026-03-31 22:10:28 +03:00 · 2026-03-31 11:40:52 +03:00 · 2026-03-31 11:38:30 +03:00
109 changed files with 10297 additions and 4449 deletions
--- a/PLAN.md
+++ b/PLAN.md
@@ -343,9 +343,9 @@ Planned code shape:
 - `bee tui` can rerun the audit manually
 - `bee tui` can export the latest audit JSON to removable media
 - `bee tui` can show health summary and run NVIDIA/memory/storage acceptance tests
- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-stress`
+- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-burn`
 - SAT summaries now expose `overall_status` plus per-job `OK/FAILED/UNSUPPORTED`
- Memory/GPU SAT runtime defaults can be overridden via `BEE_MEMTESTER_*` and `BEE_GPU_STRESS_*`
+- Memory SAT runtime defaults can be overridden via `BEE_MEMTESTER_*`
 - removable export requires explicit target selection, mount, confirmation, copy, and cleanup

 ### 2.6 — Vendor utilities and optional assets
--- a/audit/Makefile
+++ b/audit/Makefile
@@ -0,0 +1,18 @@
+LISTEN ?= :8080
+AUDIT_PATH ?=
+
+RUN_ARGS := web --listen $(LISTEN)
+ifneq ($(AUDIT_PATH),)
+RUN_ARGS += --audit-path $(AUDIT_PATH)
+endif
+
+.PHONY: run build test
+
+run:
+	go run ./cmd/bee $(RUN_ARGS)
+
+build:
+	go build -o bee ./cmd/bee
+
+test:
+	go test ./...
--- a/audit/bee
+++ b/audit/bee
--- a/audit/cmd/bee/main.go
+++ b/audit/cmd/bee/main.go
@@ -1,22 +1,54 @@
 package main

 import (
+	"context"
 	"flag"
 	"fmt"
 	"io"
 	"log/slog"
 	"os"
+	"runtime/debug"
 	"strings"

 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 	"bee/audit/internal/runtimeenv"
-	"bee/audit/internal/tui"
 	"bee/audit/internal/webui"
 )

 var Version = "dev"

+func buildLabel() string {
+	label := strings.TrimSpace(Version)
+	if label == "" {
+		label = "dev"
+	}
+	if info, ok := debug.ReadBuildInfo(); ok {
+		var revision string
+		var modified bool
+		for _, setting := range info.Settings {
+			switch setting.Key {
+			case "vcs.revision":
+				revision = setting.Value
+			case "vcs.modified":
+				modified = setting.Value == "true"
+			}
+		}
+		if revision != "" {
+			short := revision
+			if len(short) > 12 {
+				short = short[:12]
+			}
+			label += " (" + short
+			if modified {
+				label += "+"
+			}
+			label += ")"
+		}
+	}
+	return label
+}
+
 func main() {
 	os.Exit(run(os.Args[1:], os.Stdout, os.Stderr))
 }
@@ -40,8 +72,6 @@ func run(args []string, stdout, stderr io.Writer) int {
 		return 0
 	case "audit":
 		return runAudit(args[1:], stdout, stderr)
-	case "tui":
-		return runTUI(args[1:], stdout, stderr)
 	case "export":
 		return runExport(args[1:], stdout, stderr)
 	case "preflight":
@@ -66,7 +96,6 @@ func printRootUsage(w io.Writer) {
 	fmt.Fprintln(w, `bee commands:
  bee audit   --runtime auto|local|livecd --output stdout|file:<path>
  bee preflight --output stdout|file:<path>
-  bee tui     --runtime auto|local|livecd
  bee export  --target <device>
  bee support-bundle --output stdout|file:<path>
  bee web     --listen :80 --audit-path `+app.DefaultAuditJSONPath+`
@@ -79,8 +108,6 @@ func runHelp(args []string, stdout, stderr io.Writer) int {
 	switch args[0] {
 	case "audit":
 		return runAudit([]string{"--help"}, stdout, stdout)
-	case "tui":
-		return runTUI([]string{"--help"}, stdout, stdout)
 	case "export":
 		return runExport([]string{"--help"}, stdout, stdout)
 	case "preflight":
@@ -145,43 +172,6 @@ func runAudit(args []string, stdout, stderr io.Writer) int {
 	return 0
 }

-func runTUI(args []string, stdout, stderr io.Writer) int {
-	fs := flag.NewFlagSet("tui", flag.ContinueOnError)
-	fs.SetOutput(stderr)
-	runtimeFlag := fs.String("runtime", "auto", "runtime environment: auto, local, livecd")
-	fs.Usage = func() {
-		fmt.Fprintln(stderr, "usage: bee tui [--runtime auto|local|livecd]")
-		fs.PrintDefaults()
-	}
-	if err := fs.Parse(args); err != nil {
-		if err == flag.ErrHelp {
-			return 0
-		}
-		return 2
-	}
-	if fs.NArg() != 0 {
-		fs.Usage()
-		return 2
-	}
-
-	runtimeInfo, err := runtimeenv.Detect(*runtimeFlag)
-	if err != nil {
-		slog.Error("resolve runtime", "err", err)
-		return 1
-	}
-
-	slog.SetDefault(slog.New(slog.NewTextHandler(io.Discard, &slog.HandlerOptions{
-		Level: slog.LevelInfo,
-	})))
-
-	application := app.New(platform.New())
-	if err := tui.Run(application, runtimeInfo.Mode); err != nil {
-		slog.Error("run tui", "err", err)
-		return 1
-	}
-	return 0
-}
-
 func runExport(args []string, stdout, stderr io.Writer) int {
 	fs := flag.NewFlagSet("export", flag.ContinueOnError)
 	fs.SetOutput(stderr)
@@ -333,10 +323,19 @@ func runWeb(args []string, stdout, stderr io.Writer) int {
 	}

 	slog.Info("starting bee web", "listen", *listenAddr, "audit_path", *auditPath)
+
+	runtimeInfo, err := runtimeenv.Detect("auto")
+	if err != nil {
+		slog.Warn("resolve runtime for web", "err", err)
+	}
+
 	if err := webui.ListenAndServe(*listenAddr, webui.HandlerOptions{
-		Title:     *title,
-		AuditPath: *auditPath,
-		ExportDir: *exportDir,
+		Title:       *title,
+		BuildLabel:  buildLabel(),
+		AuditPath:   *auditPath,
+		ExportDir:   *exportDir,
+		App:         app.New(platform.New()),
+		RuntimeMode: runtimeInfo.Mode,
 	}); err != nil {
 		slog.Error("run web", "err", err)
 		return 1
@@ -357,6 +356,7 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	fs := flag.NewFlagSet("sat", flag.ContinueOnError)
 	fs.SetOutput(stderr)
 	duration := fs.Int("duration", 0, "stress-ng duration in seconds (cpu only; default: 60)")
+	diagLevel := fs.Int("diag-level", 0, "DCGM diagnostic level for nvidia (1=quick, 2=medium, 3=targeted stress, 4=extended stress; default: 1)")
 	if err := fs.Parse(args[1:]); err != nil {
 		if err == flag.ErrHelp {
 			return 0
@@ -371,7 +371,7 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 	target := args[0]
 	if target != "nvidia" && target != "memory" && target != "storage" && target != "cpu" {
 		fmt.Fprintf(stderr, "bee sat: unknown target %q\n", target)
-		fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>]")
+		fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>] [--diag-level <1-4>]")
 		return 2
 	}

@@ -380,19 +380,25 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
 		archive string
 		err     error
 	)
+	logLine := func(s string) { fmt.Fprintln(os.Stderr, s) }
 	switch target {
 	case "nvidia":
-		archive, err = application.RunNvidiaAcceptancePack("")
+		level := *diagLevel
+		if level > 0 {
+			_, err = application.RunNvidiaAcceptancePackWithOptions(context.Background(), "", level, nil, logLine)
+		} else {
+			archive, err = application.RunNvidiaAcceptancePack("", logLine)
+		}
 	case "memory":
-		archive, err = application.RunMemoryAcceptancePack("")
+		archive, err = application.RunMemoryAcceptancePackCtx(context.Background(), "", logLine)
 	case "storage":
-		archive, err = application.RunStorageAcceptancePack("")
+		archive, err = application.RunStorageAcceptancePackCtx(context.Background(), "", logLine)
 	case "cpu":
 		dur := *duration
 		if dur <= 0 {
 			dur = 60
 		}
-		archive, err = application.RunCPUAcceptancePack("", dur)
+		archive, err = application.RunCPUAcceptancePackCtx(context.Background(), "", dur, logLine)
 	}
 	if err != nil {
 		slog.Error("run sat", "target", target, "err", err)
--- a/audit/go.mod
+++ b/audit/go.mod
@@ -1,28 +1,26 @@
 module bee/audit

-go 1.24.0
+go 1.25.0

 replace reanimator/chart => ../internal/chart

-require github.com/charmbracelet/bubbletea v1.3.4
-require github.com/charmbracelet/lipgloss v1.0.0
-require reanimator/chart v0.0.0
+require (
+	github.com/go-analyze/charts v0.5.26
+	reanimator/chart v0.0.0-00010101000000-000000000000
+)

 require (
-	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
-	github.com/charmbracelet/lipgloss v1.0.0 // promoted to direct — used for TUI colors
-	github.com/charmbracelet/x/ansi v0.8.0 // indirect
-	github.com/charmbracelet/x/term v0.2.1 // indirect
-	github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
-	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
+	github.com/dustin/go-humanize v1.0.1 // indirect
+	github.com/go-analyze/bulk v0.1.3 // indirect
+	github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
+	github.com/google/uuid v1.6.0 // indirect
 	github.com/mattn/go-isatty v0.0.20 // indirect
-	github.com/mattn/go-localereader v0.0.1 // indirect
-	github.com/mattn/go-runewidth v0.0.16 // indirect
-	github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
-	github.com/muesli/cancelreader v0.2.2 // indirect
-	github.com/muesli/termenv v0.15.2 // indirect
-	github.com/rivo/uniseg v0.4.7 // indirect
-	golang.org/x/sync v0.11.0 // indirect
-	golang.org/x/sys v0.30.0 // indirect
-	golang.org/x/text v0.3.8 // indirect
+	github.com/ncruces/go-strftime v1.0.0 // indirect
+	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
+	golang.org/x/image v0.24.0 // indirect
+	golang.org/x/sys v0.42.0 // indirect
+	modernc.org/libc v1.70.0 // indirect
+	modernc.org/mathutil v1.7.1 // indirect
+	modernc.org/memory v1.11.0 // indirect
+	modernc.org/sqlite v1.48.0 // indirect
 )
--- a/audit/go.sum
+++ b/audit/go.sum
@@ -1,37 +1,37 @@
-github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
-github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
-github.com/charmbracelet/bubbletea v1.3.4 h1:kCg7B+jSCFPLYRA52SDZjr51kG/fMUEoPoZrkaDHyoI=
-github.com/charmbracelet/bubbletea v1.3.4/go.mod h1:dtcUCyCGEX3g9tosuYiut3MXgY/Jsv9nKVdibKKRRXo=
-github.com/charmbracelet/lipgloss v1.0.0 h1:O7VkGDvqEdGi93X+DeqsQ7PKHDgtQfF8j8/O2qFMQNg=
-github.com/charmbracelet/lipgloss v1.0.0/go.mod h1:U5fy9Z+C38obMs+T+tJqst9VGzlOYGj4ri9reL3qUlo=
-github.com/charmbracelet/x/ansi v0.8.0 h1:9GTq3xq9caJW8ZrBTe0LIe2fvfLR/bYXKTx2llXn7xE=
-github.com/charmbracelet/x/ansi v0.8.0/go.mod h1:wdYl/ONOLHLIVmQaxbIYEC/cRKOQyjTkowiI4blgS9Q=
-github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ=
-github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg=
-github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
-github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
-github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
-github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
+github.com/go-analyze/bulk v0.1.3 h1:pzRdBqzHDAT9PyROt0SlWE0YqPtdmTcEpIJY0C3vF0c=
+github.com/go-analyze/bulk v0.1.3/go.mod h1:afon/KtFJYnekIyN20H/+XUvcLFjE8sKR1CfpqfClgM=
+github.com/go-analyze/charts v0.5.26 h1:rSwZikLQuFX6cJzwI8OAgaWZneG1kDYxD857ms00ZxY=
+github.com/go-analyze/charts v0.5.26/go.mod h1:s1YvQhjiSwtLx1f2dOKfiV9x2TT49nVSL6v2rlRpTbY=
+github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
+github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
-github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
-github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
-github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
-github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
-github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
-github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
-github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo=
-github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8=
-github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
-github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
-golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
-golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
-golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
+github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+golang.org/x/image v0.24.0 h1:AN7zRgVsbvmTfNyqIbbOraYL8mSwcKncEj8ofjgzcMQ=
+golang.org/x/image v0.24.0/go.mod h1:4b/ITuLfqYq1hqZcjofwctIhi7sZh2WaCjvsBNjjya8=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
-golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY=
-golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
+golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
+golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+modernc.org/libc v1.70.0 h1:U58NawXqXbgpZ/dcdS9kMshu08aiA6b7gusEusqzNkw=
+modernc.org/libc v1.70.0/go.mod h1:OVmxFGP1CI/Z4L3E0Q3Mf1PDE0BucwMkcXjjLntvHJo=
+modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
+modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
+modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
+modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
+modernc.org/sqlite v1.48.0 h1:ElZyLop3Q2mHYk5IFPPXADejZrlHu7APbpB0sF78bq4=
+modernc.org/sqlite v1.48.0/go.mod h1:hWjRO6Tj/5Ik8ieqxQybiEOUXy0NJFNp2tpvVpKlvig=
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -33,12 +33,13 @@ var (
 )

 type App struct {
-	network  networkManager
-	services serviceManager
-	exports  exportManager
-	tools    toolManager
-	sat      satRunner
-	runtime  runtimeChecker
+	network   networkManager
+	services  serviceManager
+	exports   exportManager
+	tools     toolManager
+	sat       satRunner
+	runtime   runtimeChecker
+	installer installer
 }

 type ActionResult struct {
@@ -52,10 +53,15 @@ type networkManager interface {
 	DHCPOne(iface string) (string, error)
 	DHCPAll() (string, error)
 	SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error)
+	SetInterfaceState(iface string, up bool) error
+	GetInterfaceState(iface string) (bool, error)
+	CaptureNetworkSnapshot() (platform.NetworkSnapshot, error)
+	RestoreNetworkSnapshot(snapshot platform.NetworkSnapshot) error
 }

 type serviceManager interface {
 	ListBeeServices() ([]string, error)
+	ServiceState(name string) string
 	ServiceStatus(name string) (string, error)
 	ServiceDo(name string, action platform.ServiceAction) (string, error)
 }
@@ -70,17 +76,53 @@ type toolManager interface {
 	CheckTools(names []string) []platform.ToolStatus
 }

+type installer interface {
+	ListInstallDisks() ([]platform.InstallDisk, error)
+	InstallToDisk(ctx context.Context, device string, logFile string) error
+	IsLiveMediaInRAM() bool
+	RunInstallToRAM(ctx context.Context, logFunc func(string)) error
+}
+
+type GPUPresenceResult struct {
+	Nvidia bool
+	AMD    bool
+}
+
+func (a *App) DetectGPUPresence() GPUPresenceResult {
+	vendor := a.sat.DetectGPUVendor()
+	return GPUPresenceResult{
+		Nvidia: vendor == "nvidia",
+		AMD:    vendor == "amd",
+	}
+}
+
+func (a *App) IsLiveMediaInRAM() bool {
+	return a.installer.IsLiveMediaInRAM()
+}
+
+func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
+	return a.installer.RunInstallToRAM(ctx, logFunc)
+}
+
 type satRunner interface {
-	RunNvidiaAcceptancePack(baseDir string) (string, error)
-	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, durationSec int, sizeMB int, gpuIndices []int) (string, error)
-	RunMemoryAcceptancePack(baseDir string) (string, error)
-	RunStorageAcceptancePack(baseDir string) (string, error)
-	RunCPUAcceptancePack(baseDir string, durationSec int) (string, error)
+	RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error)
+	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
+	RunNvidiaStressPack(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error)
+	RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	ListNvidiaGPUs() ([]platform.NvidiaGPU, error)
 	DetectGPUVendor() string
 	ListAMDGPUs() ([]platform.AMDGPUInfo, error)
-	RunAMDAcceptancePack(baseDir string) (string, error)
+	RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	RunAMDMemIntegrityPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	RunAMDMemBandwidthPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
+	RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
+	RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
+	RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error)
 	RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error)
+	RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error)
+	RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 }

 type runtimeChecker interface {
@@ -90,15 +132,27 @@ type runtimeChecker interface {

 func New(platform *platform.System) *App {
 	return &App{
-		network:  platform,
-		services: platform,
-		exports:  platform,
-		tools:    platform,
-		sat:      platform,
-		runtime:  platform,
+		network:   platform,
+		services:  platform,
+		exports:   platform,
+		tools:     platform,
+		sat:       platform,
+		runtime:   platform,
+		installer: platform,
 	}
 }

+// ApplySATOverlay parses a raw audit JSON, overlays the latest SAT results,
+// and returns the updated JSON. Used by the web UI to serve always-fresh status.
+func ApplySATOverlay(auditJSON []byte) ([]byte, error) {
+	var snap schema.HardwareIngestRequest
+	if err := json.Unmarshal(auditJSON, &snap); err != nil {
+		return nil, err
+	}
+	applyLatestSATStatuses(&snap.Hardware, DefaultSATBaseDir)
+	return json.MarshalIndent(snap, "", "  ")
+}
+
 func (a *App) RunAudit(runtimeMode runtimeenv.Mode, output string) (string, error) {
 	if runtimeMode == runtimeenv.ModeLiveCD {
 		if err := a.runtime.CaptureTechnicalDump(DefaultTechDumpDir); err != nil {
@@ -292,6 +346,22 @@ func (a *App) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error) {
 	return a.network.SetStaticIPv4(cfg)
 }

+func (a *App) SetInterfaceState(iface string, up bool) error {
+	return a.network.SetInterfaceState(iface, up)
+}
+
+func (a *App) GetInterfaceState(iface string) (bool, error) {
+	return a.network.GetInterfaceState(iface)
+}
+
+func (a *App) CaptureNetworkSnapshot() (platform.NetworkSnapshot, error) {
+	return a.network.CaptureNetworkSnapshot()
+}
+
+func (a *App) RestoreNetworkSnapshot(snapshot platform.NetworkSnapshot) error {
+	return a.network.RestoreNetworkSnapshot(snapshot)
+}
+
 func (a *App) SetStaticIPv4Result(cfg platform.StaticIPv4Config) (ActionResult, error) {
 	body, err := a.network.SetStaticIPv4(cfg)
 	return ActionResult{Title: "Static IPv4: " + cfg.Interface, Body: bodyOr(body, "Static IPv4 updated.")}, err
@@ -348,6 +418,10 @@ func (a *App) ListBeeServices() ([]string, error) {
 	return a.services.ListBeeServices()
 }

+func (a *App) ServiceState(name string) string {
+	return a.services.ServiceState(name)
+}
+
 func (a *App) ServiceStatus(name string) (string, error) {
 	return a.services.ServiceStatus(name)
 }
@@ -403,15 +477,15 @@ func (a *App) AuditLogTailResult() ActionResult {
 	return ActionResult{Title: "Audit log tail", Body: body}
 }

-func (a *App) RunNvidiaAcceptancePack(baseDir string) (string, error) {
+func (a *App) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunNvidiaAcceptancePack(baseDir)
+	return a.sat.RunNvidiaAcceptancePack(baseDir, logFunc)
 }

 func (a *App) RunNvidiaAcceptancePackResult(baseDir string) (ActionResult, error) {
-	path, err := a.RunNvidiaAcceptancePack(baseDir)
+	path, err := a.RunNvidiaAcceptancePack(baseDir, nil)
 	body := "Archive written."
 	if path != "" {
 		body = "Archive written to " + path
@@ -423,58 +497,74 @@ func (a *App) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
 	return a.sat.ListNvidiaGPUs()
 }

-func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, durationSec int, sizeMB int, gpuIndices []int) (ActionResult, error) {
+func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (ActionResult, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	path, err := a.sat.RunNvidiaAcceptancePackWithOptions(ctx, baseDir, durationSec, sizeMB, gpuIndices)
+	path, err := a.sat.RunNvidiaAcceptancePackWithOptions(ctx, baseDir, diagLevel, gpuIndices, logFunc)
 	body := "Archive written."
 	if path != "" {
 		body = "Archive written to " + path
 	}
-	// Include terminal chart if available (runDir = archive path without .tar.gz).
-	if path != "" {
-		termPath := filepath.Join(strings.TrimSuffix(path, ".tar.gz"), "gpu-metrics-term.txt")
-		if chart, readErr := os.ReadFile(termPath); readErr == nil && len(chart) > 0 {
-			body += "\n\n" + string(chart)
-		}
-	}
-	return ActionResult{Title: "NVIDIA SAT", Body: body}, err
+	return ActionResult{Title: "NVIDIA DCGM", Body: body}, err
 }

-func (a *App) RunMemoryAcceptancePack(baseDir string) (string, error) {
+func (a *App) RunNvidiaStressPack(baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
+	return a.RunNvidiaStressPackCtx(context.Background(), baseDir, opts, logFunc)
+}
+
+func (a *App) RunNvidiaStressPackCtx(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunMemoryAcceptancePack(baseDir)
+	return a.sat.RunNvidiaStressPack(ctx, baseDir, opts, logFunc)
+}
+
+func (a *App) RunMemoryAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
+	return a.RunMemoryAcceptancePackCtx(context.Background(), baseDir, logFunc)
+}
+
+func (a *App) RunMemoryAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunMemoryAcceptancePack(ctx, baseDir, logFunc)
 }

 func (a *App) RunMemoryAcceptancePackResult(baseDir string) (ActionResult, error) {
-	path, err := a.RunMemoryAcceptancePack(baseDir)
+	path, err := a.RunMemoryAcceptancePack(baseDir, nil)
 	return ActionResult{Title: "Memory SAT", Body: satResultBody(path)}, err
 }

-func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int) (string, error) {
+func (a *App) RunCPUAcceptancePack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
+	return a.RunCPUAcceptancePackCtx(context.Background(), baseDir, durationSec, logFunc)
+}
+
+func (a *App) RunCPUAcceptancePackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunCPUAcceptancePack(baseDir, durationSec)
+	return a.sat.RunCPUAcceptancePack(ctx, baseDir, durationSec, logFunc)
 }

 func (a *App) RunCPUAcceptancePackResult(baseDir string, durationSec int) (ActionResult, error) {
-	path, err := a.RunCPUAcceptancePack(baseDir, durationSec)
+	path, err := a.RunCPUAcceptancePack(baseDir, durationSec, nil)
 	return ActionResult{Title: "CPU SAT", Body: satResultBody(path)}, err
 }

-func (a *App) RunStorageAcceptancePack(baseDir string) (string, error) {
+func (a *App) RunStorageAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
+	return a.RunStorageAcceptancePackCtx(context.Background(), baseDir, logFunc)
+}
+
+func (a *App) RunStorageAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunStorageAcceptancePack(baseDir)
+	return a.sat.RunStorageAcceptancePack(ctx, baseDir, logFunc)
 }

 func (a *App) RunStorageAcceptancePackResult(baseDir string) (ActionResult, error) {
-	path, err := a.RunStorageAcceptancePack(baseDir)
+	path, err := a.RunStorageAcceptancePack(baseDir, nil)
 	return ActionResult{Title: "Storage SAT", Body: satResultBody(path)}, err
 }

@@ -486,18 +576,63 @@ func (a *App) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
 	return a.sat.ListAMDGPUs()
 }

-func (a *App) RunAMDAcceptancePack(baseDir string) (string, error) {
+func (a *App) RunAMDAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
+	return a.RunAMDAcceptancePackCtx(context.Background(), baseDir, logFunc)
+}
+
+func (a *App) RunAMDAcceptancePackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
 	}
-	return a.sat.RunAMDAcceptancePack(baseDir)
+	return a.sat.RunAMDAcceptancePack(ctx, baseDir, logFunc)
 }

 func (a *App) RunAMDAcceptancePackResult(baseDir string) (ActionResult, error) {
-	path, err := a.RunAMDAcceptancePack(baseDir)
+	path, err := a.RunAMDAcceptancePack(baseDir, nil)
 	return ActionResult{Title: "AMD GPU SAT", Body: satResultBody(path)}, err
 }

+func (a *App) RunAMDMemIntegrityPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunAMDMemIntegrityPack(ctx, baseDir, logFunc)
+}
+
+func (a *App) RunAMDMemBandwidthPackCtx(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunAMDMemBandwidthPack(ctx, baseDir, logFunc)
+}
+
+func (a *App) RunMemoryStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
+	return a.RunMemoryStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
+}
+
+func (a *App) RunSATStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
+	return a.RunSATStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
+}
+
+func (a *App) RunAMDStressPack(baseDir string, durationSec int, logFunc func(string)) (string, error) {
+	return a.RunAMDStressPackCtx(context.Background(), baseDir, durationSec, logFunc)
+}
+
+func (a *App) RunMemoryStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
+	return a.sat.RunMemoryStressPack(ctx, baseDir, durationSec, logFunc)
+}
+
+func (a *App) RunSATStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
+	return a.sat.RunSATStressPack(ctx, baseDir, durationSec, logFunc)
+}
+
+func (a *App) RunAMDStressPackCtx(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunAMDStressPack(ctx, baseDir, durationSec, logFunc)
+}
+
 func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platform.FanStressOptions) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
@@ -505,6 +640,22 @@ func (a *App) RunFanStressTest(ctx context.Context, baseDir string, opts platfor
 	return a.sat.RunFanStressTest(ctx, baseDir, opts)
 }

+func (a *App) RunPlatformStress(ctx context.Context, baseDir string, opts platform.PlatformStressOptions, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunPlatformStress(ctx, baseDir, opts, logFunc)
+}
+
+func (a *App) RunNCCLTestsResult(ctx context.Context) (ActionResult, error) {
+	path, err := a.sat.RunNCCLTests(ctx, DefaultSATBaseDir, nil)
+	body := "Results: " + path
+	if err != nil && err != context.Canceled {
+		body += "\nERROR: " + err.Error()
+	}
+	return ActionResult{Title: "NCCL bandwidth test", Body: body}, err
+}
+
 func (a *App) RunFanStressTestResult(ctx context.Context, opts platform.FanStressOptions) (ActionResult, error) {
 	path, err := a.RunFanStressTest(ctx, "", opts)
 	body := formatFanStressResult(path)
@@ -1000,3 +1151,70 @@ func firstNonEmpty(values ...string) string {
 	}
 	return ""
 }
+
+func (a *App) ListInstallDisks() ([]platform.InstallDisk, error) {
+	return a.installer.ListInstallDisks()
+}
+
+func (a *App) InstallToDisk(ctx context.Context, device string, logFile string) error {
+	return a.installer.InstallToDisk(ctx, device, logFile)
+}
+
+func formatSATDetail(raw string) string {
+	var b strings.Builder
+	kv := parseKeyValueSummary(raw)
+
+	if t, ok := kv["run_at_utc"]; ok {
+		fmt.Fprintf(&b, "Run: %s\n\n", t)
+	}
+
+	lines := strings.Split(raw, "\n")
+	var stepKeys []string
+	seenStep := map[string]bool{}
+	for _, line := range lines {
+		if idx := strings.Index(line, "_status="); idx >= 0 {
+			key := line[:idx]
+			if !seenStep[key] && key != "overall" {
+				seenStep[key] = true
+				stepKeys = append(stepKeys, key)
+			}
+		}
+	}
+
+	for _, key := range stepKeys {
+		status := kv[key+"_status"]
+		display := cleanSummaryKey(key)
+		switch status {
+		case "OK":
+			fmt.Fprintf(&b, "PASS  %s\n", display)
+		case "FAILED":
+			fmt.Fprintf(&b, "FAIL  %s\n", display)
+		case "UNSUPPORTED":
+			fmt.Fprintf(&b, "SKIP  %s\n", display)
+		default:
+			fmt.Fprintf(&b, "?     %s\n", display)
+		}
+	}
+
+	if overall, ok := kv["overall_status"]; ok {
+		ok2 := kv["job_ok"]
+		failed := kv["job_failed"]
+		fmt.Fprintf(&b, "\nOverall: %s  (ok=%s  failed=%s)", overall, ok2, failed)
+	}
+
+	return strings.TrimSpace(b.String())
+}
+
+func cleanSummaryKey(key string) string {
+	idx := strings.Index(key, "-")
+	if idx <= 0 {
+		return key
+	}
+	prefix := key[:idx]
+	for _, c := range prefix {
+		if c < '0' || c > '9' {
+			return key
+		}
+	}
+	return key[idx+1:]
+}
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -43,6 +43,13 @@ func (f fakeNetwork) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error
 	return f.setStaticIPv4Fn(cfg)
 }

+func (f fakeNetwork) SetInterfaceState(_ string, _ bool) error { return nil }
+func (f fakeNetwork) GetInterfaceState(_ string) (bool, error) { return true, nil }
+func (f fakeNetwork) CaptureNetworkSnapshot() (platform.NetworkSnapshot, error) {
+	return platform.NetworkSnapshot{}, nil
+}
+func (f fakeNetwork) RestoreNetworkSnapshot(platform.NetworkSnapshot) error { return nil }
+
 type fakeServices struct {
 	serviceStatusFn func(string) (string, error)
 	serviceDoFn     func(string, platform.ServiceAction) (string, error)
@@ -52,6 +59,10 @@ func (f fakeServices) ListBeeServices() ([]string, error) {
 	return nil, nil
 }

+func (f fakeServices) ServiceState(name string) string {
+	return "active"
+}
+
 func (f fakeServices) ServiceStatus(name string) (string, error) {
 	return f.serviceStatusFn(name)
 }
@@ -109,21 +120,29 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
 }

 type fakeSAT struct {
-	runNvidiaFn      func(string) (string, error)
-	runMemoryFn      func(string) (string, error)
-	runStorageFn     func(string) (string, error)
-	runCPUFn         func(string, int) (string, error)
-	detectVendorFn   func() string
-	listAMDGPUsFn    func() ([]platform.AMDGPUInfo, error)
-	runAMDPackFn     func(string) (string, error)
-	listNvidiaGPUsFn func() ([]platform.NvidiaGPU, error)
+	runNvidiaFn       func(string) (string, error)
+	runNvidiaStressFn func(string, platform.NvidiaStressOptions) (string, error)
+	runMemoryFn       func(string) (string, error)
+	runStorageFn      func(string) (string, error)
+	runCPUFn          func(string, int) (string, error)
+	detectVendorFn    func() string
+	listAMDGPUsFn     func() ([]platform.AMDGPUInfo, error)
+	runAMDPackFn      func(string) (string, error)
+	listNvidiaGPUsFn  func() ([]platform.NvidiaGPU, error)
 }

-func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string) (string, error) {
+func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string, _ func(string)) (string, error) {
 	return f.runNvidiaFn(baseDir)
 }

-func (f fakeSAT) RunNvidiaAcceptancePackWithOptions(_ context.Context, baseDir string, _ int, _ int, _ []int) (string, error) {
+func (f fakeSAT) RunNvidiaAcceptancePackWithOptions(_ context.Context, baseDir string, _ int, _ []int, _ func(string)) (string, error) {
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaStressPack(_ context.Context, baseDir string, opts platform.NvidiaStressOptions, _ func(string)) (string, error) {
+	if f.runNvidiaStressFn != nil {
+		return f.runNvidiaStressFn(baseDir, opts)
+	}
 	return f.runNvidiaFn(baseDir)
 }

@@ -134,15 +153,15 @@ func (f fakeSAT) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
 	return nil, nil
 }

-func (f fakeSAT) RunMemoryAcceptancePack(baseDir string) (string, error) {
+func (f fakeSAT) RunMemoryAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
 	return f.runMemoryFn(baseDir)
 }

-func (f fakeSAT) RunStorageAcceptancePack(baseDir string) (string, error) {
+func (f fakeSAT) RunStorageAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
 	return f.runStorageFn(baseDir)
 }

-func (f fakeSAT) RunCPUAcceptancePack(baseDir string, durationSec int) (string, error) {
+func (f fakeSAT) RunCPUAcceptancePack(_ context.Context, baseDir string, durationSec int, _ func(string)) (string, error) {
 	if f.runCPUFn != nil {
 		return f.runCPUFn(baseDir, durationSec)
 	}
@@ -163,17 +182,43 @@ func (f fakeSAT) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
 	return nil, nil
 }

-func (f fakeSAT) RunAMDAcceptancePack(baseDir string) (string, error) {
+func (f fakeSAT) RunAMDAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
 	if f.runAMDPackFn != nil {
 		return f.runAMDPackFn(baseDir)
 	}
 	return "", nil
 }

+func (f fakeSAT) RunAMDMemIntegrityPack(_ context.Context, _ string, _ func(string)) (string, error) {
+	return "", nil
+}
+
+func (f fakeSAT) RunAMDMemBandwidthPack(_ context.Context, _ string, _ func(string)) (string, error) {
+	return "", nil
+}
+
+func (f fakeSAT) RunAMDStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
+	return "", nil
+}
+func (f fakeSAT) RunMemoryStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
+	return "", nil
+}
+func (f fakeSAT) RunSATStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
+	return "", nil
+}
+
 func (f fakeSAT) RunFanStressTest(_ context.Context, _ string, _ platform.FanStressOptions) (string, error) {
 	return "", nil
 }

+func (f fakeSAT) RunPlatformStress(_ context.Context, _ string, _ platform.PlatformStressOptions, _ func(string)) (string, error) {
+	return "", nil
+}
+
+func (f fakeSAT) RunNCCLTests(_ context.Context, _ string, _ func(string)) (string, error) {
+	return "", nil
+}
+
 func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
 	t.Parallel()

@@ -566,13 +611,13 @@ func TestRunSATDefaultsToExportDir(t *testing.T) {
 		},
 	}

-	if _, err := a.RunNvidiaAcceptancePack(""); err != nil {
+	if _, err := a.RunNvidiaAcceptancePack("", nil); err != nil {
 		t.Fatal(err)
 	}
-	if _, err := a.RunMemoryAcceptancePack(""); err != nil {
+	if _, err := a.RunMemoryAcceptancePack("", nil); err != nil {
 		t.Fatal(err)
 	}
-	if _, err := a.RunStorageAcceptancePack(""); err != nil {
+	if _, err := a.RunStorageAcceptancePack("", nil); err != nil {
 		t.Fatal(err)
 	}
 }
--- a/audit/internal/app/panel.go
+++ b/audit/internal/app/panel.go
@@ -1,387 +0,0 @@
-package app
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"sort"
-	"strings"
-
-	"bee/audit/internal/schema"
-)
-
-// ComponentRow is one line in the hardware panel.
-type ComponentRow struct {
-	Key    string // "CPU", "MEM", "GPU", "DISK", "PSU"
-	Status string // "PASS", "FAIL", "CANCEL", "N/A"
-	Detail string // compact one-liner
-}
-
-// HardwarePanelData holds everything the TUI right panel needs.
-type HardwarePanelData struct {
-	Header []string
-	Rows   []ComponentRow
-}
-
-// LoadHardwarePanel reads the latest audit JSON and SAT summaries.
-// Returns empty panel if no audit data exists yet.
-func (a *App) LoadHardwarePanel() HardwarePanelData {
-	raw, err := os.ReadFile(DefaultAuditJSONPath)
-	if err != nil {
-		return HardwarePanelData{Header: []string{"No audit data — run audit first."}}
-	}
-	var snap schema.HardwareIngestRequest
-	if err := json.Unmarshal(raw, &snap); err != nil {
-		return HardwarePanelData{Header: []string{"Audit data unreadable."}}
-	}
-
-	statuses := satStatuses()
-
-	var header []string
-	if sys := formatSystemLine(snap.Hardware.Board); sys != "" {
-		header = append(header, sys)
-	}
-	for _, fw := range snap.Hardware.Firmware {
-		if fw.DeviceName == "BIOS" && fw.Version != "" {
-			header = append(header, "BIOS: "+fw.Version)
-		}
-		if fw.DeviceName == "BMC" && fw.Version != "" {
-			header = append(header, "BMC:  "+fw.Version)
-		}
-	}
-	if ip := formatIPLine(a.network.ListInterfaces); ip != "" {
-		header = append(header, ip)
-	}
-
-	var rows []ComponentRow
-
-	if cpu := formatCPULine(snap.Hardware.CPUs); cpu != "" {
-		rows = append(rows, ComponentRow{
-			Key:    "CPU",
-			Status: statuses["cpu"],
-			Detail: strings.TrimPrefix(cpu, "CPU: "),
-		})
-	}
-	if mem := formatMemoryLine(snap.Hardware.Memory); mem != "" {
-		rows = append(rows, ComponentRow{
-			Key:    "MEM",
-			Status: statuses["memory"],
-			Detail: strings.TrimPrefix(mem, "Memory: "),
-		})
-	}
-	if gpu := formatGPULine(snap.Hardware.PCIeDevices); gpu != "" {
-		rows = append(rows, ComponentRow{
-			Key:    "GPU",
-			Status: statuses["gpu"],
-			Detail: strings.TrimPrefix(gpu, "GPU: "),
-		})
-	}
-	if disk := formatStorageLine(snap.Hardware.Storage); disk != "" {
-		rows = append(rows, ComponentRow{
-			Key:    "DISK",
-			Status: statuses["storage"],
-			Detail: strings.TrimPrefix(disk, "Storage: "),
-		})
-	}
-	if psu := formatPSULine(snap.Hardware.PowerSupplies); psu != "" {
-		rows = append(rows, ComponentRow{
-			Key:    "PSU",
-			Status: "N/A",
-			Detail: psu,
-		})
-	}
-
-	return HardwarePanelData{Header: header, Rows: rows}
-}
-
-// ComponentDetailResult returns detail text for a component shown in the panel.
-func (a *App) ComponentDetailResult(key string) ActionResult {
-	switch key {
-	case "CPU":
-		return a.cpuDetailResult(false)
-	case "MEM":
-		return a.satDetailResult("memory", "memory-", "MEM detail")
-	case "GPU":
-		// Prefer whichever GPU SAT was run most recently.
-		nv, _ := filepath.Glob(filepath.Join(DefaultSATBaseDir, "gpu-nvidia-*/summary.txt"))
-		am, _ := filepath.Glob(filepath.Join(DefaultSATBaseDir, "gpu-amd-*/summary.txt"))
-		sort.Strings(nv)
-		sort.Strings(am)
-		latestNV := ""
-		if len(nv) > 0 {
-			latestNV = nv[len(nv)-1]
-		}
-		latestAM := ""
-		if len(am) > 0 {
-			latestAM = am[len(am)-1]
-		}
-		if latestAM > latestNV {
-			return a.satDetailResult("gpu", "gpu-amd-", "GPU detail")
-		}
-		return a.satDetailResult("gpu", "gpu-nvidia-", "GPU detail")
-	case "DISK":
-		return a.satDetailResult("storage", "storage-", "DISK detail")
-	case "PSU":
-		return a.psuDetailResult()
-	default:
-		return ActionResult{Title: key, Body: "No detail available."}
-	}
-}
-
-func (a *App) cpuDetailResult(satOnly bool) ActionResult {
-	var b strings.Builder
-
-	// Show latest SAT summary if available.
-	satResult := a.satDetailResult("cpu", "cpu-", "CPU SAT")
-	if satResult.Body != "No test results found. Run a test first." {
-		fmt.Fprintln(&b, "=== Last SAT ===")
-		fmt.Fprintln(&b, satResult.Body)
-		fmt.Fprintln(&b)
-	}
-
-	if satOnly {
-		body := strings.TrimSpace(b.String())
-		if body == "" {
-			body = "No CPU SAT results found. Run a test first."
-		}
-		return ActionResult{Title: "CPU SAT", Body: body}
-	}
-
-	raw, err := os.ReadFile(DefaultAuditJSONPath)
-	if err != nil {
-		return ActionResult{Title: "CPU", Body: strings.TrimSpace(b.String())}
-	}
-	var snap schema.HardwareIngestRequest
-	if err := json.Unmarshal(raw, &snap); err != nil {
-		return ActionResult{Title: "CPU", Body: strings.TrimSpace(b.String())}
-	}
-	if len(snap.Hardware.CPUs) == 0 {
-		return ActionResult{Title: "CPU", Body: strings.TrimSpace(b.String())}
-	}
-	fmt.Fprintln(&b, "=== Audit ===")
-	for i, cpu := range snap.Hardware.CPUs {
-		fmt.Fprintf(&b, "CPU %d\n", i)
-		if cpu.Model != nil {
-			fmt.Fprintf(&b, "  Model:    %s\n", *cpu.Model)
-		}
-		if cpu.Manufacturer != nil {
-			fmt.Fprintf(&b, "  Vendor:   %s\n", *cpu.Manufacturer)
-		}
-		if cpu.Cores != nil {
-			fmt.Fprintf(&b, "  Cores:    %d\n", *cpu.Cores)
-		}
-		if cpu.Threads != nil {
-			fmt.Fprintf(&b, "  Threads:  %d\n", *cpu.Threads)
-		}
-		if cpu.MaxFrequencyMHz != nil {
-			fmt.Fprintf(&b, "  Max freq: %d MHz\n", *cpu.MaxFrequencyMHz)
-		}
-		if cpu.TemperatureC != nil {
-			fmt.Fprintf(&b, "  Temp:     %.1f°C\n", *cpu.TemperatureC)
-		}
-		if cpu.Throttled != nil {
-			fmt.Fprintf(&b, "  Throttled: %v\n", *cpu.Throttled)
-		}
-		if cpu.CorrectableErrorCount != nil && *cpu.CorrectableErrorCount > 0 {
-			fmt.Fprintf(&b, "  ECC correctable:   %d\n", *cpu.CorrectableErrorCount)
-		}
-		if cpu.UncorrectableErrorCount != nil && *cpu.UncorrectableErrorCount > 0 {
-			fmt.Fprintf(&b, "  ECC uncorrectable: %d\n", *cpu.UncorrectableErrorCount)
-		}
-		if i < len(snap.Hardware.CPUs)-1 {
-			fmt.Fprintln(&b)
-		}
-	}
-	return ActionResult{Title: "CPU", Body: strings.TrimSpace(b.String())}
-}
-
-func (a *App) satDetailResult(statusKey, prefix, title string) ActionResult {
-	matches, err := filepath.Glob(filepath.Join(DefaultSATBaseDir, prefix+"*/summary.txt"))
-	if err != nil || len(matches) == 0 {
-		return ActionResult{Title: title, Body: "No test results found. Run a test first."}
-	}
-	sort.Strings(matches)
-	raw, err := os.ReadFile(matches[len(matches)-1])
-	if err != nil {
-		return ActionResult{Title: title, Body: "Could not read test results."}
-	}
-	return ActionResult{Title: title, Body: formatSATDetail(strings.TrimSpace(string(raw)))}
-}
-
-// formatSATDetail converts raw summary.txt key=value content to a human-readable per-step display.
-func formatSATDetail(raw string) string {
-	var b strings.Builder
-	kv := parseKeyValueSummary(raw)
-
-	if t, ok := kv["run_at_utc"]; ok {
-		fmt.Fprintf(&b, "Run: %s\n\n", t)
-	}
-
-	// Collect step names in order they appear in the file
-	lines := strings.Split(raw, "\n")
-	var stepKeys []string
-	seenStep := map[string]bool{}
-	for _, line := range lines {
-		if idx := strings.Index(line, "_status="); idx >= 0 {
-			key := line[:idx]
-			if !seenStep[key] && key != "overall" {
-				seenStep[key] = true
-				stepKeys = append(stepKeys, key)
-			}
-		}
-	}
-
-	for _, key := range stepKeys {
-		status := kv[key+"_status"]
-		display := cleanSummaryKey(key)
-		switch status {
-		case "OK":
-			fmt.Fprintf(&b, "PASS  %s\n", display)
-		case "FAILED":
-			fmt.Fprintf(&b, "FAIL  %s\n", display)
-		case "UNSUPPORTED":
-			fmt.Fprintf(&b, "SKIP  %s\n", display)
-		default:
-			fmt.Fprintf(&b, "?     %s\n", display)
-		}
-	}
-
-	if overall, ok := kv["overall_status"]; ok {
-		ok2 := kv["job_ok"]
-		failed := kv["job_failed"]
-		fmt.Fprintf(&b, "\nOverall: %s  (ok=%s  failed=%s)", overall, ok2, failed)
-	}
-
-	return strings.TrimSpace(b.String())
-}
-
-// cleanSummaryKey strips the leading numeric prefix from a SAT step key.
-// "1-lscpu" → "lscpu", "3-stress-ng" → "stress-ng"
-func cleanSummaryKey(key string) string {
-	idx := strings.Index(key, "-")
-	if idx <= 0 {
-		return key
-	}
-	prefix := key[:idx]
-	for _, c := range prefix {
-		if c < '0' || c > '9' {
-			return key
-		}
-	}
-	return key[idx+1:]
-}
-
-func (a *App) psuDetailResult() ActionResult {
-	raw, err := os.ReadFile(DefaultAuditJSONPath)
-	if err != nil {
-		return ActionResult{Title: "PSU", Body: "No audit data."}
-	}
-	var snap schema.HardwareIngestRequest
-	if err := json.Unmarshal(raw, &snap); err != nil {
-		return ActionResult{Title: "PSU", Body: "Audit data unreadable."}
-	}
-	if len(snap.Hardware.PowerSupplies) == 0 {
-		return ActionResult{Title: "PSU", Body: "No PSU data in last audit."}
-	}
-	var b strings.Builder
-	for i, psu := range snap.Hardware.PowerSupplies {
-		fmt.Fprintf(&b, "PSU %d\n", i)
-		if psu.Model != nil {
-			fmt.Fprintf(&b, "  Model:   %s\n", *psu.Model)
-		}
-		if psu.Vendor != nil {
-			fmt.Fprintf(&b, "  Vendor:  %s\n", *psu.Vendor)
-		}
-		if psu.WattageW != nil {
-			fmt.Fprintf(&b, "  Rated:   %d W\n", *psu.WattageW)
-		}
-		if psu.InputPowerW != nil {
-			fmt.Fprintf(&b, "  Input:   %.1f W\n", *psu.InputPowerW)
-		}
-		if psu.OutputPowerW != nil {
-			fmt.Fprintf(&b, "  Output:  %.1f W\n", *psu.OutputPowerW)
-		}
-		if psu.TemperatureC != nil {
-			fmt.Fprintf(&b, "  Temp:    %.1f°C\n", *psu.TemperatureC)
-		}
-		if i < len(snap.Hardware.PowerSupplies)-1 {
-			fmt.Fprintln(&b)
-		}
-	}
-	return ActionResult{Title: "PSU", Body: strings.TrimSpace(b.String())}
-}
-
-// satStatuses reads the latest summary.txt for each SAT type and returns
-// a map of component key ("gpu","memory","storage") → status ("PASS","FAIL","CANCEL","N/A").
-func satStatuses() map[string]string {
-	result := map[string]string{
-		"gpu":     "N/A",
-		"memory":  "N/A",
-		"storage": "N/A",
-		"cpu":     "N/A",
-	}
-	patterns := []struct {
-		key    string
-		prefix string
-	}{
-		{"gpu", "gpu-nvidia-"},
-		{"gpu", "gpu-amd-"},
-		{"memory", "memory-"},
-		{"storage", "storage-"},
-		{"cpu", "cpu-"},
-	}
-	for _, item := range patterns {
-		matches, err := filepath.Glob(filepath.Join(DefaultSATBaseDir, item.prefix+"*/summary.txt"))
-		if err != nil || len(matches) == 0 {
-			continue
-		}
-		sort.Strings(matches)
-		raw, err := os.ReadFile(matches[len(matches)-1])
-		if err != nil {
-			continue
-		}
-		values := parseKeyValueSummary(string(raw))
-		switch strings.ToUpper(strings.TrimSpace(values["overall_status"])) {
-		case "OK":
-			result[item.key] = "PASS"
-		case "FAILED":
-			result[item.key] = "FAIL"
-		case "CANCELED", "CANCELLED":
-			result[item.key] = "CANCEL"
-		}
-	}
-	return result
-}
-
-func formatPSULine(psus []schema.HardwarePowerSupply) string {
-	var present []schema.HardwarePowerSupply
-	for _, psu := range psus {
-		if psu.Present != nil && !*psu.Present {
-			continue
-		}
-		present = append(present, psu)
-	}
-	if len(present) == 0 {
-		return ""
-	}
-	firstW := 0
-	if present[0].WattageW != nil {
-		firstW = *present[0].WattageW
-	}
-	allSame := firstW > 0
-	for _, p := range present[1:] {
-		w := 0
-		if p.WattageW != nil {
-			w = *p.WattageW
-		}
-		if w != firstW {
-			allSame = false
-			break
-		}
-	}
-	if allSame && firstW > 0 {
-		return fmt.Sprintf("%dx %dW", len(present), firstW)
-	}
-	return fmt.Sprintf("%d PSU", len(present))
-}
--- a/audit/internal/app/sat_overlay.go
+++ b/audit/internal/app/sat_overlay.go
@@ -141,9 +141,11 @@ func satSummaryStatus(summary satSummary, label string) (string, string, bool) {
 func satKeyStatus(rawStatus, label string) (string, string, bool) {
 	switch strings.ToUpper(strings.TrimSpace(rawStatus)) {
 	case "OK":
-		return "OK", label + " passed", true
+		// No error description on success — error_description is for problems only.
+		return "OK", "", true
 	case "PARTIAL", "UNSUPPORTED", "CANCELED", "CANCELLED":
-		return "Warning", label + " incomplete", true
+		// Tool couldn't run or test was incomplete — we can't assert hardware health.
+		return "Unknown", "", true
 	case "FAILED":
 		return "Critical", label + " failed", true
 	default:
@@ -180,6 +182,8 @@ func statusSeverity(status string) int {
 		return 2
 	case "OK":
 		return 1
+	case "Unknown":
+		return 1 // same as OK — does not override OK from another source
 	default:
 		return 0
 	}
--- a/audit/internal/platform/gpu_metrics.go
+++ b/audit/internal/platform/gpu_metrics.go
@@ -13,18 +13,19 @@ import (

 // GPUMetricRow is one telemetry sample from nvidia-smi during a stress test.
 type GPUMetricRow struct {
-	ElapsedSec float64
-	GPUIndex   int
-	TempC      float64
-	UsagePct   float64
-	PowerW     float64
-	ClockMHz   float64
+	ElapsedSec  float64 `json:"elapsed_sec"`
+	GPUIndex    int     `json:"index"`
+	TempC       float64 `json:"temp_c"`
+	UsagePct    float64 `json:"usage_pct"`
+	MemUsagePct float64 `json:"mem_usage_pct"`
+	PowerW      float64 `json:"power_w"`
+	ClockMHz    float64 `json:"clock_mhz"`
 }

 // sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
 func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
 	args := []string{
-		"--query-gpu=index,temperature.gpu,utilization.gpu,power.draw,clocks.current.graphics",
+		"--query-gpu=index,temperature.gpu,utilization.gpu,utilization.memory,power.draw,clocks.current.graphics",
 		"--format=csv,noheader,nounits",
 	}
 	if len(gpuIndices) > 0 {
@@ -45,16 +46,17 @@ func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
 			continue
 		}
 		parts := strings.Split(line, ", ")
-		if len(parts) < 5 {
+		if len(parts) < 6 {
 			continue
 		}
 		idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
 		rows = append(rows, GPUMetricRow{
-			GPUIndex: idx,
-			TempC:    parseGPUFloat(parts[1]),
-			UsagePct: parseGPUFloat(parts[2]),
-			PowerW:   parseGPUFloat(parts[3]),
-			ClockMHz: parseGPUFloat(parts[4]),
+			GPUIndex:    idx,
+			TempC:       parseGPUFloat(parts[1]),
+			UsagePct:    parseGPUFloat(parts[2]),
+			MemUsagePct: parseGPUFloat(parts[3]),
+			PowerW:      parseGPUFloat(parts[4]),
+			ClockMHz:    parseGPUFloat(parts[5]),
 		})
 	}
 	return rows, nil
@@ -74,6 +76,66 @@ func SampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
 	return sampleGPUMetrics(gpuIndices)
 }

+// sampleAMDGPUMetrics queries rocm-smi for live GPU metrics.
+func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
+	out, err := runROCmSMI("--showtemp", "--showuse", "--showpower", "--showmemuse", "--csv")
+	if err != nil {
+		return nil, err
+	}
+	lines := strings.Split(strings.TrimSpace(string(out)), "\n")
+	if len(lines) < 2 {
+		return nil, fmt.Errorf("rocm-smi: insufficient output")
+	}
+
+	// Parse header to find column indices by name.
+	headers := strings.Split(lines[0], ",")
+	colIdx := func(keywords ...string) int {
+		for i, h := range headers {
+			hl := strings.ToLower(strings.TrimSpace(h))
+			for _, kw := range keywords {
+				if strings.Contains(hl, kw) {
+					return i
+				}
+			}
+		}
+		return -1
+	}
+	idxTemp := colIdx("sensor edge", "temperature (c)", "temp")
+	idxUse := colIdx("gpu use (%)")
+	idxMem := colIdx("vram%", "memory allocated")
+	idxPow := colIdx("average graphics package power", "power (w)")
+
+	var rows []GPUMetricRow
+	for _, line := range lines[1:] {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+		parts := strings.Split(line, ",")
+		idx := len(rows)
+		row := GPUMetricRow{GPUIndex: idx}
+		get := func(i int) float64 {
+			if i < 0 || i >= len(parts) {
+				return 0
+			}
+			v := strings.TrimSpace(parts[i])
+			if strings.EqualFold(v, "n/a") {
+				return 0
+			}
+			return parseGPUFloat(v)
+		}
+		row.TempC = get(idxTemp)
+		row.UsagePct = get(idxUse)
+		row.MemUsagePct = get(idxMem)
+		row.PowerW = get(idxPow)
+		rows = append(rows, row)
+	}
+	if len(rows) == 0 {
+		return nil, fmt.Errorf("rocm-smi: no GPU rows parsed")
+	}
+	return rows, nil
+}
+
 // WriteGPUMetricsCSV writes collected rows as a CSV file.
 func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
 	var b bytes.Buffer
@@ -332,7 +394,7 @@ const (
 )

 // RenderGPUTerminalChart returns ANSI line charts (asciigraph-style) per GPU.
-// Suitable for display in the TUI screenOutput.
+// Used in SAT stress-test logs.
 func RenderGPUTerminalChart(rows []GPUMetricRow) string {
 	seen := make(map[int]bool)
 	var order []int
@@ -375,162 +437,6 @@ func RenderGPUTerminalChart(rows []GPUMetricRow) string {
 	return strings.TrimRight(b.String(), "\n")
 }

-// RenderGPULiveChart renders all GPU metrics on a single combined chart per GPU.
-// Each series is normalised to its own min–max and drawn in a different colour.
-// chartWidth controls the width of the plot area (Y-axis label uses 5 extra chars).
-func RenderGPULiveChart(rows []GPUMetricRow, chartWidth int) string {
-	if chartWidth < 20 {
-		chartWidth = 70
-	}
-	const chartHeight = 14
-
-	seen := make(map[int]bool)
-	var order []int
-	gpuMap := make(map[int][]GPUMetricRow)
-	for _, r := range rows {
-		if !seen[r.GPUIndex] {
-			seen[r.GPUIndex] = true
-			order = append(order, r.GPUIndex)
-		}
-		gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
-	}
-
-	type seriesDef struct {
-		label string
-		color string
-		unit  string
-		fn    func(GPUMetricRow) float64
-	}
-	defs := []seriesDef{
-		{"Usage", ansiBlue, "%", func(r GPUMetricRow) float64 { return r.UsagePct }},
-		{"Temp", ansiRed, "°C", func(r GPUMetricRow) float64 { return r.TempC }},
-		{"Power", ansiGreen, "W", func(r GPUMetricRow) float64 { return r.PowerW }},
-	}
-
-	var b strings.Builder
-	for _, gpuIdx := range order {
-		gr := gpuMap[gpuIdx]
-		if len(gr) == 0 {
-			continue
-		}
-		elapsed := gr[len(gr)-1].ElapsedSec
-
-		// Build value slices for each series.
-		type seriesData struct {
-			seriesDef
-			vals []float64
-			mn   float64
-			mx   float64
-		}
-		var series []seriesData
-		for _, d := range defs {
-			vals := extractGPUField(gr, d.fn)
-			mn, mx := gpuMinMax(vals)
-			if mn == mx {
-				mx = mn + 1
-			}
-			series = append(series, seriesData{d, vals, mn, mx})
-		}
-
-		// Shared character grid: row 0 = top (max), row chartHeight = bottom (min).
-		type cell struct {
-			ch    rune
-			color string
-		}
-		grid := make([][]cell, chartHeight+1)
-		for r := range grid {
-			grid[r] = make([]cell, chartWidth)
-			for c := range grid[r] {
-				grid[r][c] = cell{' ', ""}
-			}
-		}
-
-		// Plot each series onto the shared grid.
-		for _, s := range series {
-			w := chartWidth
-			if len(s.vals) < w {
-				w = len(s.vals)
-			}
-			data := gpuDownsample(s.vals, w)
-			prevRow := -1
-			for x, v := range data {
-				row := chartHeight - int(math.Round((v-s.mn)/(s.mx-s.mn)*float64(chartHeight)))
-				if row < 0 {
-					row = 0
-				}
-				if row > chartHeight {
-					row = chartHeight
-				}
-				if prevRow < 0 || prevRow == row {
-					grid[row][x] = cell{'─', s.color}
-				} else {
-					lo, hi := prevRow, row
-					if lo > hi {
-						lo, hi = hi, lo
-					}
-					for y := lo + 1; y < hi; y++ {
-						grid[y][x] = cell{'│', s.color}
-					}
-					if prevRow < row {
-						grid[prevRow][x] = cell{'╮', s.color}
-						grid[row][x] = cell{'╰', s.color}
-					} else {
-						grid[prevRow][x] = cell{'╯', s.color}
-						grid[row][x] = cell{'╭', s.color}
-					}
-				}
-				prevRow = row
-			}
-		}
-
-		// Render: Y axis + data rows.
-		fmt.Fprintf(&b, "GPU %d  (%.0fs)  each series normalised to its range\n", gpuIdx, elapsed)
-		for r := 0; r <= chartHeight; r++ {
-			// Y axis label: 100% at top, 50% in middle, 0% at bottom.
-			switch r {
-			case 0:
-				fmt.Fprintf(&b, "%4s┤", "100%")
-			case chartHeight / 2:
-				fmt.Fprintf(&b, "%4s┤", "50%")
-			case chartHeight:
-				fmt.Fprintf(&b, "%4s┤", "0%")
-			default:
-				fmt.Fprintf(&b, "%4s│", "")
-			}
-			for c := 0; c < chartWidth; c++ {
-				cl := grid[r][c]
-				if cl.color != "" {
-					b.WriteString(cl.color)
-					b.WriteRune(cl.ch)
-					b.WriteString(ansiReset)
-				} else {
-					b.WriteRune(' ')
-				}
-			}
-			b.WriteRune('\n')
-		}
-		// Bottom axis.
-		b.WriteString("     └")
-		b.WriteString(strings.Repeat("─", chartWidth))
-		b.WriteRune('\n')
-
-		// Legend with current (last) values.
-		b.WriteString("     ")
-		for i, s := range series {
-			last := s.vals[len(s.vals)-1]
-			b.WriteString(s.color)
-			fmt.Fprintf(&b, "▐ %s: %.0f%s", s.label, last, s.unit)
-			b.WriteString(ansiReset)
-			if i < len(series)-1 {
-				b.WriteString("   ")
-			}
-		}
-		b.WriteRune('\n')
-	}
-
-	return strings.TrimRight(b.String(), "\n")
-}
-
 // renderLineChart draws a single time-series line chart using box-drawing characters.
 // Produces output in the style of asciigraph: ╭─╮ │ ╰─╯ with a Y axis and caption.
 func renderLineChart(vals []float64, color, caption string, height, width int) string {
--- a/audit/internal/platform/install.go
+++ b/audit/internal/platform/install.go
@@ -0,0 +1,214 @@
+package platform
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"strconv"
+	"strings"
+)
+
+// InstallDisk describes a candidate disk for installation.
+type InstallDisk struct {
+	Device      string   // e.g. /dev/sda
+	Model       string
+	Size        string   // human-readable, e.g. "500G"
+	SizeBytes   int64    // raw byte count from lsblk
+	MountedParts []string // partition mount points currently active
+}
+
+const squashfsPath = "/run/live/medium/live/filesystem.squashfs"
+
+// ListInstallDisks returns block devices suitable for installation.
+// Excludes the current live boot medium but includes USB drives.
+func (s *System) ListInstallDisks() ([]InstallDisk, error) {
+	out, err := exec.Command("lsblk", "-dn", "-o", "NAME,MODEL,SIZE,TYPE,TRAN").Output()
+	if err != nil {
+		return nil, fmt.Errorf("lsblk: %w", err)
+	}
+
+	bootDev := findLiveBootDevice()
+
+	var disks []InstallDisk
+	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
+		fields := strings.Fields(line)
+		// NAME MODEL SIZE TYPE TRAN  — model may have spaces so we parse from end
+		if len(fields) < 4 {
+			continue
+		}
+		// Last field: TRAN, second-to-last: TYPE, third-to-last: SIZE
+		typ := fields[len(fields)-2]
+		size := fields[len(fields)-3]
+		name := fields[0]
+		model := strings.Join(fields[1:len(fields)-3], " ")
+
+		if typ != "disk" {
+			continue
+		}
+
+		device := "/dev/" + name
+		if device == bootDev {
+			continue
+		}
+
+		sizeBytes := diskSizeBytes(device)
+		mounted := mountedParts(device)
+
+		disks = append(disks, InstallDisk{
+			Device:       device,
+			Model:        strings.TrimSpace(model),
+			Size:         size,
+			SizeBytes:    sizeBytes,
+			MountedParts: mounted,
+		})
+	}
+	return disks, nil
+}
+
+// diskSizeBytes returns the byte size of a block device using lsblk.
+func diskSizeBytes(device string) int64 {
+	out, err := exec.Command("lsblk", "-bdn", "-o", "SIZE", device).Output()
+	if err != nil {
+		return 0
+	}
+	n, _ := strconv.ParseInt(strings.TrimSpace(string(out)), 10, 64)
+	return n
+}
+
+// mountedParts returns a list of "<part> at <mountpoint>" strings for any
+// mounted partitions on the given device.
+func mountedParts(device string) []string {
+	out, err := exec.Command("lsblk", "-n", "-o", "NAME,MOUNTPOINT", device).Output()
+	if err != nil {
+		return nil
+	}
+	var result []string
+	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
+		fields := strings.Fields(line)
+		if len(fields) < 2 {
+			continue
+		}
+		mp := fields[1]
+		if mp == "" || mp == "[SWAP]" {
+			continue
+		}
+		result = append(result, "/dev/"+strings.TrimLeft(fields[0], "└─├─")+" at "+mp)
+	}
+	return result
+}
+
+// findLiveBootDevice returns the block device backing /run/live/medium (if any).
+func findLiveBootDevice() string {
+	out, err := exec.Command("findmnt", "-n", "-o", "SOURCE", "/run/live/medium").Output()
+	if err != nil {
+		return ""
+	}
+	src := strings.TrimSpace(string(out))
+	if src == "" {
+		return ""
+	}
+	// Strip partition suffix to get the whole disk device.
+	// e.g. /dev/sdb1 → /dev/sdb,  /dev/nvme0n1p1 → /dev/nvme0n1
+	out2, err := exec.Command("lsblk", "-no", "PKNAME", src).Output()
+	if err != nil || strings.TrimSpace(string(out2)) == "" {
+		return src
+	}
+	return "/dev/" + strings.TrimSpace(string(out2))
+}
+
+// MinInstallBytes returns the minimum recommended disk size for installation:
+// squashfs size × 1.5 to allow for extracted filesystem and bootloader.
+// Returns 0 if the squashfs is not available (non-live environment).
+func MinInstallBytes() int64 {
+	fi, err := os.Stat(squashfsPath)
+	if err != nil {
+		return 0
+	}
+	return fi.Size() * 3 / 2
+}
+
+// toramActive returns true when the live system was booted with toram.
+func toramActive() bool {
+	data, err := os.ReadFile("/proc/cmdline")
+	if err != nil {
+		return false
+	}
+	return strings.Contains(string(data), "toram")
+}
+
+// freeMemBytes returns MemAvailable from /proc/meminfo.
+func freeMemBytes() int64 {
+	data, err := os.ReadFile("/proc/meminfo")
+	if err != nil {
+		return 0
+	}
+	for _, line := range strings.Split(string(data), "\n") {
+		if strings.HasPrefix(line, "MemAvailable:") {
+			fields := strings.Fields(line)
+			if len(fields) >= 2 {
+				n, _ := strconv.ParseInt(fields[1], 10, 64)
+				return n * 1024 // kB → bytes
+			}
+		}
+	}
+	return 0
+}
+
+// DiskWarnings returns advisory warning strings for a disk candidate.
+func DiskWarnings(d InstallDisk) []string {
+	var w []string
+	if len(d.MountedParts) > 0 {
+		w = append(w, "has mounted partitions: "+strings.Join(d.MountedParts, ", "))
+	}
+	min := MinInstallBytes()
+	if min > 0 && d.SizeBytes > 0 && d.SizeBytes < min {
+		w = append(w, fmt.Sprintf("disk may be too small (need ≥ %s, have %s)",
+			humanBytes(min), humanBytes(d.SizeBytes)))
+	}
+	if toramActive() {
+		sqFi, err := os.Stat(squashfsPath)
+		if err == nil {
+			free := freeMemBytes()
+			if free > 0 && free < sqFi.Size()*2 {
+				w = append(w, "toram mode — low RAM, extraction may be slow or fail")
+			}
+		}
+	}
+	return w
+}
+
+func humanBytes(b int64) string {
+	const unit = 1024
+	if b < unit {
+		return fmt.Sprintf("%d B", b)
+	}
+	div, exp := int64(unit), 0
+	for n := b / unit; n >= unit; n /= unit {
+		div *= unit
+		exp++
+	}
+	return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "KMGTPE"[exp])
+}
+
+// InstallToDisk runs bee-install <device> <logfile> and streams output to logFile.
+// The context can be used to cancel.
+func (s *System) InstallToDisk(ctx context.Context, device string, logFile string) error {
+	cmd := exec.CommandContext(ctx, "bee-install", device, logFile)
+	return cmd.Run()
+}
+
+// InstallLogPath returns the default install log path for a given device.
+func InstallLogPath(device string) string {
+	safe := strings.NewReplacer("/", "_", " ", "_").Replace(device)
+	return "/tmp/bee-install" + safe + ".log"
+}
+
+// Label returns a display label for a disk.
+func (d InstallDisk) Label() string {
+	model := d.Model
+	if model == "" {
+		model = "Unknown"
+	}
+	return fmt.Sprintf("%s  %s  %s", d.Device, d.Size, model)
+}
--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -0,0 +1,191 @@
+package platform
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+)
+
+func (s *System) IsLiveMediaInRAM() bool {
+	out, err := exec.Command("findmnt", "-n", "-o", "FSTYPE", "/run/live/medium").Output()
+	if err != nil {
+		return toramActive()
+	}
+	return strings.TrimSpace(string(out)) == "tmpfs"
+}
+
+func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
+	log := func(msg string) {
+		if logFunc != nil {
+			logFunc(msg)
+		}
+	}
+
+	if s.IsLiveMediaInRAM() {
+		log("Already running from RAM — installation media can be safely disconnected.")
+		return nil
+	}
+
+	squashfsFiles, err := filepath.Glob("/run/live/medium/live/*.squashfs")
+	if err != nil || len(squashfsFiles) == 0 {
+		return fmt.Errorf("no squashfs files found in /run/live/medium/live/")
+	}
+
+	free := freeMemBytes()
+	var needed int64
+	for _, sf := range squashfsFiles {
+		fi, err2 := os.Stat(sf)
+		if err2 != nil {
+			return fmt.Errorf("stat %s: %v", sf, err2)
+		}
+		needed += fi.Size()
+	}
+	const headroom = 256 * 1024 * 1024
+	if free > 0 && needed+headroom > free {
+		return fmt.Errorf("insufficient RAM: need %s, available %s",
+			humanBytes(needed+headroom), humanBytes(free))
+	}
+
+	dstDir := "/dev/shm/bee-live"
+	if err := os.MkdirAll(dstDir, 0755); err != nil {
+		return fmt.Errorf("create tmpfs dir: %v", err)
+	}
+
+	for _, sf := range squashfsFiles {
+		if err := ctx.Err(); err != nil {
+			return err
+		}
+		base := filepath.Base(sf)
+		dst := filepath.Join(dstDir, base)
+		log(fmt.Sprintf("Copying %s to RAM...", base))
+		if err := copyFileLarge(ctx, sf, dst, log); err != nil {
+			return fmt.Errorf("copy %s: %v", base, err)
+		}
+		log(fmt.Sprintf("Copied %s.", base))
+
+		loopDev, err := findLoopForFile(sf)
+		if err != nil {
+			log(fmt.Sprintf("Loop device for %s not found (%v) — skipping re-association.", base, err))
+			continue
+		}
+		if err := reassociateLoopDevice(loopDev, dst); err != nil {
+			log(fmt.Sprintf("Warning: could not re-associate %s → %s: %v", loopDev, dst, err))
+		} else {
+			log(fmt.Sprintf("Loop device %s now backed by RAM copy.", loopDev))
+		}
+	}
+
+	log("Copying remaining medium files...")
+	if err := cpDir(ctx, "/run/live/medium", dstDir, log); err != nil {
+		log(fmt.Sprintf("Warning: partial copy: %v", err))
+	}
+	if err := ctx.Err(); err != nil {
+		return err
+	}
+	if err := exec.Command("mount", "--bind", dstDir, "/run/live/medium").Run(); err != nil {
+		log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
+	}
+
+	log("Done. Installation media can be safely disconnected.")
+	return nil
+}
+
+func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) error {
+	in, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer in.Close()
+	fi, err := in.Stat()
+	if err != nil {
+		return err
+	}
+	out, err := os.Create(dst)
+	if err != nil {
+		return err
+	}
+	defer out.Close()
+	total := fi.Size()
+	var copied int64
+	buf := make([]byte, 4*1024*1024)
+	for {
+		if err := ctx.Err(); err != nil {
+			return err
+		}
+		n, err := in.Read(buf)
+		if n > 0 {
+			if _, werr := out.Write(buf[:n]); werr != nil {
+				return werr
+			}
+			copied += int64(n)
+			if logFunc != nil && total > 0 {
+				pct := int(float64(copied) / float64(total) * 100)
+				logFunc(fmt.Sprintf("  %s / %s (%d%%)", humanBytes(copied), humanBytes(total), pct))
+			}
+		}
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return err
+		}
+	}
+	return out.Sync()
+}
+
+func cpDir(ctx context.Context, src, dst string, logFunc func(string)) error {
+	return filepath.Walk(src, func(path string, fi os.FileInfo, err error) error {
+		if ctx.Err() != nil {
+			return ctx.Err()
+		}
+		if err != nil {
+			return nil
+		}
+		rel, _ := filepath.Rel(src, path)
+		target := filepath.Join(dst, rel)
+		if fi.IsDir() {
+			return os.MkdirAll(target, fi.Mode())
+		}
+		if strings.HasSuffix(path, ".squashfs") {
+			return nil
+		}
+		if _, err := os.Stat(target); err == nil {
+			return nil
+		}
+		return copyFileLarge(ctx, path, target, nil)
+	})
+}
+
+func findLoopForFile(backingFile string) (string, error) {
+	out, err := exec.Command("losetup", "--list", "--json").Output()
+	if err != nil {
+		return "", err
+	}
+	var result struct {
+		Loopdevices []struct {
+			Name     string `json:"name"`
+			BackFile string `json:"back-file"`
+		} `json:"loopdevices"`
+	}
+	if err := json.Unmarshal(out, &result); err != nil {
+		return "", err
+	}
+	for _, dev := range result.Loopdevices {
+		if dev.BackFile == backingFile {
+			return dev.Name, nil
+		}
+	}
+	return "", fmt.Errorf("no loop device found for %s", backingFile)
+}
+
+func reassociateLoopDevice(loopDev, newFile string) error {
+	if err := exec.Command("losetup", "--replace", loopDev, newFile).Run(); err == nil {
+		return nil
+	}
+	return loopChangeFD(loopDev, newFile)
+}
--- a/audit/internal/platform/install_to_ram_linux.go
+++ b/audit/internal/platform/install_to_ram_linux.go
@@ -0,0 +1,28 @@
+//go:build linux
+
+package platform
+
+import (
+	"os"
+	"syscall"
+)
+
+const ioctlLoopChangeFD = 0x4C08
+
+func loopChangeFD(loopDev, newFile string) error {
+	lf, err := os.OpenFile(loopDev, os.O_RDWR, 0)
+	if err != nil {
+		return err
+	}
+	defer lf.Close()
+	nf, err := os.OpenFile(newFile, os.O_RDONLY, 0)
+	if err != nil {
+		return err
+	}
+	defer nf.Close()
+	_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, lf.Fd(), ioctlLoopChangeFD, nf.Fd())
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
--- a/audit/internal/platform/install_to_ram_other.go
+++ b/audit/internal/platform/install_to_ram_other.go
@@ -0,0 +1,9 @@
+//go:build !linux
+
+package platform
+
+import "errors"
+
+func loopChangeFD(loopDev, newFile string) error {
+	return errors.New("LOOP_CHANGE_FD not available on this platform")
+}
--- a/audit/internal/platform/live_metrics.go
+++ b/audit/internal/platform/live_metrics.go
@@ -0,0 +1,326 @@
+package platform
+
+import (
+	"bufio"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// LiveMetricSample is a single point-in-time snapshot of server metrics
+// collected for the web UI metrics page.
+type LiveMetricSample struct {
+	Timestamp  time.Time      `json:"ts"`
+	Fans       []FanReading   `json:"fans"`
+	Temps      []TempReading  `json:"temps"`
+	PowerW     float64        `json:"power_w"`
+	CPULoadPct float64        `json:"cpu_load_pct"`
+	MemLoadPct float64        `json:"mem_load_pct"`
+	GPUs       []GPUMetricRow `json:"gpus"`
+}
+
+// TempReading is a named temperature sensor value.
+type TempReading struct {
+	Name    string  `json:"name"`
+	Group   string  `json:"group,omitempty"`
+	Celsius float64 `json:"celsius"`
+}
+
+// SampleLiveMetrics collects a single metrics snapshot from all available
+// sources: GPU (via nvidia-smi), fans and temperatures (via ipmitool/sensors),
+// and system power (via ipmitool dcmi). Missing sources are silently skipped.
+func SampleLiveMetrics() LiveMetricSample {
+	s := LiveMetricSample{Timestamp: time.Now().UTC()}
+
+	// GPU metrics — try NVIDIA first, fall back to AMD
+	if gpus, err := SampleGPUMetrics(nil); err == nil && len(gpus) > 0 {
+		s.GPUs = gpus
+	} else if amdGPUs, err := sampleAMDGPUMetrics(); err == nil && len(amdGPUs) > 0 {
+		s.GPUs = amdGPUs
+	}
+
+	// Fan speeds — skipped silently if ipmitool unavailable
+	fans, _ := sampleFanSpeeds()
+	s.Fans = fans
+
+	s.Temps = append(s.Temps, sampleLiveTemperatureReadings()...)
+	if !hasTempGroup(s.Temps, "cpu") {
+		if cpuTemp := sampleCPUMaxTemp(); cpuTemp > 0 {
+			s.Temps = append(s.Temps, TempReading{Name: "CPU Max", Group: "cpu", Celsius: cpuTemp})
+		}
+	}
+
+	// System power — returns 0 if unavailable
+	s.PowerW = sampleSystemPower()
+
+	// CPU load — from /proc/stat
+	s.CPULoadPct = sampleCPULoadPct()
+
+	// Memory load — from /proc/meminfo
+	s.MemLoadPct = sampleMemLoadPct()
+
+	return s
+}
+
+// sampleCPULoadPct reads two /proc/stat snapshots 200ms apart and returns
+// the overall CPU utilisation percentage.
+var cpuStatPrev [2]uint64 // [total, idle]
+
+func sampleCPULoadPct() float64 {
+	total, idle := readCPUStat()
+	if total == 0 {
+		return 0
+	}
+	prevTotal, prevIdle := cpuStatPrev[0], cpuStatPrev[1]
+	cpuStatPrev = [2]uint64{total, idle}
+	if prevTotal == 0 {
+		return 0
+	}
+	dt := float64(total - prevTotal)
+	di := float64(idle - prevIdle)
+	if dt <= 0 {
+		return 0
+	}
+	pct := (1 - di/dt) * 100
+	if pct < 0 {
+		return 0
+	}
+	if pct > 100 {
+		return 100
+	}
+	return pct
+}
+
+func readCPUStat() (total, idle uint64) {
+	f, err := os.Open("/proc/stat")
+	if err != nil {
+		return 0, 0
+	}
+	defer f.Close()
+	sc := bufio.NewScanner(f)
+	for sc.Scan() {
+		line := sc.Text()
+		if !strings.HasPrefix(line, "cpu ") {
+			continue
+		}
+		fields := strings.Fields(line)[1:] // skip "cpu"
+		var vals [10]uint64
+		for i := 0; i < len(fields) && i < 10; i++ {
+			vals[i], _ = strconv.ParseUint(fields[i], 10, 64)
+		}
+		// idle = idle + iowait
+		idle = vals[3] + vals[4]
+		for _, v := range vals {
+			total += v
+		}
+		return total, idle
+	}
+	return 0, 0
+}
+
+func sampleMemLoadPct() float64 {
+	f, err := os.Open("/proc/meminfo")
+	if err != nil {
+		return 0
+	}
+	defer f.Close()
+	vals := map[string]uint64{}
+	sc := bufio.NewScanner(f)
+	for sc.Scan() {
+		fields := strings.Fields(sc.Text())
+		if len(fields) >= 2 {
+			v, _ := strconv.ParseUint(fields[1], 10, 64)
+			vals[strings.TrimSuffix(fields[0], ":")] = v
+		}
+	}
+	total := vals["MemTotal"]
+	avail := vals["MemAvailable"]
+	if total == 0 {
+		return 0
+	}
+	used := total - avail
+	return float64(used) / float64(total) * 100
+}
+
+func hasTempGroup(temps []TempReading, group string) bool {
+	for _, t := range temps {
+		if t.Group == group {
+			return true
+		}
+	}
+	return false
+}
+
+func sampleLiveTemperatureReadings() []TempReading {
+	if temps := sampleLiveTempsViaSensorsJSON(); len(temps) > 0 {
+		return temps
+	}
+	return sampleLiveTempsViaIPMI()
+}
+
+func sampleLiveTempsViaSensorsJSON() []TempReading {
+	out, err := exec.Command("sensors", "-j").Output()
+	if err != nil || len(out) == 0 {
+		return nil
+	}
+
+	var doc map[string]map[string]any
+	if err := json.Unmarshal(out, &doc); err != nil {
+		return nil
+	}
+
+	chips := make([]string, 0, len(doc))
+	for chip := range doc {
+		chips = append(chips, chip)
+	}
+	sort.Strings(chips)
+
+	temps := make([]TempReading, 0, len(chips))
+	seen := map[string]struct{}{}
+	for _, chip := range chips {
+		features := doc[chip]
+		featureNames := make([]string, 0, len(features))
+		for name := range features {
+			featureNames = append(featureNames, name)
+		}
+		sort.Strings(featureNames)
+		for _, name := range featureNames {
+			if strings.EqualFold(name, "Adapter") {
+				continue
+			}
+			feature, ok := features[name].(map[string]any)
+			if !ok {
+				continue
+			}
+			value, ok := firstTempInputValue(feature)
+			if !ok || value <= 0 || value > 150 {
+				continue
+			}
+			group := classifyLiveTempGroup(chip, name)
+			if group == "gpu" {
+				continue
+			}
+			label := strings.TrimSpace(name)
+			if label == "" {
+				continue
+			}
+			if group == "ambient" {
+				label = compactAmbientTempName(chip, label)
+			}
+			key := group + "\x00" + label
+			if _, ok := seen[key]; ok {
+				continue
+			}
+			seen[key] = struct{}{}
+			temps = append(temps, TempReading{Name: label, Group: group, Celsius: value})
+		}
+	}
+	return temps
+}
+
+func sampleLiveTempsViaIPMI() []TempReading {
+	out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
+	if err != nil || len(out) == 0 {
+		return nil
+	}
+	var temps []TempReading
+	seen := map[string]struct{}{}
+	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
+		parts := strings.Split(line, "|")
+		if len(parts) < 3 {
+			continue
+		}
+		name := strings.TrimSpace(parts[0])
+		if name == "" {
+			continue
+		}
+		unit := strings.ToLower(strings.TrimSpace(parts[2]))
+		if !strings.Contains(unit, "degrees") {
+			continue
+		}
+		raw := strings.TrimSpace(parts[1])
+		if raw == "" || strings.EqualFold(raw, "na") {
+			continue
+		}
+		value, err := strconv.ParseFloat(raw, 64)
+		if err != nil || value <= 0 || value > 150 {
+			continue
+		}
+		group := classifyLiveTempGroup("", name)
+		if group == "gpu" {
+			continue
+		}
+		label := name
+		if group == "ambient" {
+			label = compactAmbientTempName("", label)
+		}
+		key := group + "\x00" + label
+		if _, ok := seen[key]; ok {
+			continue
+		}
+		seen[key] = struct{}{}
+		temps = append(temps, TempReading{Name: label, Group: group, Celsius: value})
+	}
+	return temps
+}
+
+func firstTempInputValue(feature map[string]any) (float64, bool) {
+	keys := make([]string, 0, len(feature))
+	for key := range feature {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+	for _, key := range keys {
+		lower := strings.ToLower(key)
+		if !strings.Contains(lower, "temp") || !strings.HasSuffix(lower, "_input") {
+			continue
+		}
+		switch value := feature[key].(type) {
+		case float64:
+			return value, true
+		case string:
+			f, err := strconv.ParseFloat(value, 64)
+			if err == nil {
+				return f, true
+			}
+		}
+	}
+	return 0, false
+}
+
+func classifyLiveTempGroup(chip, name string) string {
+	text := strings.ToLower(strings.TrimSpace(chip + " " + name))
+	switch {
+	case strings.Contains(text, "gpu"), strings.Contains(text, "amdgpu"), strings.Contains(text, "nvidia"), strings.Contains(text, "adeon"):
+		return "gpu"
+	case strings.Contains(text, "coretemp"),
+		strings.Contains(text, "k10temp"),
+		strings.Contains(text, "zenpower"),
+		strings.Contains(text, "package id"),
+		strings.Contains(text, "x86_pkg_temp"),
+		strings.Contains(text, "tctl"),
+		strings.Contains(text, "tdie"),
+		strings.Contains(text, "tccd"),
+		strings.Contains(text, "cpu"),
+		strings.Contains(text, "peci"):
+		return "cpu"
+	default:
+		return "ambient"
+	}
+}
+
+func compactAmbientTempName(chip, name string) string {
+	chip = strings.TrimSpace(chip)
+	name = strings.TrimSpace(name)
+	if chip == "" || strings.EqualFold(chip, name) {
+		return name
+	}
+	if strings.Contains(strings.ToLower(name), strings.ToLower(chip)) {
+		return name
+	}
+	return chip + " / " + name
+}
--- a/audit/internal/platform/live_metrics_test.go
+++ b/audit/internal/platform/live_metrics_test.go
@@ -0,0 +1,44 @@
+package platform
+
+import "testing"
+
+func TestFirstTempInputValue(t *testing.T) {
+	feature := map[string]any{
+		"temp1_input": 61.5,
+		"temp1_max":   80.0,
+	}
+	got, ok := firstTempInputValue(feature)
+	if !ok {
+		t.Fatal("expected value")
+	}
+	if got != 61.5 {
+		t.Fatalf("got %v want 61.5", got)
+	}
+}
+
+func TestClassifyLiveTempGroup(t *testing.T) {
+	tests := []struct {
+		chip string
+		name string
+		want string
+	}{
+		{chip: "coretemp-isa-0000", name: "Package id 0", want: "cpu"},
+		{chip: "amdgpu-pci-4300", name: "edge", want: "gpu"},
+		{chip: "nvme-pci-0100", name: "Composite", want: "ambient"},
+		{chip: "acpitz-acpi-0", name: "temp1", want: "ambient"},
+	}
+	for _, tc := range tests {
+		if got := classifyLiveTempGroup(tc.chip, tc.name); got != tc.want {
+			t.Fatalf("classifyLiveTempGroup(%q,%q)=%q want %q", tc.chip, tc.name, got, tc.want)
+		}
+	}
+}
+
+func TestCompactAmbientTempName(t *testing.T) {
+	if got := compactAmbientTempName("nvme-pci-0100", "Composite"); got != "nvme-pci-0100 / Composite" {
+		t.Fatalf("got %q", got)
+	}
+	if got := compactAmbientTempName("", "Inlet Temp"); got != "Inlet Temp" {
+		t.Fatalf("got %q", got)
+	}
+}
--- a/audit/internal/platform/network.go
+++ b/audit/internal/platform/network.go
@@ -2,6 +2,7 @@ package platform

 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"os"
 	"os/exec"
@@ -18,21 +19,17 @@ func (s *System) ListInterfaces() ([]InterfaceInfo, error) {
 	out := make([]InterfaceInfo, 0, len(names))
 	for _, name := range names {
 		state := "unknown"
-		if raw, err := exec.Command("ip", "-o", "link", "show", name).Output(); err == nil {
-			fields := strings.Fields(string(raw))
-			if len(fields) >= 9 {
-				state = fields[8]
+		if up, err := interfaceAdminState(name); err == nil {
+			if up {
+				state = "up"
+			} else {
+				state = "down"
 			}
 		}

-		var ipv4 []string
-		if raw, err := exec.Command("ip", "-o", "-4", "addr", "show", "dev", name).Output(); err == nil {
-			for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
-				fields := strings.Fields(line)
-				if len(fields) >= 4 {
-					ipv4 = append(ipv4, fields[3])
-				}
-			}
+		ipv4, err := interfaceIPv4Addrs(name)
+		if err != nil {
+			ipv4 = nil
 		}

 		out = append(out, InterfaceInfo{Name: name, State: state, IPv4: ipv4})
@@ -55,6 +52,119 @@ func (s *System) DefaultRoute() string {
 	return ""
 }

+func (s *System) CaptureNetworkSnapshot() (NetworkSnapshot, error) {
+	names, err := listInterfaceNames()
+	if err != nil {
+		return NetworkSnapshot{}, err
+	}
+
+	snapshot := NetworkSnapshot{
+		Interfaces: make([]NetworkInterfaceSnapshot, 0, len(names)),
+	}
+	for _, name := range names {
+		up, err := interfaceAdminState(name)
+		if err != nil {
+			return NetworkSnapshot{}, err
+		}
+		ipv4, err := interfaceIPv4Addrs(name)
+		if err != nil {
+			return NetworkSnapshot{}, err
+		}
+		snapshot.Interfaces = append(snapshot.Interfaces, NetworkInterfaceSnapshot{
+			Name: name,
+			Up:   up,
+			IPv4: ipv4,
+		})
+	}
+
+	if raw, err := exec.Command("ip", "route", "show", "default").Output(); err == nil {
+		for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
+			line = strings.TrimSpace(line)
+			if line != "" {
+				snapshot.DefaultRoutes = append(snapshot.DefaultRoutes, line)
+			}
+		}
+	}
+
+	if raw, err := os.ReadFile("/etc/resolv.conf"); err == nil {
+		snapshot.ResolvConf = string(raw)
+	}
+
+	return snapshot, nil
+}
+
+func (s *System) RestoreNetworkSnapshot(snapshot NetworkSnapshot) error {
+	var errs []string
+
+	for _, iface := range snapshot.Interfaces {
+		if err := exec.Command("ip", "link", "set", "dev", iface.Name, "up").Run(); err != nil {
+			errs = append(errs, fmt.Sprintf("%s: bring up before restore: %v", iface.Name, err))
+			continue
+		}
+		if err := exec.Command("ip", "addr", "flush", "dev", iface.Name).Run(); err != nil {
+			errs = append(errs, fmt.Sprintf("%s: flush addresses: %v", iface.Name, err))
+		}
+		for _, cidr := range iface.IPv4 {
+			if raw, err := exec.Command("ip", "addr", "add", cidr, "dev", iface.Name).CombinedOutput(); err != nil {
+				detail := strings.TrimSpace(string(raw))
+				if detail != "" {
+					errs = append(errs, fmt.Sprintf("%s: restore address %s: %v: %s", iface.Name, cidr, err, detail))
+				} else {
+					errs = append(errs, fmt.Sprintf("%s: restore address %s: %v", iface.Name, cidr, err))
+				}
+			}
+		}
+		state := "down"
+		if iface.Up {
+			state = "up"
+		}
+		if err := exec.Command("ip", "link", "set", "dev", iface.Name, state).Run(); err != nil {
+			errs = append(errs, fmt.Sprintf("%s: restore state %s: %v", iface.Name, state, err))
+		}
+	}
+
+	if err := exec.Command("ip", "route", "del", "default").Run(); err != nil {
+		var exitErr *exec.ExitError
+		if !errors.As(err, &exitErr) {
+			errs = append(errs, fmt.Sprintf("clear default route: %v", err))
+		}
+	}
+	for _, route := range snapshot.DefaultRoutes {
+		fields := strings.Fields(route)
+		if len(fields) == 0 {
+			continue
+		}
+		// Strip state flags that ip-route(8) does not accept as add arguments.
+		filtered := fields[:0]
+		for _, f := range fields {
+			switch f {
+			case "linkdown", "dead", "onlink", "pervasive":
+				// skip
+			default:
+				filtered = append(filtered, f)
+			}
+		}
+		args := append([]string{"route", "add"}, filtered...)
+		if raw, err := exec.Command("ip", args...).CombinedOutput(); err != nil {
+			detail := strings.TrimSpace(string(raw))
+			if detail != "" {
+				errs = append(errs, fmt.Sprintf("restore route %q: %v: %s", route, err, detail))
+			} else {
+				errs = append(errs, fmt.Sprintf("restore route %q: %v", route, err))
+			}
+		}
+	}
+
+	if err := os.WriteFile("/etc/resolv.conf", []byte(snapshot.ResolvConf), 0644); err != nil {
+		errs = append(errs, fmt.Sprintf("restore resolv.conf: %v", err))
+	}
+
+	if len(errs) > 0 {
+		return errors.New(strings.Join(errs, "; "))
+	}
+	return nil
+}
+
 func (s *System) DHCPOne(iface string) (string, error) {
 	var out bytes.Buffer
 	if err := exec.Command("ip", "link", "set", iface, "up").Run(); err != nil {
@@ -131,6 +241,65 @@ func (s *System) SetStaticIPv4(cfg StaticIPv4Config) (string, error) {
 	return out.String(), nil
 }

+// SetInterfaceState brings a network interface up or down.
+func (s *System) SetInterfaceState(iface string, up bool) error {
+	state := "down"
+	if up {
+		state = "up"
+	}
+	return exec.Command("ip", "link", "set", "dev", iface, state).Run()
+}
+
+// GetInterfaceState returns true if the interface is UP.
+func (s *System) GetInterfaceState(iface string) (bool, error) {
+	return interfaceAdminState(iface)
+}
+
+func interfaceAdminState(iface string) (bool, error) {
+	raw, err := exec.Command("ip", "-o", "link", "show", "dev", iface).Output()
+	if err != nil {
+		return false, err
+	}
+	return parseInterfaceAdminState(string(raw))
+}
+
+func parseInterfaceAdminState(raw string) (bool, error) {
+	start := strings.IndexByte(raw, '<')
+	if start == -1 {
+		return false, fmt.Errorf("ip link output missing flags")
+	}
+	end := strings.IndexByte(raw[start+1:], '>')
+	if end == -1 {
+		return false, fmt.Errorf("ip link output missing flag terminator")
+	}
+	flags := strings.Split(raw[start+1:start+1+end], ",")
+	for _, flag := range flags {
+		if strings.TrimSpace(flag) == "UP" {
+			return true, nil
+		}
+	}
+	return false, nil
+}
+
+func interfaceIPv4Addrs(iface string) ([]string, error) {
+	raw, err := exec.Command("ip", "-o", "-4", "addr", "show", "dev", iface).Output()
+	if err != nil {
+		var exitErr *exec.ExitError
+		if errors.As(err, &exitErr) {
+			return nil, nil
+		}
+		return nil, err
+	}
+	var ipv4 []string
+	for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
+		fields := strings.Fields(line)
+		if len(fields) >= 4 {
+			ipv4 = append(ipv4, fields[3])
+		}
+	}
+	return ipv4, nil
+}
+
 func listInterfaceNames() ([]string, error) {
 	raw, err := exec.Command("ip", "-o", "link", "show").Output()
 	if err != nil {
--- a/audit/internal/platform/network_test.go
+++ b/audit/internal/platform/network_test.go
@@ -0,0 +1,46 @@
+package platform
+
+import "testing"
+
+func TestParseInterfaceAdminState(t *testing.T) {
+	tests := []struct {
+		name    string
+		raw     string
+		want    bool
+		wantErr bool
+	}{
+		{
+			name: "admin up with no carrier",
+			raw:  "2: enp1s0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000\n",
+			want: true,
+		},
+		{
+			name: "admin down",
+			raw:  "2: enp1s0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000\n",
+			want: false,
+		},
+		{
+			name:    "malformed output",
+			raw:     "2: enp1s0: mtu 1500 state DOWN\n",
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := parseInterfaceAdminState(tt.raw)
+			if tt.wantErr {
+				if err == nil {
+					t.Fatal("expected error")
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("unexpected error: %v", err)
+			}
+			if got != tt.want {
+				t.Fatalf("got %v want %v", got, tt.want)
+			}
+		})
+	}
+}
--- a/audit/internal/platform/nvidia_stress.go
+++ b/audit/internal/platform/nvidia_stress.go
@@ -0,0 +1,194 @@
+package platform
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+func (s *System) RunNvidiaStressPack(ctx context.Context, baseDir string, opts NvidiaStressOptions, logFunc func(string)) (string, error) {
+	normalizeNvidiaStressOptions(&opts)
+
+	job, err := buildNvidiaStressJob(opts)
+	if err != nil {
+		return "", err
+	}
+
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-stress", []satJob{
+		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
+		{name: "02-nvidia-smi-list.log", cmd: []string{"nvidia-smi", "-L"}},
+		job,
+		{name: "04-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
+	}, logFunc)
+}
+
+func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
+	selected, err := resolveNvidiaGPUSelection(opts.GPUIndices, opts.ExcludeGPUIndices)
+	if err != nil {
+		return satJob{}, err
+	}
+
+	loader := strings.TrimSpace(strings.ToLower(opts.Loader))
+	switch loader {
+	case "", NvidiaStressLoaderBuiltin:
+		cmd := []string{
+			"bee-gpu-burn",
+			"--seconds", strconv.Itoa(opts.DurationSec),
+			"--size-mb", strconv.Itoa(opts.SizeMB),
+		}
+		if len(selected) > 0 {
+			cmd = append(cmd, "--devices", joinIndexList(selected))
+		}
+		return satJob{
+			name:       "03-bee-gpu-burn.log",
+			cmd:        cmd,
+			collectGPU: true,
+			gpuIndices: selected,
+		}, nil
+	case NvidiaStressLoaderJohn:
+		cmd := []string{
+			"bee-john-gpu-stress",
+			"--seconds", strconv.Itoa(opts.DurationSec),
+		}
+		if len(selected) > 0 {
+			cmd = append(cmd, "--devices", joinIndexList(selected))
+		}
+		return satJob{
+			name:       "03-john-gpu-stress.log",
+			cmd:        cmd,
+			collectGPU: true,
+			gpuIndices: selected,
+		}, nil
+	case NvidiaStressLoaderNCCL:
+		cmd := []string{
+			"bee-nccl-gpu-stress",
+			"--seconds", strconv.Itoa(opts.DurationSec),
+		}
+		if len(selected) > 0 {
+			cmd = append(cmd, "--devices", joinIndexList(selected))
+		}
+		return satJob{
+			name:       "03-bee-nccl-gpu-stress.log",
+			cmd:        cmd,
+			collectGPU: true,
+			gpuIndices: selected,
+		}, nil
+	default:
+		return satJob{}, fmt.Errorf("unknown NVIDIA stress loader %q", opts.Loader)
+	}
+}
+
+func normalizeNvidiaStressOptions(opts *NvidiaStressOptions) {
+	if opts.DurationSec <= 0 {
+		opts.DurationSec = 300
+	}
+	if opts.SizeMB <= 0 {
+		opts.SizeMB = 64
+	}
+	switch strings.TrimSpace(strings.ToLower(opts.Loader)) {
+	case "", NvidiaStressLoaderBuiltin:
+		opts.Loader = NvidiaStressLoaderBuiltin
+	case NvidiaStressLoaderJohn:
+		opts.Loader = NvidiaStressLoaderJohn
+	case NvidiaStressLoaderNCCL:
+		opts.Loader = NvidiaStressLoaderNCCL
+	default:
+		opts.Loader = NvidiaStressLoaderBuiltin
+	}
+	opts.GPUIndices = dedupeSortedIndices(opts.GPUIndices)
+	opts.ExcludeGPUIndices = dedupeSortedIndices(opts.ExcludeGPUIndices)
+}
+
+func resolveNvidiaGPUSelection(include, exclude []int) ([]int, error) {
+	all, err := listNvidiaGPUIndices()
+	if err != nil {
+		return nil, err
+	}
+	if len(all) == 0 {
+		return nil, fmt.Errorf("nvidia-smi found no NVIDIA GPUs")
+	}
+
+	selected := all
+	if len(include) > 0 {
+		want := make(map[int]struct{}, len(include))
+		for _, idx := range include {
+			want[idx] = struct{}{}
+		}
+		selected = selected[:0]
+		for _, idx := range all {
+			if _, ok := want[idx]; ok {
+				selected = append(selected, idx)
+			}
+		}
+	}
+	if len(exclude) > 0 {
+		skip := make(map[int]struct{}, len(exclude))
+		for _, idx := range exclude {
+			skip[idx] = struct{}{}
+		}
+		filtered := selected[:0]
+		for _, idx := range selected {
+			if _, ok := skip[idx]; ok {
+				continue
+			}
+			filtered = append(filtered, idx)
+		}
+		selected = filtered
+	}
+	if len(selected) == 0 {
+		return nil, fmt.Errorf("no NVIDIA GPUs selected after applying filters")
+	}
+	out := append([]int(nil), selected...)
+	sort.Ints(out)
+	return out, nil
+}
+
+func listNvidiaGPUIndices() ([]int, error) {
+	out, err := satExecCommand("nvidia-smi", "--query-gpu=index", "--format=csv,noheader,nounits").Output()
+	if err != nil {
+		return nil, fmt.Errorf("nvidia-smi: %w", err)
+	}
+	var indices []int
+	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+		idx, err := strconv.Atoi(line)
+		if err != nil {
+			continue
+		}
+		indices = append(indices, idx)
+	}
+	return dedupeSortedIndices(indices), nil
+}
+
+func dedupeSortedIndices(values []int) []int {
+	if len(values) == 0 {
+		return nil
+	}
+	seen := make(map[int]struct{}, len(values))
+	out := make([]int, 0, len(values))
+	for _, value := range values {
+		if value < 0 {
+			continue
+		}
+		if _, ok := seen[value]; ok {
+			continue
+		}
+		seen[value] = struct{}{}
+		out = append(out, value)
+	}
+	sort.Ints(out)
+	return out
+}
+
+func joinIndexList(values []int) string {
+	parts := make([]string, 0, len(values))
+	for _, value := range values {
+		parts = append(parts, strconv.Itoa(value))
+	}
+	return strings.Join(parts, ",")
+}
--- a/audit/internal/platform/platform_stress.go
+++ b/audit/internal/platform/platform_stress.go
@@ -0,0 +1,528 @@
+package platform
+
+import (
+	"archive/tar"
+	"bytes"
+	"compress/gzip"
+	"context"
+	"encoding/csv"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+)
+
+// PlatformStressCycle defines one load+idle cycle.
+type PlatformStressCycle struct {
+	LoadSec int // seconds of simultaneous CPU+GPU stress
+	IdleSec int // seconds of idle monitoring after load cut
+}
+
+// PlatformStressOptions controls the thermal cycling test.
+type PlatformStressOptions struct {
+	Cycles []PlatformStressCycle
+}
+
+// platformStressRow is one second of telemetry.
+type platformStressRow struct {
+	ElapsedSec   float64
+	Cycle        int
+	Phase        string // "load" | "idle"
+	CPULoadPct   float64
+	MaxCPUTempC  float64
+	MaxGPUTempC  float64
+	SysPowerW    float64
+	FanMinRPM    float64
+	FanMaxRPM    float64
+	GPUThrottled bool
+}
+
+// RunPlatformStress runs repeated load+idle thermal cycling.
+// Each cycle starts CPU (stressapptest) and GPU stress simultaneously,
+// runs for LoadSec, then cuts load abruptly and monitors for IdleSec.
+func (s *System) RunPlatformStress(
+	ctx context.Context,
+	baseDir string,
+	opts PlatformStressOptions,
+	logFunc func(string),
+) (string, error) {
+	if logFunc == nil {
+		logFunc = func(string) {}
+	}
+	if len(opts.Cycles) == 0 {
+		return "", fmt.Errorf("no cycles defined")
+	}
+	if err := os.MkdirAll(baseDir, 0755); err != nil {
+		return "", fmt.Errorf("mkdir %s: %w", baseDir, err)
+	}
+
+	stamp := time.Now().UTC().Format("20060102-150405")
+	runDir := filepath.Join(baseDir, "platform-stress-"+stamp)
+	if err := os.MkdirAll(runDir, 0755); err != nil {
+		return "", fmt.Errorf("mkdir run dir: %w", err)
+	}
+
+	vendor := s.DetectGPUVendor()
+	logFunc(fmt.Sprintf("Platform Thermal Cycling — %d cycle(s), GPU vendor: %s", len(opts.Cycles), vendor))
+
+	var rows []platformStressRow
+	start := time.Now()
+
+	var analyses []cycleAnalysis
+
+	for i, cycle := range opts.Cycles {
+		if ctx.Err() != nil {
+			break
+		}
+		cycleNum := i + 1
+		logFunc(fmt.Sprintf("--- Cycle %d/%d: load=%ds, idle=%ds ---", cycleNum, len(opts.Cycles), cycle.LoadSec, cycle.IdleSec))
+
+		// ── LOAD PHASE ───────────────────────────────────────────────────────
+		loadCtx, loadCancel := context.WithTimeout(ctx, time.Duration(cycle.LoadSec)*time.Second)
+		var wg sync.WaitGroup
+
+		// CPU stress
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			cpuCmd, err := buildCPUStressCmd(loadCtx)
+			if err != nil {
+				logFunc("CPU stress: " + err.Error())
+				return
+			}
+			_ = cpuCmd.Wait() // exits when loadCtx times out (SIGKILL)
+		}()
+
+		// GPU stress
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			gpuCmd := buildGPUStressCmd(loadCtx, vendor)
+			if gpuCmd == nil {
+				return
+			}
+			_ = gpuCmd.Wait()
+		}()
+
+		// Monitoring goroutine for load phase
+		loadRows := collectPhase(loadCtx, cycleNum, "load", start)
+		for _, r := range loadRows {
+			logFunc(formatPlatformRow(r))
+		}
+		rows = append(rows, loadRows...)
+		loadCancel()
+		wg.Wait()
+
+		if len(loadRows) > 0 {
+			logFunc(fmt.Sprintf("Cycle %d load ended (%.0fs)", cycleNum, loadRows[len(loadRows)-1].ElapsedSec))
+		}
+
+		// ── IDLE PHASE ───────────────────────────────────────────────────────
+		idleCtx, idleCancel := context.WithTimeout(ctx, time.Duration(cycle.IdleSec)*time.Second)
+		idleRows := collectPhase(idleCtx, cycleNum, "idle", start)
+		for _, r := range idleRows {
+			logFunc(formatPlatformRow(r))
+		}
+		rows = append(rows, idleRows...)
+		idleCancel()
+
+		// Per-cycle analysis
+		an := analyzePlatformCycle(loadRows, idleRows)
+		analyses = append(analyses, an)
+		logFunc(fmt.Sprintf("Cycle %d: maxCPU=%.1f°C maxGPU=%.1f°C power=%.0fW throttled=%v fanDrop=%.0f%%",
+			cycleNum, an.maxCPUTemp, an.maxGPUTemp, an.maxPower, an.throttled, an.fanDropPct))
+	}
+
+	// Write CSV
+	csvData := writePlatformCSV(rows)
+	_ = os.WriteFile(filepath.Join(runDir, "metrics.csv"), csvData, 0644)
+
+	// Write summary
+	summary := writePlatformSummary(opts, analyses)
+	logFunc("--- Summary ---")
+	for _, line := range strings.Split(summary, "\n") {
+		if line != "" {
+			logFunc(line)
+		}
+	}
+	_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)
+
+	// Pack tar.gz
+	archivePath := filepath.Join(baseDir, "platform-stress-"+stamp+".tar.gz")
+	if err := packPlatformDir(runDir, archivePath); err != nil {
+		return "", fmt.Errorf("pack archive: %w", err)
+	}
+	_ = os.RemoveAll(runDir)
+	return archivePath, nil
+}
+
+// collectPhase samples live metrics every second until ctx is done.
+func collectPhase(ctx context.Context, cycle int, phase string, testStart time.Time) []platformStressRow {
+	var rows []platformStressRow
+	ticker := time.NewTicker(time.Second)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return rows
+		case <-ticker.C:
+			sample := SampleLiveMetrics()
+			rows = append(rows, sampleToPlatformRow(sample, cycle, phase, testStart))
+		}
+	}
+}
+
+func sampleToPlatformRow(s LiveMetricSample, cycle int, phase string, testStart time.Time) platformStressRow {
+	r := platformStressRow{
+		ElapsedSec: time.Since(testStart).Seconds(),
+		Cycle:      cycle,
+		Phase:      phase,
+		CPULoadPct: s.CPULoadPct,
+		SysPowerW:  s.PowerW,
+	}
+	for _, t := range s.Temps {
+		switch t.Group {
+		case "cpu":
+			if t.Celsius > r.MaxCPUTempC {
+				r.MaxCPUTempC = t.Celsius
+			}
+		case "gpu":
+			if t.Celsius > r.MaxGPUTempC {
+				r.MaxGPUTempC = t.Celsius
+			}
+		}
+	}
+	for _, g := range s.GPUs {
+		if g.TempC > r.MaxGPUTempC {
+			r.MaxGPUTempC = g.TempC
+		}
+	}
+	if len(s.Fans) > 0 {
+		r.FanMinRPM = s.Fans[0].RPM
+		r.FanMaxRPM = s.Fans[0].RPM
+		for _, f := range s.Fans[1:] {
+			if f.RPM < r.FanMinRPM {
+				r.FanMinRPM = f.RPM
+			}
+			if f.RPM > r.FanMaxRPM {
+				r.FanMaxRPM = f.RPM
+			}
+		}
+	}
+	return r
+}
+
+func formatPlatformRow(r platformStressRow) string {
+	throttle := ""
+	if r.GPUThrottled {
+		throttle = " THROTTLE"
+	}
+	fans := ""
+	if r.FanMinRPM > 0 {
+		fans = fmt.Sprintf(" fans=%.0f-%.0fRPM", r.FanMinRPM, r.FanMaxRPM)
+	}
+	return fmt.Sprintf("[%5.0fs] cycle=%d phase=%-4s cpu=%.0f%% cpuT=%.1f°C gpuT=%.1f°C pwr=%.0fW%s%s",
+		r.ElapsedSec, r.Cycle, r.Phase, r.CPULoadPct, r.MaxCPUTempC, r.MaxGPUTempC, r.SysPowerW, fans, throttle)
+}
+
+func analyzePlatformCycle(loadRows, idleRows []platformStressRow) cycleAnalysis {
+	var an cycleAnalysis
+	for _, r := range loadRows {
+		if r.MaxCPUTempC > an.maxCPUTemp {
+			an.maxCPUTemp = r.MaxCPUTempC
+		}
+		if r.MaxGPUTempC > an.maxGPUTemp {
+			an.maxGPUTemp = r.MaxGPUTempC
+		}
+		if r.SysPowerW > an.maxPower {
+			an.maxPower = r.SysPowerW
+		}
+		if r.GPUThrottled {
+			an.throttled = true
+		}
+	}
+	// Fan RPM at cut = avg of last 5 load rows
+	if n := len(loadRows); n > 0 {
+		window := loadRows
+		if n > 5 {
+			window = loadRows[n-5:]
+		}
+		var sum float64
+		var cnt int
+		for _, r := range window {
+			if r.FanMinRPM > 0 {
+				sum += (r.FanMinRPM + r.FanMaxRPM) / 2
+				cnt++
+			}
+		}
+		if cnt > 0 {
+			an.fanAtCutAvg = sum / float64(cnt)
+		}
+	}
+	// Fan RPM min in first 15s of idle
+	an.fanMin15s = an.fanAtCutAvg
+	var cutElapsed float64
+	if len(loadRows) > 0 {
+		cutElapsed = loadRows[len(loadRows)-1].ElapsedSec
+	}
+	for _, r := range idleRows {
+		if r.ElapsedSec > cutElapsed+15 {
+			break
+		}
+		avg := (r.FanMinRPM + r.FanMaxRPM) / 2
+		if avg > 0 && (an.fanMin15s == 0 || avg < an.fanMin15s) {
+			an.fanMin15s = avg
+		}
+	}
+	if an.fanAtCutAvg > 0 {
+		an.fanDropPct = (an.fanAtCutAvg - an.fanMin15s) / an.fanAtCutAvg * 100
+	}
+	return an
+}
+
+type cycleAnalysis struct {
+	maxCPUTemp  float64
+	maxGPUTemp  float64
+	maxPower    float64
+	throttled   bool
+	fanAtCutAvg float64
+	fanMin15s   float64
+	fanDropPct  float64
+}
+
+func writePlatformSummary(opts PlatformStressOptions, analyses []cycleAnalysis) string {
+	var b strings.Builder
+	fmt.Fprintf(&b, "Platform Thermal Cycling — %d cycle(s)\n", len(opts.Cycles))
+	fmt.Fprintf(&b, "%s\n\n", strings.Repeat("=", 48))
+
+	totalThrottle := 0
+	totalFanWarn := 0
+	for i, an := range analyses {
+		cycle := opts.Cycles[i]
+		fmt.Fprintf(&b, "Cycle %d/%d (load=%ds, idle=%ds)\n", i+1, len(opts.Cycles), cycle.LoadSec, cycle.IdleSec)
+		fmt.Fprintf(&b, "  Max CPU temp: %.1f°C\n", an.maxCPUTemp)
+		fmt.Fprintf(&b, "  Max GPU temp: %.1f°C\n", an.maxGPUTemp)
+		fmt.Fprintf(&b, "  Max sys power: %.0f W\n", an.maxPower)
+		if an.throttled {
+			fmt.Fprintf(&b, "  Throttle: DETECTED\n")
+			totalThrottle++
+		} else {
+			fmt.Fprintf(&b, "  Throttle: none\n")
+		}
+		if an.fanAtCutAvg > 0 {
+			fmt.Fprintf(&b, "  Fan at load cut: %.0f RPM avg\n", an.fanAtCutAvg)
+			fmt.Fprintf(&b, "  Fan min (first 15s idle): %.0f RPM (drop %.0f%%)\n", an.fanMin15s, an.fanDropPct)
+			if an.fanDropPct > 20 {
+				fmt.Fprintf(&b, "  Fan response: WARN — fast spindown (>20%% drop in 15s)\n")
+				totalFanWarn++
+			} else {
+				fmt.Fprintf(&b, "  Fan response: OK\n")
+			}
+		}
+		b.WriteString("\n")
+	}
+
+	fmt.Fprintf(&b, "%s\n", strings.Repeat("=", 48))
+	if totalThrottle > 0 {
+		fmt.Fprintf(&b, "Overall: FAIL — throttle detected in %d/%d cycles\n", totalThrottle, len(analyses))
+	} else if totalFanWarn > 0 {
+		fmt.Fprintf(&b, "Overall: WARN — fast fan spindown in %d/%d cycles (cooling recovery risk)\n", totalFanWarn, len(analyses))
+	} else {
+		fmt.Fprintf(&b, "Overall: PASS\n")
+	}
+	return b.String()
+}
+
+func writePlatformCSV(rows []platformStressRow) []byte {
+	var buf bytes.Buffer
+	w := csv.NewWriter(&buf)
+	_ = w.Write([]string{
+		"elapsed_sec", "cycle", "phase",
+		"cpu_load_pct", "max_cpu_temp_c", "max_gpu_temp_c",
+		"sys_power_w", "fan_min_rpm", "fan_max_rpm", "gpu_throttled",
+	})
+	for _, r := range rows {
+		throttled := "0"
+		if r.GPUThrottled {
+			throttled = "1"
+		}
+		_ = w.Write([]string{
+			strconv.FormatFloat(r.ElapsedSec, 'f', 1, 64),
+			strconv.Itoa(r.Cycle),
+			r.Phase,
+			strconv.FormatFloat(r.CPULoadPct, 'f', 1, 64),
+			strconv.FormatFloat(r.MaxCPUTempC, 'f', 1, 64),
+			strconv.FormatFloat(r.MaxGPUTempC, 'f', 1, 64),
+			strconv.FormatFloat(r.SysPowerW, 'f', 1, 64),
+			strconv.FormatFloat(r.FanMinRPM, 'f', 0, 64),
+			strconv.FormatFloat(r.FanMaxRPM, 'f', 0, 64),
+			throttled,
+		})
+	}
+	w.Flush()
+	return buf.Bytes()
+}
+
+// buildCPUStressCmd creates a stressapptest command that runs until ctx is cancelled.
+func buildCPUStressCmd(ctx context.Context) (*exec.Cmd, error) {
+	path, err := satLookPath("stressapptest")
+	if err != nil {
+		return nil, fmt.Errorf("stressapptest not found: %w", err)
+	}
+	// Use a very long duration; the context timeout will kill it at the right time.
+	cmdArgs := []string{"-s", "86400", "-W", "--cc_test"}
+	if threads := platformStressCPUThreads(); threads > 0 {
+		cmdArgs = append(cmdArgs, "-m", strconv.Itoa(threads))
+	}
+	if mb := platformStressMemoryMB(); mb > 0 {
+		cmdArgs = append(cmdArgs, "-M", strconv.Itoa(mb))
+	}
+	cmd := exec.CommandContext(ctx, path, cmdArgs...)
+	cmd.Stdout = nil
+	cmd.Stderr = nil
+	if err := startLowPriorityCmd(cmd, 15); err != nil {
+		return nil, fmt.Errorf("stressapptest start: %w", err)
+	}
+	return cmd, nil
+}
+
+// buildGPUStressCmd creates a GPU stress command appropriate for the detected vendor.
+// Returns nil if no GPU stress tool is available (CPU-only cycling still useful).
+func buildGPUStressCmd(ctx context.Context, vendor string) *exec.Cmd {
+	switch strings.ToLower(vendor) {
+	case "amd":
+		return buildAMDGPUStressCmd(ctx)
+	case "nvidia":
+		return buildNvidiaGPUStressCmd(ctx)
+	}
+	return nil
+}
+
+func buildAMDGPUStressCmd(ctx context.Context) *exec.Cmd {
+	rvsArgs, err := resolveRVSCommand()
+	if err != nil {
+		return nil
+	}
+	rvsPath := rvsArgs[0]
+	cfg := `actions:
+- name: gst_platform
+  device: all
+  module: gst
+  parallel: true
+  duration: 86400000
+  copy_matrix: false
+  target_stress: 90
+  matrix_size_a: 8640
+  matrix_size_b: 8640
+  matrix_size_c: 8640
+`
+	cfgFile := "/tmp/bee-platform-gst.conf"
+	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
+	cmd := exec.CommandContext(ctx, rvsPath, "-c", cfgFile)
+	cmd.Stdout = nil
+	cmd.Stderr = nil
+	_ = startLowPriorityCmd(cmd, 10)
+	return cmd
+}
+
+func buildNvidiaGPUStressCmd(ctx context.Context) *exec.Cmd {
+	path, err := satLookPath("bee-gpu-burn")
+	if err != nil {
+		path, err = satLookPath("bee-gpu-stress")
+	}
+	if err != nil {
+		return nil
+	}
+	cmd := exec.CommandContext(ctx, path, "--seconds", "86400", "--size-mb", "64")
+	cmd.Stdout = nil
+	cmd.Stderr = nil
+	_ = startLowPriorityCmd(cmd, 10)
+	return cmd
+}
+
+func startLowPriorityCmd(cmd *exec.Cmd, nice int) error {
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+	if cmd.Process != nil {
+		_ = syscall.Setpriority(syscall.PRIO_PROCESS, cmd.Process.Pid, nice)
+	}
+	return nil
+}
+
+func platformStressCPUThreads() int {
+	if n := envInt("BEE_PLATFORM_STRESS_THREADS", 0); n > 0 {
+		return n
+	}
+	cpus := runtime.NumCPU()
+	switch {
+	case cpus <= 2:
+		return 1
+	case cpus <= 8:
+		return cpus - 1
+	default:
+		return cpus - 2
+	}
+}
+
+func platformStressMemoryMB() int {
+	if mb := envInt("BEE_PLATFORM_STRESS_MB", 0); mb > 0 {
+		return mb
+	}
+	free := freeMemBytes()
+	if free <= 0 {
+		return 0
+	}
+	mb := int((free * 60) / 100 / (1024 * 1024))
+	if mb < 1024 {
+		return 1024
+	}
+	return mb
+}
+
+func packPlatformDir(dir, dest string) error {
+	f, err := os.Create(dest)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	gz := gzip.NewWriter(f)
+	defer gz.Close()
+	tw := tar.NewWriter(gz)
+	defer tw.Close()
+
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		return err
+	}
+	base := filepath.Base(dir)
+	for _, e := range entries {
+		if e.IsDir() {
+			continue
+		}
+		fpath := filepath.Join(dir, e.Name())
+		data, err := os.ReadFile(fpath)
+		if err != nil {
+			continue
+		}
+		hdr := &tar.Header{
+			Name:    filepath.Join(base, e.Name()),
+			Size:    int64(len(data)),
+			Mode:    0644,
+			ModTime: time.Now(),
+		}
+		if err := tw.WriteHeader(hdr); err != nil {
+			return err
+		}
+		if _, err := tw.Write(data); err != nil {
+			return err
+		}
+	}
+	return nil
+}
--- a/audit/internal/platform/platform_stress_test.go
+++ b/audit/internal/platform/platform_stress_test.go
@@ -0,0 +1,34 @@
+package platform
+
+import (
+	"runtime"
+	"testing"
+)
+
+func TestPlatformStressCPUThreadsOverride(t *testing.T) {
+	t.Setenv("BEE_PLATFORM_STRESS_THREADS", "7")
+	if got := platformStressCPUThreads(); got != 7 {
+		t.Fatalf("platformStressCPUThreads=%d want 7", got)
+	}
+}
+
+func TestPlatformStressCPUThreadsDefaultLeavesHeadroom(t *testing.T) {
+	t.Setenv("BEE_PLATFORM_STRESS_THREADS", "")
+	got := platformStressCPUThreads()
+	if got < 1 {
+		t.Fatalf("platformStressCPUThreads=%d want >= 1", got)
+	}
+	if got > runtime.NumCPU() {
+		t.Fatalf("platformStressCPUThreads=%d want <= NumCPU=%d", got, runtime.NumCPU())
+	}
+	if runtime.NumCPU() > 2 && got >= runtime.NumCPU() {
+		t.Fatalf("platformStressCPUThreads=%d want headroom below NumCPU=%d", got, runtime.NumCPU())
+	}
+}
+
+func TestPlatformStressMemoryMBOverride(t *testing.T) {
+	t.Setenv("BEE_PLATFORM_STRESS_MB", "8192")
+	if got := platformStressMemoryMB(); got != 8192 {
+		t.Fatalf("platformStressMemoryMB=%d want 8192", got)
+	}
+}
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -136,7 +136,10 @@ func (s *System) runtimeToolStatuses(vendor string) []ToolStatus {
 		tools = append(tools, s.CheckTools([]string{
 			"nvidia-smi",
 			"nvidia-bug-report.sh",
-			"bee-gpu-stress",
+			"bee-gpu-burn",
+			"bee-john-gpu-stress",
+			"bee-nccl-gpu-stress",
+			"all_reduce_perf",
 		})...)
 	case "amd":
 		tool := ToolStatus{Name: "rocm-smi"}
@@ -176,8 +179,8 @@ func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHe
 			health.DriverReady = true
 		}

-		if lookErr := exec.Command("sh", "-c", "command -v bee-gpu-stress >/dev/null 2>&1").Run(); lookErr == nil {
-			out, err := exec.Command("bee-gpu-stress", "--seconds", "1", "--size-mb", "1").CombinedOutput()
+		if _, lookErr := exec.LookPath("bee-gpu-burn"); lookErr == nil {
+			out, err := exec.Command("bee-gpu-burn", "--seconds", "1", "--size-mb", "1").CombinedOutput()
 			if err == nil {
 				health.CUDAReady = true
 			} else if strings.Contains(strings.ToLower(string(out)), "cuda_error_system_not_ready") {
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -2,6 +2,8 @@ package platform

 import (
 	"archive/tar"
+	"bufio"
+	"bytes"
 	"compress/gzip"
 	"context"
 	"errors"
@@ -13,6 +15,7 @@ import (
 	"sort"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
 )

@@ -30,8 +33,46 @@ var (
 		"/opt/rocm/libexec/rocm_smi/rocm_smi.py",
 		"/opt/rocm-*/libexec/rocm_smi/rocm_smi.py",
 	}
+	rvsExecutableGlobs = []string{
+		"/opt/rocm/bin/rvs",
+		"/opt/rocm-*/bin/rvs",
+	}
 )

+// streamExecOutput runs cmd and streams each output line to logFunc (if non-nil).
+// Returns combined stdout+stderr as a byte slice.
+func streamExecOutput(cmd *exec.Cmd, logFunc func(string)) ([]byte, error) {
+	pr, pw := io.Pipe()
+	cmd.Stdout = pw
+	cmd.Stderr = pw
+
+	var buf bytes.Buffer
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		scanner := bufio.NewScanner(pr)
+		for scanner.Scan() {
+			line := scanner.Text()
+			buf.WriteString(line + "\n")
+			if logFunc != nil {
+				logFunc(line)
+			}
+		}
+	}()
+
+	err := cmd.Start()
+	if err != nil {
+		_ = pw.Close()
+		wg.Wait()
+		return nil, err
+	}
+	waitErr := cmd.Wait()
+	_ = pw.Close()
+	wg.Wait()
+	return buf.Bytes(), waitErr
+}
+
 // NvidiaGPU holds basic GPU info from nvidia-smi.
 type NvidiaGPU struct {
 	Index    int
@@ -53,6 +94,12 @@ func (s *System) DetectGPUVendor() string {
 	if _, err := os.Stat("/dev/kfd"); err == nil {
 		return "amd"
 	}
+	if raw, err := exec.Command("lspci", "-nn").Output(); err == nil {
+		text := strings.ToLower(string(raw))
+		if strings.Contains(text, "advanced micro devices") || strings.Contains(text, "amd/ati") {
+			return "amd"
+		}
+	}
 	return ""
 }

@@ -80,13 +127,103 @@ func (s *System) ListAMDGPUs() ([]AMDGPUInfo, error) {
 }

 // RunAMDAcceptancePack runs an AMD GPU diagnostic pack using rocm-smi.
-func (s *System) RunAMDAcceptancePack(baseDir string) (string, error) {
-	return runAcceptancePack(baseDir, "gpu-amd", []satJob{
+func (s *System) RunAMDAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd", []satJob{
 		{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
 		{name: "02-rocm-smi-showallinfo.log", cmd: []string{"rocm-smi", "--showallinfo"}},
 		{name: "03-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
 		{name: "04-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
-	})
+	}, logFunc)
+}
+
+// RunAMDMemIntegrityPack runs the official RVS MEM module as a validate-style memory integrity test.
+func (s *System) RunAMDMemIntegrityPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if err := ensureAMDRuntimeReady(); err != nil {
+		return "", err
+	}
+	cfgFile := "/tmp/bee-amd-mem.conf"
+	cfg := `actions:
+- name: mem_integrity
+  device: all
+  module: mem
+  parallel: true
+  duration: 60000
+  copy_matrix: false
+  target_stress: 90
+  matrix_size: 8640
+`
+	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-mem", []satJob{
+		{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
+		{name: "02-rvs-mem.log", cmd: []string{"rvs", "-c", cfgFile}},
+		{name: "03-rocm-smi-after.log", cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--showmemuse", "--csv"}},
+	}, logFunc)
+}
+
+// RunAMDMemBandwidthPack runs AMD's memory/interconnect bandwidth-oriented tools.
+func (s *System) RunAMDMemBandwidthPack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	if err := ensureAMDRuntimeReady(); err != nil {
+		return "", err
+	}
+	cfgFile := "/tmp/bee-amd-babel.conf"
+	cfg := `actions:
+- name: babel_mem_bw
+  device: all
+  module: babel
+  parallel: true
+  copy_matrix: true
+  target_stress: 90
+  matrix_size: 134217728
+`
+	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-bandwidth", []satJob{
+		{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
+		{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
+		{name: "03-rvs-babel.log", cmd: []string{"rvs", "-c", cfgFile}},
+		{name: "04-rocm-smi-after.log", cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--showmemuse", "--csv"}},
+	}, logFunc)
+}
+
+// RunAMDStressPack runs an AMD GPU burn-in pack.
+// Missing tools are reported as UNSUPPORTED, consistent with the existing SAT pattern.
+func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
+	seconds := durationSec
+	if seconds <= 0 {
+		seconds = envInt("BEE_AMD_STRESS_SECONDS", 300)
+	}
+	if err := ensureAMDRuntimeReady(); err != nil {
+		return "", err
+	}
+	// Enable copy_matrix so the same GST run drives VRAM traffic in addition to compute.
+	rvsCfg := amdStressRVSConfig(seconds)
+	cfgFile := "/tmp/bee-amd-gst.conf"
+	_ = os.WriteFile(cfgFile, []byte(rvsCfg), 0644)
+
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-stress", amdStressJobs(seconds, cfgFile), logFunc)
+}
+
+func amdStressRVSConfig(seconds int) string {
+	return fmt.Sprintf(`actions:
+- name: gst_stress
+  device: all
+  module: gst
+  parallel: true
+  duration: %d
+  copy_matrix: false
+  target_stress: 90
+  matrix_size_a: 8640
+  matrix_size_b: 8640
+  matrix_size_c: 8640
+`, seconds*1000)
+}
+
+func amdStressJobs(seconds int, cfgFile string) []satJob {
+	return []satJob{
+		{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
+		{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
+		{name: fmt.Sprintf("03-rvs-gst-%ds.log", seconds), cmd: []string{"rvs", "-c", cfgFile}},
+		{name: fmt.Sprintf("04-rocm-smi-after.log"), cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--csv"}},
+	}
 }

 // ListNvidiaGPUs returns GPUs visible to nvidia-smi.
@@ -121,39 +258,98 @@ func (s *System) ListNvidiaGPUs() ([]NvidiaGPU, error) {
 	return gpus, nil
 }

-func (s *System) RunNvidiaAcceptancePack(baseDir string) (string, error) {
-	return runAcceptancePack(baseDir, "gpu-nvidia", nvidiaSATJobs())
+// RunNCCLTests runs nccl-tests all_reduce_perf across all NVIDIA GPUs.
+// Measures collective communication bandwidth over NVLink/PCIe.
+func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+	// detect GPU count
+	out, _ := exec.Command("nvidia-smi", "--query-gpu=index", "--format=csv,noheader").Output()
+	gpuCount := len(strings.Split(strings.TrimSpace(string(out)), "\n"))
+	if gpuCount < 1 {
+		gpuCount = 1
+	}
+	return runAcceptancePackCtx(ctx, baseDir, "nccl-tests", []satJob{
+		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
+		{name: "02-all-reduce-perf.log", cmd: []string{
+			"all_reduce_perf", "-b", "512M", "-e", "4G", "-f", "2",
+			"-g", strconv.Itoa(gpuCount), "--iters", "20",
+		}},
+	}, logFunc)
 }

-// RunNvidiaAcceptancePackWithOptions runs the NVIDIA SAT with explicit duration,
-// GPU memory size, and GPU index selection. ctx cancellation kills the running job.
-func (s *System) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, durationSec int, sizeMB int, gpuIndices []int) (string, error) {
-	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaSATJobsWithOptions(durationSec, sizeMB, gpuIndices))
+func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
+	return runAcceptancePackCtx(context.Background(), baseDir, "gpu-nvidia", nvidiaSATJobs(), logFunc)
 }

-func (s *System) RunMemoryAcceptancePack(baseDir string) (string, error) {
+// RunNvidiaAcceptancePackWithOptions runs the NVIDIA diagnostics via DCGM.
+// diagLevel: 1=quick, 2=medium, 3=targeted stress, 4=extended stress.
+// gpuIndices: specific GPU indices to test (empty = all GPUs).
+// ctx cancellation kills the running job.
+func (s *System) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error) {
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, gpuIndices), logFunc)
+}
+
+func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 	sizeMB := envInt("BEE_MEMTESTER_SIZE_MB", 128)
 	passes := envInt("BEE_MEMTESTER_PASSES", 1)
-	return runAcceptancePack(baseDir, "memory", []satJob{
+	return runAcceptancePackCtx(ctx, baseDir, "memory", []satJob{
 		{name: "01-free-before.log", cmd: []string{"free", "-h"}},
 		{name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
 		{name: "03-free-after.log", cmd: []string{"free", "-h"}},
-	})
+	}, logFunc)
 }

-func (s *System) RunCPUAcceptancePack(baseDir string, durationSec int) (string, error) {
+func (s *System) RunMemoryStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
+	seconds := durationSec
+	if seconds <= 0 {
+		seconds = envInt("BEE_VM_STRESS_SECONDS", 300)
+	}
+	// Use 80% of RAM by default; override with BEE_VM_STRESS_SIZE_MB.
+	sizeArg := "80%"
+	if mb := envInt("BEE_VM_STRESS_SIZE_MB", 0); mb > 0 {
+		sizeArg = fmt.Sprintf("%dM", mb)
+	}
+	return runAcceptancePackCtx(ctx, baseDir, "memory-stress", []satJob{
+		{name: "01-free-before.log", cmd: []string{"free", "-h"}},
+		{name: "02-stress-ng-vm.log", cmd: []string{
+			"stress-ng", "--vm", "1",
+			"--vm-bytes", sizeArg,
+			"--vm-method", "all",
+			"--timeout", fmt.Sprintf("%d", seconds),
+			"--metrics-brief",
+		}},
+		{name: "03-free-after.log", cmd: []string{"free", "-h"}},
+	}, logFunc)
+}
+
+func (s *System) RunSATStressPack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
+	seconds := durationSec
+	if seconds <= 0 {
+		seconds = envInt("BEE_SAT_STRESS_SECONDS", 300)
+	}
+	cmd := []string{"stressapptest", "-s", fmt.Sprintf("%d", seconds), "-W", "--cc_test"}
+	if mb := envInt("BEE_SAT_STRESS_MB", 0); mb > 0 {
+		cmd = append(cmd, "-M", fmt.Sprintf("%d", mb))
+	}
+	return runAcceptancePackCtx(ctx, baseDir, "sat-stress", []satJob{
+		{name: "01-free-before.log", cmd: []string{"free", "-h"}},
+		{name: "02-stressapptest.log", cmd: cmd},
+		{name: "03-free-after.log", cmd: []string{"free", "-h"}},
+	}, logFunc)
+}
+
+func (s *System) RunCPUAcceptancePack(ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
 	if durationSec <= 0 {
 		durationSec = 60
 	}
-	return runAcceptancePack(baseDir, "cpu", []satJob{
+	return runAcceptancePackCtx(ctx, baseDir, "cpu", []satJob{
 		{name: "01-lscpu.log", cmd: []string{"lscpu"}},
 		{name: "02-sensors-before.log", cmd: []string{"sensors"}},
 		{name: "03-stress-ng.log", cmd: []string{"stress-ng", "--cpu", "0", "--cpu-method", "all", "--timeout", fmt.Sprintf("%d", durationSec)}},
 		{name: "04-sensors-after.log", cmd: []string{"sensors"}},
-	})
+	}, logFunc)
 }

-func (s *System) RunStorageAcceptancePack(baseDir string) (string, error) {
+func (s *System) RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
 	if baseDir == "" {
 		baseDir = "/var/log/bee-sat"
 	}
@@ -181,11 +377,17 @@ func (s *System) RunStorageAcceptancePack(baseDir string) (string, error) {
 	}

 	for index, devPath := range devices {
+		if ctx.Err() != nil {
+			break
+		}
 		prefix := fmt.Sprintf("%02d-%s", index+1, filepath.Base(devPath))
 		commands := storageSATCommands(devPath)
 		for cmdIndex, job := range commands {
+			if ctx.Err() != nil {
+				break
+			}
 			name := fmt.Sprintf("%s-%02d-%s.log", prefix, cmdIndex+1, job.name)
-			out, err := runSATCommand(verboseLog, job.name, job.cmd)
+			out, err := runSATCommandCtx(ctx, verboseLog, job.name, job.cmd, nil, logFunc)
 			if writeErr := os.WriteFile(filepath.Join(runDir, name), out, 0644); writeErr != nil {
 				return "", writeErr
 			}
@@ -223,83 +425,39 @@ type satStats struct {
 }

 func nvidiaSATJobs() []satJob {
-	seconds := envInt("BEE_GPU_STRESS_SECONDS", 5)
-	sizeMB := envInt("BEE_GPU_STRESS_SIZE_MB", 64)
 	return []satJob{
 		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
 		{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
 		{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output-file", "{{run_dir}}/nvidia-bug-report.log"}},
-		{name: "05-bee-gpu-stress.log", cmd: []string{"bee-gpu-stress", "--seconds", fmt.Sprintf("%d", seconds), "--size-mb", fmt.Sprintf("%d", sizeMB)}},
+		{name: "05-bee-gpu-burn.log", cmd: []string{"bee-gpu-burn", "--seconds", "5", "--size-mb", "64"}},
 	}
 }

-func runAcceptancePack(baseDir, prefix string, jobs []satJob) (string, error) {
-	if baseDir == "" {
-		baseDir = "/var/log/bee-sat"
+func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
+	if diagLevel < 1 || diagLevel > 4 {
+		diagLevel = 3
 	}
-	ts := time.Now().UTC().Format("20060102-150405")
-	runDir := filepath.Join(baseDir, prefix+"-"+ts)
-	if err := os.MkdirAll(runDir, 0755); err != nil {
-		return "", err
-	}
-	verboseLog := filepath.Join(runDir, "verbose.log")
-
-	var summary strings.Builder
-	stats := satStats{}
-	fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
-	for _, job := range jobs {
-		cmd := make([]string, 0, len(job.cmd))
-		for _, arg := range job.cmd {
-			cmd = append(cmd, strings.ReplaceAll(arg, "{{run_dir}}", runDir))
-		}
-		out, err := runSATCommand(verboseLog, job.name, cmd)
-		if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
-			return "", writeErr
-		}
-		status, rc := classifySATResult(job.name, out, err)
-		stats.Add(status)
-		key := strings.TrimSuffix(strings.TrimPrefix(job.name, "0"), ".log")
-		fmt.Fprintf(&summary, "%s_rc=%d\n", key, rc)
-		fmt.Fprintf(&summary, "%s_status=%s\n", key, status)
-	}
-	writeSATStats(&summary, stats)
-	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
-		return "", err
-	}
-
-	archive := filepath.Join(baseDir, prefix+"-"+ts+".tar.gz")
-	if err := createTarGz(archive, runDir); err != nil {
-		return "", err
-	}
-	return archive, nil
-}
-
-func nvidiaSATJobsWithOptions(durationSec, sizeMB int, gpuIndices []int) []satJob {
-	var env []string
+	diagArgs := []string{"dcgmi", "diag", "-r", strconv.Itoa(diagLevel)}
 	if len(gpuIndices) > 0 {
 		ids := make([]string, len(gpuIndices))
 		for i, idx := range gpuIndices {
 			ids[i] = strconv.Itoa(idx)
 		}
-		env = []string{"CUDA_VISIBLE_DEVICES=" + strings.Join(ids, ",")}
+		diagArgs = append(diagArgs, "-i", strings.Join(ids, ","))
 	}
 	return []satJob{
 		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
 		{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
-		{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output-file", "{{run_dir}}/nvidia-bug-report.log"}},
-		{
-			name:       "05-bee-gpu-stress.log",
-			cmd:        []string{"bee-gpu-stress", "--seconds", strconv.Itoa(durationSec), "--size-mb", strconv.Itoa(sizeMB)},
-			env:        env,
-			collectGPU: true,
-			gpuIndices: gpuIndices,
-		},
+		{name: "04-dcgmi-diag.log", cmd: diagArgs},
 	}
 }

-func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []satJob) (string, error) {
+func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []satJob, logFunc func(string)) (string, error) {
+	if ctx == nil {
+		ctx = context.Background()
+	}
 	if baseDir == "" {
 		baseDir = "/var/log/bee-sat"
 	}
@@ -326,9 +484,9 @@ func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []sa
 		var err error

 		if job.collectGPU {
-			out, err = runSATCommandWithMetrics(ctx, verboseLog, job.name, cmd, job.env, job.gpuIndices, runDir)
+			out, err = runSATCommandWithMetrics(ctx, verboseLog, job.name, cmd, job.env, job.gpuIndices, runDir, logFunc)
 		} else {
-			out, err = runSATCommandCtx(ctx, verboseLog, job.name, cmd, job.env)
+			out, err = runSATCommandCtx(ctx, verboseLog, job.name, cmd, job.env, logFunc)
 		}

 		if writeErr := os.WriteFile(filepath.Join(runDir, job.name), out, 0644); writeErr != nil {
@@ -352,13 +510,16 @@ func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []sa
 	return archive, nil
 }

-func runSATCommandCtx(ctx context.Context, verboseLog, name string, cmd []string, env []string) ([]byte, error) {
+func runSATCommandCtx(ctx context.Context, verboseLog, name string, cmd []string, env []string, logFunc func(string)) ([]byte, error) {
 	start := time.Now().UTC()
 	resolvedCmd, err := resolveSATCommand(cmd)
 	appendSATVerboseLog(verboseLog,
 		fmt.Sprintf("[%s] start %s", start.Format(time.RFC3339), name),
 		"cmd: "+strings.Join(resolvedCmd, " "),
 	)
+	if logFunc != nil {
+		logFunc(fmt.Sprintf("=== %s ===", name))
+	}
 	if err != nil {
 		appendSATVerboseLog(verboseLog,
 			fmt.Sprintf("[%s] finish %s", time.Now().UTC().Format(time.RFC3339), name),
@@ -373,7 +534,7 @@ func runSATCommandCtx(ctx context.Context, verboseLog, name string, cmd []string
 	if len(env) > 0 {
 		c.Env = append(os.Environ(), env...)
 	}
-	out, err := c.CombinedOutput()
+	out, err := streamExecOutput(c, logFunc)

 	rc := 0
 	if err != nil {
@@ -448,6 +609,11 @@ func classifySATResult(name string, out []byte, err error) (string, int) {
 	}

 	text := strings.ToLower(string(out))
+	// No output at all means the tool failed to start (mlock limit, binary missing,
+	// etc.) — we cannot say anything about hardware health → UNSUPPORTED.
+	if len(strings.TrimSpace(text)) == 0 {
+		return "UNSUPPORTED", rc
+	}
 	if strings.Contains(text, "unsupported") ||
 		strings.Contains(text, "not supported") ||
 		strings.Contains(text, "invalid opcode") ||
@@ -456,19 +622,25 @@ func classifySATResult(name string, out []byte, err error) (string, int) {
 		strings.Contains(text, "not available") ||
 		strings.Contains(text, "cuda_error_system_not_ready") ||
 		strings.Contains(text, "no such device") ||
+		// nvidia-smi on a machine with no NVIDIA GPU
+		strings.Contains(text, "couldn't communicate with the nvidia driver") ||
+		strings.Contains(text, "no nvidia gpu") ||
 		(strings.Contains(name, "self-test") && strings.Contains(text, "aborted")) {
 		return "UNSUPPORTED", rc
 	}
 	return "FAILED", rc
 }

-func runSATCommand(verboseLog, name string, cmd []string) ([]byte, error) {
+func runSATCommand(verboseLog, name string, cmd []string, logFunc func(string)) ([]byte, error) {
 	start := time.Now().UTC()
 	resolvedCmd, err := resolveSATCommand(cmd)
 	appendSATVerboseLog(verboseLog,
 		fmt.Sprintf("[%s] start %s", start.Format(time.RFC3339), name),
 		"cmd: "+strings.Join(resolvedCmd, " "),
 	)
+	if logFunc != nil {
+		logFunc(fmt.Sprintf("=== %s ===", name))
+	}
 	if err != nil {
 		appendSATVerboseLog(verboseLog,
 			fmt.Sprintf("[%s] finish %s", time.Now().UTC().Format(time.RFC3339), name),
@@ -479,7 +651,7 @@ func runSATCommand(verboseLog, name string, cmd []string) ([]byte, error) {
 		return []byte(err.Error() + "\n"), err
 	}

-	out, err := satExecCommand(resolvedCmd[0], resolvedCmd[1:]...).CombinedOutput()
+	out, err := streamExecOutput(satExecCommand(resolvedCmd[0], resolvedCmd[1:]...), logFunc)

 	rc := 0
 	if err != nil {
@@ -506,10 +678,23 @@ func resolveSATCommand(cmd []string) ([]string, error) {
 	if len(cmd) == 0 {
 		return nil, errors.New("empty SAT command")
 	}
-	if cmd[0] != "rocm-smi" {
-		return cmd, nil
+	switch cmd[0] {
+	case "rocm-smi":
+		return resolveROCmSMICommand(cmd[1:]...)
+	case "rvs":
+		return resolveRVSCommand(cmd[1:]...)
 	}
-	return resolveROCmSMICommand(cmd[1:]...)
+	return cmd, nil
+}
+
+func resolveRVSCommand(args ...string) ([]string, error) {
+	if path, err := satLookPath("rvs"); err == nil {
+		return append([]string{path}, args...), nil
+	}
+	for _, path := range expandExistingPaths(rvsExecutableGlobs) {
+		return append([]string{path}, args...), nil
+	}
+	return nil, errors.New("rvs not found in PATH or under /opt/rocm")
 }

 func resolveROCmSMICommand(args ...string) ([]string, error) {
@@ -533,6 +718,20 @@ func resolveROCmSMICommand(args ...string) ([]string, error) {
 	return nil, errors.New("rocm-smi not found in PATH or under /opt/rocm")
 }

+func ensureAMDRuntimeReady() error {
+	if _, err := os.Stat("/dev/kfd"); err == nil {
+		return nil
+	}
+	if raw, err := os.ReadFile("/sys/module/amdgpu/initstate"); err == nil {
+		state := strings.TrimSpace(string(raw))
+		if strings.EqualFold(state, "live") {
+			return nil
+		}
+		return fmt.Errorf("AMD driver is present but not initialized: amdgpu initstate=%q", state)
+	}
+	return errors.New("AMD GPUs are present but the runtime is not initialized: /dev/kfd is missing and amdgpu is not loaded")
+}
+
 func rocmSMIExecutableCandidates() []string {
 	return expandExistingPaths(rocmSMIExecutableGlobs)
 }
@@ -581,7 +780,7 @@ func parseStorageDevices(raw string) []string {

 // runSATCommandWithMetrics runs a command while collecting GPU metrics in the background.
 // On completion it writes gpu-metrics.csv and gpu-metrics.html into runDir.
-func runSATCommandWithMetrics(ctx context.Context, verboseLog, name string, cmd []string, env []string, gpuIndices []int, runDir string) ([]byte, error) {
+func runSATCommandWithMetrics(ctx context.Context, verboseLog, name string, cmd []string, env []string, gpuIndices []int, runDir string, logFunc func(string)) ([]byte, error) {
 	stopCh := make(chan struct{})
 	doneCh := make(chan struct{})
 	var metricRows []GPUMetricRow
@@ -609,7 +808,7 @@ func runSATCommandWithMetrics(ctx context.Context, verboseLog, name string, cmd
 		}
 	}()

-	out, err := runSATCommandCtx(ctx, verboseLog, name, cmd, env)
+	out, err := runSATCommandCtx(ctx, verboseLog, name, cmd, env, logFunc)

 	close(stopCh)
 	<-doneCh
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -2,10 +2,12 @@ package platform

 import (
 	"context"
+	"encoding/json"
 	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
+	"sort"
 	"strconv"
 	"strings"
 	"sync"
@@ -128,26 +130,21 @@ func (s *System) RunFanStressTest(ctx context.Context, baseDir string, opts FanS
 		stats.OK++
 	}

-	// loadPhase runs bee-gpu-stress for durSec; sampler stamps phaseName on each row.
+	// loadPhase runs bee-gpu-burn for durSec; sampler stamps phaseName on each row.
 	loadPhase := func(phaseName, stepName string, durSec int) {
 		if ctx.Err() != nil {
 			return
 		}
 		setPhase(phaseName)
-		var env []string
-		if len(opts.GPUIndices) > 0 {
-			ids := make([]string, len(opts.GPUIndices))
-			for i, idx := range opts.GPUIndices {
-				ids[i] = strconv.Itoa(idx)
-			}
-			env = []string{"CUDA_VISIBLE_DEVICES=" + strings.Join(ids, ",")}
-		}
 		cmd := []string{
-			"bee-gpu-stress",
+			"bee-gpu-burn",
 			"--seconds", strconv.Itoa(durSec),
 			"--size-mb", strconv.Itoa(opts.SizeMB),
 		}
-		out, err := runSATCommandCtx(ctx, verboseLog, stepName, cmd, env)
+		if len(opts.GPUIndices) > 0 {
+			cmd = append(cmd, "--devices", joinIndexList(dedupeSortedIndices(opts.GPUIndices)))
+		}
+		out, err := runSATCommandCtx(ctx, verboseLog, stepName, cmd, nil, nil)
 		_ = os.WriteFile(filepath.Join(runDir, stepName+".log"), out, 0644)
 		if err != nil && err != context.Canceled && err.Error() != "signal: killed" {
 			fmt.Fprintf(&summary, "%s_status=FAILED\n", stepName)
@@ -304,41 +301,148 @@ func sampleGPUStressMetrics(gpuIndices []int) []GPUStressMetric {
 // sampleFanSpeeds reads fan RPM values from ipmitool sdr.
 func sampleFanSpeeds() ([]FanReading, error) {
 	out, err := exec.Command("ipmitool", "sdr", "type", "Fan").Output()
+	if err == nil {
+		if fans := parseFanSpeeds(string(out)); len(fans) > 0 {
+			return fans, nil
+		}
+	}
+	fans, sensorsErr := sampleFanSpeedsViaSensorsJSON()
+	if len(fans) > 0 {
+		return fans, nil
+	}
 	if err != nil {
 		return nil, err
 	}
-	return parseFanSpeeds(string(out)), nil
+	return nil, sensorsErr
 }

 // parseFanSpeeds parses "ipmitool sdr type Fan" output.
-// Line format: "FAN1             | 2400.000   | RPM        | ok"
+// Handles two formats:
+//
+//	Old: "FAN1 | 2400.000 | RPM | ok"           (value in col[1], unit in col[2])
+//	New: "FAN1 | 41h | ok | 29.1 | 4340 RPM"   (value+unit combined in last col)
 func parseFanSpeeds(raw string) []FanReading {
 	var fans []FanReading
 	for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
 		parts := strings.Split(line, "|")
-		if len(parts) < 3 {
+		if len(parts) < 2 {
 			continue
 		}
-		unit := strings.TrimSpace(parts[2])
-		if !strings.EqualFold(unit, "RPM") {
+		name := strings.TrimSpace(parts[0])
+		// Find the first field that contains "RPM" (either as a standalone unit or inline)
+		rpmVal := 0.0
+		found := false
+		for _, p := range parts[1:] {
+			p = strings.TrimSpace(p)
+			if !strings.Contains(strings.ToUpper(p), "RPM") {
+				continue
+			}
+			if strings.EqualFold(p, "RPM") {
+				continue // unit-only column in old format; value is in previous field
+			}
+			val, err := parseFanRPMValue(p)
+			if err == nil {
+				rpmVal = val
+				found = true
+				break
+			}
+		}
+		// Old format: unit "RPM" is in col[2], value is in col[1]
+		if !found && len(parts) >= 3 && strings.EqualFold(strings.TrimSpace(parts[2]), "RPM") {
+			valStr := strings.TrimSpace(parts[1])
+			if !strings.EqualFold(valStr, "na") && !strings.EqualFold(valStr, "disabled") && valStr != "" {
+				if val, err := parseFanRPMValue(valStr); err == nil {
+					rpmVal = val
+					found = true
+				}
+			}
+		}
+		if !found {
 			continue
 		}
-		valStr := strings.TrimSpace(parts[1])
-		if strings.EqualFold(valStr, "na") || strings.EqualFold(valStr, "disabled") || valStr == "" {
-			continue
-		}
-		val, err := strconv.ParseFloat(valStr, 64)
-		if err != nil {
-			continue
-		}
-		fans = append(fans, FanReading{
-			Name: strings.TrimSpace(parts[0]),
-			RPM:  val,
-		})
+		fans = append(fans, FanReading{Name: name, RPM: rpmVal})
 	}
 	return fans
 }

+func parseFanRPMValue(raw string) (float64, error) {
+	fields := strings.Fields(strings.TrimSpace(strings.ReplaceAll(raw, ",", "")))
+	if len(fields) == 0 {
+		return 0, strconv.ErrSyntax
+	}
+	return strconv.ParseFloat(fields[0], 64)
+}
+
+func sampleFanSpeedsViaSensorsJSON() ([]FanReading, error) {
+	out, err := exec.Command("sensors", "-j").Output()
+	if err != nil || len(out) == 0 {
+		return nil, err
+	}
+	var doc map[string]map[string]any
+	if err := json.Unmarshal(out, &doc); err != nil {
+		return nil, err
+	}
+	chips := make([]string, 0, len(doc))
+	for chip := range doc {
+		chips = append(chips, chip)
+	}
+	sort.Strings(chips)
+	var fans []FanReading
+	seen := map[string]struct{}{}
+	for _, chip := range chips {
+		features := doc[chip]
+		names := make([]string, 0, len(features))
+		for name := range features {
+			names = append(names, name)
+		}
+		sort.Strings(names)
+		for _, name := range names {
+			feature, ok := features[name].(map[string]any)
+			if !ok {
+				continue
+			}
+			rpm, ok := firstFanInputValue(feature)
+			if !ok || rpm <= 0 {
+				continue
+			}
+			label := strings.TrimSpace(name)
+			if chip != "" && !strings.Contains(strings.ToLower(label), strings.ToLower(chip)) {
+				label = chip + " / " + label
+			}
+			if _, ok := seen[label]; ok {
+				continue
+			}
+			seen[label] = struct{}{}
+			fans = append(fans, FanReading{Name: label, RPM: rpm})
+		}
+	}
+	return fans, nil
+}
+
+func firstFanInputValue(feature map[string]any) (float64, bool) {
+	keys := make([]string, 0, len(feature))
+	for key := range feature {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+	for _, key := range keys {
+		lower := strings.ToLower(key)
+		if !strings.Contains(lower, "fan") || !strings.HasSuffix(lower, "_input") {
+			continue
+		}
+		switch value := feature[key].(type) {
+		case float64:
+			return value, true
+		case string:
+			f, err := strconv.ParseFloat(value, 64)
+			if err == nil {
+				return f, true
+			}
+		}
+	}
+	return 0, false
+}
+
 // sampleCPUMaxTemp returns the highest CPU/inlet temperature from ipmitool or sensors.
 func sampleCPUMaxTemp() float64 {
 	out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
--- a/audit/internal/platform/sat_fan_stress_test.go
+++ b/audit/internal/platform/sat_fan_stress_test.go
@@ -0,0 +1,27 @@
+package platform
+
+import "testing"
+
+func TestParseFanSpeeds(t *testing.T) {
+	raw := "FAN1 | 2400.000 | RPM | ok\nFAN2 | 1800 RPM | ok | ok\nFAN3 | na | RPM | ns\n"
+	got := parseFanSpeeds(raw)
+	if len(got) != 2 {
+		t.Fatalf("fans=%d want 2 (%v)", len(got), got)
+	}
+	if got[0].Name != "FAN1" || got[0].RPM != 2400 {
+		t.Fatalf("fan0=%+v", got[0])
+	}
+	if got[1].Name != "FAN2" || got[1].RPM != 1800 {
+		t.Fatalf("fan1=%+v", got[1])
+	}
+}
+
+func TestFirstFanInputValue(t *testing.T) {
+	feature := map[string]any{
+		"fan1_input": 9200.0,
+	}
+	got, ok := firstFanInputValue(feature)
+	if !ok || got != 9200 {
+		t.Fatalf("got=%v ok=%v", got, ok)
+	}
+}
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -5,6 +5,7 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	"strings"
 	"testing"
 )

@@ -30,21 +31,59 @@ func TestRunNvidiaAcceptancePackIncludesGPUStress(t *testing.T) {
 	if len(jobs) != 5 {
 		t.Fatalf("jobs=%d want 5", len(jobs))
 	}
-	if got := jobs[4].cmd[0]; got != "bee-gpu-stress" {
-		t.Fatalf("gpu stress command=%q want bee-gpu-stress", got)
+	if got := jobs[4].cmd[0]; got != "bee-gpu-burn" {
+		t.Fatalf("gpu stress command=%q want bee-gpu-burn", got)
 	}
 	if got := jobs[3].cmd[1]; got != "--output-file" {
 		t.Fatalf("bug report flag=%q want --output-file", got)
 	}
 }

-func TestNvidiaSATJobsUseEnvOverrides(t *testing.T) {
-	t.Setenv("BEE_GPU_STRESS_SECONDS", "9")
-	t.Setenv("BEE_GPU_STRESS_SIZE_MB", "96")
+func TestAMDStressConfigUsesSingleGSTAction(t *testing.T) {
+	t.Parallel()

+	cfg := amdStressRVSConfig(123)
+	if !strings.Contains(cfg, "module: gst") {
+		t.Fatalf("config missing gst module:\n%s", cfg)
+	}
+	if strings.Contains(cfg, "module: mem") {
+		t.Fatalf("config should not include mem module:\n%s", cfg)
+	}
+	if !strings.Contains(cfg, "copy_matrix: false") {
+		t.Fatalf("config should use copy_matrix=false:\n%s", cfg)
+	}
+	if strings.Count(cfg, "duration: 123000") != 1 {
+		t.Fatalf("config should apply duration once:\n%s", cfg)
+	}
+	for _, field := range []string{"matrix_size_a: 8640", "matrix_size_b: 8640", "matrix_size_c: 8640"} {
+		if !strings.Contains(cfg, field) {
+			t.Fatalf("config missing %s:\n%s", field, cfg)
+		}
+	}
+}
+
+func TestAMDStressJobsIncludeBandwidthAndGST(t *testing.T) {
+	t.Parallel()
+
+	jobs := amdStressJobs(300, "/tmp/test-amd-gst.conf")
+	if len(jobs) != 4 {
+		t.Fatalf("jobs=%d want 4", len(jobs))
+	}
+	if got := jobs[1].cmd[0]; got != "rocm-bandwidth-test" {
+		t.Fatalf("jobs[1]=%q want rocm-bandwidth-test", got)
+	}
+	if got := jobs[2].cmd[0]; got != "rvs" {
+		t.Fatalf("jobs[2]=%q want rvs", got)
+	}
+	if got := jobs[2].cmd[2]; got != "/tmp/test-amd-gst.conf" {
+		t.Fatalf("jobs[2] cfg=%q want /tmp/test-amd-gst.conf", got)
+	}
+}
+
+func TestNvidiaSATJobsUseBuiltinBurnDefaults(t *testing.T) {
 	jobs := nvidiaSATJobs()
 	got := jobs[4].cmd
-	want := []string{"bee-gpu-stress", "--seconds", "9", "--size-mb", "96"}
+	want := []string{"bee-gpu-burn", "--seconds", "5", "--size-mb", "64"}
 	if len(got) != len(want) {
 		t.Fatalf("cmd len=%d want %d", len(got), len(want))
 	}
@@ -55,6 +94,74 @@ func TestNvidiaSATJobsUseEnvOverrides(t *testing.T) {
 	}
 }

+func TestBuildNvidiaStressJobUsesSelectedLoaderAndDevices(t *testing.T) {
+	t.Parallel()
+
+	oldExecCommand := satExecCommand
+	satExecCommand = func(name string, args ...string) *exec.Cmd {
+		if name == "nvidia-smi" {
+			return exec.Command("sh", "-c", "printf '0\n1\n2\n'")
+		}
+		return exec.Command(name, args...)
+	}
+	t.Cleanup(func() { satExecCommand = oldExecCommand })
+
+	job, err := buildNvidiaStressJob(NvidiaStressOptions{
+		DurationSec:       600,
+		Loader:            NvidiaStressLoaderJohn,
+		ExcludeGPUIndices: []int{1},
+	})
+	if err != nil {
+		t.Fatalf("buildNvidiaStressJob error: %v", err)
+	}
+	wantCmd := []string{"bee-john-gpu-stress", "--seconds", "600", "--devices", "0,2"}
+	if len(job.cmd) != len(wantCmd) {
+		t.Fatalf("cmd len=%d want %d (%v)", len(job.cmd), len(wantCmd), job.cmd)
+	}
+	for i := range wantCmd {
+		if job.cmd[i] != wantCmd[i] {
+			t.Fatalf("cmd[%d]=%q want %q", i, job.cmd[i], wantCmd[i])
+		}
+	}
+	if got := joinIndexList(job.gpuIndices); got != "0,2" {
+		t.Fatalf("gpuIndices=%q want 0,2", got)
+	}
+}
+
+func TestBuildNvidiaStressJobUsesNCCLLoader(t *testing.T) {
+	t.Parallel()
+
+	oldExecCommand := satExecCommand
+	satExecCommand = func(name string, args ...string) *exec.Cmd {
+		if name == "nvidia-smi" {
+			return exec.Command("sh", "-c", "printf '0\n1\n2\n'")
+		}
+		return exec.Command(name, args...)
+	}
+	t.Cleanup(func() { satExecCommand = oldExecCommand })
+
+	job, err := buildNvidiaStressJob(NvidiaStressOptions{
+		DurationSec: 120,
+		Loader:      NvidiaStressLoaderNCCL,
+		GPUIndices:  []int{2, 0},
+	})
+	if err != nil {
+		t.Fatalf("buildNvidiaStressJob error: %v", err)
+	}
+	wantCmd := []string{"bee-nccl-gpu-stress", "--seconds", "120", "--devices", "0,2"}
+	if len(job.cmd) != len(wantCmd) {
+		t.Fatalf("cmd len=%d want %d (%v)", len(job.cmd), len(wantCmd), job.cmd)
+	}
+	for i := range wantCmd {
+		if job.cmd[i] != wantCmd[i] {
+			t.Fatalf("cmd[%d]=%q want %q", i, job.cmd[i], wantCmd[i])
+		}
+	}
+	if got := joinIndexList(job.gpuIndices); got != "0,2" {
+		t.Fatalf("gpuIndices=%q want 0,2", got)
+	}
+}
+
 func TestEnvIntFallback(t *testing.T) {
 	os.Unsetenv("BEE_MEMTESTER_SIZE_MB")
 	if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
@@ -80,8 +187,8 @@ func TestClassifySATResult(t *testing.T) {
 	}{
 		{name: "ok", job: "memtester", out: "done", err: nil, status: "OK"},
 		{name: "unsupported", job: "smartctl-self-test-short", out: "Self-test not supported", err: errors.New("rc 1"), status: "UNSUPPORTED"},
-		{name: "failed", job: "bee-gpu-stress", out: "cuda error", err: errors.New("rc 1"), status: "FAILED"},
-		{name: "cuda not ready", job: "bee-gpu-stress", out: "cuInit failed: CUDA_ERROR_SYSTEM_NOT_READY", err: errors.New("rc 1"), status: "UNSUPPORTED"},
+		{name: "failed", job: "bee-gpu-burn", out: "cuda error", err: errors.New("rc 1"), status: "FAILED"},
+		{name: "cuda not ready", job: "bee-gpu-burn", out: "cuInit failed: CUDA_ERROR_SYSTEM_NOT_READY", err: errors.New("rc 1"), status: "UNSUPPORTED"},
 	}

 	for _, tt := range tests {
--- a/audit/internal/platform/services.go
+++ b/audit/internal/platform/services.go
@@ -17,6 +17,10 @@ func (s *System) ListBeeServices() ([]string, error) {
 		}
 		for _, match := range matches {
 			name := strings.TrimSuffix(filepath.Base(match), ".service")
+			// Skip template units (e.g. bee-journal-mirror@) — they have no instances to query.
+			if strings.HasSuffix(name, "@") {
+				continue
+			}
 			if !seen[name] {
 				seen[name] = true
 				out = append(out, name)
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -8,6 +8,18 @@ type InterfaceInfo struct {
 	IPv4  []string
 }

+type NetworkInterfaceSnapshot struct {
+	Name string
+	Up   bool
+	IPv4 []string
+}
+
+type NetworkSnapshot struct {
+	Interfaces    []NetworkInterfaceSnapshot
+	DefaultRoutes []string
+	ResolvConf    string
+}
+
 type ServiceAction string

 const (
@@ -39,6 +51,20 @@ type ToolStatus struct {
 	OK   bool
 }

+const (
+	NvidiaStressLoaderBuiltin = "builtin"
+	NvidiaStressLoaderJohn    = "john"
+	NvidiaStressLoaderNCCL    = "nccl"
+)
+
+type NvidiaStressOptions struct {
+	DurationSec       int
+	SizeMB            int
+	Loader            string
+	GPUIndices        []int
+	ExcludeGPUIndices []int
+}
+
 func New() *System {
 	return &System{}
 }
--- a/audit/internal/tui/forms.go
+++ b/audit/internal/tui/forms.go
@@ -1,206 +0,0 @@
-package tui
-
-import (
-	"time"
-
-	"bee/audit/internal/platform"
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func (m model) updateStaticForm(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	switch msg.String() {
-	case "esc":
-		m.screen = screenNetwork
-		m.formFields = nil
-		m.formIndex = 0
-		return m, nil
-	case "up", "shift+tab":
-		if m.formIndex > 0 {
-			m.formIndex--
-		}
-	case "down", "tab":
-		if m.formIndex < len(m.formFields)-1 {
-			m.formIndex++
-		}
-	case "enter":
-		if m.formIndex < len(m.formFields)-1 {
-			m.formIndex++
-			return m, nil
-		}
-		cfg := m.app.ParseStaticIPv4Config(m.selectedIface, []string{
-			m.formFields[0].Value,
-			m.formFields[1].Value,
-			m.formFields[2].Value,
-			m.formFields[3].Value,
-		})
-		m.busy = true
-		m.busyTitle = "Static IPv4: " + m.selectedIface
-		return m, func() tea.Msg {
-			result, err := m.app.SetStaticIPv4Result(cfg)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
-		}
-	case "backspace":
-		field := &m.formFields[m.formIndex]
-		if len(field.Value) > 0 {
-			field.Value = field.Value[:len(field.Value)-1]
-		}
-	default:
-		if msg.Type == tea.KeyRunes && len(msg.Runes) > 0 {
-			m.formFields[m.formIndex].Value += string(msg.Runes)
-		}
-	}
-	return m, nil
-}
-
-func (m model) updateConfirm(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	switch msg.String() {
-	case "left", "up", "tab":
-		if m.cursor > 0 {
-			m.cursor--
-		}
-	case "right", "down":
-		if m.cursor < 1 {
-			m.cursor++
-		}
-	case "esc":
-		m.screen = m.confirmCancelTarget()
-		m.cursor = 0
-		m.pendingAction = actionNone
-		return m, nil
-	case "enter":
-		if m.cursor == 1 { // Cancel
-			m.screen = m.confirmCancelTarget()
-			m.cursor = 0
-			m.pendingAction = actionNone
-			return m, nil
-		}
-		m.busy = true
-		switch m.pendingAction {
-		case actionExportBundle:
-			m.busyTitle = "Export support bundle"
-			target := *m.selectedTarget
-			return m, func() tea.Msg {
-				result, err := m.app.ExportSupportBundleResult(target)
-				return resultMsg{title: result.Title, body: result.Body, err: err, back: screenMain}
-			}
-		case actionRunAll:
-			return m.executeRunAll()
-		case actionRunMemorySAT:
-			m.busyTitle = "Memory test"
-			m.progressPrefix = "memory"
-			m.progressSince = time.Now()
-			m.progressLines = nil
-			since := m.progressSince
-			return m, tea.Batch(
-				func() tea.Msg {
-					result, err := m.app.RunMemoryAcceptancePackResult("")
-					return resultMsg{title: result.Title, body: result.Body, err: err, back: screenHealthCheck}
-				},
-				pollSATProgress("memory", since),
-			)
-		case actionRunStorageSAT:
-			m.busyTitle = "Storage test"
-			m.progressPrefix = "storage"
-			m.progressSince = time.Now()
-			m.progressLines = nil
-			since := m.progressSince
-			return m, tea.Batch(
-				func() tea.Msg {
-					result, err := m.app.RunStorageAcceptancePackResult("")
-					return resultMsg{title: result.Title, body: result.Body, err: err, back: screenHealthCheck}
-				},
-				pollSATProgress("storage", since),
-			)
-		case actionRunCPUSAT:
-			m.busyTitle = "CPU test"
-			m.progressPrefix = "cpu"
-			m.progressSince = time.Now()
-			m.progressLines = nil
-			since := m.progressSince
-			durationSec := hcCPUDurations[m.hcMode]
-			return m, tea.Batch(
-				func() tea.Msg {
-					result, err := m.app.RunCPUAcceptancePackResult("", durationSec)
-					return resultMsg{title: result.Title, body: result.Body, err: err, back: screenHealthCheck}
-				},
-				pollSATProgress("cpu", since),
-			)
-		case actionRunAMDGPUSAT:
-			m.busyTitle = "AMD GPU test"
-			m.progressPrefix = "gpu-amd"
-			m.progressSince = time.Now()
-			m.progressLines = nil
-			since := m.progressSince
-			return m, tea.Batch(
-				func() tea.Msg {
-					result, err := m.app.RunAMDAcceptancePackResult("")
-					return resultMsg{title: result.Title, body: result.Body, err: err, back: screenHealthCheck}
-				},
-				pollSATProgress("gpu-amd", since),
-			)
-		case actionRunFanStress:
-			return m.startGPUStressTest()
-		}
-	case "ctrl+c":
-		return m, tea.Quit
-	}
-	return m, nil
-}
-
-func (m model) confirmCancelTarget() screen {
-	switch m.pendingAction {
-	case actionExportBundle:
-		return screenExportTargets
-	case actionRunAll, actionRunMemorySAT, actionRunStorageSAT, actionRunCPUSAT, actionRunAMDGPUSAT:
-		return screenHealthCheck
-	case actionRunFanStress:
-		return screenBurnInTests
-	default:
-		return screenMain
-	}
-}
-
-// hcFanStressOpts builds FanStressOptions for the selected mode, auto-detecting all GPUs.
-func hcFanStressOpts(hcMode int, application interface {
-	ListNvidiaGPUs() ([]platform.NvidiaGPU, error)
-}) platform.FanStressOptions {
-	// Phase durations per mode: [baseline, load1, pause, load2]
-	type durations struct{ baseline, load1, pause, load2 int }
-	modes := [3]durations{
-		{30, 120, 30, 120},  // Quick:    ~5 min total
-		{60, 300, 60, 300},  // Standard: ~12 min total
-		{60, 600, 120, 600}, // Express:  ~24 min total
-	}
-	if hcMode < 0 || hcMode >= len(modes) {
-		hcMode = 0
-	}
-	d := modes[hcMode]
-
-	// Use all detected NVIDIA GPUs.
-	var indices []int
-	if gpus, err := application.ListNvidiaGPUs(); err == nil {
-		for _, g := range gpus {
-			indices = append(indices, g.Index)
-		}
-	}
-
-	// Use nearly full GPU memory on the smallest GPU (leave 512 MB for driver overhead).
-	sizeMB := 64
-	if gpus, err := application.ListNvidiaGPUs(); err == nil {
-		for _, g := range gpus {
-			free := g.MemoryMB - 512
-			if free > 0 && (sizeMB == 64 || free < sizeMB) {
-				sizeMB = free
-			}
-		}
-	}
-
-	return platform.FanStressOptions{
-		BaselineSec:  d.baseline,
-		Phase1DurSec: d.load1,
-		PauseSec:     d.pause,
-		Phase2DurSec: d.load2,
-		SizeMB:       sizeMB,
-		GPUIndices:   indices,
-	}
-}
--- a/audit/internal/tui/messages.go
+++ b/audit/internal/tui/messages.go
@@ -1,57 +0,0 @@
-package tui
-
-import (
-	"bee/audit/internal/app"
-	"bee/audit/internal/platform"
-)
-
-type resultMsg struct {
-	title string
-	body  string
-	err   error
-	back  screen
-}
-
-type servicesMsg struct {
-	services []string
-	err      error
-}
-
-type interfacesMsg struct {
-	ifaces []platform.InterfaceInfo
-	err    error
-}
-
-type exportTargetsMsg struct {
-	targets []platform.RemovableTarget
-	err     error
-}
-
-type snapshotMsg struct {
-	banner string
-	panel  app.HardwarePanelData
-}
-
-type nvidiaGPUsMsg struct {
-	gpus []platform.NvidiaGPU
-	err  error
-}
-
-type nvtopClosedMsg struct{}
-
-type nvidiaSATDoneMsg struct {
-	title string
-	body  string
-	err   error
-}
-
-type gpuStressDoneMsg struct {
-	title string
-	body  string
-	err   error
-}
-
-type gpuLiveTickMsg struct {
-	rows    []platform.GPUMetricRow
-	indices []int
-}
--- a/audit/internal/tui/sat_progress.go
+++ b/audit/internal/tui/sat_progress.go
@@ -1,131 +0,0 @@
-package tui
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-	"sort"
-	"strconv"
-	"strings"
-	"time"
-
-	"bee/audit/internal/app"
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-type satProgressMsg struct {
-	lines []string
-}
-
-// pollSATProgress returns a Cmd that waits 300ms then reads the latest verbose.log
-// for the given SAT prefix and returns parsed step progress lines.
-func pollSATProgress(prefix string, since time.Time) tea.Cmd {
-	return tea.Tick(300*time.Millisecond, func(_ time.Time) tea.Msg {
-		return satProgressMsg{lines: readSATProgressLines(prefix, since)}
-	})
-}
-
-func readSATProgressLines(prefix string, since time.Time) []string {
-	pattern := filepath.Join(app.DefaultSATBaseDir, prefix+"-*/verbose.log")
-	matches, err := filepath.Glob(pattern)
-	if err != nil || len(matches) == 0 {
-		return nil
-	}
-	sort.Strings(matches)
-	// Find the latest file created at or after (since - 5s) to account for clock skew.
-	cutoff := since.Add(-5 * time.Second)
-	candidate := ""
-	for _, m := range matches {
-		info, statErr := os.Stat(m)
-		if statErr == nil && info.ModTime().After(cutoff) {
-			candidate = m
-		}
-	}
-	if candidate == "" {
-		return nil
-	}
-	raw, err := os.ReadFile(candidate)
-	if err != nil {
-		return nil
-	}
-	return parseSATVerboseProgress(string(raw))
-}
-
-// parseSATVerboseProgress parses verbose.log content and returns display lines like:
-//
-//	"PASS  lscpu (234ms)"
-//	"FAIL  stress-ng (60.0s)"
-//	"...   sensors-after"
-func parseSATVerboseProgress(content string) []string {
-	type step struct {
-		name       string
-		rc         int
-		durationMs int
-		done       bool
-	}
-
-	lines := strings.Split(content, "\n")
-	var steps []step
-	stepIdx := map[string]int{}
-
-	for i, line := range lines {
-		line = strings.TrimSpace(line)
-		if idx := strings.Index(line, "] start "); idx >= 0 {
-			name := strings.TrimSpace(line[idx+len("] start "):])
-			if _, exists := stepIdx[name]; !exists {
-				stepIdx[name] = len(steps)
-				steps = append(steps, step{name: name})
-			}
-		} else if idx := strings.Index(line, "] finish "); idx >= 0 {
-			name := strings.TrimSpace(line[idx+len("] finish "):])
-			si, exists := stepIdx[name]
-			if !exists {
-				continue
-			}
-			steps[si].done = true
-			for j := i + 1; j < len(lines) && j <= i+3; j++ {
-				l := strings.TrimSpace(lines[j])
-				if strings.HasPrefix(l, "rc: ") {
-					steps[si].rc, _ = strconv.Atoi(strings.TrimPrefix(l, "rc: "))
-				} else if strings.HasPrefix(l, "duration_ms: ") {
-					steps[si].durationMs, _ = strconv.Atoi(strings.TrimPrefix(l, "duration_ms: "))
-				}
-			}
-		}
-	}
-
-	var result []string
-	for _, s := range steps {
-		display := cleanSATStepName(s.name)
-		if s.done {
-			status := "PASS"
-			if s.rc != 0 {
-				status = "FAIL"
-			}
-			result = append(result, fmt.Sprintf("%-4s  %s (%s)", status, display, fmtDurMs(s.durationMs)))
-		} else {
-			result = append(result, fmt.Sprintf("...   %s", display))
-		}
-	}
-	return result
-}
-
-// cleanSATStepName strips leading digits and dash: "01-lscpu.log" → "lscpu".
-func cleanSATStepName(name string) string {
-	name = strings.TrimSuffix(name, ".log")
-	i := 0
-	for i < len(name) && name[i] >= '0' && name[i] <= '9' {
-		i++
-	}
-	if i < len(name) && name[i] == '-' {
-		name = name[i+1:]
-	}
-	return name
-}
-
-func fmtDurMs(ms int) string {
-	if ms < 1000 {
-		return fmt.Sprintf("%dms", ms)
-	}
-	return fmt.Sprintf("%.1fs", float64(ms)/1000)
-}
--- a/audit/internal/tui/screen_burn_in.go
+++ b/audit/internal/tui/screen_burn_in.go
@@ -1,117 +0,0 @@
-package tui
-
-import (
-	"fmt"
-	"strings"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-const (
-	burnCurGPUStress = 0
-	burnCurModeQuick = 1
-	burnCurModeStd   = 2
-	burnCurModeExpr  = 3
-	burnCurRun       = 4
-	burnCurTotal     = 5
-)
-
-func (m model) enterBurnInTests() (tea.Model, tea.Cmd) {
-	m.screen = screenBurnInTests
-	m.cursor = 0
-	if !m.burnInitialized {
-		m.burnMode = 0
-		m.burnCursor = 0
-		m.burnInitialized = true
-	}
-	return m, nil
-}
-
-func (m model) updateBurnInTests(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	switch msg.String() {
-	case "up", "k":
-		if m.burnCursor > 0 {
-			m.burnCursor--
-		}
-	case "down", "j":
-		if m.burnCursor < burnCurTotal-1 {
-			m.burnCursor++
-		}
-	case " ":
-		switch m.burnCursor {
-		case burnCurModeQuick, burnCurModeStd, burnCurModeExpr:
-			m.burnMode = m.burnCursor - burnCurModeQuick
-		}
-	case "enter":
-		switch m.burnCursor {
-		case burnCurGPUStress, burnCurRun:
-			return m.burnRunSelected()
-		case burnCurModeQuick, burnCurModeStd, burnCurModeExpr:
-			m.burnMode = m.burnCursor - burnCurModeQuick
-		}
-	case "f", "F", "r", "R":
-		return m.burnRunSelected()
-	case "1":
-		m.burnMode = 0
-	case "2":
-		m.burnMode = 1
-	case "3":
-		m.burnMode = 2
-	case "esc":
-		m.screen = screenMain
-		m.cursor = 1
-	case "q", "ctrl+c":
-		return m, tea.Quit
-	}
-	return m, nil
-}
-
-func (m model) burnRunSelected() (tea.Model, tea.Cmd) {
-	return m.hcRunFanStress()
-}
-
-func renderBurnInTests(m model) string {
-	var b strings.Builder
-
-	fmt.Fprintln(&b, "BURN-IN TESTS")
-	fmt.Fprintln(&b)
-	fmt.Fprintln(&b, "  Stress tests:")
-	fmt.Fprintln(&b)
-
-	pfx := "  "
-	if m.burnCursor == burnCurGPUStress {
-		pfx = "> "
-	}
-	fmt.Fprintf(&b, "%s[ GPU PLATFORM STRESS TEST [F] ]   (thermal cycling, fan lag, throttle check)\n", pfx)
-
-	fmt.Fprintln(&b)
-	fmt.Fprintln(&b, "  Mode:")
-	modes := []struct{ label, key string }{
-		{"Quick", "1"},
-		{"Standard", "2"},
-		{"Express", "3"},
-	}
-	for i, mode := range modes {
-		pfx := "  "
-		if m.burnCursor == burnCurModeQuick+i {
-			pfx = "> "
-		}
-		radio := "( )"
-		if m.burnMode == i {
-			radio = "(*)"
-		}
-		fmt.Fprintf(&b, "%s%s  %-10s  [%s]\n", pfx, radio, mode.label, mode.key)
-	}
-
-	fmt.Fprintln(&b)
-	pfx = "  "
-	if m.burnCursor == burnCurRun {
-		pfx = "> "
-	}
-	fmt.Fprintf(&b, "%s[ RUN SELECTED [R] ]\n", pfx)
-
-	fmt.Fprintln(&b)
-	fmt.Fprintln(&b, "─────────────────────────────────────────────────────────────────")
-	fmt.Fprint(&b, "[↑↓] move  [space/enter] select  [1/2/3] mode  [R/F] run  [Esc] back")
-	return b.String()
-}
--- a/audit/internal/tui/screen_export.go
+++ b/audit/internal/tui/screen_export.go
@@ -1,19 +0,0 @@
-package tui
-
-import tea "github.com/charmbracelet/bubbletea"
-
-func (m model) handleExportTargetsMenu() (tea.Model, tea.Cmd) {
-	if len(m.targets) == 0 {
-		return m, resultCmd(
-			"Export support bundle",
-			"No writable removable filesystems found.\n\nRead-only or boot media are hidden from this list.",
-			nil,
-			screenMain,
-		)
-	}
-	target := m.targets[m.cursor]
-	m.selectedTarget = &target
-	m.pendingAction = actionExportBundle
-	m.screen = screenConfirm
-	return m, nil
-}
--- a/audit/internal/tui/screen_health_check.go
+++ b/audit/internal/tui/screen_health_check.go
@@ -1,383 +0,0 @@
-package tui
-
-import (
-	"context"
-	"fmt"
-	"strings"
-	"time"
-
-	"bee/audit/internal/platform"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-// Component indices.
-const (
-	hcGPU     = 0
-	hcMemory  = 1
-	hcStorage = 2
-	hcCPU     = 3
-)
-
-// Cursor positions in Health Check screen.
-const (
-	hcCurGPU       = 0
-	hcCurMemory    = 1
-	hcCurStorage   = 2
-	hcCurCPU       = 3
-	hcCurSelectAll = 4
-	hcCurModeQuick = 5
-	hcCurModeStd   = 6
-	hcCurModeExpr  = 7
-	hcCurRunAll    = 8
-	hcCurTotal     = 9
-)
-
-// hcModeDurations maps mode index (0=Quick,1=Standard,2=Express) to GPU stress seconds.
-var hcModeDurations = [3]int{600, 3600, 28800}
-
-// hcCPUDurations maps mode index to CPU stress-ng seconds.
-var hcCPUDurations = [3]int{60, 300, 900}
-
-func (m model) enterHealthCheck() (tea.Model, tea.Cmd) {
-	m.screen = screenHealthCheck
-	if !m.hcInitialized {
-		m.hcSel = [4]bool{true, true, true, true}
-		m.hcMode = 0
-		m.hcCursor = 0
-		m.hcInitialized = true
-	}
-	return m, nil
-}
-
-func (m model) updateHealthCheck(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	switch msg.String() {
-	case "up", "k":
-		if m.hcCursor > 0 {
-			m.hcCursor--
-		}
-	case "down", "j":
-		if m.hcCursor < hcCurTotal-1 {
-			m.hcCursor++
-		}
-	case " ":
-		switch m.hcCursor {
-		case hcCurGPU, hcCurMemory, hcCurStorage, hcCurCPU:
-			m.hcSel[m.hcCursor] = !m.hcSel[m.hcCursor]
-		case hcCurSelectAll:
-			allOn := m.hcSel[0] && m.hcSel[1] && m.hcSel[2] && m.hcSel[3]
-			for i := range m.hcSel {
-				m.hcSel[i] = !allOn
-			}
-		case hcCurModeQuick, hcCurModeStd, hcCurModeExpr:
-			m.hcMode = m.hcCursor - hcCurModeQuick
-		}
-	case "enter":
-		switch m.hcCursor {
-		case hcCurGPU, hcCurMemory, hcCurStorage, hcCurCPU:
-			return m.hcRunSingle(m.hcCursor)
-		case hcCurSelectAll:
-			allOn := m.hcSel[0] && m.hcSel[1] && m.hcSel[2] && m.hcSel[3]
-			for i := range m.hcSel {
-				m.hcSel[i] = !allOn
-			}
-		case hcCurModeQuick, hcCurModeStd, hcCurModeExpr:
-			m.hcMode = m.hcCursor - hcCurModeQuick
-		case hcCurRunAll:
-			return m.hcRunAll()
-		}
-	case "g", "G":
-		return m.hcRunSingle(hcGPU)
-	case "m", "M":
-		return m.hcRunSingle(hcMemory)
-	case "s", "S":
-		return m.hcRunSingle(hcStorage)
-	case "c", "C":
-		return m.hcRunSingle(hcCPU)
-	case "r", "R":
-		return m.hcRunAll()
-	case "a", "A":
-		allOn := m.hcSel[0] && m.hcSel[1] && m.hcSel[2] && m.hcSel[3]
-		for i := range m.hcSel {
-			m.hcSel[i] = !allOn
-		}
-	case "1":
-		m.hcMode = 0
-	case "2":
-		m.hcMode = 1
-	case "3":
-		m.hcMode = 2
-	case "esc":
-		m.screen = screenMain
-		m.cursor = 0
-	case "q", "ctrl+c":
-		return m, tea.Quit
-	}
-	return m, nil
-}
-
-func (m model) hcRunSingle(idx int) (tea.Model, tea.Cmd) {
-	switch idx {
-	case hcGPU:
-		if m.app.DetectGPUVendor() == "amd" {
-			m.pendingAction = actionRunAMDGPUSAT
-			m.screen = screenConfirm
-			m.cursor = 0
-			return m, nil
-		}
-		m.nvidiaDurIdx = m.hcMode
-		return m.enterNvidiaSATSetup()
-	case hcMemory:
-		m.pendingAction = actionRunMemorySAT
-		m.screen = screenConfirm
-		m.cursor = 0
-		return m, nil
-	case hcStorage:
-		m.pendingAction = actionRunStorageSAT
-		m.screen = screenConfirm
-		m.cursor = 0
-		return m, nil
-	case hcCPU:
-		m.pendingAction = actionRunCPUSAT
-		m.screen = screenConfirm
-		m.cursor = 0
-		return m, nil
-	}
-	return m, nil
-}
-
-func (m model) hcRunFanStress() (tea.Model, tea.Cmd) {
-	m.pendingAction = actionRunFanStress
-	m.screen = screenConfirm
-	m.cursor = 0
-	return m, nil
-}
-
-// startGPUStressTest launches the GPU Platform Stress Test with a live in-TUI chart.
-func (m model) startGPUStressTest() (tea.Model, tea.Cmd) {
-	opts := hcFanStressOpts(m.burnMode, m.app)
-
-	ctx, cancel := context.WithCancel(context.Background())
-	m.gpuStressCancel = cancel
-	m.gpuStressAborted = false
-	m.gpuLiveRows = nil
-	m.gpuLiveIndices = opts.GPUIndices
-	m.gpuLiveStart = time.Now()
-	m.screen = screenGPUStressRunning
-	m.nvidiaSATCursor = 0
-
-	stressCmd := func() tea.Msg {
-		result, err := m.app.RunFanStressTestResult(ctx, opts)
-		return gpuStressDoneMsg{title: result.Title, body: result.Body, err: err}
-	}
-
-	return m, tea.Batch(stressCmd, pollGPULive(opts.GPUIndices))
-}
-
-// pollGPULive samples nvidia-smi once after one second and returns a gpuLiveTickMsg.
-// The update handler reschedules it to achieve continuous 1s polling.
-func pollGPULive(indices []int) tea.Cmd {
-	return tea.Tick(time.Second, func(_ time.Time) tea.Msg {
-		rows, _ := platform.SampleGPUMetrics(indices)
-		return gpuLiveTickMsg{rows: rows, indices: indices}
-	})
-}
-
-// updateGPUStressRunning handles keys on the GPU stress running screen.
-func (m model) updateGPUStressRunning(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	switch msg.String() {
-	case "a", "A":
-		if m.gpuStressCancel != nil {
-			m.gpuStressCancel()
-			m.gpuStressCancel = nil
-		}
-		m.gpuStressAborted = true
-		m.screen = screenBurnInTests
-		m.burnCursor = burnCurGPUStress
-		m.cursor = 0
-	case "ctrl+c":
-		return m, tea.Quit
-	}
-	return m, nil
-}
-
-func renderGPUStressRunning(m model) string {
-	var b strings.Builder
-	fmt.Fprintln(&b, "GPU PLATFORM STRESS TEST")
-	fmt.Fprintln(&b)
-	if len(m.gpuLiveRows) == 0 {
-		fmt.Fprintln(&b, "Collecting metrics...")
-	} else {
-		chartWidth := m.width - 8
-		if chartWidth < 40 {
-			chartWidth = 70
-		}
-		b.WriteString(platform.RenderGPULiveChart(m.gpuLiveRows, chartWidth))
-	}
-	fmt.Fprintln(&b)
-	b.WriteString("[a] Abort test  [ctrl+c] quit")
-	return b.String()
-}
-
-func (m model) hcRunAll() (tea.Model, tea.Cmd) {
-	for _, sel := range m.hcSel {
-		if sel {
-			m.pendingAction = actionRunAll
-			m.screen = screenConfirm
-			m.cursor = 0
-			return m, nil
-		}
-	}
-	return m, nil
-}
-
-func (m model) executeRunAll() (tea.Model, tea.Cmd) {
-	durationSec := hcModeDurations[m.hcMode]
-	durationIdx := m.hcMode
-	sel := m.hcSel
-	app := m.app
-	m.busy = true
-	m.busyTitle = "Health Check"
-	return m, func() tea.Msg {
-		var parts []string
-		if sel[hcGPU] {
-			vendor := app.DetectGPUVendor()
-			if vendor == "amd" {
-				r, err := app.RunAMDAcceptancePackResult("")
-				body := r.Body
-				if err != nil {
-					body += "\nERROR: " + err.Error()
-				}
-				parts = append(parts, "=== GPU (AMD) ===\n"+body)
-			} else {
-				gpus, err := app.ListNvidiaGPUs()
-				if err != nil || len(gpus) == 0 {
-					parts = append(parts, "=== GPU ===\nNo NVIDIA GPUs detected or driver not loaded.")
-				} else {
-					var indices []int
-					sizeMB := 0
-					for _, g := range gpus {
-						indices = append(indices, g.Index)
-						if sizeMB == 0 || g.MemoryMB < sizeMB {
-							sizeMB = g.MemoryMB
-						}
-					}
-					if sizeMB == 0 {
-						sizeMB = 64
-					}
-					r, err := app.RunNvidiaAcceptancePackWithOptions(context.Background(), "", durationSec, sizeMB, indices)
-					body := r.Body
-					if err != nil {
-						body += "\nERROR: " + err.Error()
-					}
-					parts = append(parts, "=== GPU ===\n"+body)
-				}
-			}
-		}
-		if sel[hcMemory] {
-			r, err := app.RunMemoryAcceptancePackResult("")
-			body := r.Body
-			if err != nil {
-				body += "\nERROR: " + err.Error()
-			}
-			parts = append(parts, "=== MEMORY ===\n"+body)
-		}
-		if sel[hcStorage] {
-			r, err := app.RunStorageAcceptancePackResult("")
-			body := r.Body
-			if err != nil {
-				body += "\nERROR: " + err.Error()
-			}
-			parts = append(parts, "=== STORAGE ===\n"+body)
-		}
-		if sel[hcCPU] {
-			cpuDur := hcCPUDurations[durationIdx]
-			r, err := app.RunCPUAcceptancePackResult("", cpuDur)
-			body := r.Body
-			if err != nil {
-				body += "\nERROR: " + err.Error()
-			}
-			parts = append(parts, "=== CPU ===\n"+body)
-		}
-		combined := strings.Join(parts, "\n\n")
-		if combined == "" {
-			combined = "No components selected."
-		}
-		return resultMsg{title: "Health Check", body: combined, back: screenHealthCheck}
-	}
-}
-
-func renderHealthCheck(m model) string {
-	var b strings.Builder
-
-	fmt.Fprintln(&b, "HEALTH CHECK")
-	fmt.Fprintln(&b)
-	fmt.Fprintln(&b, "  Diagnostics:")
-	fmt.Fprintln(&b)
-
-	type comp struct{ name, desc, key string }
-	comps := []comp{
-		{"GPU", "nvidia/amd auto-detect", "G"},
-		{"MEMORY", "memtester", "M"},
-		{"STORAGE", "smartctl + NVMe self-test", "S"},
-		{"CPU", "audit diagnostics", "C"},
-	}
-	for i, c := range comps {
-		pfx := "  "
-		if m.hcCursor == i {
-			pfx = "> "
-		}
-		ch := "[ ]"
-		if m.hcSel[i] {
-			ch = "[x]"
-		}
-		fmt.Fprintf(&b, "%s%s  %-8s  %-28s [%s]\n", pfx, ch, c.name, c.desc, c.key)
-	}
-
-	fmt.Fprintln(&b, "  ─────────────────────────────────────────────────")
-	{
-		pfx := "  "
-		if m.hcCursor == hcCurSelectAll {
-			pfx = "> "
-		}
-		allOn := m.hcSel[0] && m.hcSel[1] && m.hcSel[2] && m.hcSel[3]
-		ch := "[ ]"
-		if allOn {
-			ch = "[x]"
-		}
-		fmt.Fprintf(&b, "%s%s  Select / Deselect All                        [A]\n", pfx, ch)
-	}
-
-	fmt.Fprintln(&b)
-	fmt.Fprintln(&b, "  Mode:")
-	modes := []struct{ label, key string }{
-		{"Quick", "1"},
-		{"Standard", "2"},
-		{"Express", "3"},
-	}
-	for i, mode := range modes {
-		pfx := "  "
-		if m.hcCursor == hcCurModeQuick+i {
-			pfx = "> "
-		}
-		radio := "( )"
-		if m.hcMode == i {
-			radio = "(*)"
-		}
-		fmt.Fprintf(&b, "%s%s  %-10s  [%s]\n", pfx, radio, mode.label, mode.key)
-	}
-
-	fmt.Fprintln(&b)
-	{
-		pfx := "  "
-		if m.hcCursor == hcCurRunAll {
-			pfx = "> "
-		}
-		fmt.Fprintf(&b, "%s[ RUN ALL [R] ]\n", pfx)
-	}
-
-	fmt.Fprintln(&b)
-	fmt.Fprintln(&b, "─────────────────────────────────────────────────────────────────")
-	fmt.Fprint(&b, "[↑↓] move  [space/enter] toggle  [letter] single test  [R] run all  [Esc] back")
-	return b.String()
-}
--- a/audit/internal/tui/screen_main.go
+++ b/audit/internal/tui/screen_main.go
@@ -1,29 +0,0 @@
-package tui
-
-import (
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func (m model) handleMainMenu() (tea.Model, tea.Cmd) {
-	switch m.cursor {
-	case 0: // Health Check
-		return m.enterHealthCheck()
-	case 1: // Burn-in tests
-		return m.enterBurnInTests()
-	case 2: // Export support bundle
-		m.pendingAction = actionExportBundle
-		m.busy = true
-		m.busyTitle = "Export support bundle"
-		return m, func() tea.Msg {
-			targets, err := m.app.ListRemovableTargets()
-			return exportTargetsMsg{targets: targets, err: err}
-		}
-	case 3: // Settings
-		m.screen = screenSettings
-		m.cursor = 0
-		return m, nil
-	case 4: // Exit
-		return m, tea.Quit
-	}
-	return m, nil
-}
--- a/audit/internal/tui/screen_network.go
+++ b/audit/internal/tui/screen_network.go
@@ -1,76 +0,0 @@
-package tui
-
-import (
-	"strings"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func (m model) handleNetworkMenu() (tea.Model, tea.Cmd) {
-	switch m.cursor {
-	case 0:
-		m.busy = true
-		m.busyTitle = "Network status"
-		return m, func() tea.Msg {
-			result, err := m.app.NetworkStatus()
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
-		}
-	case 1:
-		m.busy = true
-		m.busyTitle = "DHCP all interfaces"
-		return m, func() tea.Msg {
-			result, err := m.app.DHCPAllResult()
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
-		}
-	case 2:
-		m.pendingAction = actionDHCPOne
-		m.busy = true
-		m.busyTitle = "Interfaces"
-		return m, func() tea.Msg {
-			ifaces, err := m.app.ListInterfaces()
-			return interfacesMsg{ifaces: ifaces, err: err}
-		}
-	case 3:
-		m.pendingAction = actionStaticIPv4
-		m.busy = true
-		m.busyTitle = "Interfaces"
-		return m, func() tea.Msg {
-			ifaces, err := m.app.ListInterfaces()
-			return interfacesMsg{ifaces: ifaces, err: err}
-		}
-	case 4:
-		m.screen = screenSettings
-		m.cursor = 0
-		return m, nil
-	}
-	return m, nil
-}
-
-func (m model) handleInterfacePickMenu() (tea.Model, tea.Cmd) {
-	if len(m.interfaces) == 0 {
-		return m, resultCmd("interfaces", "No physical interfaces found", nil, screenNetwork)
-	}
-	m.selectedIface = m.interfaces[m.cursor].Name
-	switch m.pendingAction {
-	case actionDHCPOne:
-		m.busy = true
-		m.busyTitle = "DHCP on " + m.selectedIface
-		return m, func() tea.Msg {
-			result, err := m.app.DHCPOneResult(m.selectedIface)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
-		}
-	case actionStaticIPv4:
-		defaults := m.app.DefaultStaticIPv4FormFields(m.selectedIface)
-		m.formFields = []formField{
-			{Label: "IPv4 address", Value: defaults[0]},
-			{Label: "Prefix", Value: defaults[1]},
-			{Label: "Gateway", Value: strings.TrimSpace(defaults[2])},
-			{Label: "DNS (space-separated)", Value: defaults[3]},
-		}
-		m.formIndex = 0
-		m.screen = screenStaticForm
-		return m, nil
-	default:
-		return m, nil
-	}
-}
--- a/audit/internal/tui/screen_nvidia_sat.go
+++ b/audit/internal/tui/screen_nvidia_sat.go
@@ -1,218 +0,0 @@
-package tui
-
-import (
-	"context"
-	"fmt"
-	"strings"
-
-	"bee/audit/internal/platform"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-var nvidiaDurationOptions = []struct {
-	label   string
-	seconds int
-}{
-	{"10 minutes", 600},
-	{"1 hour", 3600},
-	{"8 hours", 28800},
-	{"24 hours", 86400},
-}
-
-// enterNvidiaSATSetup resets the setup screen and starts loading GPU list.
-func (m model) enterNvidiaSATSetup() (tea.Model, tea.Cmd) {
-	m.screen = screenNvidiaSATSetup
-	m.nvidiaGPUs = nil
-	m.nvidiaGPUSel = nil
-	m.nvidiaDurIdx = 0
-	m.nvidiaSATCursor = 0
-	m.busy = true
-	m.busyTitle = "NVIDIA SAT"
-	return m, func() tea.Msg {
-		gpus, err := m.app.ListNvidiaGPUs()
-		return nvidiaGPUsMsg{gpus: gpus, err: err}
-	}
-}
-
-// handleNvidiaGPUsMsg processes the GPU list response.
-func (m model) handleNvidiaGPUsMsg(msg nvidiaGPUsMsg) (tea.Model, tea.Cmd) {
-	m.busy = false
-	m.busyTitle = ""
-	if msg.err != nil {
-		m.title = "NVIDIA SAT"
-		m.body = fmt.Sprintf("Failed to list GPUs: %v", msg.err)
-		m.prevScreen = screenHealthCheck
-		m.screen = screenOutput
-		return m, nil
-	}
-	m.nvidiaGPUs = msg.gpus
-	m.nvidiaGPUSel = make([]bool, len(msg.gpus))
-	for i := range m.nvidiaGPUSel {
-		m.nvidiaGPUSel[i] = true // all selected by default
-	}
-	m.nvidiaSATCursor = 0
-	return m, nil
-}
-
-// updateNvidiaSATSetup handles keys on the setup screen.
-func (m model) updateNvidiaSATSetup(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	numDur := len(nvidiaDurationOptions)
-	numGPU := len(m.nvidiaGPUs)
-	totalItems := numDur + numGPU + 2 // +2: Start, Cancel
-	switch msg.String() {
-	case "up", "k":
-		if m.nvidiaSATCursor > 0 {
-			m.nvidiaSATCursor--
-		}
-	case "down", "j":
-		if m.nvidiaSATCursor < totalItems-1 {
-			m.nvidiaSATCursor++
-		}
-	case " ":
-		switch {
-		case m.nvidiaSATCursor < numDur:
-			m.nvidiaDurIdx = m.nvidiaSATCursor
-		case m.nvidiaSATCursor < numDur+numGPU:
-			i := m.nvidiaSATCursor - numDur
-			m.nvidiaGPUSel[i] = !m.nvidiaGPUSel[i]
-		}
-	case "enter":
-		startIdx := numDur + numGPU
-		cancelIdx := startIdx + 1
-		switch {
-		case m.nvidiaSATCursor < numDur:
-			m.nvidiaDurIdx = m.nvidiaSATCursor
-		case m.nvidiaSATCursor < startIdx:
-			i := m.nvidiaSATCursor - numDur
-			m.nvidiaGPUSel[i] = !m.nvidiaGPUSel[i]
-		case m.nvidiaSATCursor == startIdx:
-			return m.startNvidiaSAT()
-		case m.nvidiaSATCursor == cancelIdx:
-			m.screen = screenHealthCheck
-			m.cursor = 0
-		}
-	case "esc":
-		m.screen = screenHealthCheck
-		m.cursor = 0
-	case "ctrl+c", "q":
-		return m, tea.Quit
-	}
-	return m, nil
-}
-
-// startNvidiaSAT launches the NVIDIA acceptance pack.
-func (m model) startNvidiaSAT() (tea.Model, tea.Cmd) {
-	var selectedGPUs []platform.NvidiaGPU
-	for i, sel := range m.nvidiaGPUSel {
-		if sel {
-			selectedGPUs = append(selectedGPUs, m.nvidiaGPUs[i])
-		}
-	}
-	if len(selectedGPUs) == 0 {
-		selectedGPUs = m.nvidiaGPUs // fallback: use all if none explicitly selected
-	}
-
-	sizeMB := 0
-	for _, g := range selectedGPUs {
-		if sizeMB == 0 || g.MemoryMB < sizeMB {
-			sizeMB = g.MemoryMB
-		}
-	}
-	if sizeMB == 0 {
-		sizeMB = 64
-	}
-
-	var gpuIndices []int
-	for _, g := range selectedGPUs {
-		gpuIndices = append(gpuIndices, g.Index)
-	}
-
-	durationSec := nvidiaDurationOptions[m.nvidiaDurIdx].seconds
-
-	ctx, cancel := context.WithCancel(context.Background())
-	m.nvidiaSATCancel = cancel
-	m.nvidiaSATAborted = false
-	m.screen = screenNvidiaSATRunning
-	m.nvidiaSATCursor = 0
-
-	satCmd := func() tea.Msg {
-		result, err := m.app.RunNvidiaAcceptancePackWithOptions(ctx, "", durationSec, sizeMB, gpuIndices)
-		return nvidiaSATDoneMsg{title: result.Title, body: result.Body, err: err}
-	}
-
-	return m, satCmd
-}
-
-// updateNvidiaSATRunning handles keys on the running screen.
-func (m model) updateNvidiaSATRunning(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	switch msg.String() {
-	case "a", "A":
-		if m.nvidiaSATCancel != nil {
-			m.nvidiaSATCancel()
-			m.nvidiaSATCancel = nil
-		}
-		m.nvidiaSATAborted = true
-		m.screen = screenHealthCheck
-		m.cursor = 0
-	case "ctrl+c":
-		return m, tea.Quit
-	}
-	return m, nil
-}
-
-// renderNvidiaSATSetup renders the setup screen.
-func renderNvidiaSATSetup(m model) string {
-	var b strings.Builder
-	fmt.Fprintln(&b, "NVIDIA SAT")
-	fmt.Fprintln(&b)
-	fmt.Fprintln(&b, "Duration:")
-	for i, opt := range nvidiaDurationOptions {
-		radio := "( )"
-		if i == m.nvidiaDurIdx {
-			radio = "(*)"
-		}
-		prefix := "  "
-		if m.nvidiaSATCursor == i {
-			prefix = "> "
-		}
-		fmt.Fprintf(&b, "%s%s %s\n", prefix, radio, opt.label)
-	}
-	fmt.Fprintln(&b)
-	if len(m.nvidiaGPUs) == 0 {
-		fmt.Fprintln(&b, "GPUs: (none detected)")
-	} else {
-		fmt.Fprintln(&b, "GPUs:")
-		for i, gpu := range m.nvidiaGPUs {
-			check := "[ ]"
-			if m.nvidiaGPUSel[i] {
-				check = "[x]"
-			}
-			prefix := "  "
-			if m.nvidiaSATCursor == len(nvidiaDurationOptions)+i {
-				prefix = "> "
-			}
-			fmt.Fprintf(&b, "%s%s %d: %s (%d MB)\n", prefix, check, gpu.Index, gpu.Name, gpu.MemoryMB)
-		}
-	}
-	fmt.Fprintln(&b)
-	startIdx := len(nvidiaDurationOptions) + len(m.nvidiaGPUs)
-	startPfx := "  "
-	cancelPfx := "  "
-	if m.nvidiaSATCursor == startIdx {
-		startPfx = "> "
-	}
-	if m.nvidiaSATCursor == startIdx+1 {
-		cancelPfx = "> "
-	}
-	fmt.Fprintf(&b, "%sStart\n", startPfx)
-	fmt.Fprintf(&b, "%sCancel\n", cancelPfx)
-	fmt.Fprintln(&b)
-	b.WriteString("[↑/↓] move  [space] toggle  [enter] select  [esc] cancel\n")
-	return b.String()
-}
-
-// renderNvidiaSATRunning renders the running screen.
-func renderNvidiaSATRunning() string {
-	return "NVIDIA SAT\n\nTest is running...\n\n[a] Abort test  [ctrl+c] quit\n"
-}
--- a/audit/internal/tui/screen_services.go
+++ b/audit/internal/tui/screen_services.go
@@ -1,47 +0,0 @@
-package tui
-
-import (
-	"bee/audit/internal/platform"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func (m model) handleServicesMenu() (tea.Model, tea.Cmd) {
-	if len(m.services) == 0 {
-		return m, resultCmd("Services", "No bee-* services found.", nil, screenSettings)
-	}
-	m.selectedService = m.services[m.cursor]
-	m.screen = screenServiceAction
-	m.cursor = 0
-	return m, nil
-}
-
-func (m model) handleServiceActionMenu() (tea.Model, tea.Cmd) {
-	action := m.serviceMenu[m.cursor]
-	if action == "back" {
-		m.screen = screenServices
-		m.cursor = 0
-		return m, nil
-	}
-
-	m.busy = true
-	m.busyTitle = "service: " + m.selectedService
-	return m, func() tea.Msg {
-		switch action {
-		case "Status":
-			result, err := m.app.ServiceStatusResult(m.selectedService)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
-		case "Restart":
-			result, err := m.app.ServiceActionResult(m.selectedService, platform.ServiceRestart)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
-		case "Start":
-			result, err := m.app.ServiceActionResult(m.selectedService, platform.ServiceStart)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
-		case "Stop":
-			result, err := m.app.ServiceActionResult(m.selectedService, platform.ServiceStop)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
-		default:
-			return resultMsg{title: "Service", body: "Unknown action.", back: screenServiceAction}
-		}
-	}
-}
--- a/audit/internal/tui/screen_settings.go
+++ b/audit/internal/tui/screen_settings.go
@@ -1,64 +0,0 @@
-package tui
-
-import tea "github.com/charmbracelet/bubbletea"
-
-func (m model) handleSettingsMenu() (tea.Model, tea.Cmd) {
-	switch m.cursor {
-	case 0: // Network
-		m.screen = screenNetwork
-		m.cursor = 0
-		return m, nil
-	case 1: // Services
-		m.busy = true
-		m.busyTitle = "Services"
-		return m, func() tea.Msg {
-			services, err := m.app.ListBeeServices()
-			return servicesMsg{services: services, err: err}
-		}
-	case 2: // Re-run audit
-		m.busy = true
-		m.busyTitle = "Re-run audit"
-		runtimeMode := m.runtimeMode
-		return m, func() tea.Msg {
-			result, err := m.app.RunAuditNow(runtimeMode)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenSettings}
-		}
-	case 3: // Run self-check
-		m.busy = true
-		m.busyTitle = "Self-check"
-		return m, func() tea.Msg {
-			result, err := m.app.RunRuntimePreflightResult()
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenSettings}
-		}
-	case 4: // Runtime issues
-		m.busy = true
-		m.busyTitle = "Runtime issues"
-		return m, func() tea.Msg {
-			result := m.app.RuntimeHealthResult()
-			return resultMsg{title: result.Title, body: result.Body, back: screenSettings}
-		}
-	case 5: // Audit logs
-		m.busy = true
-		m.busyTitle = "Audit logs"
-		return m, func() tea.Msg {
-			result := m.app.AuditLogTailResult()
-			return resultMsg{title: result.Title, body: result.Body, back: screenSettings}
-		}
-	case 6: // Check tools
-		m.busy = true
-		m.busyTitle = "Check tools"
-		return m, func() tea.Msg {
-			result := m.app.ToolCheckResult([]string{
-				"dmidecode", "smartctl", "nvme", "ipmitool", "lspci",
-				"ethtool", "bee", "nvidia-smi", "bee-gpu-stress",
-				"memtester", "dhclient", "lsblk", "mount",
-			})
-			return resultMsg{title: result.Title, body: result.Body, back: screenSettings}
-		}
-	case 7: // Back
-		m.screen = screenMain
-		m.cursor = 0
-		return m, nil
-	}
-	return m, nil
-}
--- a/audit/internal/tui/snapshot.go
+++ b/audit/internal/tui/snapshot.go
@@ -1,30 +0,0 @@
-package tui
-
-import (
-	"bee/audit/internal/app"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func (m model) refreshSnapshotCmd() tea.Cmd {
-	if m.app == nil {
-		return nil
-	}
-	return func() tea.Msg {
-		return snapshotMsg{
-			banner: m.app.MainBanner(),
-			panel:  m.app.LoadHardwarePanel(),
-		}
-	}
-}
-
-func shouldRefreshSnapshot(prev, next model) bool {
-	return prev.screen != next.screen || prev.busy != next.busy
-}
-
-func emptySnapshot() snapshotMsg {
-	return snapshotMsg{
-		banner: "",
-		panel:  app.HardwarePanelData{},
-	}
-}
--- a/audit/internal/tui/tui_test.go
+++ b/audit/internal/tui/tui_test.go
@@ -1,724 +0,0 @@
-package tui
-
-import (
-	"strings"
-	"testing"
-
-	"bee/audit/internal/app"
-	"bee/audit/internal/platform"
-	"bee/audit/internal/runtimeenv"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func newTestModel() model {
-	return newModel(app.New(platform.New()), runtimeenv.ModeLocal)
-}
-
-func sendKey(t *testing.T, m model, key tea.KeyType) model {
-	t.Helper()
-
-	next, _ := m.Update(tea.KeyMsg{Type: key})
-	return next.(model)
-}
-
-func TestUpdateMainMenuCursorNavigation(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-
-	m = sendKey(t, m, tea.KeyDown)
-	if m.cursor != 1 {
-		t.Fatalf("cursor=%d want 1 after down", m.cursor)
-	}
-
-	m = sendKey(t, m, tea.KeyDown)
-	if m.cursor != 2 {
-		t.Fatalf("cursor=%d want 2 after second down", m.cursor)
-	}
-
-	m = sendKey(t, m, tea.KeyUp)
-	if m.cursor != 1 {
-		t.Fatalf("cursor=%d want 1 after up", m.cursor)
-	}
-}
-
-func TestUpdateMainMenuEnterActions(t *testing.T) {
-	t.Parallel()
-
-	tests := []struct {
-		name       string
-		cursor     int
-		wantScreen screen
-		wantBusy   bool
-		wantCmd    bool
-	}{
-		{name: "health_check", cursor: 0, wantScreen: screenHealthCheck, wantCmd: true},
-		{name: "burn_in_tests", cursor: 1, wantScreen: screenBurnInTests, wantCmd: true},
-		{name: "export", cursor: 2, wantScreen: screenMain, wantBusy: true, wantCmd: true},
-		{name: "settings", cursor: 3, wantScreen: screenSettings, wantCmd: true},
-		{name: "exit", cursor: 4, wantScreen: screenMain, wantCmd: true},
-	}
-
-	for _, test := range tests {
-		test := test
-		t.Run(test.name, func(t *testing.T) {
-			t.Parallel()
-
-			m := newTestModel()
-			m.cursor = test.cursor
-
-			next, cmd := m.Update(tea.KeyMsg{Type: tea.KeyEnter})
-			got := next.(model)
-
-			if got.screen != test.wantScreen {
-				t.Fatalf("screen=%q want %q", got.screen, test.wantScreen)
-			}
-			if got.busy != test.wantBusy {
-				t.Fatalf("busy=%v want %v", got.busy, test.wantBusy)
-			}
-			if (cmd != nil) != test.wantCmd {
-				t.Fatalf("cmd present=%v want %v", cmd != nil, test.wantCmd)
-			}
-		})
-	}
-}
-
-func TestUpdateConfirmCancelViaKeys(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenConfirm
-	m.pendingAction = actionRunMemorySAT
-
-	next, _ := m.Update(tea.KeyMsg{Type: tea.KeyRight})
-	got := next.(model)
-	if got.cursor != 1 {
-		t.Fatalf("cursor=%d want 1 after right", got.cursor)
-	}
-
-	next, _ = got.Update(tea.KeyMsg{Type: tea.KeyEnter})
-	got = next.(model)
-	if got.screen != screenHealthCheck {
-		t.Fatalf("screen=%q want %q", got.screen, screenHealthCheck)
-	}
-	if got.cursor != 0 {
-		t.Fatalf("cursor=%d want 0 after cancel", got.cursor)
-	}
-}
-
-func TestMainMenuSimpleTransitions(t *testing.T) {
-	t.Parallel()
-
-	tests := []struct {
-		name       string
-		cursor     int
-		wantScreen screen
-	}{
-		{name: "health_check", cursor: 0, wantScreen: screenHealthCheck},
-		{name: "burn_in_tests", cursor: 1, wantScreen: screenBurnInTests},
-		{name: "settings", cursor: 3, wantScreen: screenSettings},
-	}
-
-	for _, test := range tests {
-		test := test
-		t.Run(test.name, func(t *testing.T) {
-			t.Parallel()
-
-			m := newTestModel()
-			m.cursor = test.cursor
-
-			next, cmd := m.handleMainMenu()
-			got := next.(model)
-
-			if cmd != nil {
-				t.Fatalf("expected nil cmd for %s", test.name)
-			}
-			if got.screen != test.wantScreen {
-				t.Fatalf("screen=%q want %q", got.screen, test.wantScreen)
-			}
-			if got.cursor != 0 {
-				t.Fatalf("cursor=%d want 0", got.cursor)
-			}
-		})
-	}
-}
-
-func TestMainMenuExportSetsBusy(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.cursor = 2 // Export support bundle
-
-	next, cmd := m.handleMainMenu()
-	got := next.(model)
-
-	if !got.busy {
-		t.Fatal("busy=false for export")
-	}
-	if cmd == nil {
-		t.Fatal("expected async cmd for export")
-	}
-}
-
-func TestMainViewRendersTwoColumns(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.cursor = 2
-
-	view := m.View()
-	for _, want := range []string{
-		"bee",
-		"Health Check",
-		"Burn-in tests",
-		"> Export support bundle",
-		"Settings",
-		"Exit",
-		"│",
-		"[↑↓] move",
-	} {
-		if !strings.Contains(view, want) {
-			t.Fatalf("view missing %q\nview:\n%s", want, view)
-		}
-	}
-}
-
-func TestEscapeNavigation(t *testing.T) {
-	t.Parallel()
-
-	tests := []struct {
-		name       string
-		screen     screen
-		wantScreen screen
-	}{
-		{name: "network to settings", screen: screenNetwork, wantScreen: screenSettings},
-		{name: "services to settings", screen: screenServices, wantScreen: screenSettings},
-		{name: "settings to main", screen: screenSettings, wantScreen: screenMain},
-		{name: "service action to services", screen: screenServiceAction, wantScreen: screenServices},
-		{name: "export targets to main", screen: screenExportTargets, wantScreen: screenMain},
-		{name: "interface pick to network", screen: screenInterfacePick, wantScreen: screenNetwork},
-	}
-
-	for _, test := range tests {
-		test := test
-		t.Run(test.name, func(t *testing.T) {
-			t.Parallel()
-
-			m := newTestModel()
-			m.screen = test.screen
-			m.cursor = 3
-
-			next, _ := m.updateKey(tea.KeyMsg{Type: tea.KeyEsc})
-			got := next.(model)
-
-			if got.screen != test.wantScreen {
-				t.Fatalf("screen=%q want %q", got.screen, test.wantScreen)
-			}
-			if got.cursor != 0 {
-				t.Fatalf("cursor=%d want 0", got.cursor)
-			}
-		})
-	}
-}
-
-func TestHealthCheckEscReturnsToMain(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenHealthCheck
-	m.hcCursor = 3
-
-	next, _ := m.updateHealthCheck(tea.KeyMsg{Type: tea.KeyEsc})
-	got := next.(model)
-
-	if got.screen != screenMain {
-		t.Fatalf("screen=%q want %q", got.screen, screenMain)
-	}
-	if got.cursor != 0 {
-		t.Fatalf("cursor=%d want 0", got.cursor)
-	}
-}
-
-func TestOutputScreenReturnsToPreviousScreen(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenOutput
-	m.prevScreen = screenNetwork
-	m.title = "title"
-	m.body = "body"
-
-	next, _ := m.updateKey(tea.KeyMsg{Type: tea.KeyEnter})
-	got := next.(model)
-
-	if got.screen != screenNetwork {
-		t.Fatalf("screen=%q want %q", got.screen, screenNetwork)
-	}
-	if got.title != "" || got.body != "" {
-		t.Fatalf("expected output state cleared, got title=%q body=%q", got.title, got.body)
-	}
-}
-
-func TestHealthCheckGPUOpensNvidiaSATSetup(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenHealthCheck
-	m.hcInitialized = true
-	m.hcSel = [4]bool{true, true, true, true}
-
-	next, cmd := m.hcRunSingle(hcGPU)
-	got := next.(model)
-
-	if cmd == nil {
-		t.Fatal("expected non-nil cmd (GPU list loader)")
-	}
-	if got.screen != screenNvidiaSATSetup {
-		t.Fatalf("screen=%q want %q", got.screen, screenNvidiaSATSetup)
-	}
-
-	// esc from setup returns to health check
-	next, _ = got.updateNvidiaSATSetup(tea.KeyMsg{Type: tea.KeyEsc})
-	got = next.(model)
-	if got.screen != screenHealthCheck {
-		t.Fatalf("screen after esc=%q want %q", got.screen, screenHealthCheck)
-	}
-}
-
-func TestHealthCheckRunSingleMapsActions(t *testing.T) {
-	t.Parallel()
-
-	tests := []struct {
-		idx  int
-		want actionKind
-	}{
-		{idx: hcMemory, want: actionRunMemorySAT},
-		{idx: hcStorage, want: actionRunStorageSAT},
-	}
-
-	for _, test := range tests {
-		m := newTestModel()
-		m.screen = screenHealthCheck
-		m.hcInitialized = true
-
-		next, _ := m.hcRunSingle(test.idx)
-		got := next.(model)
-		if got.pendingAction != test.want {
-			t.Fatalf("idx=%d pendingAction=%q want %q", test.idx, got.pendingAction, test.want)
-		}
-		if got.screen != screenConfirm {
-			t.Fatalf("idx=%d screen=%q want %q", test.idx, got.screen, screenConfirm)
-		}
-	}
-}
-
-func TestExportTargetSelectionOpensConfirm(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenExportTargets
-	m.targets = []platform.RemovableTarget{{Device: "/dev/sdb1", FSType: "vfat", Size: "16G"}}
-
-	next, cmd := m.handleExportTargetsMenu()
-	got := next.(model)
-
-	if cmd != nil {
-		t.Fatal("expected nil cmd")
-	}
-	if got.screen != screenConfirm {
-		t.Fatalf("screen=%q want %q", got.screen, screenConfirm)
-	}
-	if got.pendingAction != actionExportBundle {
-		t.Fatalf("pendingAction=%q want %q", got.pendingAction, actionExportBundle)
-	}
-	if got.selectedTarget == nil || got.selectedTarget.Device != "/dev/sdb1" {
-		t.Fatalf("selectedTarget=%+v want /dev/sdb1", got.selectedTarget)
-	}
-}
-
-func TestInterfacePickStaticIPv4OpensForm(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.pendingAction = actionStaticIPv4
-	m.interfaces = []platform.InterfaceInfo{{Name: "eth0"}}
-
-	next, cmd := m.handleInterfacePickMenu()
-	got := next.(model)
-
-	if cmd != nil {
-		t.Fatal("expected nil cmd")
-	}
-	if got.screen != screenStaticForm {
-		t.Fatalf("screen=%q want %q", got.screen, screenStaticForm)
-	}
-	if got.selectedIface != "eth0" {
-		t.Fatalf("selectedIface=%q want eth0", got.selectedIface)
-	}
-	if len(got.formFields) != 4 {
-		t.Fatalf("len(formFields)=%d want 4", len(got.formFields))
-	}
-}
-
-func TestResultMsgUsesExplicitBackScreen(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenConfirm
-
-	next, _ := m.Update(resultMsg{title: "done", body: "ok", back: screenNetwork})
-	got := next.(model)
-
-	if got.screen != screenOutput {
-		t.Fatalf("screen=%q want %q", got.screen, screenOutput)
-	}
-	if got.prevScreen != screenNetwork {
-		t.Fatalf("prevScreen=%q want %q", got.prevScreen, screenNetwork)
-	}
-}
-
-func TestConfirmCancelTarget(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-
-	m.pendingAction = actionExportBundle
-	if got := m.confirmCancelTarget(); got != screenExportTargets {
-		t.Fatalf("export cancel target=%q want %q", got, screenExportTargets)
-	}
-
-	m.pendingAction = actionRunAll
-	if got := m.confirmCancelTarget(); got != screenHealthCheck {
-		t.Fatalf("run all cancel target=%q want %q", got, screenHealthCheck)
-	}
-
-	m.pendingAction = actionRunMemorySAT
-	if got := m.confirmCancelTarget(); got != screenHealthCheck {
-		t.Fatalf("memory sat cancel target=%q want %q", got, screenHealthCheck)
-	}
-
-	m.pendingAction = actionRunStorageSAT
-	if got := m.confirmCancelTarget(); got != screenHealthCheck {
-		t.Fatalf("storage sat cancel target=%q want %q", got, screenHealthCheck)
-	}
-
-	m.pendingAction = actionRunFanStress
-	if got := m.confirmCancelTarget(); got != screenBurnInTests {
-		t.Fatalf("fan stress cancel target=%q want %q", got, screenBurnInTests)
-	}
-
-	m.pendingAction = actionNone
-	if got := m.confirmCancelTarget(); got != screenMain {
-		t.Fatalf("default cancel target=%q want %q", got, screenMain)
-	}
-}
-
-func TestViewBusyStateIsMinimal(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.busy = true
-
-	view := m.View()
-	want := "bee\n\nWorking...\n\n[ctrl+c] quit\n"
-	if view != want {
-		t.Fatalf("busy view mismatch\nwant:\n%s\ngot:\n%s", want, view)
-	}
-}
-
-func TestViewBusyStateUsesBusyTitle(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.busy = true
-	m.busyTitle = "Export support bundle"
-
-	view := m.View()
-
-	for _, want := range []string{
-		"Export support bundle",
-		"Working...",
-		"[ctrl+c] quit",
-	} {
-		if !strings.Contains(view, want) {
-			t.Fatalf("view missing %q\nview:\n%s", want, view)
-		}
-	}
-}
-
-func TestBurnInTestsEscReturnsToMain(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenBurnInTests
-	m.burnCursor = 3
-
-	next, _ := m.updateBurnInTests(tea.KeyMsg{Type: tea.KeyEsc})
-	got := next.(model)
-
-	if got.screen != screenMain {
-		t.Fatalf("screen=%q want %q", got.screen, screenMain)
-	}
-	if got.cursor != 1 {
-		t.Fatalf("cursor=%d want 1", got.cursor)
-	}
-}
-
-func TestBurnInTestsRunOpensConfirm(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenBurnInTests
-	m.burnInitialized = true
-	m.burnMode = 2
-
-	next, _ := m.burnRunSelected()
-	got := next.(model)
-
-	if got.screen != screenConfirm {
-		t.Fatalf("screen=%q want %q", got.screen, screenConfirm)
-	}
-	if got.pendingAction != actionRunFanStress {
-		t.Fatalf("pendingAction=%q want %q", got.pendingAction, actionRunFanStress)
-	}
-	if got.cursor != 0 {
-		t.Fatalf("cursor=%d want 0", got.cursor)
-	}
-}
-
-func TestViewBurnInTestsRendersGPUStressEntry(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenBurnInTests
-
-	view := m.View()
-
-	for _, want := range []string{
-		"BURN-IN TESTS",
-		"GPU PLATFORM STRESS TEST",
-		"Quick",
-		"Standard",
-		"Express",
-		"[ RUN SELECTED [R] ]",
-	} {
-		if !strings.Contains(view, want) {
-			t.Fatalf("view missing %q\nview:\n%s", want, view)
-		}
-	}
-}
-
-func TestViewOutputScreenRendersBodyAndBackHint(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenOutput
-	m.title = "Run audit"
-	m.body = "audit output: /appdata/bee/export/bee-audit.json\n"
-
-	view := m.View()
-
-	for _, want := range []string{
-		"Run audit",
-		"audit output: /appdata/bee/export/bee-audit.json",
-		"[enter/esc] back  [ctrl+c] quit",
-	} {
-		if !strings.Contains(view, want) {
-			t.Fatalf("view missing %q\nview:\n%s", want, view)
-		}
-	}
-}
-
-func TestViewRendersBannerModuleAboveScreenBody(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.banner = "System: Demo Server\nIP: 10.0.0.10"
-	m.width = 60
-
-	view := m.View()
-
-	for _, want := range []string{
-		"┌ MOTD ",
-		"System: Demo Server",
-		"IP: 10.0.0.10",
-		"Health Check",
-		"Export support bundle",
-	} {
-		if !strings.Contains(view, want) {
-			t.Fatalf("view missing %q\nview:\n%s", want, view)
-		}
-	}
-}
-
-func TestSnapshotMsgUpdatesBannerAndPanel(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-
-	next, cmd := m.Update(snapshotMsg{
-		banner: "System: Demo",
-		panel: app.HardwarePanelData{
-			Header: []string{"Demo header"},
-			Rows: []app.ComponentRow{
-				{Key: "CPU", Status: "PASS", Detail: "ok"},
-			},
-		},
-	})
-	got := next.(model)
-
-	if cmd != nil {
-		t.Fatal("expected nil cmd")
-	}
-	if got.banner != "System: Demo" {
-		t.Fatalf("banner=%q want %q", got.banner, "System: Demo")
-	}
-	if len(got.panel.Rows) != 1 || got.panel.Rows[0].Key != "CPU" {
-		t.Fatalf("panel rows=%+v", got.panel.Rows)
-	}
-}
-
-func TestViewExportTargetsRendersDeviceMetadata(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenExportTargets
-	m.targets = []platform.RemovableTarget{
-		{
-			Device:     "/dev/sdb1",
-			FSType:     "vfat",
-			Size:       "29G",
-			Label:      "BEEUSB",
-			Mountpoint: "/media/bee",
-		},
-	}
-
-	view := m.View()
-
-	for _, want := range []string{
-		"Export support bundle",
-		"Select writable removable filesystem (read-only/boot media hidden)",
-		"> /dev/sdb1 [vfat 29G] label=BEEUSB mounted=/media/bee",
-	} {
-		if !strings.Contains(view, want) {
-			t.Fatalf("view missing %q\nview:\n%s", want, view)
-		}
-	}
-}
-
-func TestExportTargetsMsgEmptyShowsHiddenBootMediaHint(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.busy = true
-	m.busyTitle = "Export support bundle"
-
-	next, _ := m.Update(exportTargetsMsg{})
-	got := next.(model)
-
-	if got.screen != screenOutput {
-		t.Fatalf("screen=%q want %q", got.screen, screenOutput)
-	}
-	if got.title != "Export support bundle" {
-		t.Fatalf("title=%q want %q", got.title, "Export support bundle")
-	}
-	for _, want := range []string{
-		"No writable removable filesystems found.",
-		"Read-only or boot media are hidden from this list.",
-	} {
-		if !strings.Contains(got.body, want) {
-			t.Fatalf("body missing %q\nbody:\n%s", want, got.body)
-		}
-	}
-}
-
-func TestViewStaticFormRendersFields(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenStaticForm
-	m.selectedIface = "enp1s0"
-	m.formFields = []formField{
-		{Label: "Address", Value: "192.0.2.10/24"},
-		{Label: "Gateway", Value: "192.0.2.1"},
-		{Label: "DNS", Value: "1.1.1.1"},
-	}
-	m.formIndex = 1
-
-	view := m.View()
-
-	for _, want := range []string{
-		"Static IPv4: enp1s0",
-		"  Address: 192.0.2.10/24",
-		"> Gateway: 192.0.2.1",
-		"  DNS: 1.1.1.1",
-		"[tab/↑/↓] move  [enter] next/submit  [backspace] delete  [esc] cancel",
-	} {
-		if !strings.Contains(view, want) {
-			t.Fatalf("view missing %q\nview:\n%s", want, view)
-		}
-	}
-}
-
-func TestViewConfirmScreenMatchesPendingExport(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenConfirm
-	m.pendingAction = actionExportBundle
-	m.selectedTarget = &platform.RemovableTarget{Device: "/dev/sdb1"}
-
-	view := m.View()
-
-	for _, want := range []string{
-		"Export support bundle",
-		"Copy support bundle to /dev/sdb1?",
-		"> Confirm",
-		"  Cancel",
-	} {
-		if !strings.Contains(view, want) {
-			t.Fatalf("view missing %q\nview:\n%s", want, view)
-		}
-	}
-}
-
-func TestResultMsgClearsBusyAndPendingAction(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.busy = true
-	m.busyTitle = "Export support bundle"
-	m.pendingAction = actionExportBundle
-	m.screen = screenConfirm
-
-	next, _ := m.Update(resultMsg{title: "Export support bundle", body: "done", back: screenMain})
-	got := next.(model)
-
-	if got.busy {
-		t.Fatal("busy=true want false")
-	}
-	if got.busyTitle != "" {
-		t.Fatalf("busyTitle=%q want empty", got.busyTitle)
-	}
-	if got.pendingAction != actionNone {
-		t.Fatalf("pendingAction=%q want empty", got.pendingAction)
-	}
-}
-
-func TestResultMsgErrorWithoutBodyFormatsCleanly(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-
-	next, _ := m.Update(resultMsg{title: "Export support bundle", err: assertErr("boom"), back: screenMain})
-	got := next.(model)
-
-	if got.body != "ERROR: boom" {
-		t.Fatalf("body=%q want %q", got.body, "ERROR: boom")
-	}
-}
-
-type assertErr string
-
-func (e assertErr) Error() string { return string(e) }
--- a/audit/internal/tui/types.go
+++ b/audit/internal/tui/types.go
@@ -1,215 +0,0 @@
-package tui
-
-import (
-	"strings"
-	"time"
-
-	"bee/audit/internal/app"
-	"bee/audit/internal/platform"
-	"bee/audit/internal/runtimeenv"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-type screen string
-
-const (
-	screenMain             screen = "main"
-	screenHealthCheck      screen = "health_check"
-	screenBurnInTests      screen = "burn_in_tests"
-	screenSettings         screen = "settings"
-	screenNetwork          screen = "network"
-	screenInterfacePick    screen = "interface_pick"
-	screenServices         screen = "services"
-	screenServiceAction    screen = "service_action"
-	screenExportTargets    screen = "export_targets"
-	screenOutput           screen = "output"
-	screenStaticForm       screen = "static_form"
-	screenConfirm          screen = "confirm"
-	screenNvidiaSATSetup   screen = "nvidia_sat_setup"
-	screenNvidiaSATRunning screen = "nvidia_sat_running"
-	screenGPUStressRunning screen = "gpu_stress_running"
-)
-
-type actionKind string
-
-const (
-	actionNone          actionKind = ""
-	actionDHCPOne       actionKind = "dhcp_one"
-	actionStaticIPv4    actionKind = "static_ipv4"
-	actionExportBundle  actionKind = "export_bundle"
-	actionRunAll        actionKind = "run_all"
-	actionRunMemorySAT  actionKind = "run_memory_sat"
-	actionRunStorageSAT actionKind = "run_storage_sat"
-	actionRunCPUSAT     actionKind = "run_cpu_sat"
-	actionRunAMDGPUSAT  actionKind = "run_amd_gpu_sat"
-	actionRunFanStress  actionKind = "run_fan_stress"
-)
-
-type model struct {
-	app         *app.App
-	runtimeMode runtimeenv.Mode
-
-	screen       screen
-	prevScreen   screen
-	cursor       int
-	busy         bool
-	busyTitle    string
-	title        string
-	body         string
-	mainMenu     []string
-	settingsMenu []string
-	networkMenu  []string
-	serviceMenu  []string
-
-	services        []string
-	interfaces      []platform.InterfaceInfo
-	targets         []platform.RemovableTarget
-	selectedService string
-	selectedIface   string
-	selectedTarget  *platform.RemovableTarget
-	pendingAction   actionKind
-
-	formFields []formField
-	formIndex  int
-
-	// Hardware panel (right column)
-	panel       app.HardwarePanelData
-	panelFocus  bool
-	panelCursor int
-	banner      string
-
-	// Health Check screen
-	hcSel         [4]bool
-	hcMode        int
-	hcCursor      int
-	hcInitialized bool
-
-	// Burn-in tests screen
-	burnMode        int
-	burnCursor      int
-	burnInitialized bool
-
-	// NVIDIA SAT setup
-	nvidiaGPUs      []platform.NvidiaGPU
-	nvidiaGPUSel    []bool
-	nvidiaDurIdx    int
-	nvidiaSATCursor int
-
-	// NVIDIA SAT running
-	nvidiaSATCancel  func()
-	nvidiaSATAborted bool
-
-	// GPU Platform Stress Test running
-	gpuStressCancel  func()
-	gpuStressAborted bool
-	gpuLiveRows      []platform.GPUMetricRow
-	gpuLiveIndices   []int
-	gpuLiveStart     time.Time
-
-	// SAT verbose progress (CPU / Memory / Storage / AMD GPU)
-	progressLines  []string
-	progressPrefix string
-	progressSince  time.Time
-
-	// Terminal size
-	width int
-}
-
-type formField struct {
-	Label string
-	Value string
-}
-
-func Run(application *app.App, runtimeMode runtimeenv.Mode) error {
-	options := []tea.ProgramOption{}
-	if runtimeMode != runtimeenv.ModeLiveCD {
-		options = append(options, tea.WithAltScreen())
-	}
-	program := tea.NewProgram(newModel(application, runtimeMode), options...)
-	_, err := program.Run()
-	return err
-}
-
-func newModel(application *app.App, runtimeMode runtimeenv.Mode) model {
-	return model{
-		app:         application,
-		runtimeMode: runtimeMode,
-		screen:      screenMain,
-		mainMenu: []string{
-			"Health Check",
-			"Burn-in tests",
-			"Export support bundle",
-			"Settings",
-			"Exit",
-		},
-		settingsMenu: []string{
-			"Network",
-			"Services",
-			"Re-run audit",
-			"Run self-check",
-			"Runtime issues",
-			"Audit logs",
-			"Check tools",
-			"Back",
-		},
-		networkMenu: []string{
-			"Show status",
-			"DHCP on all interfaces",
-			"DHCP on one interface",
-			"Set static IPv4",
-			"Back",
-		},
-		serviceMenu: []string{
-			"Status",
-			"Restart",
-			"Start",
-			"Stop",
-			"Back",
-		},
-	}
-}
-
-func (m model) Init() tea.Cmd {
-	return m.refreshSnapshotCmd()
-}
-
-func (m model) confirmBody() (string, string) {
-	switch m.pendingAction {
-	case actionExportBundle:
-		if m.selectedTarget == nil {
-			return "Export support bundle", "No target selected"
-		}
-		return "Export support bundle", "Copy support bundle to " + m.selectedTarget.Device + "?"
-	case actionRunAll:
-		modes := []string{"Quick", "Standard", "Express"}
-		mode := modes[m.hcMode]
-		var sel []string
-		names := []string{"GPU", "Memory", "Storage", "CPU"}
-		for i, on := range m.hcSel {
-			if on {
-				sel = append(sel, names[i])
-			}
-		}
-		if len(sel) == 0 {
-			return "Health Check", "No components selected."
-		}
-		return "Health Check", "Run: " + strings.Join(sel, " + ") + "\nMode: " + mode
-	case actionRunMemorySAT:
-		return "Memory test", "Run memtester?"
-	case actionRunStorageSAT:
-		return "Storage test", "Run storage diagnostic pack?"
-	case actionRunCPUSAT:
-		modes := []string{"Quick (60s)", "Standard (300s)", "Express (900s)"}
-		return "CPU test", "Run stress-ng? Mode: " + modes[m.hcMode]
-	case actionRunAMDGPUSAT:
-		return "AMD GPU test", "Run AMD GPU diagnostic pack (rocm-smi)?"
-	case actionRunFanStress:
-		modes := []string{"Quick (2×2min)", "Standard (2×5min)", "Express (2×10min)"}
-		return "GPU Platform Stress Test", "Two-phase GPU thermal cycling test.\n" +
-			"Monitors fans, temps, power — detects throttling.\n" +
-			"Mode: " + modes[m.burnMode] + "\n\nAll NVIDIA GPUs will be stressed."
-	default:
-		return "Confirm", "Proceed?"
-	}
-}
--- a/audit/internal/tui/update.go
+++ b/audit/internal/tui/update.go
@@ -1,310 +0,0 @@
-package tui
-
-import (
-	"fmt"
-	"strings"
-	"time"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
-	switch msg := msg.(type) {
-	case tea.WindowSizeMsg:
-		m.width = msg.Width
-		return m, nil
-	case tea.KeyMsg:
-		if m.busy {
-			if msg.String() == "ctrl+c" {
-				return m, tea.Quit
-			}
-			return m, nil
-		}
-		next, cmd := m.updateKey(msg)
-		nextModel := next.(model)
-		if shouldRefreshSnapshot(m, nextModel) {
-			return nextModel, tea.Batch(cmd, nextModel.refreshSnapshotCmd())
-		}
-		return nextModel, cmd
-	case satProgressMsg:
-		if m.busy && m.progressPrefix != "" {
-			if len(msg.lines) > 0 {
-				m.progressLines = msg.lines
-			}
-			return m, pollSATProgress(m.progressPrefix, m.progressSince)
-		}
-		return m, nil
-	case snapshotMsg:
-		m.banner = msg.banner
-		m.panel = msg.panel
-		return m, nil
-	case resultMsg:
-		m.busy = false
-		m.busyTitle = ""
-		m.progressLines = nil
-		m.progressPrefix = ""
-		m.title = msg.title
-		if msg.err != nil {
-			body := strings.TrimSpace(msg.body)
-			if body == "" {
-				m.body = fmt.Sprintf("ERROR: %v", msg.err)
-			} else {
-				m.body = fmt.Sprintf("%s\n\nERROR: %v", body, msg.err)
-			}
-		} else {
-			m.body = msg.body
-		}
-		m.pendingAction = actionNone
-		if msg.back != "" {
-			m.prevScreen = msg.back
-		} else {
-			m.prevScreen = m.screen
-		}
-		m.screen = screenOutput
-		m.cursor = 0
-		return m, m.refreshSnapshotCmd()
-	case servicesMsg:
-		m.busy = false
-		m.busyTitle = ""
-		if msg.err != nil {
-			m.title = "Services"
-			m.body = msg.err.Error()
-			m.prevScreen = screenSettings
-			m.screen = screenOutput
-			return m, m.refreshSnapshotCmd()
-		}
-		m.services = msg.services
-		m.screen = screenServices
-		m.cursor = 0
-		return m, m.refreshSnapshotCmd()
-	case interfacesMsg:
-		m.busy = false
-		m.busyTitle = ""
-		if msg.err != nil {
-			m.title = "interfaces"
-			m.body = msg.err.Error()
-			m.prevScreen = screenNetwork
-			m.screen = screenOutput
-			return m, m.refreshSnapshotCmd()
-		}
-		m.interfaces = msg.ifaces
-		m.screen = screenInterfacePick
-		m.cursor = 0
-		return m, m.refreshSnapshotCmd()
-	case exportTargetsMsg:
-		m.busy = false
-		m.busyTitle = ""
-		if msg.err != nil {
-			m.title = "export"
-			m.body = msg.err.Error()
-			m.prevScreen = screenMain
-			m.screen = screenOutput
-			return m, m.refreshSnapshotCmd()
-		}
-		if len(msg.targets) == 0 {
-			m.title = "Export support bundle"
-			m.body = "No writable removable filesystems found.\n\nRead-only or boot media are hidden from this list."
-			m.prevScreen = screenMain
-			m.screen = screenOutput
-			return m, m.refreshSnapshotCmd()
-		}
-		m.targets = msg.targets
-		m.screen = screenExportTargets
-		m.cursor = 0
-		return m, m.refreshSnapshotCmd()
-	case nvidiaGPUsMsg:
-		return m.handleNvidiaGPUsMsg(msg)
-	case nvtopClosedMsg:
-		return m, nil
-	case gpuStressDoneMsg:
-		if m.gpuStressAborted {
-			return m, nil
-		}
-		if m.gpuStressCancel != nil {
-			m.gpuStressCancel()
-			m.gpuStressCancel = nil
-		}
-		m.prevScreen = screenBurnInTests
-		m.screen = screenOutput
-		m.title = msg.title
-		if msg.err != nil {
-			body := strings.TrimSpace(msg.body)
-			if body == "" {
-				m.body = fmt.Sprintf("ERROR: %v", msg.err)
-			} else {
-				m.body = fmt.Sprintf("%s\n\nERROR: %v", body, msg.err)
-			}
-		} else {
-			m.body = msg.body
-		}
-		return m, m.refreshSnapshotCmd()
-	case gpuLiveTickMsg:
-		if m.screen == screenGPUStressRunning {
-			if len(msg.rows) > 0 {
-				elapsed := time.Since(m.gpuLiveStart).Seconds()
-				for i := range msg.rows {
-					msg.rows[i].ElapsedSec = elapsed
-				}
-				m.gpuLiveRows = append(m.gpuLiveRows, msg.rows...)
-				n := max(1, len(msg.indices))
-				if len(m.gpuLiveRows) > 60*n {
-					m.gpuLiveRows = m.gpuLiveRows[len(m.gpuLiveRows)-60*n:]
-				}
-			}
-			return m, pollGPULive(msg.indices)
-		}
-		return m, nil
-	case nvidiaSATDoneMsg:
-		if m.nvidiaSATAborted {
-			return m, nil
-		}
-		if m.nvidiaSATCancel != nil {
-			m.nvidiaSATCancel()
-			m.nvidiaSATCancel = nil
-		}
-		m.prevScreen = screenHealthCheck
-		m.screen = screenOutput
-		m.title = msg.title
-		if msg.err != nil {
-			body := strings.TrimSpace(msg.body)
-			if body == "" {
-				m.body = fmt.Sprintf("ERROR: %v", msg.err)
-			} else {
-				m.body = fmt.Sprintf("%s\n\nERROR: %v", body, msg.err)
-			}
-		} else {
-			m.body = msg.body
-		}
-		return m, m.refreshSnapshotCmd()
-	}
-	return m, nil
-}
-
-func (m model) updateKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	switch m.screen {
-	case screenMain:
-		return m.updateMain(msg)
-	case screenHealthCheck:
-		return m.updateHealthCheck(msg)
-	case screenBurnInTests:
-		return m.updateBurnInTests(msg)
-	case screenSettings:
-		return m.updateMenu(msg, len(m.settingsMenu), m.handleSettingsMenu)
-	case screenNetwork:
-		return m.updateMenu(msg, len(m.networkMenu), m.handleNetworkMenu)
-	case screenServices:
-		return m.updateMenu(msg, len(m.services), m.handleServicesMenu)
-	case screenServiceAction:
-		return m.updateMenu(msg, len(m.serviceMenu), m.handleServiceActionMenu)
-	case screenNvidiaSATSetup:
-		return m.updateNvidiaSATSetup(msg)
-	case screenNvidiaSATRunning:
-		return m.updateNvidiaSATRunning(msg)
-	case screenGPUStressRunning:
-		return m.updateGPUStressRunning(msg)
-	case screenExportTargets:
-		return m.updateMenu(msg, len(m.targets), m.handleExportTargetsMenu)
-	case screenInterfacePick:
-		return m.updateMenu(msg, len(m.interfaces), m.handleInterfacePickMenu)
-	case screenOutput:
-		switch msg.String() {
-		case "esc", "enter", "q":
-			m.screen = m.prevScreen
-			m.body = ""
-			m.title = ""
-			m.pendingAction = actionNone
-			return m, nil
-		case "ctrl+c":
-			return m, tea.Quit
-		}
-	case screenStaticForm:
-		return m.updateStaticForm(msg)
-	case screenConfirm:
-		return m.updateConfirm(msg)
-	}
-	if msg.String() == "ctrl+c" {
-		return m, tea.Quit
-	}
-	return m, nil
-}
-
-// updateMain handles keys on the main (two-column) screen.
-func (m model) updateMain(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	if m.panelFocus {
-		return m.updateMainPanel(msg)
-	}
-	// Switch focus to right panel.
-	if (msg.String() == "tab" || msg.String() == "right" || msg.String() == "l") && len(m.panel.Rows) > 0 {
-		m.panelFocus = true
-		return m, nil
-	}
-	return m.updateMenu(msg, len(m.mainMenu), m.handleMainMenu)
-}
-
-// updateMainPanel handles keys when right panel has focus.
-func (m model) updateMainPanel(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	switch msg.String() {
-	case "up", "k":
-		if m.panelCursor > 0 {
-			m.panelCursor--
-		}
-	case "down", "j":
-		if m.panelCursor < len(m.panel.Rows)-1 {
-			m.panelCursor++
-		}
-	case "enter":
-		if m.panelCursor < len(m.panel.Rows) {
-			key := m.panel.Rows[m.panelCursor].Key
-			m.busy = true
-			m.busyTitle = key
-			return m, func() tea.Msg {
-				r := m.app.ComponentDetailResult(key)
-				return resultMsg{title: r.Title, body: r.Body, back: screenMain}
-			}
-		}
-	case "tab", "left", "h", "esc":
-		m.panelFocus = false
-	case "q", "ctrl+c":
-		return m, tea.Quit
-	}
-	return m, nil
-}
-
-func (m model) updateMenu(msg tea.KeyMsg, size int, onEnter func() (tea.Model, tea.Cmd)) (tea.Model, tea.Cmd) {
-	if size == 0 {
-		size = 1
-	}
-	switch msg.String() {
-	case "up", "k":
-		if m.cursor > 0 {
-			m.cursor--
-		}
-	case "down", "j":
-		if m.cursor < size-1 {
-			m.cursor++
-		}
-	case "enter":
-		return onEnter()
-	case "esc":
-		switch m.screen {
-		case screenNetwork, screenServices:
-			m.screen = screenSettings
-			m.cursor = 0
-		case screenSettings:
-			m.screen = screenMain
-			m.cursor = 0
-		case screenServiceAction:
-			m.screen = screenServices
-			m.cursor = 0
-		case screenExportTargets:
-			m.screen = screenMain
-			m.cursor = 0
-		case screenInterfacePick:
-			m.screen = screenNetwork
-			m.cursor = 0
-		}
-	case "q", "ctrl+c":
-		return m, tea.Quit
-	}
-	return m, nil
-}
--- a/audit/internal/tui/view.go
+++ b/audit/internal/tui/view.go
@@ -1,303 +0,0 @@
-package tui
-
-import (
-	"fmt"
-	"strings"
-
-	"bee/audit/internal/platform"
-
-	tea "github.com/charmbracelet/bubbletea"
-	"github.com/charmbracelet/lipgloss"
-)
-
-// Column widths for two-column main layout.
-const leftColWidth = 30
-
-var (
-	stylePass   = lipgloss.NewStyle().Foreground(lipgloss.Color("10")) // bright green
-	styleFail   = lipgloss.NewStyle().Foreground(lipgloss.Color("9"))  // bright red
-	styleCancel = lipgloss.NewStyle().Foreground(lipgloss.Color("11")) // bright yellow
-	styleNA     = lipgloss.NewStyle().Foreground(lipgloss.Color("8"))  // dark gray
-)
-
-func colorStatus(status string) string {
-	switch status {
-	case "PASS":
-		return stylePass.Render("PASS")
-	case "FAIL":
-		return styleFail.Render("FAIL")
-	case "CANCEL":
-		return styleCancel.Render("CANC")
-	default:
-		return styleNA.Render("N/A ")
-	}
-}
-
-func (m model) View() string {
-	var body string
-	if m.busy {
-		title := "bee"
-		if m.busyTitle != "" {
-			title = m.busyTitle
-		}
-		if len(m.progressLines) > 0 {
-			var b strings.Builder
-			fmt.Fprintf(&b, "%s\n\n", title)
-			for _, l := range m.progressLines {
-				fmt.Fprintf(&b, "  %s\n", l)
-			}
-			b.WriteString("\n[ctrl+c] quit\n")
-			body = b.String()
-		} else {
-			body = fmt.Sprintf("%s\n\nWorking...\n\n[ctrl+c] quit\n", title)
-		}
-	} else {
-		switch m.screen {
-		case screenMain:
-			body = renderTwoColumnMain(m)
-		case screenHealthCheck:
-			body = renderHealthCheck(m)
-		case screenBurnInTests:
-			body = renderBurnInTests(m)
-		case screenSettings:
-			body = renderMenu("Settings", "Select action", m.settingsMenu, m.cursor)
-		case screenNetwork:
-			body = renderMenu("Network", "Select action", m.networkMenu, m.cursor)
-		case screenServices:
-			body = renderMenu("Services", "Select service", m.services, m.cursor)
-		case screenServiceAction:
-			body = renderMenu("Service: "+m.selectedService, "Select action", m.serviceMenu, m.cursor)
-		case screenExportTargets:
-			body = renderMenu(
-				"Export support bundle",
-				"Select writable removable filesystem (read-only/boot media hidden)",
-				renderTargetItems(m.targets),
-				m.cursor,
-			)
-		case screenInterfacePick:
-			body = renderMenu("Interfaces", "Select interface", renderInterfaceItems(m.interfaces), m.cursor)
-		case screenStaticForm:
-			body = renderForm("Static IPv4: "+m.selectedIface, m.formFields, m.formIndex)
-		case screenConfirm:
-			title, confirmBody := m.confirmBody()
-			body = renderConfirm(title, confirmBody, m.cursor)
-		case screenNvidiaSATSetup:
-			body = renderNvidiaSATSetup(m)
-		case screenNvidiaSATRunning:
-			body = renderNvidiaSATRunning()
-		case screenGPUStressRunning:
-			body = renderGPUStressRunning(m)
-		case screenOutput:
-			body = fmt.Sprintf("%s\n\n%s\n\n[enter/esc] back  [ctrl+c] quit\n", m.title, strings.TrimSpace(m.body))
-		default:
-			body = "bee\n"
-		}
-	}
-	return m.renderWithBanner(body)
-}
-
-// renderTwoColumnMain renders the main screen with menu on the left and hardware panel on the right.
-func renderTwoColumnMain(m model) string {
-	// Left column lines
-	leftLines := []string{"bee", ""}
-	for i, item := range m.mainMenu {
-		pfx := "  "
-		if !m.panelFocus && m.cursor == i {
-			pfx = "> "
-		}
-		leftLines = append(leftLines, pfx+item)
-	}
-
-	// Right column lines
-	rightLines := buildPanelLines(m)
-
-	// Render side by side
-	var b strings.Builder
-	maxRows := max(len(leftLines), len(rightLines))
-	for i := 0; i < maxRows; i++ {
-		l := ""
-		if i < len(leftLines) {
-			l = leftLines[i]
-		}
-		r := ""
-		if i < len(rightLines) {
-			r = rightLines[i]
-		}
-		w := lipgloss.Width(l)
-		if w < leftColWidth {
-			l += strings.Repeat(" ", leftColWidth-w)
-		}
-		b.WriteString(l + " │ " + r + "\n")
-	}
-
-	sep := strings.Repeat("─", leftColWidth) + "─┴─" + strings.Repeat("─", 46)
-	b.WriteString(sep + "\n")
-
-	if m.panelFocus {
-		b.WriteString("[↑↓] move  [enter] details  [tab/←] menu  [ctrl+c] quit\n")
-	} else {
-		b.WriteString("[↑↓] move  [enter] select  [tab/→] panel  [ctrl+c] quit\n")
-	}
-
-	return b.String()
-}
-
-func buildPanelLines(m model) []string {
-	p := m.panel
-	var lines []string
-
-	for _, h := range p.Header {
-		lines = append(lines, h)
-	}
-	if len(p.Header) > 0 && len(p.Rows) > 0 {
-		lines = append(lines, "")
-	}
-
-	for i, row := range p.Rows {
-		pfx := "  "
-		if m.panelFocus && m.panelCursor == i {
-			pfx = "> "
-		}
-		status := colorStatus(row.Status)
-		lines = append(lines, fmt.Sprintf("%s%s  %-4s  %s", pfx, status, row.Key, row.Detail))
-	}
-
-	return lines
-}
-
-func renderTargetItems(targets []platform.RemovableTarget) []string {
-	items := make([]string, 0, len(targets))
-	for _, target := range targets {
-		desc := fmt.Sprintf("%s [%s %s]", target.Device, target.FSType, target.Size)
-		if target.Label != "" {
-			desc += " label=" + target.Label
-		}
-		if target.Mountpoint != "" {
-			desc += " mounted=" + target.Mountpoint
-		}
-		items = append(items, desc)
-	}
-	return items
-}
-
-func renderInterfaceItems(interfaces []platform.InterfaceInfo) []string {
-	items := make([]string, 0, len(interfaces))
-	for _, iface := range interfaces {
-		label := iface.Name
-		if len(iface.IPv4) > 0 {
-			label += " [" + strings.Join(iface.IPv4, ", ") + "]"
-		}
-		items = append(items, label)
-	}
-	return items
-}
-
-func renderMenu(title, subtitle string, items []string, cursor int) string {
-	var body strings.Builder
-	fmt.Fprintf(&body, "%s\n\n%s\n\n", title, subtitle)
-	if len(items) == 0 {
-		body.WriteString("(no items)\n")
-	} else {
-		for i, item := range items {
-			prefix := "  "
-			if i == cursor {
-				prefix = "> "
-			}
-			fmt.Fprintf(&body, "%s%s\n", prefix, item)
-		}
-	}
-	body.WriteString("\n[↑/↓] move  [enter] select  [esc] back  [ctrl+c] quit\n")
-	return body.String()
-}
-
-func renderForm(title string, fields []formField, idx int) string {
-	var body strings.Builder
-	fmt.Fprintf(&body, "%s\n\n", title)
-	for i, field := range fields {
-		prefix := "  "
-		if i == idx {
-			prefix = "> "
-		}
-		fmt.Fprintf(&body, "%s%s: %s\n", prefix, field.Label, field.Value)
-	}
-	body.WriteString("\n[tab/↑/↓] move  [enter] next/submit  [backspace] delete  [esc] cancel\n")
-	return body.String()
-}
-
-func renderConfirm(title, body string, cursor int) string {
-	options := []string{"Confirm", "Cancel"}
-	var out strings.Builder
-	fmt.Fprintf(&out, "%s\n\n%s\n\n", title, body)
-	for i, option := range options {
-		prefix := "  "
-		if i == cursor {
-			prefix = "> "
-		}
-		fmt.Fprintf(&out, "%s%s\n", prefix, option)
-	}
-	out.WriteString("\n[←/→/↑/↓] move  [enter] select  [esc] cancel\n")
-	return out.String()
-}
-
-func resultCmd(title, body string, err error, back screen) tea.Cmd {
-	return func() tea.Msg {
-		return resultMsg{title: title, body: body, err: err, back: back}
-	}
-}
-
-func (m model) renderWithBanner(body string) string {
-	body = strings.TrimRight(body, "\n")
-	banner := renderBannerModule(m.banner, m.width)
-	if banner == "" {
-		if body == "" {
-			return ""
-		}
-		return body + "\n"
-	}
-	if body == "" {
-		return banner + "\n"
-	}
-	return banner + "\n\n" + body + "\n"
-}
-
-func renderBannerModule(banner string, width int) string {
-	banner = strings.TrimSpace(banner)
-	if banner == "" {
-		return ""
-	}
-
-	lines := strings.Split(banner, "\n")
-	contentWidth := 0
-	for _, line := range lines {
-		if w := lipgloss.Width(line); w > contentWidth {
-			contentWidth = w
-		}
-	}
-	if width > 0 && width-4 > contentWidth {
-		contentWidth = width - 4
-	}
-	if contentWidth < 20 {
-		contentWidth = 20
-	}
-
-	label := " MOTD "
-	topFill := contentWidth + 2 - lipgloss.Width(label)
-	if topFill < 0 {
-		topFill = 0
-	}
-
-	var b strings.Builder
-	b.WriteString("┌" + label + strings.Repeat("─", topFill) + "┐\n")
-	for _, line := range lines {
-		b.WriteString("│ " + padRight(line, contentWidth) + " │\n")
-	}
-	b.WriteString("└" + strings.Repeat("─", contentWidth+2) + "┘")
-	return b.String()
-}
-
-func padRight(value string, width int) string {
-	if gap := width - lipgloss.Width(value); gap > 0 {
-		return value + strings.Repeat(" ", gap)
-	}
-	return value
-}
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -0,0 +1,969 @@
+package webui
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"os/exec"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"sync/atomic"
+	"syscall"
+	"time"
+
+	"bee/audit/internal/app"
+	"bee/audit/internal/platform"
+)
+
+var ansiEscapeRE = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]|\x1b[()][A-Z0-9]|\x1b[DABC]`)
+
+// ── Job ID counter ────────────────────────────────────────────────────────────
+
+var jobCounter atomic.Uint64
+
+func newJobID(prefix string) string {
+	return fmt.Sprintf("%s-%d", prefix, jobCounter.Add(1))
+}
+
+// ── SSE helpers ───────────────────────────────────────────────────────────────
+
+func sseWrite(w http.ResponseWriter, event, data string) bool {
+	f, ok := w.(http.Flusher)
+	if !ok {
+		return false
+	}
+	if event != "" {
+		fmt.Fprintf(w, "event: %s\n", event)
+	}
+	fmt.Fprintf(w, "data: %s\n\n", data)
+	f.Flush()
+	return true
+}
+
+func sseStart(w http.ResponseWriter) bool {
+	_, ok := w.(http.Flusher)
+	if !ok {
+		http.Error(w, "streaming not supported", http.StatusInternalServerError)
+		return false
+	}
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("Access-Control-Allow-Origin", "*")
+	return true
+}
+
+// streamJob streams lines from a jobState to a SSE response.
+func streamJob(w http.ResponseWriter, r *http.Request, j *jobState) {
+	if !sseStart(w) {
+		return
+	}
+	existing, ch := j.subscribe()
+	for _, line := range existing {
+		sseWrite(w, "", line)
+	}
+	if ch == nil {
+		// Job already finished
+		sseWrite(w, "done", j.err)
+		return
+	}
+	for {
+		select {
+		case line, ok := <-ch:
+			if !ok {
+				sseWrite(w, "done", j.err)
+				return
+			}
+			sseWrite(w, "", line)
+		case <-r.Context().Done():
+			return
+		}
+	}
+}
+
+// runCmdJob runs an exec.Cmd as a background job, streaming stdout+stderr lines.
+func runCmdJob(j *jobState, cmd *exec.Cmd) {
+	pr, pw := io.Pipe()
+	cmd.Stdout = pw
+	cmd.Stderr = pw
+
+	if err := cmd.Start(); err != nil {
+		j.finish(err.Error())
+		return
+	}
+	// Lower the CPU scheduling priority of stress/audit subprocesses to nice+10
+	// so the X server and kernel interrupt handling remain responsive under load
+	// (prevents KVM/IPMI graphical console from freezing during GPU stress tests).
+	if cmd.Process != nil {
+		_ = syscall.Setpriority(syscall.PRIO_PROCESS, cmd.Process.Pid, 10)
+	}
+
+	go func() {
+		scanner := bufio.NewScanner(pr)
+		for scanner.Scan() {
+			// Split on \r to handle progress-bar style output (e.g. \r overwrites)
+			// and strip ANSI escape codes so logs are readable in the browser.
+			parts := strings.Split(scanner.Text(), "\r")
+			for _, part := range parts {
+				line := ansiEscapeRE.ReplaceAllString(part, "")
+				if line != "" {
+					j.append(line)
+				}
+			}
+		}
+	}()
+
+	err := cmd.Wait()
+	_ = pw.Close()
+	if err != nil {
+		j.finish(err.Error())
+	} else {
+		j.finish("")
+	}
+}
+
+// ── Audit ─────────────────────────────────────────────────────────────────────
+
+func (h *handler) handleAPIAuditRun(w http.ResponseWriter, _ *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	t := &Task{
+		ID:        newJobID("audit"),
+		Name:      "Audit",
+		Target:    "audit",
+		Status:    TaskPending,
+		CreatedAt: time.Now(),
+	}
+	globalQueue.enqueue(t)
+	writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
+}
+
+func (h *handler) handleAPIAuditStream(w http.ResponseWriter, r *http.Request) {
+	id := r.URL.Query().Get("job_id")
+	if id == "" {
+		id = r.URL.Query().Get("task_id")
+	}
+	// Try task queue first, then legacy job manager
+	if j, ok := globalQueue.findJob(id); ok {
+		streamJob(w, r, j)
+		return
+	}
+	if j, ok := globalJobs.get(id); ok {
+		streamJob(w, r, j)
+		return
+	}
+	http.Error(w, "job not found", http.StatusNotFound)
+}
+
+// ── SAT ───────────────────────────────────────────────────────────────────────
+
+func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		if h.opts.App == nil {
+			writeError(w, http.StatusServiceUnavailable, "app not configured")
+			return
+		}
+
+		var body struct {
+			Duration          int    `json:"duration"`
+			DiagLevel         int    `json:"diag_level"`
+			GPUIndices        []int  `json:"gpu_indices"`
+			ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
+			Loader            string `json:"loader"`
+			Profile           string `json:"profile"`
+			DisplayName       string `json:"display_name"`
+		}
+		if r.ContentLength > 0 {
+			_ = json.NewDecoder(r.Body).Decode(&body)
+		}
+
+		name := taskNames[target]
+		if body.Profile != "" {
+			if n, ok := burnNames[target]; ok {
+				name = n
+			}
+		}
+		if name == "" {
+			name = target
+		}
+		t := &Task{
+			ID:        newJobID("sat-" + target),
+			Name:      name,
+			Target:    target,
+			Status:    TaskPending,
+			CreatedAt: time.Now(),
+			params: taskParams{
+				Duration:          body.Duration,
+				DiagLevel:         body.DiagLevel,
+				GPUIndices:        body.GPUIndices,
+				ExcludeGPUIndices: body.ExcludeGPUIndices,
+				Loader:            body.Loader,
+				BurnProfile:       body.Profile,
+				DisplayName:       body.DisplayName,
+			},
+		}
+		if strings.TrimSpace(body.DisplayName) != "" {
+			t.Name = body.DisplayName
+		}
+		globalQueue.enqueue(t)
+		writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
+	}
+}
+
+func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
+	id := r.URL.Query().Get("job_id")
+	if id == "" {
+		id = r.URL.Query().Get("task_id")
+	}
+	if j, ok := globalQueue.findJob(id); ok {
+		streamJob(w, r, j)
+		return
+	}
+	if j, ok := globalJobs.get(id); ok {
+		streamJob(w, r, j)
+		return
+	}
+	http.Error(w, "job not found", http.StatusNotFound)
+}
+
+func (h *handler) handleAPISATAbort(w http.ResponseWriter, r *http.Request) {
+	id := r.URL.Query().Get("job_id")
+	if id == "" {
+		id = r.URL.Query().Get("task_id")
+	}
+	if t, ok := globalQueue.findByID(id); ok {
+		globalQueue.mu.Lock()
+		switch t.Status {
+		case TaskPending:
+			t.Status = TaskCancelled
+			now := time.Now()
+			t.DoneAt = &now
+		case TaskRunning:
+			if t.job != nil {
+				t.job.abort()
+			}
+			t.Status = TaskCancelled
+			now := time.Now()
+			t.DoneAt = &now
+		}
+		globalQueue.mu.Unlock()
+		writeJSON(w, map[string]string{"status": "aborted"})
+		return
+	}
+	if j, ok := globalJobs.get(id); ok {
+		if j.abort() {
+			writeJSON(w, map[string]string{"status": "aborted"})
+		} else {
+			writeJSON(w, map[string]string{"status": "not_running"})
+		}
+		return
+	}
+	http.Error(w, "job not found", http.StatusNotFound)
+}
+
+// ── Services ──────────────────────────────────────────────────────────────────
+
+func (h *handler) handleAPIServicesList(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	names, err := h.opts.App.ListBeeServices()
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	type serviceInfo struct {
+		Name  string `json:"name"`
+		State string `json:"state"`
+		Body  string `json:"body"`
+	}
+	result := make([]serviceInfo, 0, len(names))
+	for _, name := range names {
+		state := h.opts.App.ServiceState(name)
+		body, _ := h.opts.App.ServiceStatus(name)
+		result = append(result, serviceInfo{Name: name, State: state, Body: body})
+	}
+	writeJSON(w, result)
+}
+
+func (h *handler) handleAPIServicesAction(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	var req struct {
+		Name   string `json:"name"`
+		Action string `json:"action"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		writeError(w, http.StatusBadRequest, "invalid request body")
+		return
+	}
+	var action platform.ServiceAction
+	switch req.Action {
+	case "start":
+		action = platform.ServiceStart
+	case "stop":
+		action = platform.ServiceStop
+	case "restart":
+		action = platform.ServiceRestart
+	default:
+		writeError(w, http.StatusBadRequest, "action must be start|stop|restart")
+		return
+	}
+	result, err := h.opts.App.ServiceActionResult(req.Name, action)
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	writeJSON(w, map[string]string{"status": "ok", "output": result.Body})
+}
+
+// ── Network ───────────────────────────────────────────────────────────────────
+
+func (h *handler) handleAPINetworkStatus(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	ifaces, err := h.opts.App.ListInterfaces()
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	writeJSON(w, map[string]any{
+		"interfaces":    ifaces,
+		"default_route": h.opts.App.DefaultRoute(),
+	})
+}
+
+func (h *handler) handleAPINetworkDHCP(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	var req struct {
+		Interface string `json:"interface"`
+	}
+	_ = json.NewDecoder(r.Body).Decode(&req)
+
+	result, err := h.applyPendingNetworkChange(func() (app.ActionResult, error) {
+		if req.Interface == "" || req.Interface == "all" {
+			return h.opts.App.DHCPAllResult()
+		}
+		return h.opts.App.DHCPOneResult(req.Interface)
+	})
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	writeJSON(w, map[string]any{
+		"status":      "ok",
+		"output":      result.Body,
+		"rollback_in": int(netRollbackTimeout.Seconds()),
+	})
+}
+
+func (h *handler) handleAPINetworkStatic(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	var req struct {
+		Interface string   `json:"interface"`
+		Address   string   `json:"address"`
+		Prefix    string   `json:"prefix"`
+		Gateway   string   `json:"gateway"`
+		DNS       []string `json:"dns"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		writeError(w, http.StatusBadRequest, "invalid request body")
+		return
+	}
+	cfg := platform.StaticIPv4Config{
+		Interface: req.Interface,
+		Address:   req.Address,
+		Prefix:    req.Prefix,
+		Gateway:   req.Gateway,
+		DNS:       req.DNS,
+	}
+	result, err := h.applyPendingNetworkChange(func() (app.ActionResult, error) {
+		return h.opts.App.SetStaticIPv4Result(cfg)
+	})
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	writeJSON(w, map[string]any{
+		"status":      "ok",
+		"output":      result.Body,
+		"rollback_in": int(netRollbackTimeout.Seconds()),
+	})
+}
+
+// ── Export ────────────────────────────────────────────────────────────────────
+
+func (h *handler) handleAPIExportList(w http.ResponseWriter, r *http.Request) {
+	entries, err := listExportFiles(h.opts.ExportDir)
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	writeJSON(w, entries)
+}
+
+func (h *handler) handleAPIExportBundle(w http.ResponseWriter, r *http.Request) {
+	archive, err := app.BuildSupportBundle(h.opts.ExportDir)
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	writeJSON(w, map[string]string{
+		"status": "ok",
+		"path":   archive,
+		"url":    "/export/support.tar.gz",
+	})
+}
+
+func (h *handler) handleAPIExportUSBTargets(w http.ResponseWriter, _ *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	targets, err := h.opts.App.ListRemovableTargets()
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	if targets == nil {
+		targets = []platform.RemovableTarget{}
+	}
+	writeJSON(w, targets)
+}
+
+func (h *handler) handleAPIExportUSBAudit(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	var target platform.RemovableTarget
+	if err := json.NewDecoder(r.Body).Decode(&target); err != nil || target.Device == "" {
+		writeError(w, http.StatusBadRequest, "device is required")
+		return
+	}
+	result, err := h.opts.App.ExportLatestAuditResult(target)
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	writeJSON(w, map[string]string{"status": "ok", "message": result.Body})
+}
+
+func (h *handler) handleAPIExportUSBBundle(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	var target platform.RemovableTarget
+	if err := json.NewDecoder(r.Body).Decode(&target); err != nil || target.Device == "" {
+		writeError(w, http.StatusBadRequest, "device is required")
+		return
+	}
+	result, err := h.opts.App.ExportSupportBundleResult(target)
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	writeJSON(w, map[string]string{"status": "ok", "message": result.Body})
+}
+
+// ── GPU presence ──────────────────────────────────────────────────────────────
+
+func (h *handler) handleAPIGPUPresence(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	gp := h.opts.App.DetectGPUPresence()
+	w.Header().Set("Content-Type", "application/json")
+	_ = json.NewEncoder(w).Encode(map[string]bool{
+		"nvidia": gp.Nvidia,
+		"amd":    gp.AMD,
+	})
+}
+
+// ── System ────────────────────────────────────────────────────────────────────
+
+func (h *handler) handleAPIRAMStatus(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	inRAM := h.opts.App.IsLiveMediaInRAM()
+	w.Header().Set("Content-Type", "application/json")
+	_ = json.NewEncoder(w).Encode(map[string]bool{"in_ram": inRAM})
+}
+
+func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	h.installMu.Lock()
+	installRunning := h.installJob != nil && !h.installJob.isDone()
+	h.installMu.Unlock()
+	if installRunning {
+		writeError(w, http.StatusConflict, "install to disk is already running")
+		return
+	}
+	t := &Task{
+		ID:        newJobID("install-to-ram"),
+		Name:      "Install to RAM",
+		Target:    "install-to-ram",
+		Priority:  10,
+		Status:    TaskPending,
+		CreatedAt: time.Now(),
+	}
+	globalQueue.enqueue(t)
+	w.Header().Set("Content-Type", "application/json")
+	_ = json.NewEncoder(w).Encode(map[string]string{"task_id": t.ID})
+}
+
+// ── Tools ─────────────────────────────────────────────────────────────────────
+
+var standardTools = []string{
+	"dmidecode", "smartctl", "nvme", "lspci", "ipmitool",
+	"nvidia-smi", "memtester", "stress-ng", "nvtop",
+	"mstflint", "qrencode",
+}
+
+func (h *handler) handleAPIToolsCheck(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	statuses := h.opts.App.CheckTools(standardTools)
+	writeJSON(w, statuses)
+}
+
+// ── Preflight ─────────────────────────────────────────────────────────────────
+
+func (h *handler) handleAPIPreflight(w http.ResponseWriter, r *http.Request) {
+	data, err := loadSnapshot(filepath.Join(h.opts.ExportDir, "runtime-health.json"))
+	if err != nil {
+		writeError(w, http.StatusNotFound, "runtime health not found")
+		return
+	}
+	w.Header().Set("Content-Type", "application/json; charset=utf-8")
+	w.Header().Set("Cache-Control", "no-store")
+	_, _ = w.Write(data)
+}
+
+// ── Install ───────────────────────────────────────────────────────────────────
+
+func (h *handler) handleAPIInstallDisks(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	disks, err := h.opts.App.ListInstallDisks()
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	type diskJSON struct {
+		Device       string   `json:"device"`
+		Model        string   `json:"model"`
+		Size         string   `json:"size"`
+		SizeBytes    int64    `json:"size_bytes"`
+		MountedParts []string `json:"mounted_parts"`
+		Warnings     []string `json:"warnings"`
+	}
+	result := make([]diskJSON, 0, len(disks))
+	for _, d := range disks {
+		result = append(result, diskJSON{
+			Device:       d.Device,
+			Model:        d.Model,
+			Size:         d.Size,
+			SizeBytes:    d.SizeBytes,
+			MountedParts: d.MountedParts,
+			Warnings:     platform.DiskWarnings(d),
+		})
+	}
+	writeJSON(w, result)
+}
+
+func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	var req struct {
+		Device string `json:"device"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil || req.Device == "" {
+		writeError(w, http.StatusBadRequest, "device is required")
+		return
+	}
+
+	// Whitelist: only allow devices that ListInstallDisks() returns.
+	disks, err := h.opts.App.ListInstallDisks()
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	allowed := false
+	for _, d := range disks {
+		if d.Device == req.Device {
+			allowed = true
+			break
+		}
+	}
+	if !allowed {
+		writeError(w, http.StatusBadRequest, "device not in install candidate list")
+		return
+	}
+	if globalQueue.hasActiveTarget("install-to-ram") {
+		writeError(w, http.StatusConflict, "install to RAM task is already pending or running")
+		return
+	}
+
+	h.installMu.Lock()
+	if h.installJob != nil && !h.installJob.isDone() {
+		h.installMu.Unlock()
+		writeError(w, http.StatusConflict, "install already running")
+		return
+	}
+	j := &jobState{}
+	h.installJob = j
+	h.installMu.Unlock()
+
+	logFile := platform.InstallLogPath(req.Device)
+	go runCmdJob(j, exec.CommandContext(context.Background(), "bee-install", req.Device, logFile))
+
+	w.WriteHeader(http.StatusNoContent)
+}
+
+func (h *handler) handleAPIInstallStream(w http.ResponseWriter, r *http.Request) {
+	h.installMu.Lock()
+	j := h.installJob
+	h.installMu.Unlock()
+	if j == nil {
+		if !sseStart(w) {
+			return
+		}
+		sseWrite(w, "done", "")
+		return
+	}
+	streamJob(w, r, j)
+}
+
+// ── Metrics SSE ───────────────────────────────────────────────────────────────
+
+func (h *handler) handleAPIMetricsLatest(w http.ResponseWriter, r *http.Request) {
+	sample, ok := h.latestMetric()
+	if !ok {
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte("{}"))
+		return
+	}
+	b, err := json.Marshal(sample)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+	w.Header().Set("Content-Type", "application/json")
+	_, _ = w.Write(b)
+}
+
+func (h *handler) handleAPIMetricsStream(w http.ResponseWriter, r *http.Request) {
+	if !sseStart(w) {
+		return
+	}
+	ticker := time.NewTicker(1 * time.Second)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-r.Context().Done():
+			return
+		case <-ticker.C:
+			sample, ok := h.latestMetric()
+			if !ok {
+				continue
+			}
+			b, err := json.Marshal(sample)
+			if err != nil {
+				continue
+			}
+			if !sseWrite(w, "metrics", string(b)) {
+				return
+			}
+		}
+	}
+}
+
+// feedRings pushes one sample into all in-memory ring buffers.
+func (h *handler) feedRings(sample platform.LiveMetricSample) {
+	for _, t := range sample.Temps {
+		switch t.Group {
+		case "cpu":
+			h.pushNamedMetricRing(&h.cpuTempRings, t.Name, t.Celsius)
+		case "ambient":
+			h.pushNamedMetricRing(&h.ambientTempRings, t.Name, t.Celsius)
+		}
+	}
+	h.ringPower.push(sample.PowerW)
+	h.ringCPULoad.push(sample.CPULoadPct)
+	h.ringMemLoad.push(sample.MemLoadPct)
+
+	h.ringsMu.Lock()
+	for i, fan := range sample.Fans {
+		for len(h.ringFans) <= i {
+			h.ringFans = append(h.ringFans, newMetricsRing(120))
+			h.fanNames = append(h.fanNames, fan.Name)
+		}
+		h.ringFans[i].push(float64(fan.RPM))
+	}
+	for _, gpu := range sample.GPUs {
+		idx := gpu.GPUIndex
+		for len(h.gpuRings) <= idx {
+			h.gpuRings = append(h.gpuRings, &gpuRings{
+				Temp:    newMetricsRing(120),
+				Util:    newMetricsRing(120),
+				MemUtil: newMetricsRing(120),
+				Power:   newMetricsRing(120),
+			})
+		}
+		h.gpuRings[idx].Temp.push(gpu.TempC)
+		h.gpuRings[idx].Util.push(gpu.UsagePct)
+		h.gpuRings[idx].MemUtil.push(gpu.MemUsagePct)
+		h.gpuRings[idx].Power.push(gpu.PowerW)
+	}
+	h.ringsMu.Unlock()
+}
+
+func (h *handler) pushNamedMetricRing(dst *[]*namedMetricsRing, name string, value float64) {
+	if name == "" {
+		return
+	}
+	for _, item := range *dst {
+		if item != nil && item.Name == name && item.Ring != nil {
+			item.Ring.push(value)
+			return
+		}
+	}
+	*dst = append(*dst, &namedMetricsRing{
+		Name: name,
+		Ring: newMetricsRing(120),
+	})
+	(*dst)[len(*dst)-1].Ring.push(value)
+}
+
+// ── Network toggle ────────────────────────────────────────────────────────────
+
+const netRollbackTimeout = 60 * time.Second
+
+func (h *handler) handleAPINetworkToggle(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	var req struct {
+		Iface string `json:"iface"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil || req.Iface == "" {
+		writeError(w, http.StatusBadRequest, "iface is required")
+		return
+	}
+
+	wasUp, err := h.opts.App.GetInterfaceState(req.Iface)
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+
+	if _, err := h.applyPendingNetworkChange(func() (app.ActionResult, error) {
+		err := h.opts.App.SetInterfaceState(req.Iface, !wasUp)
+		return app.ActionResult{}, err
+	}); err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+
+	newState := "up"
+	if wasUp {
+		newState = "down"
+	}
+	writeJSON(w, map[string]any{
+		"iface":       req.Iface,
+		"new_state":   newState,
+		"rollback_in": int(netRollbackTimeout.Seconds()),
+	})
+}
+
+func (h *handler) applyPendingNetworkChange(apply func() (app.ActionResult, error)) (app.ActionResult, error) {
+	if h.opts.App == nil {
+		return app.ActionResult{}, fmt.Errorf("app not configured")
+	}
+
+	if err := h.rollbackPendingNetworkChange(); err != nil && err.Error() != "no pending network change" {
+		return app.ActionResult{}, err
+	}
+
+	snapshot, err := h.opts.App.CaptureNetworkSnapshot()
+	if err != nil {
+		return app.ActionResult{}, err
+	}
+
+	result, err := apply()
+	if err != nil {
+		return result, err
+	}
+
+	pnc := &pendingNetChange{snapshot: snapshot}
+	pnc.timer = time.AfterFunc(netRollbackTimeout, func() {
+		_ = h.opts.App.RestoreNetworkSnapshot(snapshot)
+		h.pendingNetMu.Lock()
+		if h.pendingNet == pnc {
+			h.pendingNet = nil
+		}
+		h.pendingNetMu.Unlock()
+	})
+
+	h.pendingNetMu.Lock()
+	h.pendingNet = pnc
+	h.pendingNetMu.Unlock()
+
+	return result, nil
+}
+
+func (h *handler) handleAPINetworkConfirm(w http.ResponseWriter, _ *http.Request) {
+	h.pendingNetMu.Lock()
+	pnc := h.pendingNet
+	h.pendingNet = nil
+	h.pendingNetMu.Unlock()
+	if pnc != nil {
+		pnc.mu.Lock()
+		pnc.timer.Stop()
+		pnc.mu.Unlock()
+	}
+	writeJSON(w, map[string]string{"status": "confirmed"})
+}
+
+func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Request) {
+	if err := h.rollbackPendingNetworkChange(); err != nil {
+		if err.Error() == "no pending network change" {
+			writeError(w, http.StatusConflict, err.Error())
+			return
+		}
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	writeJSON(w, map[string]string{"status": "rolled back"})
+}
+
+func (h *handler) rollbackPendingNetworkChange() error {
+	h.pendingNetMu.Lock()
+	pnc := h.pendingNet
+	h.pendingNet = nil
+	h.pendingNetMu.Unlock()
+	if pnc == nil {
+		return fmt.Errorf("no pending network change")
+	}
+	pnc.mu.Lock()
+	pnc.timer.Stop()
+	pnc.mu.Unlock()
+	if h.opts.App != nil {
+		return h.opts.App.RestoreNetworkSnapshot(pnc.snapshot)
+	}
+	return nil
+}
+
+// ── Display / Screen Resolution ───────────────────────────────────────────────
+
+type displayMode struct {
+	Output  string `json:"output"`
+	Mode    string `json:"mode"`
+	Current bool   `json:"current"`
+}
+
+type displayInfo struct {
+	Output  string        `json:"output"`
+	Modes   []displayMode `json:"modes"`
+	Current string        `json:"current"`
+}
+
+var xrandrOutputRE = regexp.MustCompile(`^(\S+)\s+connected`)
+var xrandrModeRE = regexp.MustCompile(`^\s{3}(\d+x\d+)\s`)
+var xrandrCurrentRE = regexp.MustCompile(`\*`)
+
+func parseXrandrOutput(out string) []displayInfo {
+	var infos []displayInfo
+	var cur *displayInfo
+	for _, line := range strings.Split(out, "\n") {
+		if m := xrandrOutputRE.FindStringSubmatch(line); m != nil {
+			if cur != nil {
+				infos = append(infos, *cur)
+			}
+			cur = &displayInfo{Output: m[1]}
+			continue
+		}
+		if cur == nil {
+			continue
+		}
+		if m := xrandrModeRE.FindStringSubmatch(line); m != nil {
+			isCurrent := xrandrCurrentRE.MatchString(line)
+			mode := displayMode{Output: cur.Output, Mode: m[1], Current: isCurrent}
+			cur.Modes = append(cur.Modes, mode)
+			if isCurrent {
+				cur.Current = m[1]
+			}
+		}
+	}
+	if cur != nil {
+		infos = append(infos, *cur)
+	}
+	return infos
+}
+
+func (h *handler) handleAPIDisplayResolutions(w http.ResponseWriter, _ *http.Request) {
+	out, err := exec.Command("xrandr").Output()
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, "xrandr: "+err.Error())
+		return
+	}
+	writeJSON(w, parseXrandrOutput(string(out)))
+}
+
+func (h *handler) handleAPIDisplaySet(w http.ResponseWriter, r *http.Request) {
+	var req struct {
+		Output string `json:"output"`
+		Mode   string `json:"mode"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil || req.Output == "" || req.Mode == "" {
+		writeError(w, http.StatusBadRequest, "output and mode are required")
+		return
+	}
+	// Validate mode looks like WxH to prevent injection
+	if !regexp.MustCompile(`^\d+x\d+$`).MatchString(req.Mode) {
+		writeError(w, http.StatusBadRequest, "invalid mode format")
+		return
+	}
+	// Validate output name (no special chars)
+	if !regexp.MustCompile(`^[A-Za-z0-9_\-]+$`).MatchString(req.Output) {
+		writeError(w, http.StatusBadRequest, "invalid output name")
+		return
+	}
+	if out, err := exec.Command("xrandr", "--output", req.Output, "--mode", req.Mode).CombinedOutput(); err != nil {
+		writeError(w, http.StatusInternalServerError, "xrandr: "+strings.TrimSpace(string(out)))
+		return
+	}
+	writeJSON(w, map[string]string{"status": "ok", "output": req.Output, "mode": req.Mode})
+}
--- a/audit/internal/webui/jobs.go
+++ b/audit/internal/webui/jobs.go
@@ -0,0 +1,137 @@
+package webui
+
+import (
+	"os"
+	"strings"
+	"sync"
+	"time"
+)
+
+// jobState holds the output lines and completion status of an async job.
+type jobState struct {
+	lines   []string
+	done    bool
+	err     string
+	mu      sync.Mutex
+	subs    []chan string
+	cancel  func() // optional cancel function; nil if job is not cancellable
+	logPath string
+}
+
+// abort cancels the job if it has a cancel function and is not yet done.
+func (j *jobState) abort() bool {
+	j.mu.Lock()
+	defer j.mu.Unlock()
+	if j.done || j.cancel == nil {
+		return false
+	}
+	j.cancel()
+	return true
+}
+
+func (j *jobState) append(line string) {
+	j.mu.Lock()
+	defer j.mu.Unlock()
+	j.lines = append(j.lines, line)
+	if j.logPath != "" {
+		appendJobLog(j.logPath, line)
+	}
+	for _, ch := range j.subs {
+		select {
+		case ch <- line:
+		default:
+		}
+	}
+}
+
+func (j *jobState) finish(errMsg string) {
+	j.mu.Lock()
+	defer j.mu.Unlock()
+	j.done = true
+	j.err = errMsg
+	for _, ch := range j.subs {
+		close(ch)
+	}
+	j.subs = nil
+}
+
+// subscribe returns a channel that receives all future lines.
+// Existing lines are returned first, then the channel streams new ones.
+func (j *jobState) subscribe() ([]string, <-chan string) {
+	j.mu.Lock()
+	defer j.mu.Unlock()
+	existing := make([]string, len(j.lines))
+	copy(existing, j.lines)
+	if j.done {
+		return existing, nil
+	}
+	ch := make(chan string, 256)
+	j.subs = append(j.subs, ch)
+	return existing, ch
+}
+
+// jobManager manages async jobs identified by string IDs.
+type jobManager struct {
+	mu   sync.Mutex
+	jobs map[string]*jobState
+}
+
+var globalJobs = &jobManager{jobs: make(map[string]*jobState)}
+
+func (m *jobManager) create(id string) *jobState {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	j := &jobState{}
+	m.jobs[id] = j
+	// Schedule cleanup after 30 minutes
+	go func() {
+		time.Sleep(30 * time.Minute)
+		m.mu.Lock()
+		delete(m.jobs, id)
+		m.mu.Unlock()
+	}()
+	return j
+}
+
+// isDone returns true if the job has finished (either successfully or with error).
+func (j *jobState) isDone() bool {
+	j.mu.Lock()
+	defer j.mu.Unlock()
+	return j.done
+}
+
+func (m *jobManager) get(id string) (*jobState, bool) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	j, ok := m.jobs[id]
+	return j, ok
+}
+
+func newTaskJobState(logPath string) *jobState {
+	j := &jobState{logPath: logPath}
+	if logPath == "" {
+		return j
+	}
+	data, err := os.ReadFile(logPath)
+	if err != nil || len(data) == 0 {
+		return j
+	}
+	lines := strings.Split(strings.ReplaceAll(string(data), "\r\n", "\n"), "\n")
+	if len(lines) > 0 && lines[len(lines)-1] == "" {
+		lines = lines[:len(lines)-1]
+	}
+	j.lines = append(j.lines, lines...)
+	return j
+}
+
+func appendJobLog(path, line string) {
+	if path == "" {
+		return
+	}
+	f, err := os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0644)
+	if err != nil {
+		return
+	}
+	defer f.Close()
+	_, _ = f.WriteString(line + "\n")
+}
--- a/audit/internal/webui/metricsdb.go
+++ b/audit/internal/webui/metricsdb.go
@@ -0,0 +1,317 @@
+package webui
+
+import (
+	"database/sql"
+	"encoding/csv"
+	"io"
+	"strconv"
+	"time"
+
+	"bee/audit/internal/platform"
+	_ "modernc.org/sqlite"
+)
+
+const metricsDBPath = "/appdata/bee/metrics.db"
+
+// MetricsDB persists live metric samples to SQLite.
+type MetricsDB struct {
+	db *sql.DB
+}
+
+// openMetricsDB opens (or creates) the metrics database at the given path.
+func openMetricsDB(path string) (*MetricsDB, error) {
+	db, err := sql.Open("sqlite", path+"?_journal=WAL&_busy_timeout=5000")
+	if err != nil {
+		return nil, err
+	}
+	db.SetMaxOpenConns(1)
+	if err := initMetricsSchema(db); err != nil {
+		_ = db.Close()
+		return nil, err
+	}
+	return &MetricsDB{db: db}, nil
+}
+
+func initMetricsSchema(db *sql.DB) error {
+	_, err := db.Exec(`
+CREATE TABLE IF NOT EXISTS sys_metrics (
+  ts           INTEGER NOT NULL,
+  cpu_load_pct REAL,
+  mem_load_pct REAL,
+  power_w      REAL,
+  PRIMARY KEY (ts)
+);
+CREATE TABLE IF NOT EXISTS gpu_metrics (
+  ts            INTEGER NOT NULL,
+  gpu_index     INTEGER NOT NULL,
+  temp_c        REAL,
+  usage_pct     REAL,
+  mem_usage_pct REAL,
+  power_w       REAL,
+  PRIMARY KEY (ts, gpu_index)
+);
+CREATE TABLE IF NOT EXISTS fan_metrics (
+  ts   INTEGER NOT NULL,
+  name TEXT NOT NULL,
+  rpm  REAL,
+  PRIMARY KEY (ts, name)
+);
+CREATE TABLE IF NOT EXISTS temp_metrics (
+  ts      INTEGER NOT NULL,
+  name    TEXT NOT NULL,
+  grp     TEXT NOT NULL,
+  celsius REAL,
+  PRIMARY KEY (ts, name)
+);
+`)
+	return err
+}
+
+// Write inserts one sample into all relevant tables.
+func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
+	ts := s.Timestamp.Unix()
+	tx, err := m.db.Begin()
+	if err != nil {
+		return err
+	}
+	defer func() { _ = tx.Rollback() }()
+
+	_, err = tx.Exec(
+		`INSERT OR REPLACE INTO sys_metrics(ts,cpu_load_pct,mem_load_pct,power_w) VALUES(?,?,?,?)`,
+		ts, s.CPULoadPct, s.MemLoadPct, s.PowerW,
+	)
+	if err != nil {
+		return err
+	}
+	for _, g := range s.GPUs {
+		_, err = tx.Exec(
+			`INSERT OR REPLACE INTO gpu_metrics(ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w) VALUES(?,?,?,?,?,?)`,
+			ts, g.GPUIndex, g.TempC, g.UsagePct, g.MemUsagePct, g.PowerW,
+		)
+		if err != nil {
+			return err
+		}
+	}
+	for _, f := range s.Fans {
+		_, err = tx.Exec(
+			`INSERT OR REPLACE INTO fan_metrics(ts,name,rpm) VALUES(?,?,?)`,
+			ts, f.Name, f.RPM,
+		)
+		if err != nil {
+			return err
+		}
+	}
+	for _, t := range s.Temps {
+		_, err = tx.Exec(
+			`INSERT OR REPLACE INTO temp_metrics(ts,name,grp,celsius) VALUES(?,?,?,?)`,
+			ts, t.Name, t.Group, t.Celsius,
+		)
+		if err != nil {
+			return err
+		}
+	}
+	return tx.Commit()
+}
+
+// LoadRecent returns up to n samples in chronological order (oldest first).
+func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?`, n)
+}
+
+// LoadAll returns all persisted samples in chronological order (oldest first).
+func (m *MetricsDB) LoadAll() ([]platform.LiveMetricSample, error) {
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts`, nil)
+}
+
+// loadSamples reconstructs LiveMetricSample rows from the normalized tables.
+func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetricSample, error) {
+	rows, err := m.db.Query(query, args...)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	type sysRow struct {
+		ts          int64
+		cpu, mem, pwr float64
+	}
+	var sysRows []sysRow
+	for rows.Next() {
+		var r sysRow
+		if err := rows.Scan(&r.ts, &r.cpu, &r.mem, &r.pwr); err != nil {
+			continue
+		}
+		sysRows = append(sysRows, r)
+	}
+	if len(sysRows) == 0 {
+		return nil, nil
+	}
+	// Reverse to chronological order
+	for i, j := 0, len(sysRows)-1; i < j; i, j = i+1, j-1 {
+		sysRows[i], sysRows[j] = sysRows[j], sysRows[i]
+	}
+
+	// Collect min/max ts for range query
+	minTS := sysRows[0].ts
+	maxTS := sysRows[len(sysRows)-1].ts
+
+	// Load GPU rows in range
+	type gpuKey struct{ ts int64; idx int }
+	gpuData := map[gpuKey]platform.GPUMetricRow{}
+	gRows, err := m.db.Query(
+		`SELECT ts,gpu_index,temp_c,usage_pct,mem_usage_pct,power_w FROM gpu_metrics WHERE ts>=? AND ts<=? ORDER BY ts,gpu_index`,
+		minTS, maxTS,
+	)
+	if err == nil {
+		defer gRows.Close()
+		for gRows.Next() {
+			var ts int64
+			var g platform.GPUMetricRow
+			if err := gRows.Scan(&ts, &g.GPUIndex, &g.TempC, &g.UsagePct, &g.MemUsagePct, &g.PowerW); err == nil {
+				gpuData[gpuKey{ts, g.GPUIndex}] = g
+			}
+		}
+	}
+
+	// Load fan rows in range
+	type fanKey struct{ ts int64; name string }
+	fanData := map[fanKey]float64{}
+	fRows, err := m.db.Query(
+		`SELECT ts,name,rpm FROM fan_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
+	)
+	if err == nil {
+		defer fRows.Close()
+		for fRows.Next() {
+			var ts int64
+			var name string
+			var rpm float64
+			if err := fRows.Scan(&ts, &name, &rpm); err == nil {
+				fanData[fanKey{ts, name}] = rpm
+			}
+		}
+	}
+
+	// Load temp rows in range
+	type tempKey struct{ ts int64; name string }
+	tempData := map[tempKey]platform.TempReading{}
+	tRows, err := m.db.Query(
+		`SELECT ts,name,grp,celsius FROM temp_metrics WHERE ts>=? AND ts<=?`, minTS, maxTS,
+	)
+	if err == nil {
+		defer tRows.Close()
+		for tRows.Next() {
+			var ts int64
+			var t platform.TempReading
+			if err := tRows.Scan(&ts, &t.Name, &t.Group, &t.Celsius); err == nil {
+				tempData[tempKey{ts, t.Name}] = t
+			}
+		}
+	}
+
+	// Collect unique GPU indices and fan names from loaded data (preserve order)
+	seenGPU := map[int]bool{}
+	var gpuIndices []int
+	for k := range gpuData {
+		if !seenGPU[k.idx] {
+			seenGPU[k.idx] = true
+			gpuIndices = append(gpuIndices, k.idx)
+		}
+	}
+	seenFan := map[string]bool{}
+	var fanNames []string
+	for k := range fanData {
+		if !seenFan[k.name] {
+			seenFan[k.name] = true
+			fanNames = append(fanNames, k.name)
+		}
+	}
+	seenTemp := map[string]bool{}
+	var tempNames []string
+	for k := range tempData {
+		if !seenTemp[k.name] {
+			seenTemp[k.name] = true
+			tempNames = append(tempNames, k.name)
+		}
+	}
+
+	samples := make([]platform.LiveMetricSample, len(sysRows))
+	for i, r := range sysRows {
+		s := platform.LiveMetricSample{
+			Timestamp:  time.Unix(r.ts, 0).UTC(),
+			CPULoadPct: r.cpu,
+			MemLoadPct: r.mem,
+			PowerW:     r.pwr,
+		}
+		for _, idx := range gpuIndices {
+			if g, ok := gpuData[gpuKey{r.ts, idx}]; ok {
+				s.GPUs = append(s.GPUs, g)
+			}
+		}
+		for _, name := range fanNames {
+			if rpm, ok := fanData[fanKey{r.ts, name}]; ok {
+				s.Fans = append(s.Fans, platform.FanReading{Name: name, RPM: rpm})
+			}
+		}
+		for _, name := range tempNames {
+			if t, ok := tempData[tempKey{r.ts, name}]; ok {
+				s.Temps = append(s.Temps, t)
+			}
+		}
+		samples[i] = s
+	}
+	return samples, nil
+}
+
+// ExportCSV writes all sys+gpu data as CSV to w.
+func (m *MetricsDB) ExportCSV(w io.Writer) error {
+	rows, err := m.db.Query(`
+		SELECT s.ts, s.cpu_load_pct, s.mem_load_pct, s.power_w,
+		       g.gpu_index, g.temp_c, g.usage_pct, g.mem_usage_pct, g.power_w
+		FROM sys_metrics s
+		LEFT JOIN gpu_metrics g ON g.ts = s.ts
+		ORDER BY s.ts, g.gpu_index
+	`)
+	if err != nil {
+		return err
+	}
+	defer rows.Close()
+
+	cw := csv.NewWriter(w)
+	_ = cw.Write([]string{"ts", "cpu_load_pct", "mem_load_pct", "sys_power_w", "gpu_index", "gpu_temp_c", "gpu_usage_pct", "gpu_mem_pct", "gpu_power_w"})
+	for rows.Next() {
+		var ts int64
+		var cpu, mem, pwr float64
+		var gpuIdx sql.NullInt64
+		var gpuTemp, gpuUse, gpuMem, gpuPow sql.NullFloat64
+		if err := rows.Scan(&ts, &cpu, &mem, &pwr, &gpuIdx, &gpuTemp, &gpuUse, &gpuMem, &gpuPow); err != nil {
+			continue
+		}
+		row := []string{
+			strconv.FormatInt(ts, 10),
+			strconv.FormatFloat(cpu, 'f', 2, 64),
+			strconv.FormatFloat(mem, 'f', 2, 64),
+			strconv.FormatFloat(pwr, 'f', 1, 64),
+		}
+		if gpuIdx.Valid {
+			row = append(row,
+				strconv.FormatInt(gpuIdx.Int64, 10),
+				strconv.FormatFloat(gpuTemp.Float64, 'f', 1, 64),
+				strconv.FormatFloat(gpuUse.Float64, 'f', 1, 64),
+				strconv.FormatFloat(gpuMem.Float64, 'f', 1, 64),
+				strconv.FormatFloat(gpuPow.Float64, 'f', 1, 64),
+			)
+		} else {
+			row = append(row, "", "", "", "", "")
+		}
+		_ = cw.Write(row)
+	}
+	cw.Flush()
+	return cw.Error()
+}
+
+// Close closes the database.
+func (m *MetricsDB) Close() { _ = m.db.Close() }
+
+func nullFloat(v float64) sql.NullFloat64 {
+	return sql.NullFloat64{Float64: v, Valid: true}
+}
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -7,9 +7,89 @@ import (
 	"path/filepath"
 	"strings"
 	"testing"
+	"time"
+
+	"bee/audit/internal/platform"
 )

-func TestRootRendersShellWithIframe(t *testing.T) {
+func TestChartLegendNumber(t *testing.T) {
+	tests := []struct {
+		in   float64
+		want string
+	}{
+		{in: 0.4, want: "0"},
+		{in: 61.5, want: "62"},
+		{in: 999.4, want: "999"},
+		{in: 1200, want: "1,2k"},
+		{in: 1250, want: "1,25k"},
+		{in: 1310, want: "1,31k"},
+		{in: 1500, want: "1,5k"},
+		{in: 2600, want: "2,6k"},
+		{in: 10200, want: "10k"},
+	}
+	for _, tc := range tests {
+		if got := chartLegendNumber(tc.in); got != tc.want {
+			t.Fatalf("chartLegendNumber(%v)=%q want %q", tc.in, got, tc.want)
+		}
+	}
+}
+
+func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
+	samples := []platform.LiveMetricSample{
+		{
+			Timestamp:  time.Now().Add(-3 * time.Minute),
+			CPULoadPct: 10,
+			MemLoadPct: 20,
+			PowerW:     300,
+			GPUs: []platform.GPUMetricRow{
+				{GPUIndex: 0, UsagePct: 90, MemUsagePct: 5, PowerW: 120, TempC: 50},
+			},
+		},
+		{
+			Timestamp:  time.Now().Add(-2 * time.Minute),
+			CPULoadPct: 30,
+			MemLoadPct: 40,
+			PowerW:     320,
+			GPUs: []platform.GPUMetricRow{
+				{GPUIndex: 0, UsagePct: 95, MemUsagePct: 7, PowerW: 125, TempC: 51},
+			},
+		},
+		{
+			Timestamp:  time.Now().Add(-1 * time.Minute),
+			CPULoadPct: 50,
+			MemLoadPct: 60,
+			PowerW:     340,
+			GPUs: []platform.GPUMetricRow{
+				{GPUIndex: 0, UsagePct: 97, MemUsagePct: 9, PowerW: 130, TempC: 52},
+			},
+		},
+	}
+
+	datasets, names, labels, title, _, _, ok := chartDataFromSamples("gpu-all-power", samples)
+	if !ok {
+		t.Fatal("chartDataFromSamples returned ok=false")
+	}
+	if title != "GPU Power" {
+		t.Fatalf("title=%q", title)
+	}
+	if len(names) != 1 || names[0] != "GPU 0" {
+		t.Fatalf("names=%v", names)
+	}
+	if len(labels) != len(samples) {
+		t.Fatalf("labels len=%d want %d", len(labels), len(samples))
+	}
+	if len(datasets) != 1 || len(datasets[0]) != len(samples) {
+		t.Fatalf("datasets shape=%v", datasets)
+	}
+	if got := datasets[0][0]; got != 120 {
+		t.Fatalf("datasets[0][0]=%v want 120", got)
+	}
+	if got := datasets[0][2]; got != 130 {
+		t.Fatalf("datasets[0][2]=%v want 130", got)
+	}
+}
+
+func TestRootRendersDashboard(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
 	exportDir := filepath.Join(dir, "export")
@@ -31,11 +111,12 @@ func TestRootRendersShellWithIframe(t *testing.T) {
 	if first.Code != http.StatusOK {
 		t.Fatalf("first status=%d", first.Code)
 	}
-	if !strings.Contains(first.Body.String(), `iframe`) || !strings.Contains(first.Body.String(), `src="/viewer"`) {
-		t.Fatalf("first body missing iframe viewer: %s", first.Body.String())
+	// Dashboard should contain the audit nav link and hardware summary
+	if !strings.Contains(first.Body.String(), `href="/audit"`) {
+		t.Fatalf("first body missing audit nav link: %s", first.Body.String())
 	}
-	if !strings.Contains(first.Body.String(), "/export/support.tar.gz") {
-		t.Fatalf("first body missing support bundle link: %s", first.Body.String())
+	if !strings.Contains(first.Body.String(), `/viewer`) {
+		t.Fatalf("first body missing viewer link: %s", first.Body.String())
 	}
 	if got := first.Header().Get("Cache-Control"); got != "no-store" {
 		t.Fatalf("first cache-control=%q", got)
@@ -50,8 +131,30 @@ func TestRootRendersShellWithIframe(t *testing.T) {
 	if second.Code != http.StatusOK {
 		t.Fatalf("second status=%d", second.Code)
 	}
-	if !strings.Contains(second.Body.String(), `src="/viewer"`) {
-		t.Fatalf("second body missing iframe viewer: %s", second.Body.String())
+	if !strings.Contains(second.Body.String(), `Hardware Summary`) {
+		t.Fatalf("second body missing hardware summary: %s", second.Body.String())
+	}
+}
+
+func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "audit.json")
+	if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z"}`), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	handler := NewHandler(HandlerOptions{AuditPath: path})
+	rec := httptest.NewRecorder()
+	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/audit", nil))
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d", rec.Code)
+	}
+	body := rec.Body.String()
+	if !strings.Contains(body, `iframe class="viewer-frame" src="/viewer"`) {
+		t.Fatalf("audit page missing viewer frame: %s", body)
+	}
+	if !strings.Contains(body, `openAuditModal()`) {
+		t.Fatalf("audit page missing action modal trigger: %s", body)
 	}
 }

@@ -103,8 +206,8 @@ func TestAuditJSONServesLatestSnapshot(t *testing.T) {
 	if rec.Code != http.StatusOK {
 		t.Fatalf("status=%d", rec.Code)
 	}
-	if got := strings.TrimSpace(rec.Body.String()); got != body {
-		t.Fatalf("body=%q want %q", got, body)
+	if !strings.Contains(rec.Body.String(), "SERIAL-API") {
+		t.Fatalf("body missing expected serial: %s", rec.Body.String())
 	}
 	if got := rec.Header().Get("Content-Type"); !strings.Contains(got, "application/json") {
 		t.Fatalf("content-type=%q", got)
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -0,0 +1,706 @@
+package webui
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+	"os"
+	"path/filepath"
+	"sort"
+	"sync"
+	"time"
+
+	"bee/audit/internal/app"
+	"bee/audit/internal/platform"
+)
+
+// Task statuses.
+const (
+	TaskPending   = "pending"
+	TaskRunning   = "running"
+	TaskDone      = "done"
+	TaskFailed    = "failed"
+	TaskCancelled = "cancelled"
+)
+
+// taskNames maps target → human-readable name for validate (SAT) runs.
+var taskNames = map[string]string{
+	"nvidia":          "NVIDIA SAT",
+	"nvidia-stress":   "NVIDIA GPU Stress",
+	"memory":          "Memory SAT",
+	"storage":         "Storage SAT",
+	"cpu":             "CPU SAT",
+	"amd":             "AMD GPU SAT",
+	"amd-mem":         "AMD GPU MEM Integrity",
+	"amd-bandwidth":   "AMD GPU MEM Bandwidth",
+	"amd-stress":      "AMD GPU Burn-in",
+	"memory-stress":   "Memory Burn-in",
+	"sat-stress":      "SAT Stress (stressapptest)",
+	"platform-stress": "Platform Thermal Cycling",
+	"audit":           "Audit",
+	"install":         "Install to Disk",
+	"install-to-ram":  "Install to RAM",
+}
+
+// burnNames maps target → human-readable name when a burn profile is set.
+var burnNames = map[string]string{
+	"nvidia": "NVIDIA Burn-in",
+	"memory": "Memory Burn-in",
+	"cpu":    "CPU Burn-in",
+	"amd":    "AMD GPU Burn-in",
+}
+
+// Task represents one unit of work in the queue.
+type Task struct {
+	ID        string     `json:"id"`
+	Name      string     `json:"name"`
+	Target    string     `json:"target"`
+	Priority  int        `json:"priority"`
+	Status    string     `json:"status"`
+	CreatedAt time.Time  `json:"created_at"`
+	StartedAt *time.Time `json:"started_at,omitempty"`
+	DoneAt    *time.Time `json:"done_at,omitempty"`
+	ErrMsg    string     `json:"error,omitempty"`
+	LogPath   string     `json:"log_path,omitempty"`
+
+	// runtime fields (not serialised)
+	job    *jobState
+	params taskParams
+}
+
+// taskParams holds optional parameters parsed from the run request.
+type taskParams struct {
+	Duration          int    `json:"duration,omitempty"`
+	DiagLevel         int    `json:"diag_level,omitempty"`
+	GPUIndices        []int  `json:"gpu_indices,omitempty"`
+	ExcludeGPUIndices []int  `json:"exclude_gpu_indices,omitempty"`
+	Loader            string `json:"loader,omitempty"`
+	BurnProfile       string `json:"burn_profile,omitempty"`
+	DisplayName       string `json:"display_name,omitempty"`
+	Device            string `json:"device,omitempty"` // for install
+}
+
+type persistedTask struct {
+	ID        string     `json:"id"`
+	Name      string     `json:"name"`
+	Target    string     `json:"target"`
+	Priority  int        `json:"priority"`
+	Status    string     `json:"status"`
+	CreatedAt time.Time  `json:"created_at"`
+	StartedAt *time.Time `json:"started_at,omitempty"`
+	DoneAt    *time.Time `json:"done_at,omitempty"`
+	ErrMsg    string     `json:"error,omitempty"`
+	LogPath   string     `json:"log_path,omitempty"`
+	Params    taskParams `json:"params,omitempty"`
+}
+
+type burnPreset struct {
+	NvidiaDiag  int
+	DurationSec int
+}
+
+func resolveBurnPreset(profile string) burnPreset {
+	switch profile {
+	case "overnight":
+		return burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}
+	case "acceptance":
+		return burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}
+	default:
+		return burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}
+	}
+}
+
+func resolvePlatformStressPreset(profile string) platform.PlatformStressOptions {
+	switch profile {
+	case "overnight":
+		return platform.PlatformStressOptions{Cycles: []platform.PlatformStressCycle{
+			{LoadSec: 600, IdleSec: 120},
+			{LoadSec: 600, IdleSec: 60},
+			{LoadSec: 600, IdleSec: 30},
+			{LoadSec: 600, IdleSec: 120},
+			{LoadSec: 600, IdleSec: 60},
+			{LoadSec: 600, IdleSec: 30},
+			{LoadSec: 600, IdleSec: 120},
+			{LoadSec: 600, IdleSec: 60},
+		}}
+	case "acceptance":
+		return platform.PlatformStressOptions{Cycles: []platform.PlatformStressCycle{
+			{LoadSec: 300, IdleSec: 60},
+			{LoadSec: 300, IdleSec: 30},
+			{LoadSec: 300, IdleSec: 60},
+			{LoadSec: 300, IdleSec: 30},
+		}}
+	default: // smoke
+		return platform.PlatformStressOptions{Cycles: []platform.PlatformStressCycle{
+			{LoadSec: 90, IdleSec: 60},
+			{LoadSec: 90, IdleSec: 30},
+		}}
+	}
+}
+
+// taskQueue manages a priority-ordered list of tasks and runs them one at a time.
+type taskQueue struct {
+	mu        sync.Mutex
+	tasks     []*Task
+	trigger   chan struct{}
+	opts      *HandlerOptions // set by startWorker
+	statePath string
+	logsDir   string
+	started   bool
+}
+
+var globalQueue = &taskQueue{trigger: make(chan struct{}, 1)}
+
+const maxTaskHistory = 50
+
+var (
+	runMemoryAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+		return a.RunMemoryAcceptancePackCtx(ctx, baseDir, logFunc)
+	}
+	runStorageAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+		return a.RunStorageAcceptancePackCtx(ctx, baseDir, logFunc)
+	}
+	runCPUAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
+		return a.RunCPUAcceptancePackCtx(ctx, baseDir, durationSec, logFunc)
+	}
+	runAMDAcceptancePackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+		return a.RunAMDAcceptancePackCtx(ctx, baseDir, logFunc)
+	}
+	runAMDMemIntegrityPackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+		return a.RunAMDMemIntegrityPackCtx(ctx, baseDir, logFunc)
+	}
+	runAMDMemBandwidthPackCtx = func(a *app.App, ctx context.Context, baseDir string, logFunc func(string)) (string, error) {
+		return a.RunAMDMemBandwidthPackCtx(ctx, baseDir, logFunc)
+	}
+	runNvidiaStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
+		return a.RunNvidiaStressPackCtx(ctx, baseDir, opts, logFunc)
+	}
+	runAMDStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
+		return a.RunAMDStressPackCtx(ctx, baseDir, durationSec, logFunc)
+	}
+	runMemoryStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
+		return a.RunMemoryStressPackCtx(ctx, baseDir, durationSec, logFunc)
+	}
+	runSATStressPackCtx = func(a *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
+		return a.RunSATStressPackCtx(ctx, baseDir, durationSec, logFunc)
+	}
+)
+
+// enqueue adds a task to the queue and notifies the worker.
+func (q *taskQueue) enqueue(t *Task) {
+	q.mu.Lock()
+	q.assignTaskLogPathLocked(t)
+	q.tasks = append(q.tasks, t)
+	q.prune()
+	q.persistLocked()
+	q.mu.Unlock()
+	select {
+	case q.trigger <- struct{}{}:
+	default:
+	}
+}
+
+// prune removes oldest completed tasks beyond maxTaskHistory.
+func (q *taskQueue) prune() {
+	var done []*Task
+	var active []*Task
+	for _, t := range q.tasks {
+		switch t.Status {
+		case TaskDone, TaskFailed, TaskCancelled:
+			done = append(done, t)
+		default:
+			active = append(active, t)
+		}
+	}
+	if len(done) > maxTaskHistory {
+		done = done[len(done)-maxTaskHistory:]
+	}
+	q.tasks = append(active, done...)
+}
+
+// nextPending returns the highest-priority pending task (nil if none).
+func (q *taskQueue) nextPending() *Task {
+	var best *Task
+	for _, t := range q.tasks {
+		if t.Status != TaskPending {
+			continue
+		}
+		if best == nil || t.Priority > best.Priority ||
+			(t.Priority == best.Priority && t.CreatedAt.Before(best.CreatedAt)) {
+			best = t
+		}
+	}
+	return best
+}
+
+// findByID looks up a task by ID.
+func (q *taskQueue) findByID(id string) (*Task, bool) {
+	q.mu.Lock()
+	defer q.mu.Unlock()
+	for _, t := range q.tasks {
+		if t.ID == id {
+			return t, true
+		}
+	}
+	return nil, false
+}
+
+// findJob returns the jobState for a task ID (for SSE streaming compatibility).
+func (q *taskQueue) findJob(id string) (*jobState, bool) {
+	t, ok := q.findByID(id)
+	if !ok || t.job == nil {
+		return nil, false
+	}
+	return t.job, true
+}
+
+func (q *taskQueue) hasActiveTarget(target string) bool {
+	q.mu.Lock()
+	defer q.mu.Unlock()
+	for _, t := range q.tasks {
+		if t.Target != target {
+			continue
+		}
+		if t.Status == TaskPending || t.Status == TaskRunning {
+			return true
+		}
+	}
+	return false
+}
+
+// snapshot returns a copy of all tasks sorted for display (running first, then pending by priority, then done by doneAt desc).
+func (q *taskQueue) snapshot() []Task {
+	q.mu.Lock()
+	defer q.mu.Unlock()
+	out := make([]Task, len(q.tasks))
+	for i, t := range q.tasks {
+		out[i] = *t
+	}
+	sort.SliceStable(out, func(i, j int) bool {
+		si := statusOrder(out[i].Status)
+		sj := statusOrder(out[j].Status)
+		if si != sj {
+			return si < sj
+		}
+		if out[i].Priority != out[j].Priority {
+			return out[i].Priority > out[j].Priority
+		}
+		return out[i].CreatedAt.Before(out[j].CreatedAt)
+	})
+	return out
+}
+
+func statusOrder(s string) int {
+	switch s {
+	case TaskRunning:
+		return 0
+	case TaskPending:
+		return 1
+	default:
+		return 2
+	}
+}
+
+// startWorker launches the queue runner goroutine.
+func (q *taskQueue) startWorker(opts *HandlerOptions) {
+	q.mu.Lock()
+	q.opts = opts
+	q.statePath = filepath.Join(opts.ExportDir, "tasks-state.json")
+	q.logsDir = filepath.Join(opts.ExportDir, "tasks")
+	_ = os.MkdirAll(q.logsDir, 0755)
+	if !q.started {
+		q.loadLocked()
+		q.started = true
+		go q.worker()
+	}
+	hasPending := q.nextPending() != nil
+	q.mu.Unlock()
+	if hasPending {
+		select {
+		case q.trigger <- struct{}{}:
+		default:
+		}
+	}
+}
+
+func (q *taskQueue) worker() {
+	for {
+		<-q.trigger
+		setCPUGovernor("performance")
+		for {
+			q.mu.Lock()
+			t := q.nextPending()
+			if t == nil {
+				q.mu.Unlock()
+				break
+			}
+			now := time.Now()
+			t.Status = TaskRunning
+			t.StartedAt = &now
+			t.DoneAt = nil
+			t.ErrMsg = ""
+			j := newTaskJobState(t.LogPath)
+			ctx, cancel := context.WithCancel(context.Background())
+			j.cancel = cancel
+			t.job = j
+			q.persistLocked()
+			q.mu.Unlock()
+
+			q.runTask(t, j, ctx)
+
+			q.mu.Lock()
+			now2 := time.Now()
+			t.DoneAt = &now2
+			if t.Status == TaskRunning { // not cancelled externally
+				if j.err != "" {
+					t.Status = TaskFailed
+					t.ErrMsg = j.err
+				} else {
+					t.Status = TaskDone
+				}
+			}
+			q.prune()
+			q.persistLocked()
+			q.mu.Unlock()
+		}
+		setCPUGovernor("powersave")
+	}
+}
+
+// setCPUGovernor writes the given governor to all CPU scaling_governor sysfs files.
+// Silently ignores errors (e.g. when cpufreq is not available).
+func setCPUGovernor(governor string) {
+	matches, err := filepath.Glob("/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor")
+	if err != nil || len(matches) == 0 {
+		return
+	}
+	for _, path := range matches {
+		_ = os.WriteFile(path, []byte(governor), 0644)
+	}
+}
+
+// runTask executes the work for a task, writing output to j.
+func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
+	if q.opts == nil || q.opts.App == nil {
+		j.append("ERROR: app not configured")
+		j.finish("app not configured")
+		return
+	}
+	a := q.opts.App
+
+	j.append(fmt.Sprintf("Starting %s...", t.Name))
+	if len(j.lines) > 0 {
+		j.append(fmt.Sprintf("Recovered after bee-web restart at %s", time.Now().UTC().Format(time.RFC3339)))
+	}
+
+	var (
+		archive string
+		err     error
+	)
+
+	switch t.Target {
+	case "nvidia":
+		diagLevel := t.params.DiagLevel
+		if t.params.BurnProfile != "" && diagLevel <= 0 {
+			diagLevel = resolveBurnPreset(t.params.BurnProfile).NvidiaDiag
+		}
+		if len(t.params.GPUIndices) > 0 || diagLevel > 0 {
+			result, e := a.RunNvidiaAcceptancePackWithOptions(
+				ctx, "", diagLevel, t.params.GPUIndices, j.append,
+			)
+			if e != nil {
+				err = e
+			} else {
+				archive = result.Body
+			}
+		} else {
+			archive, err = a.RunNvidiaAcceptancePack("", j.append)
+		}
+	case "nvidia-stress":
+		dur := t.params.Duration
+		if t.params.BurnProfile != "" && dur <= 0 {
+			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
+		}
+		archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
+			DurationSec:       dur,
+			Loader:            t.params.Loader,
+			GPUIndices:        t.params.GPUIndices,
+			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
+		}, j.append)
+	case "memory":
+		archive, err = runMemoryAcceptancePackCtx(a, ctx, "", j.append)
+	case "storage":
+		archive, err = runStorageAcceptancePackCtx(a, ctx, "", j.append)
+	case "cpu":
+		dur := t.params.Duration
+		if t.params.BurnProfile != "" && dur <= 0 {
+			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
+		}
+		if dur <= 0 {
+			dur = 60
+		}
+		archive, err = runCPUAcceptancePackCtx(a, ctx, "", dur, j.append)
+	case "amd":
+		archive, err = runAMDAcceptancePackCtx(a, ctx, "", j.append)
+	case "amd-mem":
+		archive, err = runAMDMemIntegrityPackCtx(a, ctx, "", j.append)
+	case "amd-bandwidth":
+		archive, err = runAMDMemBandwidthPackCtx(a, ctx, "", j.append)
+	case "amd-stress":
+		dur := t.params.Duration
+		if t.params.BurnProfile != "" && dur <= 0 {
+			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
+		}
+		archive, err = runAMDStressPackCtx(a, ctx, "", dur, j.append)
+	case "memory-stress":
+		dur := t.params.Duration
+		if t.params.BurnProfile != "" && dur <= 0 {
+			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
+		}
+		archive, err = runMemoryStressPackCtx(a, ctx, "", dur, j.append)
+	case "sat-stress":
+		dur := t.params.Duration
+		if t.params.BurnProfile != "" && dur <= 0 {
+			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
+		}
+		archive, err = runSATStressPackCtx(a, ctx, "", dur, j.append)
+	case "platform-stress":
+		opts := resolvePlatformStressPreset(t.params.BurnProfile)
+		archive, err = a.RunPlatformStress(ctx, "", opts, j.append)
+	case "audit":
+		result, e := a.RunAuditNow(q.opts.RuntimeMode)
+		if e != nil {
+			err = e
+		} else {
+			for _, line := range splitLines(result.Body) {
+				j.append(line)
+			}
+		}
+	case "install-to-ram":
+		err = a.RunInstallToRAM(ctx, j.append)
+	default:
+		j.append("ERROR: unknown target: " + t.Target)
+		j.finish("unknown target")
+		return
+	}
+
+	if err != nil {
+		if ctx.Err() != nil {
+			j.append("Aborted.")
+			j.finish("aborted")
+		} else {
+			j.append("ERROR: " + err.Error())
+			j.finish(err.Error())
+		}
+		return
+	}
+	if archive != "" {
+		j.append("Archive: " + archive)
+	}
+	j.finish("")
+}
+
+func splitLines(s string) []string {
+	var out []string
+	for _, l := range splitNL(s) {
+		if l != "" {
+			out = append(out, l)
+		}
+	}
+	return out
+}
+
+func splitNL(s string) []string {
+	var out []string
+	start := 0
+	for i, c := range s {
+		if c == '\n' {
+			out = append(out, s[start:i])
+			start = i + 1
+		}
+	}
+	out = append(out, s[start:])
+	return out
+}
+
+// ── HTTP handlers ─────────────────────────────────────────────────────────────
+
+func (h *handler) handleAPITasksList(w http.ResponseWriter, _ *http.Request) {
+	tasks := globalQueue.snapshot()
+	writeJSON(w, tasks)
+}
+
+func (h *handler) handleAPITasksCancel(w http.ResponseWriter, r *http.Request) {
+	id := r.PathValue("id")
+	t, ok := globalQueue.findByID(id)
+	if !ok {
+		writeError(w, http.StatusNotFound, "task not found")
+		return
+	}
+	globalQueue.mu.Lock()
+	defer globalQueue.mu.Unlock()
+	switch t.Status {
+	case TaskPending:
+		t.Status = TaskCancelled
+		now := time.Now()
+		t.DoneAt = &now
+		globalQueue.persistLocked()
+		writeJSON(w, map[string]string{"status": "cancelled"})
+	case TaskRunning:
+		if t.job != nil {
+			t.job.abort()
+		}
+		t.Status = TaskCancelled
+		now := time.Now()
+		t.DoneAt = &now
+		globalQueue.persistLocked()
+		writeJSON(w, map[string]string{"status": "cancelled"})
+	default:
+		writeError(w, http.StatusConflict, "task is not running or pending")
+	}
+}
+
+func (h *handler) handleAPITasksPriority(w http.ResponseWriter, r *http.Request) {
+	id := r.PathValue("id")
+	t, ok := globalQueue.findByID(id)
+	if !ok {
+		writeError(w, http.StatusNotFound, "task not found")
+		return
+	}
+	var req struct {
+		Delta int `json:"delta"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		writeError(w, http.StatusBadRequest, "invalid body")
+		return
+	}
+	globalQueue.mu.Lock()
+	defer globalQueue.mu.Unlock()
+	if t.Status != TaskPending {
+		writeError(w, http.StatusConflict, "only pending tasks can be reprioritised")
+		return
+	}
+	t.Priority += req.Delta
+	globalQueue.persistLocked()
+	writeJSON(w, map[string]int{"priority": t.Priority})
+}
+
+func (h *handler) handleAPITasksCancelAll(w http.ResponseWriter, _ *http.Request) {
+	globalQueue.mu.Lock()
+	now := time.Now()
+	n := 0
+	for _, t := range globalQueue.tasks {
+		switch t.Status {
+		case TaskPending:
+			t.Status = TaskCancelled
+			t.DoneAt = &now
+			n++
+		case TaskRunning:
+			if t.job != nil {
+				t.job.abort()
+			}
+			t.Status = TaskCancelled
+			t.DoneAt = &now
+			n++
+		}
+	}
+	globalQueue.persistLocked()
+	globalQueue.mu.Unlock()
+	writeJSON(w, map[string]int{"cancelled": n})
+}
+
+func (h *handler) handleAPITasksStream(w http.ResponseWriter, r *http.Request) {
+	id := r.PathValue("id")
+	// Wait up to 5s for the task to get a job (it may be pending)
+	deadline := time.Now().Add(5 * time.Second)
+	var j *jobState
+	for time.Now().Before(deadline) {
+		if jj, ok := globalQueue.findJob(id); ok {
+			j = jj
+			break
+		}
+		time.Sleep(200 * time.Millisecond)
+	}
+	if j == nil {
+		http.Error(w, "task not found or not yet started", http.StatusNotFound)
+		return
+	}
+	streamJob(w, r, j)
+}
+
+func (q *taskQueue) assignTaskLogPathLocked(t *Task) {
+	if t.LogPath != "" || q.logsDir == "" || t.ID == "" {
+		return
+	}
+	t.LogPath = filepath.Join(q.logsDir, t.ID+".log")
+}
+
+func (q *taskQueue) loadLocked() {
+	if q.statePath == "" {
+		return
+	}
+	data, err := os.ReadFile(q.statePath)
+	if err != nil || len(data) == 0 {
+		return
+	}
+	var persisted []persistedTask
+	if err := json.Unmarshal(data, &persisted); err != nil {
+		return
+	}
+	for _, pt := range persisted {
+		t := &Task{
+			ID:        pt.ID,
+			Name:      pt.Name,
+			Target:    pt.Target,
+			Priority:  pt.Priority,
+			Status:    pt.Status,
+			CreatedAt: pt.CreatedAt,
+			StartedAt: pt.StartedAt,
+			DoneAt:    pt.DoneAt,
+			ErrMsg:    pt.ErrMsg,
+			LogPath:   pt.LogPath,
+			params:    pt.Params,
+		}
+		q.assignTaskLogPathLocked(t)
+		if t.Status == TaskPending || t.Status == TaskRunning {
+			t.Status = TaskPending
+			t.DoneAt = nil
+			t.ErrMsg = ""
+		}
+		q.tasks = append(q.tasks, t)
+	}
+	q.prune()
+	q.persistLocked()
+}
+
+func (q *taskQueue) persistLocked() {
+	if q.statePath == "" {
+		return
+	}
+	state := make([]persistedTask, 0, len(q.tasks))
+	for _, t := range q.tasks {
+		state = append(state, persistedTask{
+			ID:        t.ID,
+			Name:      t.Name,
+			Target:    t.Target,
+			Priority:  t.Priority,
+			Status:    t.Status,
+			CreatedAt: t.CreatedAt,
+			StartedAt: t.StartedAt,
+			DoneAt:    t.DoneAt,
+			ErrMsg:    t.ErrMsg,
+			LogPath:   t.LogPath,
+			Params:    t.params,
+		})
+	}
+	data, err := json.MarshalIndent(state, "", "  ")
+	if err != nil {
+		return
+	}
+	tmp := q.statePath + ".tmp"
+	if err := os.WriteFile(tmp, data, 0644); err != nil {
+		return
+	}
+	_ = os.Rename(tmp, q.statePath)
+}
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -0,0 +1,156 @@
+package webui
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+
+	"bee/audit/internal/app"
+)
+
+func TestTaskQueuePersistsAndRecoversPendingTasks(t *testing.T) {
+	dir := t.TempDir()
+	q := &taskQueue{
+		statePath: filepath.Join(dir, "tasks-state.json"),
+		logsDir:   filepath.Join(dir, "tasks"),
+		trigger:   make(chan struct{}, 1),
+	}
+	if err := os.MkdirAll(q.logsDir, 0755); err != nil {
+		t.Fatal(err)
+	}
+
+	started := time.Now().Add(-time.Minute)
+	task := &Task{
+		ID:        "task-1",
+		Name:      "Memory Burn-in",
+		Target:    "memory-stress",
+		Priority:  2,
+		Status:    TaskRunning,
+		CreatedAt: time.Now().Add(-2 * time.Minute),
+		StartedAt: &started,
+		params: taskParams{
+			Duration:    300,
+			BurnProfile: "smoke",
+		},
+	}
+	q.tasks = append(q.tasks, task)
+	q.assignTaskLogPathLocked(task)
+	q.persistLocked()
+
+	recovered := &taskQueue{
+		statePath: q.statePath,
+		logsDir:   q.logsDir,
+		trigger:   make(chan struct{}, 1),
+	}
+	recovered.loadLocked()
+
+	if len(recovered.tasks) != 1 {
+		t.Fatalf("tasks=%d want 1", len(recovered.tasks))
+	}
+	got := recovered.tasks[0]
+	if got.Status != TaskPending {
+		t.Fatalf("status=%q want %q", got.Status, TaskPending)
+	}
+	if got.params.Duration != 300 || got.params.BurnProfile != "smoke" {
+		t.Fatalf("params=%+v", got.params)
+	}
+	if got.LogPath == "" {
+		t.Fatal("expected log path")
+	}
+}
+
+func TestNewTaskJobStateLoadsExistingLog(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "task.log")
+	if err := os.WriteFile(path, []byte("line1\nline2\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	j := newTaskJobState(path)
+	existing, ch := j.subscribe()
+	if ch == nil {
+		t.Fatal("expected live subscription channel")
+	}
+	if len(existing) != 2 || existing[0] != "line1" || existing[1] != "line2" {
+		t.Fatalf("existing=%v", existing)
+	}
+}
+
+func TestResolveBurnPreset(t *testing.T) {
+	tests := []struct {
+		profile string
+		want    burnPreset
+	}{
+		{profile: "smoke", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
+		{profile: "acceptance", want: burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}},
+		{profile: "overnight", want: burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}},
+		{profile: "", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
+	}
+	for _, tc := range tests {
+		if got := resolveBurnPreset(tc.profile); got != tc.want {
+			t.Fatalf("resolveBurnPreset(%q)=%+v want %+v", tc.profile, got, tc.want)
+		}
+	}
+}
+
+func TestRunTaskHonorsCancel(t *testing.T) {
+	t.Parallel()
+
+	blocked := make(chan struct{})
+	released := make(chan struct{})
+	aRun := func(_ any, ctx context.Context, _ string, _ int, _ func(string)) (string, error) {
+		close(blocked)
+		select {
+		case <-ctx.Done():
+			close(released)
+			return "", ctx.Err()
+		case <-time.After(5 * time.Second):
+			close(released)
+			return "unexpected", nil
+		}
+	}
+
+	q := &taskQueue{
+		opts: &HandlerOptions{App: &app.App{}},
+	}
+	tk := &Task{
+		ID:        "cpu-1",
+		Name:      "CPU SAT",
+		Target:    "cpu",
+		Status:    TaskRunning,
+		CreatedAt: time.Now(),
+		params:    taskParams{Duration: 60},
+	}
+	j := &jobState{}
+	ctx, cancel := context.WithCancel(context.Background())
+	j.cancel = cancel
+	tk.job = j
+
+	orig := runCPUAcceptancePackCtx
+	runCPUAcceptancePackCtx = func(_ *app.App, ctx context.Context, baseDir string, durationSec int, logFunc func(string)) (string, error) {
+		return aRun(nil, ctx, baseDir, durationSec, logFunc)
+	}
+	defer func() { runCPUAcceptancePackCtx = orig }()
+
+	done := make(chan struct{})
+	go func() {
+		q.runTask(tk, j, ctx)
+		close(done)
+	}()
+
+	<-blocked
+	j.abort()
+
+	select {
+	case <-released:
+	case <-time.After(2 * time.Second):
+		t.Fatal("task did not observe cancel")
+	}
+	select {
+	case <-done:
+	case <-time.After(2 * time.Second):
+		t.Fatal("runTask did not return after cancel")
+	}
+}
--- a/2
+++ b/2
--- a/bible-local/architecture/charting.md
+++ b/bible-local/architecture/charting.md
@@ -0,0 +1,38 @@
+# Charting architecture
+
+## Decision: one chart engine for all live metrics
+
+**Engine:** `github.com/go-analyze/charts` (pure Go, no CGO, SVG output)
+**Theme:** `grafana` (dark background, coloured lines)
+
+All live metrics charts in the web UI are server-side SVG images served by Go
+and polled by the browser every 2 seconds via `<img src="...?t=now">`.
+There is no client-side canvas or JS chart library.
+
+### Why go-analyze/charts
+
+- Pure Go, no CGO — builds cleanly inside the live-build container
+- SVG output — crisp at any display resolution, full-width without pixelation
+- Grafana theme matches the dark web UI colour scheme
+- Active fork of the archived wcharczuk/go-chart
+
+### SAT stress-test charts
+
+The `drawGPUChartSVG` function in `platform/gpu_metrics.go` is a separate
+self-contained SVG renderer used **only** for completed SAT run reports
+(HTML export, burn-in summaries). It is not used for live metrics.
+
+### Live metrics chart endpoints
+
+| Path | Content |
+|------|---------|
+| `GET /api/metrics/chart/server.svg` | CPU temp, CPU load %, mem load %, power W, fan RPMs |
+| `GET /api/metrics/chart/gpu/{idx}.svg` | GPU temp °C, load %, mem %, power W |
+
+Charts are 1400 × 280 px SVG. The page renders them at `width: 100%` in a
+single-column layout so they always fill the viewport width.
+
+### Ring buffers
+
+Each metric is stored in a 120-sample ring buffer (2 minutes of history at 1 Hz).
+Buffers are per-server or per-GPU and grow dynamically as new GPUs appear.
--- a/bible-local/architecture/runtime-flows.md
+++ b/bible-local/architecture/runtime-flows.md
@@ -22,8 +22,8 @@ local-fs.target
  │                           creates /dev/nvidia* nodes)
  ├── bee-audit.service      (runs `bee audit` → /var/log/bee-audit.json,
  │                            never blocks boot on partial collector failures)
-  └── bee-web.service        (runs `bee web` on :80,
-                               reads the latest audit snapshot on each request)
+  ├── bee-web.service        (runs `bee web` on :80 — full interactive web UI)
+  └── bee-desktop.service    (startx → openbox + chromium http://localhost/)
 ```

 **Critical invariants:**
@@ -44,17 +44,21 @@ Local-console behavior:
 ```text
 tty1
  └── live-config autologin → bee
-        └── /home/bee/.profile
-              └── exec menu
-                    └── /usr/local/bin/bee-tui
-                          └── sudo -n /usr/local/bin/bee tui --runtime livecd
+        └── /home/bee/.profile (prints web UI URLs)
+
+display :0
+  └── bee-desktop.service (User=bee)
+        └── startx /usr/local/bin/bee-openbox-session -- :0
+              ├── tint2 (taskbar)
+              ├── chromium http://localhost/
+              └── openbox (WM)
 ```

 Rules:
 - local `tty1` lands in user `bee`, not directly in `root`
- `menu` must work without typing `sudo`
- TUI actions still run as `root` via `sudo -n`
- SSH is independent from the tty1 path
+- `bee-desktop.service` starts X11 + openbox + Chromium automatically after `bee-web.service`
+- Chromium opens `http://localhost/` — the full interactive web UI
+- SSH is independent from the desktop path
 - serial console support is enabled for VM boot debugging

 ## ISO build sequence
@@ -77,9 +81,9 @@ build-in-container.sh [--authorized-keys /path/to/keys]
  7. `build-cublas.sh`:
       a. download `libcublas`, `libcublasLt`, `libcudart` runtime + dev packages from the NVIDIA CUDA Debian repo
       b. verify packages against repo `Packages.gz`
-       c. extract headers for `bee-gpu-stress` build
+       c. extract headers for `bee-gpu-burn` worker build
       d. cache userspace libs in `dist/cublas-<version>+cuda<series>/`
-  8. build `bee-gpu-stress` against extracted cuBLASLt/cudart headers
+  8. build `bee-gpu-burn` worker against extracted cuBLASLt/cudart headers
  9. inject NVIDIA `.ko` → staged `/usr/local/lib/nvidia/`
  10. inject `nvidia-smi` → staged `/usr/local/bin/nvidia-smi`
  11. inject `libnvidia-ml` + `libcuda` + `libcublas` + `libcublasLt` + `libcudart` → staged `/usr/lib/`
@@ -100,7 +104,7 @@ Build host notes:
  1. `build-in-container.sh` / `build-nvidia-module.sh` — Debian kernel headers for module build
  2. `auto/config` — `linux-image-${DEBIAN_KERNEL_ABI}` in the ISO
 - NVIDIA modules go to staged `usr/local/lib/nvidia/` — NOT to `/lib/modules/<kver>/extra/`.
- `bee-gpu-stress` must be built against cached CUDA userspace headers from `build-cublas.sh`, not against random host-installed CUDA headers.
+- `bee-gpu-burn` worker must be built against cached CUDA userspace headers from `build-cublas.sh`, not against random host-installed CUDA headers.
 - The live ISO must ship `libcublas`, `libcublasLt`, and `libcudart` together with `libcuda` so tensor-core stress works without internet or package installs at boot.
 - The source overlay in `iso/overlay/` is treated as immutable source. Build-time files are injected only into the staged overlay.
 - The live-build workdir under `dist/` is disposable; source files under `iso/builder/` stay clean.
@@ -122,7 +126,7 @@ Key checks: NVIDIA modules loaded, `nvidia-smi` sees all GPUs, lib symlinks pres
 systemd services running, audit completed with NVIDIA enrichment, LAN reachability.

 Current validation state:
- local/libvirt VM boot path is validated for `systemd`, SSH, `bee audit`, `bee-network`, and TUI startup
+- local/libvirt VM boot path is validated for `systemd`, SSH, `bee audit`, `bee-network`, and Web UI startup
 - real hardware validation is still required before treating the ISO as release-ready

 ## Overlay mechanism
@@ -149,48 +153,31 @@ Current validation state:
 Every collector returns `nil, nil` on tool-not-found. Errors are logged, never fatal.

 Acceptance flows:
- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + mixed-precision `bee-gpu-stress`
+- `bee sat nvidia` → diagnostic archive with `nvidia-smi -q` + `nvidia-bug-report` + lightweight `bee-gpu-burn`
+- NVIDIA GPU burn-in can use either `bee-gpu-burn` or `bee-john-gpu-stress` (John the Ripper jumbo via OpenCL)
 - `bee sat memory` → `memtester` archive
 - `bee sat storage` → SMART/NVMe diagnostic archive and short self-test trigger where supported
 - SAT `summary.txt` now includes `overall_status` and per-job `*_status` values (`OK`, `FAILED`, `UNSUPPORTED`)
- `bee-gpu-stress` should prefer cuBLASLt GEMM load over the old integer/PTX burn path:
+- `bee-gpu-burn` should prefer cuBLASLt GEMM load over the old integer/PTX burn path:
  - Ampere: `fp16` + `fp32`/TF32 tensor-core load
  - Ada / Hopper: add `fp8`
  - Blackwell+: add `fp4`
  - PTX fallback is only for missing cuBLASLt/userspace or unsupported narrow datatypes
 - Runtime overrides:
-  - `BEE_GPU_STRESS_SECONDS`
-  - `BEE_GPU_STRESS_SIZE_MB`
  - `BEE_MEMTESTER_SIZE_MB`
  - `BEE_MEMTESTER_PASSES`

-## NVIDIA SAT TUI flow (v1.0.0+)
+## NVIDIA SAT Web UI flow

 ```
-TUI: Acceptance tests → NVIDIA command pack
-  1. screenNvidiaSATSetup
-       a. enumerate GPUs via `nvidia-smi --query-gpu=index,name,memory.total`
-       b. user selects duration preset: 10 min / 1 h / 8 h / 24 h
-       c. user selects GPUs via checkboxes (all selected by default)
-       d. memory size = max(selected GPU memory) — auto-detected, not exposed to user
-  2. Start → screenNvidiaSATRunning
-       a. CUDA_VISIBLE_DEVICES set to selected GPU indices
-       b. tea.Batch: SAT goroutine + tea.ExecProcess(nvtop) launched concurrently
-       c. nvtop occupies full terminal; SAT result queues in background
-       d. [o] reopen nvtop at any time; [a] abort (cancels context → kills bee-gpu-stress)
-  3. GPU metrics collection (during bee-gpu-stress)
-       - background goroutine polls `nvidia-smi` every second
-       - per-second rows: elapsed, GPU index, temp°C, usage%, power W, clock MHz
-       - outputs: gpu-metrics.csv, gpu-metrics.html (offline SVG chart), gpu-metrics-term.txt
-  4. After SAT completes
-       - result shown in screenOutput with terminal line-chart (gpu-metrics-term.txt)
-       - chart is asciigraph-style: box-drawing chars (╭╮╰╯─│), 4 series per GPU,
-         Y axis with ticks, ANSI colours (red=temp, blue=usage, green=power, yellow=clock)
+Web UI: Acceptance Tests page → Run Test button
+  1. POST /api/sat/nvidia/run → returns job_id
+  2. GET  /api/sat/stream?job_id=... (SSE) — streams stdout/stderr lines live
+  3. After completion — archive written to /appdata/bee/export/bee-sat/
+     summary.txt contains overall_status (OK / FAILED) and per-job status values
 ```

 **Critical invariants:**
- `nvtop` must be in `iso/builder/config/package-lists/bee.list.chroot` (baked into ISO).
- `bee-gpu-stress` uses `exec.CommandContext` — aborted on cancel.
+- `bee-gpu-burn` / `bee-john-gpu-stress` use `exec.CommandContext` — killed on job context cancel.
 - Metric goroutine uses stopCh/doneCh pattern; main goroutine waits `<-doneCh` before reading rows (no mutex needed).
- If `nvtop` is not found on PATH, SAT still runs without it (graceful degradation).
 - SVG chart is fully offline: no JS, no external CSS, pure inline SVG.
--- a/bible-local/architecture/system-overview.md
+++ b/bible-local/architecture/system-overview.md
@@ -21,14 +21,14 @@ Fills gaps where Redfish/logpile is blind:
 - Read-only hardware inventory: board, CPU, memory, storage, PCIe, PSU, GPU, NIC, RAID
 - Machine-readable health summary derived from collector verdicts
 - Operator-triggered acceptance tests for NVIDIA, memory, and storage
- NVIDIA SAT includes both diagnostic collection and mixed-precision GPU stress via `bee-gpu-stress`
- `bee-gpu-stress` should exercise tensor/inference paths (`fp16`, `fp32`/TF32, `fp8`, `fp4` when supported by the GPU/userspace stack) and fall back to Driver API PTX burn only if cuBLASLt is unavailable
+- NVIDIA SAT includes diagnostic collection plus a lightweight in-image GPU stress step via `bee-gpu-burn`
+- `bee-gpu-burn` should exercise tensor/inference paths (`fp16`, `fp32`/TF32, `fp8`, `fp4` when supported by the GPU/userspace stack) and fall back to Driver API PTX burn only if cuBLASLt is unavailable
 - Automatic boot audit with operator-facing local console and SSH access
 - NVIDIA proprietary driver loaded at boot for GPU enrichment via `nvidia-smi`
 - SSH access (OpenSSH) always available for inspection and debugging
- Interactive Go TUI via `bee tui` for network setup, service management, and acceptance tests
- Read-only web viewer via `bee web`, rendering the latest audit snapshot through the embedded Reanimator Chart
- Local `tty1` operator UX: `bee` autologin, `menu` auto-start, privileged actions via `sudo -n`
+- Full web UI via `bee web` on port 80: interactive control panel with live metrics, SAT tests, network config, service management, export, and tools
+- Local operator desktop: openbox + Xorg + Chromium auto-opening `http://localhost/`
+- Local `tty1` operator UX: `bee` autologin, openbox desktop auto-starts with Chromium on `http://localhost/`

 ## Network isolation — CRITICAL

@@ -70,15 +70,16 @@ Fills gaps where Redfish/logpile is blind:
 | SSH | OpenSSH server |
 | NVIDIA driver | Proprietary `.run` installer, built against Debian kernel headers |
 | NVIDIA modules | Loaded via `insmod` from `/usr/local/lib/nvidia/` |
-| GPU stress backend | `bee-gpu-stress` + cuBLASLt/cuBLAS/cudart mixed-precision GEMM, with Driver API PTX fallback |
+| GPU stress backend | `bee-gpu-burn` + cuBLASLt/cuBLAS/cudart mixed-precision GEMM, with Driver API PTX fallback |
 | Builder | Debian 12 host/VM or Debian 12 container image |

 ## Operator UX

 - On the live ISO, `tty1` autologins as `bee`
- The login profile auto-runs `menu`, which enters the Go TUI
- The TUI itself executes privileged actions as `root` via `sudo -n`
+- `bee-desktop.service` starts X11 + openbox + Chromium on display `:0`
+- Chromium opens `http://localhost/` — the full web UI
 - SSH remains available independently of the local console path
+- Remote operators can open `http://<ip>/` in any browser on the same LAN
 - VM-oriented builds also include `qemu-guest-agent` and serial console support for debugging
 - The ISO boots with `toram`, so loss of the original USB/BMC virtual media after boot should not break already-installed runtime binaries

@@ -103,7 +104,10 @@ Fills gaps where Redfish/logpile is blind:
 | `internal/chart/` | Git submodule with `reanimator/chart`, embedded into `bee web` |
 | `iso/builder/VERSIONS` | Pinned versions: Debian, Go, NVIDIA driver, kernel ABI |
 | `iso/builder/smoketest.sh` | Post-boot smoke test — run via SSH to verify live ISO |
-| `iso/overlay/etc/profile.d/bee.sh` | `menu` helper + tty1 auto-start policy |
-| `iso/overlay/home/bee/.profile` | `bee` shell profile for local console startup |
+| `iso/overlay/etc/profile.d/bee.sh` | tty1 welcome message with web UI URLs |
+| `iso/overlay/home/bee/.profile` | `bee` shell profile (PATH only) |
+| `iso/overlay/etc/systemd/system/bee-desktop.service` | starts X11 + openbox + chromium |
+| `iso/overlay/usr/local/bin/bee-desktop` | startx wrapper for bee-desktop.service |
+| `iso/overlay/usr/local/bin/bee-openbox-session` | xinitrc: tint2 + chromium + openbox |
 | `dist/` | Build outputs (gitignored) |
 | `iso/out/` | Downloaded ISO files (gitignored) |
--- a/bible-local/decisions/2026-03-05-nvidia-proprietary-driver.md
+++ b/bible-local/decisions/2026-03-05-nvidia-proprietary-driver.md
@@ -18,6 +18,8 @@ Use the official proprietary NVIDIA `.run` installer for both kernel modules and
 - Kernel modules and nvidia-smi come from a single verified source.
 - NVIDIA publishes `.sha256sum` alongside each installer — download and verify before use.
 - Driver version pinned in `iso/builder/VERSIONS` as `NVIDIA_DRIVER_VERSION`.
+- DCGM must track the CUDA user-mode driver major version exposed by `nvidia-smi`.
+- For NVIDIA driver branch `590` with CUDA `13.x`, use DCGM 4 package family `datacenter-gpu-manager-4-cuda13`; legacy `datacenter-gpu-manager` 3.x does not provide a working path for this stack.
 - Build process: download `.run`, extract, compile `kernel/` sources against `linux-lts-dev`.
 - Modules cached in `dist/nvidia-<version>-<kver>/` — rebuild only on version or kernel change.
 - ISO size increases by ~50MB for .ko files + nvidia-smi.
--- a/bible-local/docs/iso-build-rules.md
+++ b/bible-local/docs/iso-build-rules.md
@@ -0,0 +1,22 @@
+# ISO Build Rules
+
+## Verify package names before use
+
+ISO builds take 30–60 minutes. A wrong package name wastes an entire build cycle.
+
+**Rule: before adding any Debian package name to the ISO config, verify it exists and check its file list.**
+
+Use one of:
+- `https://packages.debian.org/bookworm/<package-name>` — existence + description
+- `https://packages.debian.org/bookworm/amd64/<package-name>/filelist` — exact files installed
+- `apt-cache show <package>` inside a Debian bookworm container
+
+This applies to:
+- `iso/builder/config/package-lists/*.list.chroot`
+- Any package referenced in bootloader configs, hooks, or overlay scripts
+
+## Memtest rule
+
+Prefer live-build's built-in memtest integration over custom hooks or hardcoded
+bootloader paths. If you ever need to reference memtest files manually, verify
+the exact package file list first for the target Debian release.
--- a/bible-local/docs/validate-vs-burn.md
+++ b/bible-local/docs/validate-vs-burn.md
@@ -0,0 +1,35 @@
+# Validate vs Burn: Hardware Impact Policy
+
+## Validate Tests (non-destructive)
+
+Tests on the **Validate** page are purely diagnostic. They:
+
+- **Do not write to disks** — no data is written to storage devices; SMART counters (power-on hours, load cycle count, reallocated sectors) are not incremented.
+- **Do not run sustained high load** — commands complete quickly (seconds to minutes) and do not push hardware to thermal or electrical limits.
+- **Do not increment hardware wear counters** — GPU memory ECC counters, NVMe wear leveling counters, and similar endurance metrics are unaffected.
+- **Are safe to run repeatedly** — on new, production-bound, or already-deployed hardware without concern for reducing lifespan.
+
+### What Validate tests actually do
+
+| Test | What it runs |
+|---|---|
+| NVIDIA GPU | `nvidia-smi`, `dcgmi diag` (levels 1–4 read-only diagnostics) |
+| Memory | `memtester` on a limited allocation; reads/writes to RAM only |
+| Storage | `smartctl -a`, `nvme smart-log` — reads SMART data only |
+| CPU | `stress-ng` for a bounded duration; CPU-only, no I/O |
+| AMD GPU | `rocm-smi --showallinfo`, `dmidecode` — read-only queries |
+
+## Burn Tests (hardware wear)
+
+Tests on the **Burn** page run hardware at maximum or near-maximum load for extended durations. They:
+
+- **Wear storage**: write-intensive patterns can reduce SSD endurance (P/E cycles).
+- **Stress GPU memory**: extended ECC stress tests may surface latent defects but also exercise memory cells.
+- **Accelerate thermal cycling**: repeated heat/cool cycles degrade solder joints and capacitors over time.
+- **May increment wear counters**: GPU power-on hours, NVMe media wear indicator, and similar metrics will advance.
+
+### Rule
+
+> Run **Validate** freely on any server, at any time, before or after deployment.
+> Run **Burn** only when explicitly required (e.g., initial acceptance after repair, or per customer SLA).
+> Document when and why Burn tests were run.
--- a/internal/chart
+++ b/internal/chart
--- a/iso/README.md
+++ b/iso/README.md
@@ -48,6 +48,7 @@ sh iso/builder/build-in-container.sh --cache-dir /path/to/cache
 - The builder image is automatically rebuilt if the local tag exists for the wrong architecture.
 - The live ISO boots with Debian `live-boot` `toram`, so the read-only medium is copied into RAM during boot and the runtime no longer depends on the original USB/BMC virtual media staying present.
 - Target systems need enough RAM for the full compressed live medium plus normal runtime overhead, or boot may fail before reaching the TUI.
+- The NVIDIA variant installs DCGM 4 packages matched to the CUDA user-mode driver major version. For driver branch `590` / CUDA `13.x`, the package family is `datacenter-gpu-manager-4-cuda13` rather than legacy `datacenter-gpu-manager`.
 - Override the container platform only if you know why:

 ```sh
--- a/iso/builder/Dockerfile
+++ b/iso/builder/Dockerfile
@@ -23,9 +23,33 @@ RUN apt-get update -qq && apt-get install -y \
    gcc \
    make \
    perl \
+    pkg-config \
+    yasm \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libgmp-dev \
+    libpcap-dev \
+    libsqlite3-dev \
+    libcurl4-openssl-dev \
+    ocl-icd-opencl-dev \
    linux-headers-amd64 \
    && rm -rf /var/lib/apt/lists/*

+# Add NVIDIA CUDA repo and install nvcc (needed to compile nccl-tests)
+RUN wget -qO /tmp/cuda-keyring.gpg \
+        https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/3bf863cc.pub \
+    && gpg --dearmor < /tmp/cuda-keyring.gpg \
+        > /usr/share/keyrings/nvidia-cuda.gpg \
+    && rm /tmp/cuda-keyring.gpg \
+    && echo "deb [signed-by=/usr/share/keyrings/nvidia-cuda.gpg] \
+https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/ /" \
+        > /etc/apt/sources.list.d/cuda.list \
+    && apt-get update -qq \
+    && apt-get install -y cuda-nvcc-12-8 \
+    && rm -rf /var/lib/apt/lists/* \
+    && ln -sfn /usr/local/cuda-12.8 /usr/local/cuda
+
 RUN arch="$(dpkg --print-architecture)" \
    && case "$arch" in \
        amd64) goarch=amd64 ;; \
--- a/iso/builder/VERSIONS
+++ b/iso/builder/VERSIONS
@@ -4,7 +4,20 @@ NVIDIA_DRIVER_VERSION=590.48.01
 NCCL_VERSION=2.28.9-1
 NCCL_CUDA_VERSION=13.0
 NCCL_SHA256=2e6faafd2c19cffc7738d9283976a3200ea9db9895907f337f0c7e5a25563186
+NCCL_TESTS_VERSION=2.13.10
+NVCC_VERSION=12.8
 CUBLAS_VERSION=13.0.2.14-1
 CUDA_USERSPACE_VERSION=13.0.96-1
+DCGM_VERSION=4.5.2-1
+JOHN_JUMBO_COMMIT=67fcf9fe5a
+ROCM_VERSION=6.3.4
+ROCM_SMI_VERSION=7.4.0.60304-76~22.04
+ROCM_BANDWIDTH_TEST_VERSION=1.4.0.60304-76~22.04
+ROCM_VALIDATION_SUITE_VERSION=1.1.0.60304-76~22.04
+ROCBLAS_VERSION=4.3.0.60304-76~22.04
+ROCRAND_VERSION=3.2.0.60304-76~22.04
+HIP_RUNTIME_AMD_VERSION=6.3.42134.60304-76~22.04
+HIPBLASLT_VERSION=0.10.0.60304-76~22.04
+COMGR_VERSION=2.8.0.60304-76~22.04
 GO_VERSION=1.24.0
 AUDIT_VERSION=1.0.0
--- a/iso/builder/auto/config
+++ b/iso/builder/auto/config
@@ -29,9 +29,10 @@ lb config noauto \
    --security true \
    --linux-flavours "amd64" \
    --linux-packages "${LB_LINUX_PACKAGES}" \
-    --memtest none \
-    --iso-volume "EASY-BEE" \
-    --iso-application "EASY-BEE" \
-    --bootappend-live "boot=live toram components console=tty2 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
+    --memtest memtest86+ \
+    --iso-volume "EASY_BEE_${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
+    --iso-application "EASY-BEE-${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
+    --bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=7 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
    --apt-recommends false \
+    --chroot-squashfs-compression-type zstd \
    "${@}"
--- a/iso/builder/bee-gpu-stress.c
+++ b/iso/builder/bee-gpu-stress.c
@@ -29,8 +29,14 @@ typedef void *CUfunction;
 typedef void *CUstream;

 #define CU_SUCCESS 0
+#define CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT 16
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76
+#define MAX_STRESS_STREAMS 16
+#define MAX_CUBLAS_PROFILES 5
+#define MIN_PROFILE_BUDGET_BYTES ((size_t)4u * 1024u * 1024u)
+#define MIN_STREAM_BUDGET_BYTES ((size_t)64u * 1024u * 1024u)
+#define STRESS_LAUNCH_DEPTH 8

 static const char *ptx_source =
    ".version 6.0\n"
@@ -97,6 +103,9 @@ typedef CUresult (*cuLaunchKernel_fn)(CUfunction,
                                      CUstream,
                                      void **,
                                      void **);
+typedef CUresult (*cuMemGetInfo_fn)(size_t *, size_t *);
+typedef CUresult (*cuStreamCreate_fn)(CUstream *, unsigned int);
+typedef CUresult (*cuStreamDestroy_fn)(CUstream);
 typedef CUresult (*cuGetErrorName_fn)(CUresult, const char **);
 typedef CUresult (*cuGetErrorString_fn)(CUresult, const char **);

@@ -118,6 +127,9 @@ struct cuda_api {
    cuModuleLoadDataEx_fn cuModuleLoadDataEx;
    cuModuleGetFunction_fn cuModuleGetFunction;
    cuLaunchKernel_fn cuLaunchKernel;
+    cuMemGetInfo_fn cuMemGetInfo;
+    cuStreamCreate_fn cuStreamCreate;
+    cuStreamDestroy_fn cuStreamDestroy;
    cuGetErrorName_fn cuGetErrorName;
    cuGetErrorString_fn cuGetErrorString;
 };
@@ -128,9 +140,10 @@ struct stress_report {
    int cc_major;
    int cc_minor;
    int buffer_mb;
+    int stream_count;
    unsigned long iterations;
    uint64_t checksum;
-    char details[1024];
+    char details[16384];
 };

 static int load_symbol(void *lib, const char *name, void **out) {
@@ -144,7 +157,7 @@ static int load_cuda(struct cuda_api *api) {
    if (!api->lib) {
        return 0;
    }
-    return
+    if (!(
        load_symbol(api->lib, "cuInit", (void **)&api->cuInit) &&
        load_symbol(api->lib, "cuDeviceGetCount", (void **)&api->cuDeviceGetCount) &&
        load_symbol(api->lib, "cuDeviceGet", (void **)&api->cuDeviceGet) &&
@@ -160,7 +173,17 @@ static int load_cuda(struct cuda_api *api) {
        load_symbol(api->lib, "cuMemcpyDtoH_v2", (void **)&api->cuMemcpyDtoH) &&
        load_symbol(api->lib, "cuModuleLoadDataEx", (void **)&api->cuModuleLoadDataEx) &&
        load_symbol(api->lib, "cuModuleGetFunction", (void **)&api->cuModuleGetFunction) &&
-        load_symbol(api->lib, "cuLaunchKernel", (void **)&api->cuLaunchKernel);
+        load_symbol(api->lib, "cuLaunchKernel", (void **)&api->cuLaunchKernel))) {
+        dlclose(api->lib);
+        memset(api, 0, sizeof(*api));
+        return 0;
+    }
+    load_symbol(api->lib, "cuMemGetInfo_v2", (void **)&api->cuMemGetInfo);
+    load_symbol(api->lib, "cuStreamCreate", (void **)&api->cuStreamCreate);
+    if (!load_symbol(api->lib, "cuStreamDestroy_v2", (void **)&api->cuStreamDestroy)) {
+        load_symbol(api->lib, "cuStreamDestroy", (void **)&api->cuStreamDestroy);
+    }
+    return 1;
 }

 static const char *cu_error_name(struct cuda_api *api, CUresult rc) {
@@ -193,14 +216,12 @@ static double now_seconds(void) {
    return (double)ts.tv_sec + ((double)ts.tv_nsec / 1000000000.0);
 }

-#if HAVE_CUBLASLT_HEADERS
 static size_t round_down_size(size_t value, size_t multiple) {
    if (multiple == 0 || value < multiple) {
        return value;
    }
    return value - (value % multiple);
 }
-#endif

 static int query_compute_capability(struct cuda_api *api, CUdevice dev, int *major, int *minor) {
    int cc_major = 0;
@@ -220,6 +241,75 @@ static int query_compute_capability(struct cuda_api *api, CUdevice dev, int *maj
    return 1;
 }

+static int query_multiprocessor_count(struct cuda_api *api, CUdevice dev, int *count) {
+    int mp_count = 0;
+    if (!check_rc(api,
+                  "cuDeviceGetAttribute(multiprocessors)",
+                  api->cuDeviceGetAttribute(&mp_count, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev))) {
+        return 0;
+    }
+    *count = mp_count;
+    return 1;
+}
+
+static size_t clamp_budget_to_free_memory(struct cuda_api *api, size_t requested_bytes) {
+    size_t free_bytes = 0;
+    size_t total_bytes = 0;
+    size_t max_bytes = requested_bytes;
+
+    if (!api->cuMemGetInfo) {
+        return requested_bytes;
+    }
+    if (api->cuMemGetInfo(&free_bytes, &total_bytes) != CU_SUCCESS || free_bytes == 0) {
+        return requested_bytes;
+    }
+
+    max_bytes = (free_bytes * 9u) / 10u;
+    if (max_bytes < (size_t)4u * 1024u * 1024u) {
+        max_bytes = (size_t)4u * 1024u * 1024u;
+    }
+    if (requested_bytes > max_bytes) {
+        return max_bytes;
+    }
+    return requested_bytes;
+}
+
+static int choose_stream_count(int mp_count, int planned_profiles, size_t total_budget, int have_streams) {
+    int stream_count = 1;
+    if (!have_streams || mp_count <= 0 || planned_profiles <= 0) {
+        return 1;
+    }
+
+    stream_count = mp_count / 8;
+    if (stream_count < 2) {
+        stream_count = 2;
+    }
+    if (stream_count > MAX_STRESS_STREAMS) {
+        stream_count = MAX_STRESS_STREAMS;
+    }
+
+    while (stream_count > 1) {
+        size_t per_stream_budget = total_budget / ((size_t)planned_profiles * (size_t)stream_count);
+        if (per_stream_budget >= MIN_STREAM_BUDGET_BYTES) {
+            break;
+        }
+        stream_count--;
+    }
+    return stream_count;
+}
+
+static void destroy_streams(struct cuda_api *api, CUstream *streams, int count) {
+    if (!api->cuStreamDestroy) {
+        return;
+    }
+    for (int i = 0; i < count; i++) {
+        if (streams[i]) {
+            api->cuStreamDestroy(streams[i]);
+            streams[i] = NULL;
+        }
+    }
+}
+
 #if HAVE_CUBLASLT_HEADERS
 static void append_detail(char *buf, size_t cap, const char *fmt, ...) {
    size_t len = strlen(buf);
@@ -242,12 +332,19 @@ static int run_ptx_fallback(struct cuda_api *api,
                            int size_mb,
                            struct stress_report *report) {
    CUcontext ctx = NULL;
-    CUdeviceptr device_mem = 0;
    CUmodule module = NULL;
    CUfunction kernel = NULL;
    uint32_t sample[256];
-    uint32_t words = 0;
+    CUdeviceptr device_mem[MAX_STRESS_STREAMS] = {0};
+    CUstream streams[MAX_STRESS_STREAMS] = {0};
+    uint32_t words[MAX_STRESS_STREAMS] = {0};
+    uint32_t rounds[MAX_STRESS_STREAMS] = {0};
+    void *params[MAX_STRESS_STREAMS][3];
+    size_t bytes_per_stream[MAX_STRESS_STREAMS] = {0};
    unsigned long iterations = 0;
+    int mp_count = 0;
+    int stream_count = 1;
+    int launches_per_wave = 0;

    memset(report, 0, sizeof(*report));
    snprintf(report->backend, sizeof(report->backend), "driver-ptx");
@@ -260,64 +357,109 @@ static int run_ptx_fallback(struct cuda_api *api,
        return 0;
    }

-    size_t bytes = (size_t)size_mb * 1024u * 1024u;
-    if (bytes < 4u * 1024u * 1024u) {
-        bytes = 4u * 1024u * 1024u;
+    size_t requested_bytes = (size_t)size_mb * 1024u * 1024u;
+    if (requested_bytes < MIN_PROFILE_BUDGET_BYTES) {
+        requested_bytes = MIN_PROFILE_BUDGET_BYTES;
    }
-    if (bytes > (size_t)1024u * 1024u * 1024u) {
-        bytes = (size_t)1024u * 1024u * 1024u;
+    size_t total_bytes = clamp_budget_to_free_memory(api, requested_bytes);
+    if (total_bytes < MIN_PROFILE_BUDGET_BYTES) {
+        total_bytes = MIN_PROFILE_BUDGET_BYTES;
    }
-    words = (uint32_t)(bytes / sizeof(uint32_t));
+    report->buffer_mb = (int)(total_bytes / (1024u * 1024u));

-    if (!check_rc(api, "cuMemAlloc", api->cuMemAlloc(&device_mem, bytes))) {
-        api->cuCtxDestroy(ctx);
-        return 0;
+    if (query_multiprocessor_count(api, dev, &mp_count) &&
+        api->cuStreamCreate &&
+        api->cuStreamDestroy) {
+        stream_count = choose_stream_count(mp_count, 1, total_bytes, 1);
    }
-    if (!check_rc(api, "cuMemsetD8", api->cuMemsetD8(device_mem, 0, bytes))) {
-        api->cuMemFree(device_mem);
-        api->cuCtxDestroy(ctx);
-        return 0;
+    if (stream_count > 1) {
+        int created = 0;
+        for (; created < stream_count; created++) {
+            if (!check_rc(api, "cuStreamCreate", api->cuStreamCreate(&streams[created], 0))) {
+                destroy_streams(api, streams, created);
+                stream_count = 1;
+                break;
+            }
+        }
    }
+    report->stream_count = stream_count;
+
+    for (int lane = 0; lane < stream_count; lane++) {
+        size_t slice = total_bytes / (size_t)stream_count;
+        if (lane == stream_count - 1) {
+            slice = total_bytes - ((size_t)lane * (total_bytes / (size_t)stream_count));
+        }
+        slice = round_down_size(slice, sizeof(uint32_t));
+        if (slice < MIN_PROFILE_BUDGET_BYTES) {
+            slice = MIN_PROFILE_BUDGET_BYTES;
+        }
+        bytes_per_stream[lane] = slice;
+        words[lane] = (uint32_t)(slice / sizeof(uint32_t));
+
+        if (!check_rc(api, "cuMemAlloc", api->cuMemAlloc(&device_mem[lane], slice))) {
+            goto fail;
+        }
+        if (!check_rc(api, "cuMemsetD8", api->cuMemsetD8(device_mem[lane], 0, slice))) {
+            goto fail;
+        }
+        rounds[lane] = 2048;
+        params[lane][0] = &device_mem[lane];
+        params[lane][1] = &words[lane];
+        params[lane][2] = &rounds[lane];
+    }
+
    if (!check_rc(api,
                  "cuModuleLoadDataEx",
                  api->cuModuleLoadDataEx(&module, ptx_source, 0, NULL, NULL))) {
-        api->cuMemFree(device_mem);
-        api->cuCtxDestroy(ctx);
-        return 0;
+        goto fail;
    }
    if (!check_rc(api, "cuModuleGetFunction", api->cuModuleGetFunction(&kernel, module, "burn"))) {
-        api->cuMemFree(device_mem);
-        api->cuCtxDestroy(ctx);
-        return 0;
+        goto fail;
    }

    unsigned int threads = 256;
-    unsigned int blocks = (unsigned int)((words + threads - 1) / threads);
-    uint32_t rounds = 1024;
-    void *params[] = {&device_mem, &words, &rounds};

    double start = now_seconds();
    double deadline = start + (double)seconds;
    while (now_seconds() < deadline) {
-        if (!check_rc(api,
-                      "cuLaunchKernel",
-                      api->cuLaunchKernel(kernel, blocks, 1, 1, threads, 1, 1, 0, NULL, params, NULL))) {
-            api->cuMemFree(device_mem);
-            api->cuCtxDestroy(ctx);
-            return 0;
+        launches_per_wave = 0;
+        for (int depth = 0; depth < STRESS_LAUNCH_DEPTH && now_seconds() < deadline; depth++) {
+            int launched_this_batch = 0;
+            for (int lane = 0; lane < stream_count; lane++) {
+                unsigned int blocks = (unsigned int)((words[lane] + threads - 1) / threads);
+                if (!check_rc(api,
+                              "cuLaunchKernel",
+                              api->cuLaunchKernel(kernel,
+                                                  blocks,
+                                                  1,
+                                                  1,
+                                                  threads,
+                                                  1,
+                                                  1,
+                                                  0,
+                                                  streams[lane],
+                                                  params[lane],
+                                                  NULL))) {
+                    goto fail;
+                }
+                launches_per_wave++;
+                launched_this_batch++;
+            }
+            if (launched_this_batch <= 0) {
+                break;
+            }
        }
-        iterations++;
+        if (launches_per_wave <= 0) {
+            goto fail;
+        }
+        if (!check_rc(api, "cuCtxSynchronize", api->cuCtxSynchronize())) {
+            goto fail;
+        }
+        iterations += (unsigned long)launches_per_wave;
    }

-    if (!check_rc(api, "cuCtxSynchronize", api->cuCtxSynchronize())) {
-        api->cuMemFree(device_mem);
-        api->cuCtxDestroy(ctx);
-        return 0;
-    }
-    if (!check_rc(api, "cuMemcpyDtoH", api->cuMemcpyDtoH(sample, device_mem, sizeof(sample)))) {
-        api->cuMemFree(device_mem);
-        api->cuCtxDestroy(ctx);
-        return 0;
+    if (!check_rc(api, "cuMemcpyDtoH", api->cuMemcpyDtoH(sample, device_mem[0], sizeof(sample)))) {
+        goto fail;
    }

    for (size_t i = 0; i < sizeof(sample) / sizeof(sample[0]); i++) {
@@ -326,12 +468,34 @@ static int run_ptx_fallback(struct cuda_api *api,
    report->iterations = iterations;
    snprintf(report->details,
             sizeof(report->details),
-             "profile_int32_fallback=OK iterations=%lu\n",
+             "fallback_int32=OK requested_mb=%d actual_mb=%d streams=%d queue_depth=%d per_stream_mb=%zu iterations=%lu\n",
+             size_mb,
+             report->buffer_mb,
+             report->stream_count,
+             STRESS_LAUNCH_DEPTH,
+             bytes_per_stream[0] / (1024u * 1024u),
             iterations);

-    api->cuMemFree(device_mem);
+    for (int lane = 0; lane < stream_count; lane++) {
+        if (device_mem[lane]) {
+            api->cuMemFree(device_mem[lane]);
+        }
+    }
+    destroy_streams(api, streams, stream_count);
    api->cuCtxDestroy(ctx);
    return 1;
+
+fail:
+    for (int lane = 0; lane < MAX_STRESS_STREAMS; lane++) {
+        if (device_mem[lane]) {
+            api->cuMemFree(device_mem[lane]);
+        }
+    }
+    destroy_streams(api, streams, MAX_STRESS_STREAMS);
+    if (ctx) {
+        api->cuCtxDestroy(ctx);
+    }
+    return 0;
 }

 #if HAVE_CUBLASLT_HEADERS
@@ -418,6 +582,7 @@ struct profile_desc {

 struct prepared_profile {
    struct profile_desc desc;
+    CUstream stream;
    cublasLtMatmulDesc_t op_desc;
    cublasLtMatrixLayout_t a_layout;
    cublasLtMatrixLayout_t b_layout;
@@ -617,8 +782,8 @@ static uint64_t choose_square_dim(size_t budget_bytes, size_t bytes_per_cell, in
    if (dim < (uint64_t)multiple) {
        dim = (uint64_t)multiple;
    }
-    if (dim > 8192u) {
-        dim = 8192u;
+    if (dim > 65536u) {
+        dim = 65536u;
    }
    return dim;
 }
@@ -704,10 +869,12 @@ static int prepare_profile(struct cublaslt_api *cublas,
                           cublasLtHandle_t handle,
                           struct cuda_api *cuda,
                           const struct profile_desc *desc,
+                           CUstream stream,
                           size_t profile_budget_bytes,
                           struct prepared_profile *out) {
    memset(out, 0, sizeof(*out));
    out->desc = *desc;
+    out->stream = stream;

    size_t bytes_per_cell = 0;
    bytes_per_cell += bytes_for_elements(desc->a_type, 1);
@@ -935,7 +1102,7 @@ static int run_cublas_profile(cublasLtHandle_t handle,
                                               &profile->heuristic.algo,
                                               (void *)(uintptr_t)profile->workspace_dev,
                                               profile->workspace_size,
-                                               (cudaStream_t)0));
+                                               profile->stream));
 }

 static int run_cublaslt_stress(struct cuda_api *cuda,
@@ -947,13 +1114,22 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                               int size_mb,
                               struct stress_report *report) {
    struct cublaslt_api cublas;
-    struct prepared_profile prepared[sizeof(k_profiles) / sizeof(k_profiles[0])];
+    struct prepared_profile prepared[MAX_STRESS_STREAMS * MAX_CUBLAS_PROFILES];
    cublasLtHandle_t handle = NULL;
    CUcontext ctx = NULL;
+    CUstream streams[MAX_STRESS_STREAMS] = {0};
    uint16_t sample[256];
    int cc = cc_major * 10 + cc_minor;
    int planned = 0;
    int active = 0;
+    int mp_count = 0;
+    int stream_count = 1;
+    int profile_count = (int)(sizeof(k_profiles) / sizeof(k_profiles[0]));
+    int prepared_count = 0;
+    int wave_launches = 0;
+    size_t requested_budget = 0;
+    size_t total_budget = 0;
+    size_t per_profile_budget = 0;

    memset(report, 0, sizeof(*report));
    snprintf(report->backend, sizeof(report->backend), "cublasLt");
@@ -986,16 +1162,46 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }

-    size_t total_budget = (size_t)size_mb * 1024u * 1024u;
-    if (total_budget < (size_t)planned * 4u * 1024u * 1024u) {
-        total_budget = (size_t)planned * 4u * 1024u * 1024u;
+    requested_budget = (size_t)size_mb * 1024u * 1024u;
+    if (requested_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
+        requested_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
    }
-    size_t per_profile_budget = total_budget / (size_t)planned;
-    if (per_profile_budget < 4u * 1024u * 1024u) {
-        per_profile_budget = 4u * 1024u * 1024u;
+    total_budget = clamp_budget_to_free_memory(cuda, requested_budget);
+    if (total_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
+        total_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
    }
+    if (query_multiprocessor_count(cuda, dev, &mp_count) &&
+        cuda->cuStreamCreate &&
+        cuda->cuStreamDestroy) {
+        stream_count = choose_stream_count(mp_count, planned, total_budget, 1);
+    }
+    if (stream_count > 1) {
+        int created = 0;
+        for (; created < stream_count; created++) {
+            if (!check_rc(cuda, "cuStreamCreate", cuda->cuStreamCreate(&streams[created], 0))) {
+                destroy_streams(cuda, streams, created);
+                stream_count = 1;
+                break;
+            }
+        }
+    }
+    report->stream_count = stream_count;
+    per_profile_budget = total_budget / ((size_t)planned * (size_t)stream_count);
+    if (per_profile_budget < MIN_PROFILE_BUDGET_BYTES) {
+        per_profile_budget = MIN_PROFILE_BUDGET_BYTES;
+    }
+    report->buffer_mb = (int)(total_budget / (1024u * 1024u));
+    append_detail(report->details,
+                  sizeof(report->details),
+                  "requested_mb=%d actual_mb=%d streams=%d queue_depth=%d mp_count=%d per_worker_mb=%zu\n",
+                  size_mb,
+                  report->buffer_mb,
+                  report->stream_count,
+                  STRESS_LAUNCH_DEPTH,
+                  mp_count,
+                  per_profile_budget / (1024u * 1024u));

-    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
+    for (int i = 0; i < profile_count; i++) {
        const struct profile_desc *desc = &k_profiles[i];
        if (!(desc->enabled && cc >= desc->min_cc)) {
            append_detail(report->details,
@@ -1005,63 +1211,87 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                          desc->min_cc);
            continue;
        }
-        if (prepare_profile(&cublas, handle, cuda, desc, per_profile_budget, &prepared[i])) {
-            active++;
-            append_detail(report->details,
-                          sizeof(report->details),
-                          "%s=READY dim=%llux%llux%llu block=%s\n",
-                          desc->name,
-                          (unsigned long long)prepared[i].m,
-                          (unsigned long long)prepared[i].n,
-                          (unsigned long long)prepared[i].k,
-                          desc->block_label);
-        } else {
-            append_detail(report->details, sizeof(report->details), "%s=SKIPPED unsupported\n", desc->name);
+        for (int lane = 0; lane < stream_count; lane++) {
+            CUstream stream = streams[lane];
+            if (prepared_count >= (int)(sizeof(prepared) / sizeof(prepared[0]))) {
+                break;
+            }
+            if (prepare_profile(&cublas, handle, cuda, desc, stream, per_profile_budget, &prepared[prepared_count])) {
+                active++;
+                append_detail(report->details,
+                              sizeof(report->details),
+                              "%s[%d]=READY dim=%llux%llux%llu block=%s stream=%d\n",
+                              desc->name,
+                              lane,
+                              (unsigned long long)prepared[prepared_count].m,
+                              (unsigned long long)prepared[prepared_count].n,
+                              (unsigned long long)prepared[prepared_count].k,
+                              desc->block_label,
+                              lane);
+                prepared_count++;
+            } else {
+                append_detail(report->details,
+                              sizeof(report->details),
+                              "%s[%d]=SKIPPED unsupported\n",
+                              desc->name,
+                              lane);
+            }
        }
    }

    if (active <= 0) {
        cublas.cublasLtDestroy(handle);
+        destroy_streams(cuda, streams, stream_count);
        cuda->cuCtxDestroy(ctx);
        return 0;
    }

    double deadline = now_seconds() + (double)seconds;
    while (now_seconds() < deadline) {
-        for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
-            if (!prepared[i].ready) {
-                continue;
-            }
-            if (!run_cublas_profile(handle, &cublas, &prepared[i])) {
-                append_detail(report->details,
-                              sizeof(report->details),
-                              "%s=FAILED runtime\n",
-                              prepared[i].desc.name);
-                for (size_t j = 0; j < sizeof(prepared) / sizeof(prepared[0]); j++) {
-                    destroy_profile(&cublas, cuda, &prepared[j]);
+        wave_launches = 0;
+        for (int depth = 0; depth < STRESS_LAUNCH_DEPTH && now_seconds() < deadline; depth++) {
+            int launched_this_batch = 0;
+            for (int i = 0; i < prepared_count; i++) {
+                if (!prepared[i].ready) {
+                    continue;
                }
-                cublas.cublasLtDestroy(handle);
-                cuda->cuCtxDestroy(ctx);
-                return 0;
+                if (!run_cublas_profile(handle, &cublas, &prepared[i])) {
+                    append_detail(report->details,
+                                  sizeof(report->details),
+                                  "%s=FAILED runtime\n",
+                                  prepared[i].desc.name);
+                    for (int j = 0; j < prepared_count; j++) {
+                        destroy_profile(&cublas, cuda, &prepared[j]);
+                    }
+                    cublas.cublasLtDestroy(handle);
+                    destroy_streams(cuda, streams, stream_count);
+                    cuda->cuCtxDestroy(ctx);
+                    return 0;
+                }
+                prepared[i].iterations++;
+                report->iterations++;
+                wave_launches++;
+                launched_this_batch++;
            }
-            prepared[i].iterations++;
-            report->iterations++;
-            if (now_seconds() >= deadline) {
+            if (launched_this_batch <= 0) {
                break;
            }
        }
-    }
-
-    if (!check_rc(cuda, "cuCtxSynchronize", cuda->cuCtxSynchronize())) {
-        for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
-            destroy_profile(&cublas, cuda, &prepared[i]);
+        if (wave_launches <= 0) {
+            break;
+        }
+        if (!check_rc(cuda, "cuCtxSynchronize", cuda->cuCtxSynchronize())) {
+            for (int i = 0; i < prepared_count; i++) {
+                destroy_profile(&cublas, cuda, &prepared[i]);
+            }
+            cublas.cublasLtDestroy(handle);
+            destroy_streams(cuda, streams, stream_count);
+            cuda->cuCtxDestroy(ctx);
+            return 0;
        }
-        cublas.cublasLtDestroy(handle);
-        cuda->cuCtxDestroy(ctx);
-        return 0;
    }

-    for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+    for (int i = 0; i < prepared_count; i++) {
        if (!prepared[i].ready) {
            continue;
        }
@@ -1072,7 +1302,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                      prepared[i].iterations);
    }

-    for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+    for (int i = 0; i < prepared_count; i++) {
        if (prepared[i].ready) {
            if (check_rc(cuda, "cuMemcpyDtoH", cuda->cuMemcpyDtoH(sample, prepared[i].d_dev, sizeof(sample)))) {
                for (size_t j = 0; j < sizeof(sample) / sizeof(sample[0]); j++) {
@@ -1083,10 +1313,11 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        }
    }

-    for (size_t i = 0; i < sizeof(prepared) / sizeof(prepared[0]); i++) {
+    for (int i = 0; i < prepared_count; i++) {
        destroy_profile(&cublas, cuda, &prepared[i]);
    }
    cublas.cublasLtDestroy(handle);
+    destroy_streams(cuda, streams, stream_count);
    cuda->cuCtxDestroy(ctx);
    return 1;
 }
@@ -1095,13 +1326,16 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
 int main(int argc, char **argv) {
    int seconds = 5;
    int size_mb = 64;
+    int device_index = 0;
    for (int i = 1; i < argc; i++) {
        if ((strcmp(argv[i], "--seconds") == 0 || strcmp(argv[i], "-t") == 0) && i + 1 < argc) {
            seconds = atoi(argv[++i]);
        } else if ((strcmp(argv[i], "--size-mb") == 0 || strcmp(argv[i], "-m") == 0) && i + 1 < argc) {
            size_mb = atoi(argv[++i]);
+        } else if ((strcmp(argv[i], "--device") == 0 || strcmp(argv[i], "-d") == 0) && i + 1 < argc) {
+            device_index = atoi(argv[++i]);
        } else {
-            fprintf(stderr, "usage: %s [--seconds N] [--size-mb N]\n", argv[0]);
+            fprintf(stderr, "usage: %s [--seconds N] [--size-mb N] [--device N]\n", argv[0]);
            return 2;
        }
    }
@@ -1111,6 +1345,9 @@ int main(int argc, char **argv) {
    if (size_mb <= 0) {
        size_mb = 64;
    }
+    if (device_index < 0) {
+        device_index = 0;
+    }

    struct cuda_api cuda;
    if (!load_cuda(&cuda)) {
@@ -1133,8 +1370,13 @@ int main(int argc, char **argv) {
        return 1;
    }

+    if (device_index >= count) {
+        fprintf(stderr, "device index %d out of range (found %d CUDA device(s))\n", device_index, count);
+        return 1;
+    }
+
    CUdevice dev = 0;
-    if (!check_rc(&cuda, "cuDeviceGet", cuda.cuDeviceGet(&dev, 0))) {
+    if (!check_rc(&cuda, "cuDeviceGet", cuda.cuDeviceGet(&dev, device_index))) {
        return 1;
    }

@@ -1162,10 +1404,12 @@ int main(int argc, char **argv) {
    }

    printf("device=%s\n", report.device);
+    printf("device_index=%d\n", device_index);
    printf("compute_capability=%d.%d\n", report.cc_major, report.cc_minor);
    printf("backend=%s\n", report.backend);
    printf("duration_s=%d\n", seconds);
    printf("buffer_mb=%d\n", report.buffer_mb);
+    printf("streams=%d\n", report.stream_count);
    printf("iterations=%lu\n", report.iterations);
    printf("checksum=%llu\n", (unsigned long long)report.checksum);
    if (report.details[0] != '\0') {
--- a/iso/builder/build-cublas.sh
+++ b/iso/builder/build-cublas.sh
@@ -1,9 +1,9 @@
 #!/bin/sh
-# build-cublas.sh — download cuBLASLt/cuBLAS/cudart runtime + headers for bee-gpu-stress.
+# build-cublas.sh — download cuBLASLt/cuBLAS/cudart runtime + headers for bee-gpu-burn worker.
 #
 # Downloads .deb packages from NVIDIA's CUDA apt repository (Debian 12, x86_64),
 # verifies them against Packages.gz, and extracts the small subset we need:
-#   - headers for compiling bee-gpu-stress against cuBLASLt
+#   - headers for compiling bee-gpu-burn worker against cuBLASLt
 #   - runtime libs for libcublas, libcublasLt, libcudart inside the ISO

 set -e
@@ -28,6 +28,8 @@ PACKAGES_GZ="${DOWNLOAD_CACHE_DIR}/Packages.gz"
 echo "=== cuBLAS ${CUBLAS_VERSION} / cudart ${CUDA_USERSPACE_VERSION} / CUDA ${CUDA_SERIES} ==="

 if [ -f "${CACHE_DIR}/include/cublasLt.h" ] && [ -f "${CACHE_DIR}/include/cuda_runtime_api.h" ] \
+    && [ -f "${CACHE_DIR}/include/crt/host_defines.h" ] \
+    && [ -f "${CACHE_DIR}/include/nv/target" ] \
    && [ "$(find "${CACHE_DIR}/lib" \( -name 'libcublas.so*' -o -name 'libcublasLt.so*' -o -name 'libcudart.so*' \) 2>/dev/null | wc -l)" -gt 0 ]; then
    echo "=== cuBLAS cached, skipping download ==="
    echo "cache: $CACHE_DIR"
@@ -41,21 +43,22 @@ wget -q -O "${PACKAGES_GZ}" "${REPO_BASE}/Packages.gz"

 lookup_pkg() {
    pkg="$1"
-    ver="$2"
+    ver="$2"  # if empty, match any version (first found)
    gzip -dc "${PACKAGES_GZ}" | awk -v pkg="$pkg" -v ver="$ver" '
-        /^Package: / { cur_pkg=$2 }
-        /^Version: / { cur_ver=$2 }
-        /^Filename: / { cur_file=$2 }
-        /^SHA256: / { cur_sha=$2 }
+        /^Package: / { cur_pkg=$2; gsub(/\r/, "", cur_pkg) }
+        /^Version: / { cur_ver=$2; gsub(/\r/, "", cur_ver) }
+        /^Filename: / { cur_file=$2; gsub(/\r/, "", cur_file) }
+        /^SHA256: / { cur_sha=$2; gsub(/\r/, "", cur_sha) }
        /^$/ {
-            if (cur_pkg == pkg && cur_ver == ver) {
+            if (cur_pkg == pkg && (ver == "" || cur_ver == ver)) {
                print cur_file " " cur_sha
+                printed=1
                exit
            }
            cur_pkg=""; cur_ver=""; cur_file=""; cur_sha=""
        }
        END {
-            if (cur_pkg == pkg && cur_ver == ver) {
+            if (!printed && cur_pkg == pkg && (ver == "" || cur_ver == ver)) {
                print cur_file " " cur_sha
            }
        }'
@@ -77,26 +80,26 @@ download_verified_pkg() {
    if [ -f "$out" ]; then
        actual_sha="$(sha256sum "$out" | awk '{print $1}')"
        if [ "$actual_sha" = "$repo_sha" ]; then
-            echo "=== using cached $(basename "$repo_file") ==="
+            echo "=== using cached $(basename "$repo_file") ===" >&2
            printf '%s\n' "$out"
            return 0
        fi
-        echo "=== removing stale $(basename "$repo_file") (sha256 mismatch) ==="
+        echo "=== removing stale $(basename "$repo_file") (sha256 mismatch) ===" >&2
        rm -f "$out"
    fi

-    echo "=== downloading $(basename "$repo_file") ==="
+    echo "=== downloading $(basename "$repo_file") ===" >&2
    wget --show-progress -O "$out" "${REPO_BASE}/$(basename "$repo_file")"

    actual_sha="$(sha256sum "$out" | awk '{print $1}')"
    if [ "$actual_sha" != "$repo_sha" ]; then
-        echo "ERROR: sha256 mismatch for $(basename "$repo_file")"
-        echo "  expected: $repo_sha"
-        echo "  actual:   $actual_sha"
+        echo "ERROR: sha256 mismatch for $(basename "$repo_file")" >&2
+        echo "  expected: $repo_sha" >&2
+        echo "  actual:   $actual_sha" >&2
        rm -f "$out"
        exit 1
    fi
-    echo "sha256 OK: $(basename "$repo_file")"
+    echo "sha256 OK: $(basename "$repo_file")" >&2
    printf '%s\n' "$out"
 }

@@ -118,6 +121,17 @@ copy_headers() {
    if [ -d "${from}/usr/include" ]; then
        cp -a "${from}/usr/include/." "${CACHE_DIR}/include/"
    fi
+    # NVIDIA CUDA packages install headers under /usr/local/cuda-X.Y/targets/x86_64-linux/include/
+    find "$from" -type d -name include | while read -r inc_dir; do
+        case "$inc_dir" in
+            */usr/include) ;;  # already handled above
+            *)
+                if find "${inc_dir}" -maxdepth 3 \( -name '*.h' -o -type f \) | grep -q .; then
+                    cp -a "${inc_dir}/." "${CACHE_DIR}/include/"
+                fi
+                ;;
+        esac
+    done
 }

 copy_libs() {
@@ -143,14 +157,20 @@ CUBLAS_RT_DEB=$(download_verified_pkg "libcublas-${CUDA_SERIES_DASH}" "${CUBLAS_
 CUBLAS_DEV_DEB=$(download_verified_pkg "libcublas-dev-${CUDA_SERIES_DASH}" "${CUBLAS_VERSION}")
 CUDART_RT_DEB=$(download_verified_pkg "cuda-cudart-${CUDA_SERIES_DASH}" "${CUDA_USERSPACE_VERSION}")
 CUDART_DEV_DEB=$(download_verified_pkg "cuda-cudart-dev-${CUDA_SERIES_DASH}" "${CUDA_USERSPACE_VERSION}")
+CUDA_CRT_DEB=$(download_verified_pkg "cuda-crt-${CUDA_SERIES_DASH}" "")
+CUDA_CCCL_DEB=$(download_verified_pkg "cuda-cccl-${CUDA_SERIES_DASH}" "")

 extract_deb "$CUBLAS_RT_DEB" "${TMP_DIR}/cublas-rt"
 extract_deb "$CUBLAS_DEV_DEB" "${TMP_DIR}/cublas-dev"
 extract_deb "$CUDART_RT_DEB" "${TMP_DIR}/cudart-rt"
 extract_deb "$CUDART_DEV_DEB" "${TMP_DIR}/cudart-dev"
+extract_deb "$CUDA_CRT_DEB" "${TMP_DIR}/cuda-crt"
+extract_deb "$CUDA_CCCL_DEB" "${TMP_DIR}/cuda-cccl"

 copy_headers "${TMP_DIR}/cublas-dev"
 copy_headers "${TMP_DIR}/cudart-dev"
+copy_headers "${TMP_DIR}/cuda-crt"
+copy_headers "${TMP_DIR}/cuda-cccl"
 copy_libs "${TMP_DIR}/cublas-rt"
 copy_libs "${TMP_DIR}/cudart-rt"

--- a/iso/builder/build-in-container.sh
+++ b/iso/builder/build-in-container.sh
@@ -11,6 +11,8 @@ BUILDER_PLATFORM="${BEE_BUILDER_PLATFORM:-linux/amd64}"
 CACHE_DIR="${BEE_BUILDER_CACHE_DIR:-${REPO_ROOT}/dist/container-cache}"
 AUTH_KEYS=""
 REBUILD_IMAGE=0
+CLEAN_CACHE=0
+VARIANT="all"

 . "${BUILDER_DIR}/VERSIONS"

@@ -28,14 +30,42 @@ while [ $# -gt 0 ]; do
            AUTH_KEYS="$2"
            shift 2
            ;;
+        --clean-build)
+            CLEAN_CACHE=1
+            REBUILD_IMAGE=1
+            shift
+            ;;
+        --variant)
+            VARIANT="$2"
+            shift 2
+            ;;
        *)
            echo "unknown arg: $1" >&2
-            echo "usage: $0 [--cache-dir /path] [--rebuild-image] [--authorized-keys /path/to/authorized_keys]" >&2
+            echo "usage: $0 [--cache-dir /path] [--rebuild-image] [--clean-build] [--authorized-keys /path/to/authorized_keys] [--variant nvidia|amd|all]" >&2
            exit 1
            ;;
    esac
 done

+case "$VARIANT" in
+    nvidia|amd|nogpu|all) ;;
+    *) echo "unknown variant: $VARIANT (expected nvidia, amd, nogpu, or all)" >&2; exit 1 ;;
+esac
+
+if [ "$CLEAN_CACHE" = "1" ]; then
+    echo "=== cleaning build cache: ${CACHE_DIR} ==="
+    rm -rf "${CACHE_DIR:?}/go-build" \
+           "${CACHE_DIR:?}/go-mod" \
+           "${CACHE_DIR:?}/tmp" \
+           "${CACHE_DIR:?}/bee" \
+           "${CACHE_DIR:?}/lb-packages"
+    echo "=== cleaning live-build work dirs ==="
+    rm -rf "${REPO_ROOT}/dist/live-build-work-nvidia"
+    rm -rf "${REPO_ROOT}/dist/live-build-work-amd"
+    rm -rf "${REPO_ROOT}/dist/live-build-work-nogpu"
+    echo "=== caches cleared, proceeding with build ==="
+fi
+
 if ! command -v "$CONTAINER_TOOL" >/dev/null 2>&1; then
    echo "container tool not found: $CONTAINER_TOOL" >&2
    exit 1
@@ -90,34 +120,75 @@ else
    echo "=== using existing builder image ${IMAGE_REF} (${BUILDER_PLATFORM}) ==="
 fi

-set -- \
-    run --rm --privileged \
-    --platform "${BUILDER_PLATFORM}" \
-    -v "${REPO_ROOT}:/work" \
-    -v "${CACHE_DIR}:/cache" \
-    -e BEE_CONTAINER_BUILD=1 \
-    -e GOCACHE=/cache/go-build \
-    -e GOMODCACHE=/cache/go-mod \
-    -e TMPDIR=/cache/tmp \
-    -e BEE_CACHE_DIR=/cache/bee \
-    -w /work \
-    "${IMAGE_REF}" \
-    sh /work/iso/builder/build.sh
-
-if [ -n "$AUTH_KEYS" ]; then
-    set -- run --rm --privileged \
-        --platform "${BUILDER_PLATFORM}" \
-        -v "${REPO_ROOT}:/work" \
-        -v "${CACHE_DIR}:/cache" \
-        -v "${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro" \
+# Build base docker run args (without --authorized-keys)
+build_run_args() {
+    _variant="$1"
+    _auth_arg=""
+    if [ -n "$AUTH_KEYS" ]; then
+        _auth_arg="--authorized-keys /tmp/bee-authkeys/${AUTH_KEYS_BASE}"
+    fi
+    echo "run --rm --privileged \
+        --platform ${BUILDER_PLATFORM} \
+        -v ${REPO_ROOT}:/work \
+        -v ${CACHE_DIR}:/cache \
+        ${AUTH_KEYS:+-v ${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro} \
        -e BEE_CONTAINER_BUILD=1 \
        -e GOCACHE=/cache/go-build \
        -e GOMODCACHE=/cache/go-mod \
        -e TMPDIR=/cache/tmp \
        -e BEE_CACHE_DIR=/cache/bee \
        -w /work \
-        "${IMAGE_REF}" \
-        sh /work/iso/builder/build.sh --authorized-keys "/tmp/bee-authkeys/${AUTH_KEYS_BASE}"
-fi
+        ${IMAGE_REF} \
+        sh /work/iso/builder/build.sh --variant ${_variant} ${_auth_arg}"
+}

-"$CONTAINER_TOOL" "$@"
+run_variant() {
+    _v="$1"
+    echo "=== building variant: ${_v} ==="
+    if [ -n "$AUTH_KEYS" ]; then
+        "$CONTAINER_TOOL" run --rm --privileged \
+            --platform "${BUILDER_PLATFORM}" \
+            -v "${REPO_ROOT}:/work" \
+            -v "${CACHE_DIR}:/cache" \
+            -v "${AUTH_KEYS_DIR}:/tmp/bee-authkeys:ro" \
+            -e BEE_CONTAINER_BUILD=1 \
+            -e GOCACHE=/cache/go-build \
+            -e GOMODCACHE=/cache/go-mod \
+            -e TMPDIR=/cache/tmp \
+            -e BEE_CACHE_DIR=/cache/bee \
+            -w /work \
+            "${IMAGE_REF}" \
+            sh /work/iso/builder/build.sh --variant "${_v}" \
+                --authorized-keys "/tmp/bee-authkeys/${AUTH_KEYS_BASE}"
+    else
+        "$CONTAINER_TOOL" run --rm --privileged \
+            --platform "${BUILDER_PLATFORM}" \
+            -v "${REPO_ROOT}:/work" \
+            -v "${CACHE_DIR}:/cache" \
+            -e BEE_CONTAINER_BUILD=1 \
+            -e GOCACHE=/cache/go-build \
+            -e GOMODCACHE=/cache/go-mod \
+            -e TMPDIR=/cache/tmp \
+            -e BEE_CACHE_DIR=/cache/bee \
+            -w /work \
+            "${IMAGE_REF}" \
+            sh /work/iso/builder/build.sh --variant "${_v}"
+    fi
+}
+
+case "$VARIANT" in
+    nvidia)
+        run_variant nvidia
+        ;;
+    amd)
+        run_variant amd
+        ;;
+    nogpu)
+        run_variant nogpu
+        ;;
+    all)
+        run_variant nvidia
+        run_variant amd
+        run_variant nogpu
+        ;;
+esac
--- a/iso/builder/build-john.sh
+++ b/iso/builder/build-john.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+# build-john.sh — build John the Ripper jumbo with OpenCL support for the LiveCD.
+#
+# Downloads a pinned source snapshot from the official openwall/john repository,
+# builds it inside the builder container, and caches the resulting run/ tree.
+
+set -e
+
+JOHN_COMMIT="$1"
+DIST_DIR="$2"
+
+[ -n "$JOHN_COMMIT" ] || { echo "usage: $0 <john-commit> <dist-dir>"; exit 1; }
+[ -n "$DIST_DIR" ] || { echo "usage: $0 <john-commit> <dist-dir>"; exit 1; }
+
+echo "=== John the Ripper jumbo ${JOHN_COMMIT} ==="
+
+CACHE_DIR="${DIST_DIR}/john-${JOHN_COMMIT}"
+CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
+DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/john-downloads"
+SRC_TAR="${DOWNLOAD_CACHE_DIR}/john-${JOHN_COMMIT}.tar.gz"
+SRC_URL="https://github.com/openwall/john/archive/${JOHN_COMMIT}.tar.gz"
+
+if [ -x "${CACHE_DIR}/run/john" ] && [ -f "${CACHE_DIR}/run/john.conf" ]; then
+    echo "=== john cached, skipping build ==="
+    echo "run dir: ${CACHE_DIR}/run"
+    exit 0
+fi
+
+mkdir -p "${DOWNLOAD_CACHE_DIR}"
+if [ ! -f "${SRC_TAR}" ]; then
+    echo "=== downloading john source snapshot ==="
+    wget --show-progress -O "${SRC_TAR}" "${SRC_URL}"
+fi
+
+BUILD_TMP=$(mktemp -d)
+trap 'rm -rf "${BUILD_TMP}"' EXIT INT TERM
+
+cd "${BUILD_TMP}"
+tar xf "${SRC_TAR}"
+SRC_DIR=$(find . -maxdepth 1 -type d -name 'john-*' | head -1)
+[ -n "${SRC_DIR}" ] || { echo "ERROR: john source directory not found"; exit 1; }
+
+cd "${SRC_DIR}/src"
+echo "=== configuring john ==="
+./configure
+echo "=== building john ==="
+make clean >/dev/null 2>&1 || true
+make -j"$(nproc)"
+
+mkdir -p "${CACHE_DIR}"
+cp -a "../run" "${CACHE_DIR}/run"
+chmod +x "${CACHE_DIR}/run/john"
+
+echo "=== john build complete ==="
+echo "run dir: ${CACHE_DIR}/run"
--- a/iso/builder/build-nccl-tests.sh
+++ b/iso/builder/build-nccl-tests.sh
@@ -0,0 +1,164 @@
+#!/bin/sh
+# build-nccl-tests.sh — build nccl-tests all_reduce_perf for the LiveCD.
+#
+# Downloads nccl-tests source from GitHub, downloads libnccl-dev .deb for
+# nccl.h, and compiles all_reduce_perf with nvcc (cuda-nvcc-13-0).
+#
+# Output is cached in DIST_DIR/nccl-tests-<version>/ so subsequent builds
+# are instant unless NCCL_TESTS_VERSION changes.
+#
+# Output layout:
+#   $CACHE_DIR/bin/all_reduce_perf
+#   $CACHE_DIR/lib/libcudart.so* copied from the nvcc toolchain used to build nccl-tests
+
+set -e
+
+NCCL_TESTS_VERSION="$1"
+NCCL_VERSION="$2"
+NCCL_CUDA_VERSION="$3"
+DIST_DIR="$4"
+NVCC_VERSION="${5:-}"
+DEBIAN_VERSION="${6:-12}"
+
+[ -n "$NCCL_TESTS_VERSION" ] || { echo "usage: $0 <nccl-tests-version> <nccl-version> <cuda-version> <dist-dir> [nvcc-version] [debian-version]"; exit 1; }
+[ -n "$NCCL_VERSION" ]       || { echo "usage: $0 <nccl-tests-version> <nccl-version> <cuda-version> <dist-dir> [nvcc-version] [debian-version]"; exit 1; }
+[ -n "$NCCL_CUDA_VERSION" ]  || { echo "usage: $0 <nccl-tests-version> <nccl-version> <cuda-version> <dist-dir> [nvcc-version] [debian-version]"; exit 1; }
+[ -n "$DIST_DIR" ]           || { echo "usage: $0 <nccl-tests-version> <nccl-version> <cuda-version> <dist-dir> [nvcc-version] [debian-version]"; exit 1; }
+
+echo "=== nccl-tests ${NCCL_TESTS_VERSION} ==="
+
+CACHE_DIR="${DIST_DIR}/nccl-tests-${NCCL_TESTS_VERSION}"
+CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
+DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nccl-tests-downloads"
+
+if [ -f "${CACHE_DIR}/bin/all_reduce_perf" ] && [ "$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libcudart.so*' 2>/dev/null | wc -l)" -gt 0 ]; then
+    echo "=== nccl-tests cached, skipping build ==="
+    echo "binary: ${CACHE_DIR}/bin/all_reduce_perf"
+    exit 0
+fi
+
+# Resolve nvcc path (cuda-nvcc-X-Y installs to /usr/local/cuda-X.Y/bin/nvcc)
+NVCC_VERSION_PATH="$(echo "${NVCC_VERSION}" | tr '.' '.')"
+NVCC=""
+for candidate in nvcc "/usr/local/cuda-${NVCC_VERSION_PATH}/bin/nvcc" /usr/local/cuda-12/bin/nvcc /usr/local/cuda/bin/nvcc; do
+    if command -v "$candidate" >/dev/null 2>&1 || [ -x "$candidate" ]; then
+        NVCC="$candidate"
+        break
+    fi
+done
+[ -n "$NVCC" ] || { echo "ERROR: nvcc not found — install cuda-nvcc-$(echo "${NVCC_VERSION}" | tr '.' '-')"; exit 1; }
+echo "nvcc: $NVCC"
+
+# Determine CUDA_HOME from nvcc location
+CUDA_HOME="$(dirname "$(dirname "$NVCC")")"
+echo "CUDA_HOME: $CUDA_HOME"
+
+find_cudart_dir() {
+    for dir in \
+        "${CUDA_HOME}/targets/x86_64-linux/lib" \
+        "${CUDA_HOME}/targets/x86_64-linux/lib/stubs" \
+        "${CUDA_HOME}/lib64" \
+        "${CUDA_HOME}/lib"; do
+        if [ -d "$dir" ] && find "$dir" -maxdepth 1 -name 'libcudart.so*' -type f | grep -q .; then
+            printf '%s\n' "$dir"
+            return 0
+        fi
+    done
+    return 1
+}
+
+CUDART_DIR="$(find_cudart_dir)" || { echo "ERROR: libcudart.so* not found under ${CUDA_HOME}"; exit 1; }
+echo "cudart dir: $CUDART_DIR"
+
+# Download libnccl-dev for nccl.h
+REPO_BASE="https://developer.download.nvidia.com/compute/cuda/repos/debian${DEBIAN_VERSION}/x86_64"
+DEV_PKG="libnccl-dev_${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}_amd64.deb"
+DEV_URL="${REPO_BASE}/${DEV_PKG}"
+
+mkdir -p "$DOWNLOAD_CACHE_DIR"
+DEV_DEB="${DOWNLOAD_CACHE_DIR}/${DEV_PKG}"
+
+if [ ! -f "$DEV_DEB" ]; then
+    echo "=== downloading libnccl-dev ==="
+    wget --show-progress -O "$DEV_DEB" "$DEV_URL"
+fi
+
+# Extract nccl.h from libnccl-dev
+NCCL_INCLUDE_TMP=$(mktemp -d)
+trap 'rm -rf "$NCCL_INCLUDE_TMP" "$BUILD_TMP"' EXIT INT TERM
+
+cd "$NCCL_INCLUDE_TMP"
+ar x "$DEV_DEB"
+DATA_TAR=$(ls data.tar.* 2>/dev/null | head -1)
+[ -n "$DATA_TAR" ] || { echo "ERROR: data.tar.* not found in libnccl-dev .deb"; exit 1; }
+tar xf "$DATA_TAR"
+
+# nccl.h lands in ./usr/include/ or ./usr/local/cuda-X.Y/targets/.../include/
+NCCL_H=$(find . -name 'nccl.h' -type f 2>/dev/null | head -1)
+[ -n "$NCCL_H" ] || { echo "ERROR: nccl.h not found in libnccl-dev package"; exit 1; }
+NCCL_INCLUDE_DIR="$(pwd)/$(dirname "$NCCL_H")"
+echo "nccl.h: $NCCL_H"
+
+# libnccl.so comes from the already-built NCCL cache (build-nccl.sh ran first)
+NCCL_LIB_DIR="${DIST_DIR}/nccl-${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}/lib"
+[ -d "$NCCL_LIB_DIR" ] || { echo "ERROR: NCCL lib dir not found at $NCCL_LIB_DIR — run build-nccl.sh first"; exit 1; }
+echo "nccl lib: $NCCL_LIB_DIR"
+
+# Download nccl-tests source
+SRC_TAR="${DOWNLOAD_CACHE_DIR}/nccl-tests-v${NCCL_TESTS_VERSION}.tar.gz"
+SRC_URL="https://github.com/NVIDIA/nccl-tests/archive/refs/tags/v${NCCL_TESTS_VERSION}.tar.gz"
+
+if [ ! -f "$SRC_TAR" ]; then
+    echo "=== downloading nccl-tests v${NCCL_TESTS_VERSION} ==="
+    wget --show-progress -O "$SRC_TAR" "$SRC_URL"
+fi
+
+# Extract and build
+BUILD_TMP=$(mktemp -d)
+cd "$BUILD_TMP"
+tar xf "$SRC_TAR"
+SRC_DIR=$(ls -d nccl-tests-* 2>/dev/null | head -1)
+[ -n "$SRC_DIR" ] || { echo "ERROR: source directory not found in archive"; exit 1; }
+cd "$SRC_DIR"
+
+echo "=== building all_reduce_perf ==="
+# Pick gencode based on the actual nvcc version:
+#   CUDA 12.x — Volta..Blackwell (sm_70..sm_100)
+#   CUDA 13.x — Hopper..Blackwell (sm_90..sm_100, Pascal/Volta/Ampere dropped)
+NVCC_MAJOR=$("$NVCC" --version 2>/dev/null | grep -oE 'release [0-9]+' | awk '{print $2}' | head -1)
+echo "nvcc major version: ${NVCC_MAJOR:-unknown}"
+if [ "${NVCC_MAJOR:-0}" -ge 13 ] 2>/dev/null; then
+    GENCODE="-gencode=arch=compute_90,code=sm_90 \
+             -gencode=arch=compute_100,code=sm_100"
+    echo "gencode: sm_90 sm_100 (CUDA 13+)"
+else
+    GENCODE="-gencode=arch=compute_70,code=sm_70 \
+             -gencode=arch=compute_80,code=sm_80 \
+             -gencode=arch=compute_86,code=sm_86 \
+             -gencode=arch=compute_90,code=sm_90 \
+             -gencode=arch=compute_100,code=sm_100"
+    echo "gencode: sm_70..sm_100 (CUDA 12)"
+fi
+LIBRARY_PATH="$NCCL_LIB_DIR${LIBRARY_PATH:+:$LIBRARY_PATH}" \
+make MPI=0 \
+    NVCC="$NVCC" \
+    CUDA_HOME="$CUDA_HOME" \
+    NCCL_HOME="$NCCL_INCLUDE_DIR/.." \
+    NCCL_LIB="$NCCL_LIB_DIR" \
+    NVCC_GENCODE="$GENCODE" \
+    BUILDDIR="./build"
+
+[ -f "./build/all_reduce_perf" ] || { echo "ERROR: all_reduce_perf not found after build"; exit 1; }
+
+mkdir -p "${CACHE_DIR}/bin"
+cp "./build/all_reduce_perf" "${CACHE_DIR}/bin/all_reduce_perf"
+chmod +x "${CACHE_DIR}/bin/all_reduce_perf"
+
+mkdir -p "${CACHE_DIR}/lib"
+find "${CUDART_DIR}" -maxdepth 1 -name 'libcudart.so*' -type f -exec cp -a {} "${CACHE_DIR}/lib/" \;
+[ "$(find "${CACHE_DIR}/lib" -maxdepth 1 -name 'libcudart.so*' -type f | wc -l)" -gt 0 ] || { echo "ERROR: libcudart runtime copy failed"; exit 1; }
+
+echo "=== nccl-tests build complete ==="
+echo "binary: ${CACHE_DIR}/bin/all_reduce_perf"
+ls -lh "${CACHE_DIR}/bin/all_reduce_perf"
+ls -lh "${CACHE_DIR}/lib/"libcudart.so* 2>/dev/null || true
--- a/iso/builder/build-nvidia-module.sh
+++ b/iso/builder/build-nvidia-module.sh
@@ -10,7 +10,7 @@
 # Output layout:
 #   $CACHE_DIR/modules/   — nvidia*.ko files
 #   $CACHE_DIR/bin/       — nvidia-smi, nvidia-debugdump
-#   $CACHE_DIR/lib/       — libnvidia-ml.so*, libcuda.so* (for nvidia-smi)
+#   $CACHE_DIR/lib/       — libnvidia-ml.so*, libcuda.so*, OpenCL-related libs

 set -e

@@ -133,7 +133,14 @@ fi
 # Copy ALL userspace library files.
 # libnvidia-ptxjitcompiler is required by libcuda for PTX JIT compilation
 # (cuModuleLoadDataEx with PTX source) — without it CUDA_ERROR_JIT_COMPILER_NOT_FOUND.
-for lib in libnvidia-ml libcuda libnvidia-ptxjitcompiler; do
+for lib in \
+    libnvidia-ml \
+    libcuda \
+    libnvidia-ptxjitcompiler \
+    libnvidia-opencl \
+    libnvidia-compiler \
+    libnvidia-nvvm \
+    libnvidia-fatbinaryloader; do
    count=0
    for f in $(find "$EXTRACT_DIR" -maxdepth 1 -name "${lib}.so.*" 2>/dev/null); do
        cp "$f" "$CACHE_DIR/lib/" && count=$((count+1))
@@ -150,7 +157,14 @@ ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l)
 [ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; }

 # Create soname symlinks: use [0-9][0-9]* to avoid circular symlink (.so.1 has single digit)
-for lib in libnvidia-ml libcuda libnvidia-ptxjitcompiler; do
+for lib in \
+    libnvidia-ml \
+    libcuda \
+    libnvidia-ptxjitcompiler \
+    libnvidia-opencl \
+    libnvidia-compiler \
+    libnvidia-nvvm \
+    libnvidia-fatbinaryloader; do
    versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9][0-9]* 2>/dev/null | head -1)
    [ -n "$versioned" ] || continue
    base=$(basename "$versioned")
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
@@ -13,21 +13,34 @@ BUILDER_DIR="${REPO_ROOT}/iso/builder"
 OVERLAY_DIR="${REPO_ROOT}/iso/overlay"
 DIST_DIR="${REPO_ROOT}/dist"
 VENDOR_DIR="${REPO_ROOT}/iso/vendor"
-BUILD_WORK_DIR="${DIST_DIR}/live-build-work"
-OVERLAY_STAGE_DIR="${DIST_DIR}/overlay-stage"
 CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
 AUTH_KEYS=""
+BEE_GPU_VENDOR="nvidia"

 # parse args
 while [ $# -gt 0 ]; do
    case "$1" in
        --authorized-keys) AUTH_KEYS="$2"; shift 2 ;;
+        --variant) BEE_GPU_VENDOR="$2"; shift 2 ;;
        *) echo "unknown arg: $1"; exit 1 ;;
    esac
 done

+case "$BEE_GPU_VENDOR" in
+    nvidia|amd|nogpu) ;;
+    *) echo "unknown variant: $BEE_GPU_VENDOR (expected nvidia, amd, or nogpu)" >&2; exit 1 ;;
+esac
+
+BUILD_WORK_DIR="${DIST_DIR}/live-build-work-${BEE_GPU_VENDOR}"
+OVERLAY_STAGE_DIR="${DIST_DIR}/overlay-stage-${BEE_GPU_VENDOR}"
+
+export BEE_GPU_VENDOR
+
 . "${BUILDER_DIR}/VERSIONS"
 export PATH="$PATH:/usr/local/go/bin"
+
+# Allow git to read the bind-mounted repo (different UID inside container).
+git config --global safe.directory "${REPO_ROOT}"
 mkdir -p "${DIST_DIR}"
 mkdir -p "${CACHE_ROOT}"
 : "${GOCACHE:=${CACHE_ROOT}/go-build}"
@@ -42,7 +55,7 @@ resolve_audit_version() {

    tag="$(git -C "${REPO_ROOT}" describe --tags --match 'audit/v*' --abbrev=7 --dirty 2>/dev/null || true)"
    if [ -z "${tag}" ]; then
-        tag="$(git -C "${REPO_ROOT}" describe --tags --match 'v*' --abbrev=7 --dirty 2>/dev/null || true)"
+        tag="$(git -C "${REPO_ROOT}" describe --tags --match 'v[0-9]*' --abbrev=7 --dirty 2>/dev/null || true)"
    fi
    case "${tag}" in
        audit/v*)
@@ -76,6 +89,16 @@ resolve_iso_version() {
        return 0
    fi

+    # Plain v* tags (e.g. v2.7) take priority — this is the current tagging scheme
+    tag="$(git -C "${REPO_ROOT}" describe --tags --match 'v[0-9]*' --abbrev=7 --dirty 2>/dev/null || true)"
+    case "${tag}" in
+        v*)
+            echo "${tag#v}"
+            return 0
+            ;;
+    esac
+
+    # Legacy iso/v* tags fallback
    tag="$(git -C "${REPO_ROOT}" describe --tags --match 'iso/v*' --abbrev=7 --dirty 2>/dev/null || true)"
    case "${tag}" in
        iso/v*)
@@ -88,6 +111,61 @@ resolve_iso_version() {
    resolve_audit_version
 }

+validate_iso_memtest() {
+    iso_path="$1"
+    echo "=== validating memtest in ISO ==="
+
+    [ -f "$iso_path" ] || { echo "ERROR: ISO not found for validation: $iso_path" >&2; exit 1; }
+    command -v bsdtar >/dev/null 2>&1 || { echo "ERROR: bsdtar is required for ISO validation" >&2; exit 1; }
+
+    bsdtar -tf "$iso_path" | grep -q '^boot/memtest86+x64\.bin$' || {
+        echo "ERROR: memtest BIOS binary missing in ISO: boot/memtest86+x64.bin" >&2
+        exit 1
+    }
+    bsdtar -tf "$iso_path" | grep -q '^boot/memtest86+x64\.efi$' || {
+        echo "ERROR: memtest EFI binary missing in ISO: boot/memtest86+x64.efi" >&2
+        exit 1
+    }
+
+    grub_cfg="$(mktemp)"
+    isolinux_cfg="$(mktemp)"
+    trap 'rm -f "$grub_cfg" "$isolinux_cfg"' EXIT INT TERM
+
+    bsdtar -xOf "$iso_path" boot/grub/grub.cfg > "$grub_cfg" || {
+        echo "ERROR: failed to extract boot/grub/grub.cfg from ISO" >&2
+        exit 1
+    }
+    bsdtar -xOf "$iso_path" isolinux/live.cfg > "$isolinux_cfg" || {
+        echo "ERROR: failed to extract isolinux/live.cfg from ISO" >&2
+        exit 1
+    }
+
+    grep -q 'Memory Test (memtest86+)' "$grub_cfg" || {
+        echo "ERROR: GRUB menu entry for memtest is missing" >&2
+        exit 1
+    }
+    grep -q '/boot/memtest86+x64\.efi' "$grub_cfg" || {
+        echo "ERROR: GRUB memtest EFI path is missing" >&2
+        exit 1
+    }
+    grep -q '/boot/memtest86+x64\.bin' "$grub_cfg" || {
+        echo "ERROR: GRUB memtest BIOS path is missing" >&2
+        exit 1
+    }
+    grep -q 'Memory Test (memtest86+)' "$isolinux_cfg" || {
+        echo "ERROR: isolinux menu entry for memtest is missing" >&2
+        exit 1
+    }
+    grep -q '/boot/memtest86+x64\.bin' "$isolinux_cfg" || {
+        echo "ERROR: isolinux memtest path is missing" >&2
+        exit 1
+    }
+
+    rm -f "$grub_cfg" "$isolinux_cfg"
+    trap - EXIT INT TERM
+    echo "=== memtest validation OK ==="
+}
+
 AUDIT_VERSION_EFFECTIVE="$(resolve_audit_version)"
 ISO_VERSION_EFFECTIVE="$(resolve_iso_version)"

@@ -119,7 +197,7 @@ if [ ! -d "/usr/src/linux-headers-${KVER}" ]; then
    apt-get install -y "linux-headers-${KVER}"
 fi

-echo "=== bee ISO build ==="
+echo "=== bee ISO build (variant: ${BEE_GPU_VENDOR}) ==="
 echo "Debian: ${DEBIAN_VERSION}, Kernel ABI: ${DEBIAN_KERNEL_ABI}, Go: ${GO_VERSION}"
 echo "Audit version: ${AUDIT_VERSION_EFFECTIVE}, ISO version: ${ISO_VERSION_EFFECTIVE}"
 echo ""
@@ -128,8 +206,8 @@ echo "=== syncing git submodules ==="
 git -C "${REPO_ROOT}" submodule update --init --recursive

 # --- compile bee binary (static, Linux amd64) ---
+# Shared between variants — built once, reused on second pass.
 BEE_BIN="${DIST_DIR}/bee-linux-amd64"
-GPU_STRESS_BIN="${DIST_DIR}/bee-gpu-stress-linux-amd64"
 NEED_BUILD=1
 if [ -f "$BEE_BIN" ]; then
    NEWEST_SRC=$(find "${REPO_ROOT}/audit" -name '*.go' -newer "$BEE_BIN" | head -1)
@@ -159,45 +237,82 @@ else
    echo "=== bee binary up to date, skipping build ==="
 fi

-echo ""
-echo "=== downloading cuBLAS/cuBLASLt/cudart ${NCCL_CUDA_VERSION} userspace ==="
-sh "${BUILDER_DIR}/build-cublas.sh" \
-    "${CUBLAS_VERSION}" \
-    "${CUDA_USERSPACE_VERSION}" \
-    "${NCCL_CUDA_VERSION}" \
-    "${DIST_DIR}"
+# --- NVIDIA-only build steps ---
+GPU_BURN_WORKER_BIN="${DIST_DIR}/bee-gpu-burn-worker-linux-amd64"
+if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
+    echo ""
+    echo "=== downloading cuBLAS/cuBLASLt/cudart ${NCCL_CUDA_VERSION} userspace ==="
+    sh "${BUILDER_DIR}/build-cublas.sh" \
+        "${CUBLAS_VERSION}" \
+        "${CUDA_USERSPACE_VERSION}" \
+        "${NCCL_CUDA_VERSION}" \
+        "${DIST_DIR}"

-CUBLAS_CACHE="${DIST_DIR}/cublas-${CUBLAS_VERSION}+cuda${NCCL_CUDA_VERSION}"
+    CUBLAS_CACHE="${DIST_DIR}/cublas-${CUBLAS_VERSION}+cuda${NCCL_CUDA_VERSION}"

-GPU_STRESS_NEED_BUILD=1
-if [ -f "$GPU_STRESS_BIN" ] && [ "${BUILDER_DIR}/bee-gpu-stress.c" -ot "$GPU_STRESS_BIN" ]; then
-    GPU_STRESS_NEED_BUILD=0
+    GPU_STRESS_NEED_BUILD=1
+    if [ -f "$GPU_BURN_WORKER_BIN" ] && [ "${BUILDER_DIR}/bee-gpu-stress.c" -ot "$GPU_BURN_WORKER_BIN" ]; then
+        GPU_STRESS_NEED_BUILD=0
+    fi
+
+    if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then
+        echo "=== building bee-gpu-burn worker ==="
+        gcc -O2 -s -Wall -Wextra \
+            -I"${CUBLAS_CACHE}/include" \
+            -o "$GPU_BURN_WORKER_BIN" \
+            "${BUILDER_DIR}/bee-gpu-stress.c" \
+            -ldl -lm
+        echo "binary: $GPU_BURN_WORKER_BIN"
+    else
+        echo "=== bee-gpu-burn worker up to date, skipping build ==="
+    fi
 fi

-if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then
-    echo "=== building bee-gpu-stress ==="
-    gcc -O2 -s -Wall -Wextra \
-        -I"${CUBLAS_CACHE}/include" \
-        -o "$GPU_STRESS_BIN" \
-        "${BUILDER_DIR}/bee-gpu-stress.c" \
-        -ldl
-    echo "binary: $GPU_STRESS_BIN"
-else
-    echo "=== bee-gpu-stress up to date, skipping build ==="
-fi
-
-echo "=== preparing staged overlay ==="
-rm -rf "${BUILD_WORK_DIR}" "${OVERLAY_STAGE_DIR}"
+echo "=== preparing staged overlay (${BEE_GPU_VENDOR}) ==="
 mkdir -p "${BUILD_WORK_DIR}" "${OVERLAY_STAGE_DIR}"
-rsync -a "${BUILDER_DIR}/" "${BUILD_WORK_DIR}/"
+
+# Sync builder config into variant work dir, preserving lb cache.
+rsync -a --delete \
+    --exclude='cache/' \
+    --exclude='chroot/' \
+    --exclude='.build/' \
+    --exclude='*.iso' \
+    --exclude='*.packages' \
+    --exclude='*.contents' \
+    --exclude='*.files' \
+    "${BUILDER_DIR}/" "${BUILD_WORK_DIR}/"
+
+# Share deb package cache across variants.
+# Restore: populate work dir cache from shared cache before build.
+# Persist: sync back after build (done after lb build below).
+LB_PKG_CACHE="${CACHE_ROOT}/lb-packages"
+mkdir -p "${LB_PKG_CACHE}"
+if [ -d "${BUILD_WORK_DIR}/cache/packages.chroot" ]; then
+    rsync -a --delete "${BUILD_WORK_DIR}/cache/packages.chroot/" "${LB_PKG_CACHE}/"
+elif [ -d "${LB_PKG_CACHE}" ] && [ "$(ls -A "${LB_PKG_CACHE}" 2>/dev/null)" ]; then
+    mkdir -p "${BUILD_WORK_DIR}/cache/packages.chroot"
+    rsync -a "${LB_PKG_CACHE}/" "${BUILD_WORK_DIR}/cache/packages.chroot/"
+fi
+
 rsync -a "${OVERLAY_DIR}/" "${OVERLAY_STAGE_DIR}/"
 rm -f \
    "${OVERLAY_STAGE_DIR}/etc/bee-ssh-password-fallback" \
    "${OVERLAY_STAGE_DIR}/etc/bee-release" \
    "${OVERLAY_STAGE_DIR}/root/.ssh/authorized_keys" \
    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee" \
-    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress" \
-    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest"
+    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nccl-gpu-stress" \
+    "${OVERLAY_STAGE_DIR}/usr/local/bin/john" \
+    "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/bee-gpu-burn-worker" \
+    "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest" \
+    "${OVERLAY_STAGE_DIR}/usr/local/bin/all_reduce_perf"
+rm -rf \
+    "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john"
+
+# Remove NVIDIA-specific overlay files for non-nvidia variants
+if [ "$BEE_GPU_VENDOR" != "nvidia" ]; then
+    rm -f "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nvidia-load"
+    rm -f "${OVERLAY_STAGE_DIR}/etc/systemd/system/bee-nvidia.service"
+fi

 # --- inject authorized_keys for SSH access ---
 AUTHORIZED_KEYS_FILE="${OVERLAY_STAGE_DIR}/root/.ssh/authorized_keys"
@@ -236,8 +351,15 @@ fi
 mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/bin"
 cp "${DIST_DIR}/bee-linux-amd64" "${OVERLAY_STAGE_DIR}/usr/local/bin/bee"
 chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee"
-cp "${GPU_STRESS_BIN}" "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress"
-chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress"
+
+if [ "$BEE_GPU_VENDOR" = "nvidia" ] && [ -f "$GPU_BURN_WORKER_BIN" ]; then
+    mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/lib/bee" "${OVERLAY_STAGE_DIR}/usr/local/bin"
+    cp "${GPU_BURN_WORKER_BIN}" "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/bee-gpu-burn-worker"
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/bee-gpu-burn-worker"
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-burn" 2>/dev/null || true
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-john-gpu-stress" 2>/dev/null || true
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nccl-gpu-stress" 2>/dev/null || true
+fi

 # --- inject smoketest into overlay so it runs directly on the live CD ---
 cp "${BUILDER_DIR}/smoketest.sh" "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-smoketest"
@@ -254,76 +376,171 @@ for tool in storcli64 sas2ircu sas3ircu arcconf ssacli; do
    fi
 done

-# --- build NVIDIA kernel modules ---
-echo ""
-echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
-sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${DEBIAN_KERNEL_ABI}"
+# --- NVIDIA kernel modules and userspace libs ---
+if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
+    echo ""
+    echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
+    sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${DEBIAN_KERNEL_ABI}"

-KVER="${DEBIAN_KERNEL_ABI}-amd64"
-NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
+    KVER="${DEBIAN_KERNEL_ABI}-amd64"
+    NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"

-# Inject .ko files into overlay at /usr/local/lib/nvidia/
-OVERLAY_KMOD_DIR="${OVERLAY_DIR}/usr/local/lib/nvidia"
-OVERLAY_KMOD_DIR="${OVERLAY_STAGE_DIR}/usr/local/lib/nvidia"
-mkdir -p "${OVERLAY_KMOD_DIR}"
-cp "${NVIDIA_CACHE}/modules/"*.ko "${OVERLAY_KMOD_DIR}/"
+    # Inject .ko files into overlay at /usr/local/lib/nvidia/
+    OVERLAY_KMOD_DIR="${OVERLAY_STAGE_DIR}/usr/local/lib/nvidia"
+    mkdir -p "${OVERLAY_KMOD_DIR}"
+    cp "${NVIDIA_CACHE}/modules/"*.ko "${OVERLAY_KMOD_DIR}/"

-# Inject nvidia-smi and libnvidia-ml
-mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/bin" "${OVERLAY_STAGE_DIR}/usr/lib"
-cp "${NVIDIA_CACHE}/bin/nvidia-smi" "${OVERLAY_STAGE_DIR}/usr/local/bin/"
-chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/nvidia-smi"
-cp "${NVIDIA_CACHE}/bin/nvidia-bug-report.sh" "${OVERLAY_STAGE_DIR}/usr/local/bin/" 2>/dev/null || true
-chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/nvidia-bug-report.sh" 2>/dev/null || true
-cp "${NVIDIA_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/" 2>/dev/null || true
+    # Inject nvidia-smi and libnvidia-ml
+    mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/bin" "${OVERLAY_STAGE_DIR}/usr/lib"
+    cp "${NVIDIA_CACHE}/bin/nvidia-smi" "${OVERLAY_STAGE_DIR}/usr/local/bin/"
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/nvidia-smi"
+    cp "${NVIDIA_CACHE}/bin/nvidia-bug-report.sh" "${OVERLAY_STAGE_DIR}/usr/local/bin/" 2>/dev/null || true
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/nvidia-bug-report.sh" 2>/dev/null || true
+    cp "${NVIDIA_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/" 2>/dev/null || true
+    mkdir -p "${OVERLAY_STAGE_DIR}/etc/OpenCL/vendors"
+    printf 'libnvidia-opencl.so.1\n' > "${OVERLAY_STAGE_DIR}/etc/OpenCL/vendors/nvidia.icd"

-# Inject GSP firmware into /lib/firmware/nvidia/<version>/
-if [ -d "${NVIDIA_CACHE}/firmware" ] && [ "$(ls -A "${NVIDIA_CACHE}/firmware" 2>/dev/null)" ]; then
-    mkdir -p "${OVERLAY_STAGE_DIR}/lib/firmware/nvidia/${NVIDIA_DRIVER_VERSION}"
-    cp "${NVIDIA_CACHE}/firmware/"* "${OVERLAY_STAGE_DIR}/lib/firmware/nvidia/${NVIDIA_DRIVER_VERSION}/"
-    echo "=== firmware: $(ls "${OVERLAY_STAGE_DIR}/lib/firmware/nvidia/${NVIDIA_DRIVER_VERSION}/" | wc -l) files injected ==="
+    # Inject GSP firmware into /lib/firmware/nvidia/<version>/
+    if [ -d "${NVIDIA_CACHE}/firmware" ] && [ "$(ls -A "${NVIDIA_CACHE}/firmware" 2>/dev/null)" ]; then
+        mkdir -p "${OVERLAY_STAGE_DIR}/lib/firmware/nvidia/${NVIDIA_DRIVER_VERSION}"
+        cp "${NVIDIA_CACHE}/firmware/"* "${OVERLAY_STAGE_DIR}/lib/firmware/nvidia/${NVIDIA_DRIVER_VERSION}/"
+        echo "=== firmware: $(ls "${OVERLAY_STAGE_DIR}/lib/firmware/nvidia/${NVIDIA_DRIVER_VERSION}/" | wc -l) files injected ==="
+    fi
+
+    # --- build / download NCCL ---
+    echo ""
+    echo "=== downloading NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ==="
+    sh "${BUILDER_DIR}/build-nccl.sh" "${NCCL_VERSION}" "${NCCL_CUDA_VERSION}" "${DIST_DIR}" "${NCCL_SHA256:-}"
+
+    NCCL_CACHE="${DIST_DIR}/nccl-${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}"
+
+    # Inject libnccl.so.* into overlay alongside other NVIDIA userspace libs
+    cp "${NCCL_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/"
+    echo "=== NCCL: $(ls "${NCCL_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="
+
+    # Inject cuBLAS/cuBLASLt/cudart runtime libs used by the bee-gpu-burn worker tensor-core GEMM path
+    cp "${CUBLAS_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/"
+    echo "=== cuBLAS: $(ls "${CUBLAS_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="
+
+    # --- build nccl-tests ---
+    echo ""
+    echo "=== building nccl-tests ${NCCL_TESTS_VERSION} ==="
+    sh "${BUILDER_DIR}/build-nccl-tests.sh" \
+        "${NCCL_TESTS_VERSION}" \
+        "${NCCL_VERSION}" \
+        "${NCCL_CUDA_VERSION}" \
+        "${DIST_DIR}" \
+        "${NVCC_VERSION}" \
+        "${DEBIAN_VERSION}"
+
+    NCCL_TESTS_CACHE="${DIST_DIR}/nccl-tests-${NCCL_TESTS_VERSION}"
+    cp "${NCCL_TESTS_CACHE}/bin/all_reduce_perf" "${OVERLAY_STAGE_DIR}/usr/local/bin/all_reduce_perf"
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/all_reduce_perf"
+    cp "${NCCL_TESTS_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/" 2>/dev/null || true
+    echo "=== all_reduce_perf injected ==="
+
+    echo ""
+    echo "=== building john jumbo ${JOHN_JUMBO_COMMIT} ==="
+    sh "${BUILDER_DIR}/build-john.sh" "${JOHN_JUMBO_COMMIT}" "${DIST_DIR}"
+    JOHN_CACHE="${DIST_DIR}/john-${JOHN_JUMBO_COMMIT}"
+    mkdir -p "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john"
+    rsync -a --delete "${JOHN_CACHE}/run/" "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john/run/"
+    ln -sfn ../lib/bee/john/run/john "${OVERLAY_STAGE_DIR}/usr/local/bin/john"
+    chmod +x "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john/run/john"
+    echo "=== john injected ==="
 fi

-# --- build / download NCCL ---
-echo ""
-echo "=== downloading NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ==="
-sh "${BUILDER_DIR}/build-nccl.sh" "${NCCL_VERSION}" "${NCCL_CUDA_VERSION}" "${DIST_DIR}" "${NCCL_SHA256:-}"
-
-NCCL_CACHE="${DIST_DIR}/nccl-${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}"
-
-# Inject libnccl.so.* into overlay alongside other NVIDIA userspace libs
-cp "${NCCL_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/"
-echo "=== NCCL: $(ls "${NCCL_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="
-
-# Inject cuBLAS/cuBLASLt/cudart runtime libs used by bee-gpu-stress tensor-core GEMM path
-cp "${CUBLAS_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/"
-echo "=== cuBLAS: $(ls "${CUBLAS_CACHE}/lib/" | wc -l) files injected into /usr/lib/ ==="
-
 # --- embed build metadata ---
 mkdir -p "${OVERLAY_STAGE_DIR}/etc"
 BUILD_DATE="$(date +%Y-%m-%d)"
 GIT_COMMIT="$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo unknown)"
-cat > "${OVERLAY_STAGE_DIR}/etc/bee-release" <<EOF
-BEE_ISO_VERSION=${ISO_VERSION_EFFECTIVE}
-BEE_AUDIT_VERSION=${AUDIT_VERSION_EFFECTIVE}
-BUILD_DATE=${BUILD_DATE}
-GIT_COMMIT=${GIT_COMMIT}
-DEBIAN_VERSION=${DEBIAN_VERSION}
-DEBIAN_KERNEL_ABI=${DEBIAN_KERNEL_ABI}
-NVIDIA_DRIVER_VERSION=${NVIDIA_DRIVER_VERSION}
+
+if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
+    GPU_VERSION_LINE="NVIDIA_DRIVER_VERSION=${NVIDIA_DRIVER_VERSION}
 NCCL_VERSION=${NCCL_VERSION}
 NCCL_CUDA_VERSION=${NCCL_CUDA_VERSION}
 CUBLAS_VERSION=${CUBLAS_VERSION}
 CUDA_USERSPACE_VERSION=${CUDA_USERSPACE_VERSION}
+NCCL_TESTS_VERSION=${NCCL_TESTS_VERSION}
+JOHN_JUMBO_COMMIT=${JOHN_JUMBO_COMMIT}"
+    GPU_BUILD_INFO="nvidia:${NVIDIA_DRIVER_VERSION}"
+elif [ "$BEE_GPU_VENDOR" = "amd" ]; then
+    GPU_VERSION_LINE="ROCM_VERSION=${ROCM_VERSION}"
+    GPU_BUILD_INFO="rocm:${ROCM_VERSION}"
+else
+    GPU_VERSION_LINE=""
+    GPU_BUILD_INFO="nogpu"
+fi
+
+cat > "${OVERLAY_STAGE_DIR}/etc/bee-release" <<EOF
+BEE_ISO_VERSION=${ISO_VERSION_EFFECTIVE}
+BEE_AUDIT_VERSION=${AUDIT_VERSION_EFFECTIVE}
+BEE_GPU_VENDOR=${BEE_GPU_VENDOR}
+BUILD_DATE=${BUILD_DATE}
+GIT_COMMIT=${GIT_COMMIT}
+DEBIAN_VERSION=${DEBIAN_VERSION}
+DEBIAN_KERNEL_ABI=${DEBIAN_KERNEL_ABI}
+${GPU_VERSION_LINE}
 EOF

+# Write GPU vendor marker for hooks
+echo "${BEE_GPU_VENDOR}" > "${OVERLAY_STAGE_DIR}/etc/bee-gpu-vendor"
+
 # Patch motd with build info
-BEE_BUILD_INFO="${BUILD_DATE} git:${GIT_COMMIT} debian:${DEBIAN_VERSION} nvidia:${NVIDIA_DRIVER_VERSION}"
+BEE_BUILD_INFO="${BUILD_DATE} git:${GIT_COMMIT} debian:${DEBIAN_VERSION} ${GPU_BUILD_INFO}"
 if [ -f "${OVERLAY_STAGE_DIR}/etc/motd" ]; then
    sed "s/%%BUILD_INFO%%/${BEE_BUILD_INFO}/" "${OVERLAY_STAGE_DIR}/etc/motd" \
        > "${OVERLAY_STAGE_DIR}/etc/motd.patched"
    mv "${OVERLAY_STAGE_DIR}/etc/motd.patched" "${OVERLAY_STAGE_DIR}/etc/motd"
 fi

+# --- copy variant-specific package list, remove all other variant lists ---
+# live-build picks up ALL .list.chroot files — delete other variants to avoid conflicts.
+cp "${BUILD_WORK_DIR}/config/package-lists/bee-${BEE_GPU_VENDOR}.list.chroot" \
+   "${BUILD_WORK_DIR}/config/package-lists/bee-gpu.list.chroot"
+rm -f "${BUILD_WORK_DIR}/config/package-lists/bee-nvidia.list.chroot" \
+      "${BUILD_WORK_DIR}/config/package-lists/bee-amd.list.chroot" \
+      "${BUILD_WORK_DIR}/config/package-lists/bee-nogpu.list.chroot"
+
+# --- remove archives for the other vendor(s) ---
+if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
+    rm -f "${BUILD_WORK_DIR}/config/archives/rocm.list.chroot" \
+          "${BUILD_WORK_DIR}/config/archives/rocm.key.chroot"
+elif [ "$BEE_GPU_VENDOR" = "amd" ]; then
+    rm -f "${BUILD_WORK_DIR}/config/archives/nvidia-cuda.list.chroot" \
+          "${BUILD_WORK_DIR}/config/archives/nvidia-cuda.key.chroot"
+else
+    # nogpu: remove both
+    rm -f "${BUILD_WORK_DIR}/config/archives/rocm.list.chroot" \
+          "${BUILD_WORK_DIR}/config/archives/rocm.key.chroot" \
+          "${BUILD_WORK_DIR}/config/archives/nvidia-cuda.list.chroot" \
+          "${BUILD_WORK_DIR}/config/archives/nvidia-cuda.key.chroot"
+fi
+
+# --- substitute version placeholders in package list and archive ---
+if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
+    sed -i \
+        -e "s/%%DCGM_VERSION%%/${DCGM_VERSION}/g" \
+        "${BUILD_WORK_DIR}/config/package-lists/bee-gpu.list.chroot"
+elif [ "$BEE_GPU_VENDOR" = "amd" ]; then
+    sed -i \
+        -e "s/%%ROCM_VERSION%%/${ROCM_VERSION}/g" \
+        -e "s/%%ROCM_SMI_VERSION%%/${ROCM_SMI_VERSION}/g" \
+        -e "s/%%ROCM_BANDWIDTH_TEST_VERSION%%/${ROCM_BANDWIDTH_TEST_VERSION}/g" \
+        -e "s/%%ROCM_VALIDATION_SUITE_VERSION%%/${ROCM_VALIDATION_SUITE_VERSION}/g" \
+        -e "s/%%ROCBLAS_VERSION%%/${ROCBLAS_VERSION}/g" \
+        -e "s/%%ROCRAND_VERSION%%/${ROCRAND_VERSION}/g" \
+        -e "s/%%HIP_RUNTIME_AMD_VERSION%%/${HIP_RUNTIME_AMD_VERSION}/g" \
+        -e "s/%%HIPBLASLT_VERSION%%/${HIPBLASLT_VERSION}/g" \
+        -e "s/%%COMGR_VERSION%%/${COMGR_VERSION}/g" \
+        "${BUILD_WORK_DIR}/config/package-lists/bee-gpu.list.chroot"
+    if [ -f "${BUILD_WORK_DIR}/config/archives/rocm.list.chroot" ]; then
+        sed -i \
+            -e "s/%%ROCM_VERSION%%/${ROCM_VERSION}/g" \
+            "${BUILD_WORK_DIR}/config/archives/rocm.list.chroot"
+    fi
+fi
+
 # --- sync overlay into live-build includes.chroot ---
 LB_DIR="${BUILD_WORK_DIR}"
 LB_INCLUDES="${LB_DIR}/config/includes.chroot"
@@ -338,20 +555,32 @@ fi

 # --- build ISO using live-build ---
 echo ""
-echo "=== building ISO (live-build) ==="
+echo "=== building ISO (live-build, variant: ${BEE_GPU_VENDOR}) ==="
+
+# Export for auto/config
+BEE_GPU_VENDOR_UPPER="$(echo "${BEE_GPU_VENDOR}" | tr 'a-z' 'A-Z')"
+export BEE_GPU_VENDOR_UPPER

 cd "${LB_DIR}"
 lb clean 2>&1 | tail -3
 lb config 2>&1 | tail -5
 lb build 2>&1

+# --- persist deb package cache back to shared location ---
+# This allows the second variant to reuse all downloaded packages.
+if [ -d "${BUILD_WORK_DIR}/cache/packages.chroot" ]; then
+    rsync -a "${BUILD_WORK_DIR}/cache/packages.chroot/" "${LB_PKG_CACHE}/"
+    echo "=== package cache synced to ${LB_PKG_CACHE} ==="
+fi
+
 # live-build outputs live-image-amd64.hybrid.iso in LB_DIR
 ISO_RAW="${LB_DIR}/live-image-amd64.hybrid.iso"
-ISO_OUT="${DIST_DIR}/bee-debian${DEBIAN_VERSION}-v${ISO_VERSION_EFFECTIVE}-amd64.iso"
+ISO_OUT="${DIST_DIR}/easy-bee-${BEE_GPU_VENDOR}-v${ISO_VERSION_EFFECTIVE}-amd64.iso"
 if [ -f "$ISO_RAW" ]; then
+    validate_iso_memtest "$ISO_RAW"
    cp "$ISO_RAW" "$ISO_OUT"
    echo ""
-    echo "=== done ==="
+    echo "=== done (${BEE_GPU_VENDOR}) ==="
    echo "ISO: $ISO_OUT"
    if command -v stat >/dev/null 2>&1; then
        ISO_SIZE_BYTES="$(stat -c '%s' "$ISO_OUT" 2>/dev/null || stat -f '%z' "$ISO_OUT")"
--- a/iso/builder/config/archives/nvidia-cuda.key.chroot
+++ b/iso/builder/config/archives/nvidia-cuda.key.chroot
@@ -0,0 +1,29 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Version: GnuPG v2.0.22 (GNU/Linux)
+
+mQINBGJYmlEBEAC6nJmeqByeReM+MSy4palACCnfOg4pOxffrrkldxz4jrDOZNK4
+q8KG+ZbXrkdP0e9qTFRvZzN+A6Jw3ySfoiKXRBw5l2Zp81AYkghV641OpWNjZOyL
+syKEtST9LR1ttHv1ZI71pj8NVG/EnpimZPOblEJ1OpibJJCXLrbn+qcJ8JNuGTSK
+6v2aLBmhR8VR/aSJpmkg7fFjcGklweTI8+Ibj72HuY9JRD/+dtUoSh7z037mWo56
+ee02lPFRD0pHOEAlLSXxFO/SDqRVMhcgHk0a8roCF+9h5Ni7ZUyxlGK/uHkqN7ED
+/U/ATpGKgvk4t23eTpdRC8FXAlBZQyf/xnhQXsyF/z7+RV5CL0o1zk1LKgo+5K32
+5ka5uZb6JSIrEPUaCPEMXu6EEY8zSFnCrRS/Vjkfvc9ViYZWzJ387WTjAhMdS7wd
+PmdDWw2ASGUP4FrfCireSZiFX+ZAOspKpZdh0P5iR5XSx14XDt3jNK2EQQboaJAD
+uqksItatOEYNu4JsCbc24roJvJtGhpjTnq1/dyoy6K433afU0DS2ZPLthLpGqeyK
+MKNY7a2WjxhRmCSu5Zok/fGKcO62XF8a3eSj4NzCRv8LM6mG1Oekz6Zz+tdxHg19
+ufHO0et7AKE5q+5VjE438Xpl4UWbM/Voj6VPJ9uzywDcnZXpeOqeTQh2pQARAQAB
+tCBjdWRhdG9vbHMgPGN1ZGF0b29sc0BudmlkaWEuY29tPokCOQQTAQIAIwUCYlia
+UQIbAwcLCQgHAwIBBhUIAgkKCwQWAgMBAh4BAheAAAoJEKS0aZY7+GPM1y4QALKh
+BqSozrYbe341Qu7SyxHQgjRCGi4YhI3bHCMj5F6vEOHnwiFH6YmFkxCYtqcGjca6
+iw7cCYMow/hgKLAPwkwSJ84EYpGLWx62+20rMM4OuZwauSUcY/kE2WgnQ74zbh3+
+MHs56zntJFfJ9G+NYidvwDWeZn5HIzR4CtxaxRgpiykg0s3ps6X0U+vuVcLnutBF
+7r81astvlVQERFbce/6KqHK+yj843Qrhb3JEolUoOETK06nD25bVtnAxe0QEyA90
+9MpRNLfR6BdjPpxqhphDcMOhJfyubAroQUxG/7S+Yw+mtEqHrL/dz9iEYqodYiSo
+zfi0b+HFI59sRkTfOBDBwb3kcARExwnvLJmqijiVqWkoJ3H67oA0XJN2nelucw+A
+Hb+Jt9BWjyzKWlLFDnVHdGicyRJ0I8yqi32w8hGeXmu3tU58VWJrkXEXadBftmci
+pemb6oZ/r5SCkW6kxr2PsNWcJoebUdynyOQGbVwpMtJAnjOYp0ObKOANbcIg+tsi
+kyCIO5TiY3ADbBDPCeZK8xdcugXoW5WFwACGC0z+Cn0mtw8z3VGIPAMSCYmLusgW
+t2+EpikwrP2inNp5Pc+YdczRAsa4s30Jpyv/UHEG5P9GKnvofaxJgnU56lJIRPzF
+iCUGy6cVI0Fq777X/ME1K6A/bzZ4vRYNx8rUmVE5
+=DO7z
+-----END PGP PUBLIC KEY BLOCK-----
--- a/iso/builder/config/archives/nvidia-cuda.list.chroot
+++ b/iso/builder/config/archives/nvidia-cuda.list.chroot
@@ -0,0 +1 @@
+deb https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/ /
--- a/iso/builder/config/archives/rocm.key.chroot
+++ b/iso/builder/config/archives/rocm.key.chroot
--- a/iso/builder/config/archives/rocm.list.chroot
+++ b/iso/builder/config/archives/rocm.list.chroot
@@ -0,0 +1 @@
+deb https://repo.radeon.com/rocm/apt/%%ROCM_VERSION%% jammy main
--- a/iso/builder/config/bootloaders/grub-pc/config.cfg
+++ b/iso/builder/config/bootloaders/grub-pc/config.cfg
@@ -8,7 +8,7 @@ else
 fi

 if loadfont $font ; then
-    set gfxmode=800x600
+    set gfxmode=1920x1080,1280x1024,auto
    set gfxpayload=keep
    insmod efi_gop
    insmod efi_uga
--- a/iso/builder/config/bootloaders/grub-pc/grub.cfg
+++ b/iso/builder/config/bootloaders/grub-pc/grub.cfg
@@ -10,20 +10,35 @@ echo "  ╚══════╝╚═╝  ╚═╝╚══════╝
 echo ""

 menuentry "EASY-BEE" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=normal
+    linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
+    initrd  @INITRD_LIVE@
+}
+
+menuentry "EASY-BEE (load to RAM)" {
+    linux   @KERNEL_LIVE@ @APPEND_LIVE@ toram nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
    initrd  @INITRD_LIVE@
 }

 menuentry "EASY-BEE (NVIDIA GSP=off)" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=gsp-off
+    linux   @KERNEL_LIVE@ @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable nowatchdog nosoftlockup
    initrd  @INITRD_LIVE@
 }

 menuentry "EASY-BEE (fail-safe)" {
-    linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal
+    linux   @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0
    initrd  @INITRD_LIVE@
 }

+if [ "${grub_platform}" = "efi" ]; then
+    menuentry "Memory Test (memtest86+)" {
+        chainloader /boot/memtest86+x64.efi
+    }
+else
+    menuentry "Memory Test (memtest86+)" {
+        linux16 /boot/memtest86+x64.bin
+    }
+fi
+
 if [ "${grub_platform}" = "efi" ]; then
    menuentry "UEFI Firmware Settings" {
        fwsetup
--- a/iso/builder/config/bootloaders/isolinux/live.cfg.in
+++ b/iso/builder/config/bootloaders/isolinux/live.cfg.in
@@ -5,14 +5,24 @@ label live-@FLAVOUR@-normal
    initrd @INITRD@
    append @APPEND_LIVE@ bee.nvidia.mode=normal

+label live-@FLAVOUR@-toram
+    menu label EASY-BEE (^load to RAM)
+    linux @LINUX@
+    initrd @INITRD@
+    append @APPEND_LIVE@ toram bee.nvidia.mode=normal
+
 label live-@FLAVOUR@-gsp-off
    menu label EASY-BEE (^NVIDIA GSP=off)
    linux @LINUX@
    initrd @INITRD@
-    append @APPEND_LIVE@ bee.nvidia.mode=gsp-off
+    append @APPEND_LIVE@ nomodeset bee.nvidia.mode=gsp-off

 label live-@FLAVOUR@-failsafe
    menu label EASY-BEE (^fail-safe)
    linux @LINUX@
    initrd @INITRD@
    append @APPEND_LIVE@ bee.nvidia.mode=gsp-off memtest noapic noapm nodma nomce nolapic nosmp vga=normal
+
+label memtest
+    menu label ^Memory Test (memtest86+)
+    linux /boot/memtest86+x64.bin
--- a/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
+++ b/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot
@@ -5,42 +5,65 @@ set -e

 echo "=== bee chroot setup ==="

+GPU_VENDOR=$(cat /etc/bee-gpu-vendor 2>/dev/null || echo nvidia)
+echo "=== GPU vendor: ${GPU_VENDOR} ==="
+
 ensure_bee_console_user() {
    if id bee >/dev/null 2>&1; then
-        usermod -d /home/bee -s /bin/sh bee 2>/dev/null || true
+        usermod -d /home/bee -s /bin/bash bee 2>/dev/null || true
    else
-        useradd -d /home/bee -m -s /bin/sh -U bee
+        useradd -d /home/bee -m -s /bin/bash -U bee
    fi

    mkdir -p /home/bee
    chown -R bee:bee /home/bee
    echo "bee:eeb" | chpasswd
-    usermod -aG sudo bee 2>/dev/null || true
+    groupadd -f ipmi 2>/dev/null || true
+    usermod -aG sudo,video,input,render,ipmi bee 2>/dev/null || true
 }

 ensure_bee_console_user

-# Enable bee services
+# Enable common bee services
 systemctl enable bee-network.service
-systemctl enable bee-nvidia.service
 systemctl enable bee-preflight.service
 systemctl enable bee-audit.service
 systemctl enable bee-web.service
 systemctl enable bee-sshsetup.service
 systemctl enable ssh.service
+systemctl enable lightdm.service 2>/dev/null || true
 systemctl enable qemu-guest-agent.service 2>/dev/null || true
 systemctl enable serial-getty@ttyS0.service 2>/dev/null || true
 systemctl enable serial-getty@ttyS1.service 2>/dev/null || true
 systemctl enable bee-journal-mirror@ttyS1.service 2>/dev/null || true

+# Enable GPU-vendor specific services
+if [ "$GPU_VENDOR" = "nvidia" ]; then
+    systemctl enable nvidia-dcgm.service 2>/dev/null || true
+    systemctl enable bee-nvidia.service
+elif [ "$GPU_VENDOR" = "amd" ]; then
+    # ROCm symlinks (packages install to /opt/rocm-*/bin/)
+    for tool in rocm-smi rocm-bandwidth-test rvs; do
+        if [ ! -e /usr/local/bin/${tool} ]; then
+            bin_path="$(find /opt -path "*/bin/${tool}" -type f 2>/dev/null | sort | tail -1)"
+            [ -n "${bin_path}" ] && ln -sf "${bin_path}" /usr/local/bin/${tool}
+        fi
+    done
+fi
+# nogpu: no GPU services needed
+
 # Ensure scripts are executable
 chmod +x /usr/local/bin/bee-network.sh  2>/dev/null || true
-chmod +x /usr/local/bin/bee-nvidia-load 2>/dev/null || true
 chmod +x /usr/local/bin/bee-sshsetup   2>/dev/null || true
 chmod +x /usr/local/bin/bee-smoketest  2>/dev/null || true
-chmod +x /usr/local/bin/bee-tui        2>/dev/null || true
 chmod +x /usr/local/bin/bee            2>/dev/null || true
 chmod +x /usr/local/bin/bee-log-run    2>/dev/null || true
+if [ "$GPU_VENDOR" = "nvidia" ]; then
+    chmod +x /usr/local/bin/bee-nvidia-load 2>/dev/null || true
+    chmod +x /usr/local/bin/bee-gpu-burn 2>/dev/null || true
+    chmod +x /usr/local/bin/bee-john-gpu-stress 2>/dev/null || true
+    chmod +x /usr/local/bin/bee-nccl-gpu-stress 2>/dev/null || true
+fi

 # Reload udev rules
 udevadm control --reload-rules 2>/dev/null || true
@@ -52,4 +75,4 @@ if [ -f /etc/sudoers.d/bee ]; then
    chmod 0440 /etc/sudoers.d/bee
 fi

-echo "=== bee chroot setup complete ==="
+echo "=== bee chroot setup complete (${GPU_VENDOR}) ==="
--- a/iso/builder/config/hooks/normal/9001-amd-rocm.hook.chroot
+++ b/iso/builder/config/hooks/normal/9001-amd-rocm.hook.chroot
@@ -1,103 +0,0 @@
-#!/bin/sh
-# 9001-amd-rocm.hook.chroot — install AMD ROCm SMI tool for Instinct GPU monitoring.
-# Runs inside the live-build chroot. Adds AMD's apt repository and installs
-# rocm-smi-lib which provides the `rocm-smi` CLI (analogous to nvidia-smi).
-#
-# AMD does NOT publish Debian Bookworm packages. The repo uses Ubuntu codenames
-# (jammy/noble). We use jammy (Ubuntu 22.04) — its packages install cleanly on
-# Debian 12 (Bookworm) due to compatible glibc/libstdc++.
-# Tried versions newest-first; falls back if a point release is missing.
-
-set -e
-
-# Ubuntu codename to use for the AMD repo (Debian has no AMD packages).
-ROCM_UBUNTU_DIST="jammy"
-
-# ROCm point-releases to try newest-first. AMD drops old point releases
-# from the repo, so we walk backwards until one responds 200.
-ROCM_CANDIDATES="6.3.4 6.3.3 6.3.2 6.3.1 6.3 6.2.4 6.2.3 6.2.2 6.2.1 6.2"
-
-ROCM_KEYRING="/etc/apt/keyrings/rocm.gpg"
-ROCM_LIST="/etc/apt/sources.list.d/rocm.list"
-APT_UPDATED=0
-
-mkdir -p /etc/apt/keyrings
-
-ensure_tool() {
-    tool="$1"
-    pkg="$2"
-    if command -v "${tool}" >/dev/null 2>&1; then
-        return 0
-    fi
-    if [ "${APT_UPDATED}" -eq 0 ]; then
-        apt-get update -qq
-        APT_UPDATED=1
-    fi
-    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends "${pkg}"
-}
-
-ensure_cert_bundle() {
-    if [ -s /etc/ssl/certs/ca-certificates.crt ]; then
-        return 0
-    fi
-    if [ "${APT_UPDATED}" -eq 0 ]; then
-        apt-get update -qq
-        APT_UPDATED=1
-    fi
-    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates
-}
-
-# live-build chroot may not include fetch/signing tools yet
-if ! ensure_cert_bundle || ! ensure_tool wget wget || ! ensure_tool gpg gpg; then
-    echo "WARN: failed to install wget/gpg/ca-certificates prerequisites — skipping ROCm install"
-    exit 0
-fi
-
-# Download and import AMD GPG key
-if ! wget -qO- "https://repo.radeon.com/rocm/rocm.gpg.key" \
-        | gpg --dearmor --yes --output "${ROCM_KEYRING}"; then
-    echo "WARN: failed to fetch AMD ROCm GPG key — skipping ROCm install"
-    exit 0
-fi
-
-# Try each ROCm version until apt-get update succeeds.
-# AMD repo uses Ubuntu codenames; bookworm is not published — use jammy.
-ROCM_VERSION=""
-for candidate in ${ROCM_CANDIDATES}; do
-    cat > "${ROCM_LIST}" <<EOF
-deb [arch=amd64 signed-by=${ROCM_KEYRING}] https://repo.radeon.com/rocm/apt/${candidate} ${ROCM_UBUNTU_DIST} main
-EOF
-    if apt-get update -qq 2>/dev/null; then
-        ROCM_VERSION="${candidate}"
-        echo "=== AMD ROCm ${ROCM_VERSION} (${ROCM_UBUNTU_DIST}): repository available ==="
-        break
-    fi
-    echo "WARN: ROCm ${candidate} not available, trying next..."
-    rm -f "${ROCM_LIST}"
-done
-
-if [ -z "${ROCM_VERSION}" ]; then
-    echo "WARN: no ROCm apt repository available — skipping ROCm install"
-    rm -f "${ROCM_KEYRING}"
-    exit 0
-fi
-
-# rocm-smi-lib provides the rocm-smi CLI tool for GPU monitoring
-if DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends rocm-smi-lib; then
-    echo "=== AMD ROCm: rocm-smi-lib installed ==="
-    if [ -x /opt/rocm/bin/rocm-smi ]; then
-        ln -sf /opt/rocm/bin/rocm-smi /usr/local/bin/rocm-smi
-    else
-        smi_path="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
-        if [ -n "${smi_path}" ]; then
-            ln -sf "${smi_path}" /usr/local/bin/rocm-smi
-        fi
-    fi
-    rocm-smi --version 2>/dev/null || true
-else
-    echo "WARN: rocm-smi-lib install failed — AMD GPU monitoring unavailable"
-fi
-
-# Clean up apt lists to keep ISO size down
-rm -f "${ROCM_LIST}"
-apt-get clean
--- a/iso/builder/config/hooks/normal/9999-slim.hook.chroot
+++ b/iso/builder/config/hooks/normal/9999-slim.hook.chroot
@@ -0,0 +1,32 @@
+#!/bin/sh
+# 9999-slim.hook.chroot — strip non-essential files to reduce squashfs size.
+set -e
+
+# ── Man pages and documentation ───────────────────────────────────────────────
+find /usr/share/man   -mindepth 1 -delete 2>/dev/null || true
+find /usr/share/doc   -mindepth 1 ! -name 'copyright' -delete 2>/dev/null || true
+find /usr/share/info  -mindepth 1 -delete 2>/dev/null || true
+find /usr/share/groff -mindepth 1 -delete 2>/dev/null || true
+find /usr/share/lintian -mindepth 1 -delete 2>/dev/null || true
+
+# ── Locales — keep only C and en_US ──────────────────────────────────────────
+find /usr/share/locale -mindepth 1 -maxdepth 1 \
+    ! -name 'en' ! -name 'en_US' ! -name 'locale.alias' \
+    -exec rm -rf {} + 2>/dev/null || true
+find /usr/share/i18n/locales -mindepth 1 \
+    ! -name 'en_US' ! -name 'i18n' ! -name 'iso14651_t1' ! -name 'iso14651_t1_common' \
+    -delete 2>/dev/null || true
+
+# ── Python cache ──────────────────────────────────────────────────────────────
+find /usr /opt -name '__pycache__' -type d -exec rm -rf {} + 2>/dev/null || true
+find /usr /opt -name '*.pyc' -delete 2>/dev/null || true
+
+# ── APT cache and lists ───────────────────────────────────────────────────────
+apt-get clean
+rm -rf /var/lib/apt/lists/*
+
+# ── Misc ──────────────────────────────────────────────────────────────────────
+rm -rf /tmp/* /var/tmp/* 2>/dev/null || true
+find /var/log -type f -delete 2>/dev/null || true
+
+echo "=== slim: done ==="
--- a/iso/builder/config/package-lists/bee-amd.list.chroot
+++ b/iso/builder/config/package-lists/bee-amd.list.chroot
@@ -0,0 +1,9 @@
+# AMD ROCm — GPU monitoring, bandwidth test, and compute stress (RVS GST)
+rocm-smi-lib=%%ROCM_SMI_VERSION%%
+rocm-bandwidth-test=%%ROCM_BANDWIDTH_TEST_VERSION%%
+rocm-validation-suite=%%ROCM_VALIDATION_SUITE_VERSION%%
+rocblas=%%ROCBLAS_VERSION%%
+rocrand=%%ROCRAND_VERSION%%
+hip-runtime-amd=%%HIP_RUNTIME_AMD_VERSION%%
+hipblaslt=%%HIPBLASLT_VERSION%%
+comgr=%%COMGR_VERSION%%
--- a/iso/builder/config/package-lists/bee-nogpu.list.chroot
+++ b/iso/builder/config/package-lists/bee-nogpu.list.chroot
@@ -0,0 +1 @@
+# No GPU variant — no NVIDIA, no AMD/ROCm packages
--- a/iso/builder/config/package-lists/bee-nvidia.list.chroot
+++ b/iso/builder/config/package-lists/bee-nvidia.list.chroot
@@ -0,0 +1,8 @@
+# NVIDIA DCGM (Data Center GPU Manager) — dcgmi diag for acceptance testing.
+# DCGM 4 is packaged per CUDA major. The image ships NVIDIA driver 590 with CUDA 13 userspace,
+# so install the CUDA 13 build plus proprietary diagnostic components explicitly.
+datacenter-gpu-manager-4-cuda13=1:%%DCGM_VERSION%%
+datacenter-gpu-manager-4-proprietary=1:%%DCGM_VERSION%%
+datacenter-gpu-manager-4-proprietary-cuda13=1:%%DCGM_VERSION%%
+ocl-icd-libopencl1
+clinfo
--- a/iso/builder/config/package-lists/bee.list.chroot
+++ b/iso/builder/config/package-lists/bee.list.chroot
@@ -18,6 +18,19 @@ qemu-guest-agent
 # SSH
 openssh-server

+# Disk installer
+squashfs-tools
+parted
+# Keep GRUB install tools without selecting a single active platform package.
+# grub-pc and grub-efi-amd64 conflict with each other, but grub2-common
+# provides grub-install/update-grub and the *-bin packages provide BIOS/UEFI modules.
+grub2-common
+grub-pc-bin
+grub-efi-amd64-bin
+grub-efi-amd64-signed
+shim-signed
+efibootmgr
+
 # Filesystem support for USB export targets
 exfatprogs
 exfat-fuse
@@ -33,18 +46,38 @@ vim-tiny
 mc
 htop
 nvtop
+btop
 sudo
 zstd
 mstflint
 memtester
 stress-ng
+stressapptest

 # QR codes (for displaying audit results)
 qrencode

+# Local desktop (openbox + chromium kiosk)
+openbox
+tint2
+xorg
+xterm
+chromium
+xserver-xorg-video-fbdev
+xserver-xorg-video-vesa
+lightdm
+
 # Firmware
 firmware-linux-free
+firmware-linux-nonfree
+firmware-misc-nonfree
 firmware-amd-graphics
+firmware-realtek
+firmware-intel-sound
+firmware-bnx2
+firmware-bnx2x
+firmware-cavium
+firmware-qlogic

 # glibc compat helpers (for any external binaries that need it)
 libc6
--- a/iso/builder/smoketest.sh
+++ b/iso/builder/smoketest.sh
@@ -39,7 +39,7 @@ info "nvidia boot mode: ${NVIDIA_BOOT_MODE}"
 # --- PATH & binaries ---
 echo "-- PATH & binaries --"
 for tool in dmidecode smartctl nvme ipmitool lspci bee; do
-    if p=$(PATH="/usr/local/bin:$PATH" command -v "$tool" 2>/dev/null); then
+    if p=$(PATH="/usr/local/bin:/usr/sbin:/sbin:$PATH" command -v "$tool" 2>/dev/null); then
        ok "$tool found: $p"
    else
        fail "$tool: NOT FOUND"
--- a/iso/overlay/etc/X11/Xwrapper.config
+++ b/iso/overlay/etc/X11/Xwrapper.config
@@ -0,0 +1,2 @@
+allowed_users=anybody
+needs_root_rights=yes
--- a/iso/overlay/etc/X11/xorg.conf.d/10-fbdev.conf
+++ b/iso/overlay/etc/X11/xorg.conf.d/10-fbdev.conf
@@ -0,0 +1,11 @@
+Section "Device"
+    Identifier  "fbdev"
+    Driver      "fbdev"
+    Option      "fbdev" "/dev/fb0"
+EndSection
+
+Section "Screen"
+    Identifier  "screen0"
+    Device      "fbdev"
+    DefaultDepth 24
+EndSection
--- a/iso/overlay/etc/lightdm/lightdm.conf.d/50-bee-autologin.conf
+++ b/iso/overlay/etc/lightdm/lightdm.conf.d/50-bee-autologin.conf
@@ -0,0 +1,5 @@
+[Seat:*]
+autologin-user=bee
+autologin-user-timeout=0
+autologin-session=openbox
+user-session=openbox
--- a/iso/overlay/etc/modules-load.d/bee-ipmi.conf
+++ b/iso/overlay/etc/modules-load.d/bee-ipmi.conf
@@ -0,0 +1,3 @@
+# Load IPMI modules for fan/sensor/power monitoring via ipmitool
+ipmi_si
+ipmi_devintf
--- a/iso/overlay/etc/motd
+++ b/iso/overlay/etc/motd
@@ -12,6 +12,6 @@
  Export dir:    /appdata/bee/export
  Self-check:    /appdata/bee/export/runtime-health.json

-  Open TUI:      bee-tui
+  Web UI:        http://<ip>/

  SSH access:    key auth (developers) or bee/eeb (password fallback)
--- a/iso/overlay/etc/profile.d/bee.sh
+++ b/iso/overlay/etc/profile.d/bee.sh
@@ -1,21 +1,18 @@
-export PATH="$PATH:/usr/local/bin:/opt/rocm/bin:/opt/rocm/sbin"
+export PATH="$PATH:/usr/local/bin:/usr/sbin:/sbin:/opt/rocm/bin:/opt/rocm/sbin"

-menu() {
-    if [ -x /usr/local/bin/bee-tui ]; then
-        /usr/local/bin/bee-tui "$@"
-    else
-        echo "bee-tui is not installed"
-        return 1
-    fi
-}
-
-# On the local console, keep the shell visible and let the operator
-# start the TUI explicitly. This avoids black-screen failures if the
-# terminal implementation does not support the TUI well.
+# Print web UI URLs on the local console at login.
 if [ -z "${SSH_CONNECTION:-}" ] \
-    && [ -z "${SSH_TTY:-}" ] \
-    && [ "$(tty 2>/dev/null)" = "/dev/tty1" ]; then
+    && [ -z "${SSH_TTY:-}" ]; then
    echo "Bee live environment ready."
-    echo "Run 'menu' to open the TUI."
-    echo "Kernel logs: Alt+F2  |  Extra shell: Alt+F3"
+    echo ""
+    echo "  Web UI (local):  http://localhost/"
+    # Print IP addresses for remote access
+    _ips=$(ip -4 addr show scope global 2>/dev/null | awk '/inet /{print $2}' | cut -d/ -f1)
+    for _ip in $_ips; do
+        echo "  Web UI (remote): http://$_ip/"
+    done
+    unset _ips _ip
+    echo ""
+    echo "  Network setup: netconf"
+    echo "  Kernel logs:   Alt+F2  |  Extra shell: Alt+F3"
 fi
--- a/iso/overlay/etc/systemd/journald.conf.d/bee.conf
+++ b/iso/overlay/etc/systemd/journald.conf.d/bee.conf
@@ -0,0 +1,4 @@
+[Journal]
+# Do not forward service logs to the console — prevents log spam on
+# physical monitors and the local openbox desktop.
+ForwardToConsole=no
--- a/iso/overlay/etc/systemd/system/bee-audit.service
+++ b/iso/overlay/etc/systemd/system/bee-audit.service
@@ -1,14 +1,9 @@
 [Unit]
-Description=Bee: run hardware audit
-After=bee-network.service bee-nvidia.service bee-preflight.service
-Before=bee-web.service
+Description=Bee: on-demand hardware audit (not started automatically)

 [Service]
 Type=oneshot
-ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-audit.log /bin/sh -c '/usr/local/bin/bee audit --runtime livecd --output file:/appdata/bee/export/bee-audit.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-audit] WARN: audit exited with rc=$rc"; fi; exit 0'
+RemainAfterExit=yes
+ExecStart=/bin/sh -c 'curl -sf -X POST http://localhost/api/audit/run >/dev/null'
 StandardOutput=journal
 StandardError=journal
-RemainAfterExit=yes
-
-[Install]
-WantedBy=multi-user.target
--- a/iso/overlay/etc/systemd/system/bee-web.service
+++ b/iso/overlay/etc/systemd/system/bee-web.service
@@ -1,7 +1,5 @@
 [Unit]
 Description=Bee: hardware audit web viewer
-After=bee-network.service bee-audit.service
-Wants=bee-audit.service

 [Service]
 Type=simple
@@ -10,6 +8,10 @@ Restart=always
 RestartSec=2
 StandardOutput=journal
 StandardError=journal
+LimitMEMLOCK=infinity
+# Keep the web server responsive during GPU/CPU stress (children inherit nice+10
+# via Setpriority in runCmdJob, but the bee-web parent stays at 0).
+Nice=0

 [Install]
 WantedBy=multi-user.target
--- a/iso/overlay/etc/systemd/system/lightdm.service.d/bee-limits.conf
+++ b/iso/overlay/etc/systemd/system/lightdm.service.d/bee-limits.conf
@@ -0,0 +1,9 @@
+[Service]
+# On server hardware without a usable framebuffer X may fail to start.
+# Limit restarts so the console is not flooded on headless deployments.
+RestartSec=10
+StartLimitIntervalSec=60
+StartLimitBurst=3
+# Raise scheduling priority of the X server so the graphical console (KVM/IPMI)
+# stays responsive during GPU/CPU stress tests running at nice+10.
+Nice=-5
--- a/iso/overlay/etc/udev/rules.d/99-ipmi.rules
+++ b/iso/overlay/etc/udev/rules.d/99-ipmi.rules
@@ -0,0 +1,2 @@
+# Allow ipmi group to access IPMI device without root
+KERNEL=="ipmi[0-9]*", GROUP="ipmi", MODE="0660"
--- a/iso/overlay/home/bee/.profile
+++ b/iso/overlay/home/bee/.profile
@@ -1,13 +1 @@
 export PATH="/usr/local/bin:$PATH"
-
-if [ -z "${SSH_CONNECTION:-}" ] \
-    && [ -z "${SSH_TTY:-}" ] \
-    && [ "$(tty 2>/dev/null)" = "/dev/tty1" ]; then
-    if command -v menu >/dev/null 2>&1; then
-        menu
-    elif [ -x /usr/local/bin/bee-tui ]; then
-        /usr/local/bin/bee-tui
-    else
-        echo "Bee menu is unavailable."
-    fi
-fi
--- a/iso/overlay/usr/local/bin/bee-gpu-burn
+++ b/iso/overlay/usr/local/bin/bee-gpu-burn
@@ -0,0 +1,93 @@
+#!/bin/sh
+set -eu
+
+SECONDS=5
+SIZE_MB=64
+DEVICES=""
+EXCLUDE=""
+WORKER="/usr/local/lib/bee/bee-gpu-burn-worker"
+
+usage() {
+    echo "usage: $0 [--seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3]" >&2
+    exit 2
+}
+
+normalize_list() {
+    echo "${1:-}" | tr ',' '\n' | sed 's/[[:space:]]//g' | awk 'NF' | sort -n | uniq | paste -sd, -
+}
+
+contains_csv() {
+    needle="$1"
+    haystack="${2:-}"
+    echo ",${haystack}," | grep -q ",${needle},"
+}
+
+while [ "$#" -gt 0 ]; do
+    case "$1" in
+        --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
+        --size-mb|-m) [ "$#" -ge 2 ] || usage; SIZE_MB="$2"; shift 2 ;;
+        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
+        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
+        *) usage ;;
+    esac
+done
+
+[ -x "${WORKER}" ] || { echo "bee-gpu-burn worker not found: ${WORKER}" >&2; exit 1; }
+
+ALL_DEVICES=$(nvidia-smi --query-gpu=index --format=csv,noheader,nounits 2>/dev/null | sed 's/[[:space:]]//g' | awk 'NF' | paste -sd, -)
+[ -n "${ALL_DEVICES}" ] || { echo "nvidia-smi found no NVIDIA GPUs" >&2; exit 1; }
+
+DEVICES=$(normalize_list "${DEVICES}")
+EXCLUDE=$(normalize_list "${EXCLUDE}")
+SELECTED="${DEVICES}"
+if [ -z "${SELECTED}" ]; then
+    SELECTED="${ALL_DEVICES}"
+fi
+
+FINAL=""
+for id in $(echo "${SELECTED}" | tr ',' ' '); do
+    [ -n "${id}" ] || continue
+    if contains_csv "${id}" "${EXCLUDE}"; then
+        continue
+    fi
+    if [ -z "${FINAL}" ]; then
+        FINAL="${id}"
+    else
+        FINAL="${FINAL},${id}"
+    fi
+done
+
+[ -n "${FINAL}" ] || { echo "no NVIDIA GPUs selected after filters" >&2; exit 1; }
+
+echo "loader=bee-gpu-burn"
+echo "selected_gpus=${FINAL}"
+
+TMP_DIR=$(mktemp -d)
+trap 'rm -rf "${TMP_DIR}"' EXIT INT TERM
+
+WORKERS=""
+for id in $(echo "${FINAL}" | tr ',' ' '); do
+    log="${TMP_DIR}/gpu-${id}.log"
+    echo "starting gpu ${id}"
+    "${WORKER}" --device "${id}" --seconds "${SECONDS}" --size-mb "${SIZE_MB}" >"${log}" 2>&1 &
+    pid=$!
+    WORKERS="${WORKERS} ${pid}:${id}:${log}"
+done
+
+status=0
+for spec in ${WORKERS}; do
+    pid=${spec%%:*}
+    rest=${spec#*:}
+    id=${rest%%:*}
+    log=${rest#*:}
+    if wait "${pid}"; then
+        echo "gpu ${id} finished: OK"
+    else
+        rc=$?
+        echo "gpu ${id} finished: FAILED rc=${rc}"
+        status=1
+    fi
+    sed "s/^/[gpu ${id}] /" "${log}" || true
+done
+
+exit "${status}"
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`deb https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/ /`
				`@@ -0,0 +1 @@`
				`deb https://repo.radeon.com/rocm/apt/%%ROCM_VERSION%% jammy main`
				`@@ -0,0 +1 @@`
				`# No GPU variant — no NVIDIA, no AMD/ROCm packages`