Disable fp64/fp4 in mixed gpu burn

Disable unstable fp4/fp64 benchmark phases
Power bench: compare GPU-reported TDP vs IPMI server power delta
2026-04-16 10:00:03 +03:00 · 2026-04-16 09:58:02 +03:00 · 2026-04-16 07:21:02 +03:00 · 2026-04-16 06:57:23 +03:00 · 2026-04-16 06:54:13 +03:00 · 2026-04-16 06:46:45 +03:00
207 changed files with 41211 additions and 1980 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 .DS_Store
 dist/
 iso/out/
+build-cache/
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
 [submodule "bible"]
 	path = bible
 	url = https://git.mchus.pro/mchus/bible.git
+[submodule "internal/chart"]
+	path = internal/chart
+	url = https://git.mchus.pro/reanimator/chart.git
--- a/PLAN.md
+++ b/PLAN.md
@@ -4,13 +4,13 @@ Hardware audit LiveCD for offline server inventory.
 Produces `HardwareIngestRequest` JSON compatible with core/reanimator.

 **Principle:** OS-level collection — reads hardware directly, not through BMC.
-Fully unattended — no user interaction required at any stage. Boot → update → audit → output → done.
-All errors are logged, never presented interactively. Every failure path has a silent fallback.
+Automatic boot audit plus operator console. Boot runs audit immediately, but local/SSH operators can rerun checks through the TUI and CLI.
+Errors are logged and should not block boot on partial collector failures.
 Fills the gaps where logpile/Redfish is blind: NVMe, DIMM serials, GPU serials, physical disks behind RAID, full SMART, NIC firmware.

 ---

-## Status snapshot (2026-03-06)
+## Status snapshot (2026-03-14)

 ### Phase 1 — Go Audit Binary

@@ -23,8 +23,10 @@ Fills the gaps where logpile/Redfish is blind: NVMe, DIMM serials, GPU serials,
 - 1.7 PSU collector — **DONE (basic FRU path)**
 - 1.8 NVIDIA GPU enrichment — **DONE**
 - 1.8b Component wear / age telemetry — **DONE** (storage + NVMe + NVIDIA + NIC SFP/DOM + NIC packet stats)
+- 1.8c Storage health verdicts — **DONE** (SMART/NVMe warning/failed status derivation)
 - 1.9 Mellanox/NVIDIA NIC enrichment — **DONE** (mstflint + ethtool firmware fallback)
 - 1.10 RAID controller enrichment — **DONE (initial multi-tool support)** (storcli + sas2/3ircu + arcconf + ssacli + VROC/mdstat)
+- 1.11 PSU SDR health — **DONE** (`ipmitool sdr` merged with FRU inventory)
 - 1.11 Output and export workflow — **DONE** (explicit file output + manual removable export via TUI)
 - 1.12 Integration test (local) — **DONE** (`scripts/test-local.sh`)

@@ -33,9 +35,14 @@ Fills the gaps where logpile/Redfish is blind: NVMe, DIMM serials, GPU serials,
 - Current implementation uses Debian 12 `live-build`, `systemd`, and OpenSSH.
 - Network bring-up on boot — **DONE**
 - Boot services (`bee-network`, `bee-nvidia`, `bee-audit`, `bee-sshsetup`) — **DONE**
+- Local console UX (`bee` autologin on `tty1`, `menu` auto-start, TUI privilege escalation via `sudo -n`) — **DONE**
+- VM/debug support (`qemu-guest-agent`, serial console, virtual GPU initramfs modules) — **DONE**
 - Vendor utilities in overlay — **DONE**
 - Build metadata + staged overlay injection — **DONE**
+- Builder container cache persisted outside container writable layer — **DONE**
+- ISO volume label `BEE` — **DONE**
 - Auto-update flow remains deferred; current focus is deterministic offline audit ISO behavior.
+- Real-hardware validation remains **PENDING**; current validation is limited to local/libvirt VM boot + service checks.

 ---

@@ -265,13 +272,10 @@ ISO image bootable via BMC virtual media or USB. Runs boot services automaticall

 ### 2.1 — Builder environment

-`iso/builder/setup-builder.sh` prepares a Debian 12 host/VM with:
- `live-build`, `debootstrap`, bootloader tooling, kernel headers
- Go toolchain
- everything needed to compile the `bee` binary and NVIDIA modules
-
-`iso/builder/build-in-container.sh` offers the same builder stack in a Debian 12 container image.
-The container run is privileged because `live-build` needs mount/chroot/loop capabilities.
+`iso/builder/build-in-container.sh` is the only supported builder entrypoint.
+It builds a Debian 12 builder image with `live-build`, toolchains, and pinned kernel headers,
+then runs the ISO assembly in a privileged container because `live-build` needs
+mount/chroot/loop capabilities.

 `iso/builder/build.sh` orchestrates the full ISO build:
 1. compile the Go `bee` binary
@@ -334,8 +338,14 @@ Planned code shape:
 ### 2.5 — Operator workflows

 - Automatic boot audit writes JSON to `/var/log/bee-audit.json`
+- `tty1` autologins into `bee` and auto-runs `menu`
+- `menu` launches the LiveCD wrapper `bee-tui`, which escalates to `root` via `sudo -n`
 - `bee tui` can rerun the audit manually
 - `bee tui` can export the latest audit JSON to removable media
+- `bee tui` can show health summary and run NVIDIA/memory/storage acceptance tests
+- NVIDIA SAT now includes a lightweight in-image GPU stress step via `bee-gpu-burn`
+- SAT summaries now expose `overall_status` plus per-job `OK/FAILED/UNSUPPORTED`
+- Memory SAT runtime defaults can be overridden via `BEE_MEMTESTER_*`
 - removable export requires explicit target selection, mount, confirmation, copy, and cleanup

 ### 2.6 — Vendor utilities and optional assets
@@ -343,7 +353,9 @@ Planned code shape:
 Optional binaries live in `iso/vendor/` and are included when present:
 - `storcli64`
 - `sas2ircu`, `sas3ircu`
- `mstflint`
+- `arcconf`
+- `ssacli`
+- `mstflint` (via Debian package set)

 Missing optional tools do not fail the build or boot.

@@ -358,6 +370,7 @@ Missing optional tools do not fail the build or boot.
 Current release model:
 - shipping a new ISO means a full rebuild
 - build metadata is embedded into `/etc/bee-release` and `motd`
+- current ISO label is `BEE`
 - binary self-update remains deferred; no automatic USB/network patching is part of the current runtime

 ---
@@ -374,9 +387,9 @@ No "works on my Mac" drift.
 1.2  board collector               → first real data
 1.3  CPU collector                 → +CPUs

--- BUILDER + DEBUG ISO (unblock real-hardware testing) ---
+--- BUILDER + BEE ISO (unblock real-hardware testing) ---

-2.1  builder setup                 → Debian host/VM or privileged container with build deps
+2.1  builder setup                 → privileged container with build deps
 2.2  debug ISO profile             → minimal Debian ISO: `bee` binary + OpenSSH + all packages
 2.3  boot on real server           → SSH in, verify packages present, run audit manually

@@ -397,7 +410,7 @@ No "works on my Mac" drift.
 2.4  NVIDIA driver build           → driver compiled into overlay
 2.5  network bring-up on boot      → DHCP on all interfaces
 2.6  systemd boot service          → audit runs on boot automatically
-2.7  vendor utilities              → storcli/sas2ircu/mstflint in image
+2.7  vendor utilities              → storcli/sas2ircu/arcconf/ssacli in image
 2.8  release workflow              → versioning + release notes
 2.9  operator export flow          → explicit TUI export to removable media
 ```
--- a/audit/Makefile
+++ b/audit/Makefile
@@ -0,0 +1,22 @@
+LISTEN ?= :8080
+AUDIT_PATH ?=
+EXPORT_DIR ?= $(CURDIR)/.tmp/export
+VERSION ?= $(shell sh ./scripts/resolve-version.sh)
+GO_LDFLAGS := -X main.Version=$(VERSION)
+
+RUN_ARGS := web --listen $(LISTEN) --export-dir $(EXPORT_DIR)
+ifneq ($(AUDIT_PATH),)
+RUN_ARGS += --audit-path $(AUDIT_PATH)
+endif
+
+.PHONY: run build test
+
+run:
+	mkdir -p $(EXPORT_DIR)
+	go run -ldflags "$(GO_LDFLAGS)" ./cmd/bee $(RUN_ARGS)
+
+build:
+	go build -ldflags "$(GO_LDFLAGS)" -o bee ./cmd/bee
+
+test:
+	go test ./...
--- a/audit/bee
+++ b/audit/bee
--- a/audit/cmd/bee/main.go
+++ b/audit/cmd/bee/main.go
@@ -1,64 +1,123 @@
 package main

 import (
+	"context"
 	"flag"
 	"fmt"
 	"io"
 	"log/slog"
 	"os"
+	"runtime/debug"
+	"strconv"
 	"strings"

 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 	"bee/audit/internal/runtimeenv"
-	"bee/audit/internal/tui"
+	"bee/audit/internal/webui"
 )

 var Version = "dev"

+func buildLabel() string {
+	label := strings.TrimSpace(Version)
+	if label == "" {
+		return "dev"
+	}
+	return label
+}
+
 func main() {
 	os.Exit(run(os.Args[1:], os.Stdout, os.Stderr))
 }

-func run(args []string, stdout, stderr io.Writer) int {
+func run(args []string, stdout, stderr io.Writer) (exitCode int) {
 	slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
 		Level: slog.LevelInfo,
 	})))
+	defer func() {
+		if rec := recover(); rec != nil {
+			slog.Error("fatal panic",
+				"panic", fmt.Sprint(rec),
+				"stack", string(debug.Stack()),
+			)
+			exitCode = 1
+		}
+	}()

 	if len(args) == 0 {
 		printRootUsage(stderr)
-		return 1
+		return 2
 	}

 	switch args[0] {
 	case "help", "--help", "-h":
+		if len(args) > 1 {
+			return runHelp(args[1:], stdout, stderr)
+		}
 		printRootUsage(stdout)
 		return 0
 	case "audit":
 		return runAudit(args[1:], stdout, stderr)
-	case "tui":
-		return runTUI(args[1:], stdout, stderr)
 	case "export":
 		return runExport(args[1:], stdout, stderr)
+	case "preflight":
+		return runPreflight(args[1:], stdout, stderr)
+	case "support-bundle":
+		return runSupportBundle(args[1:], stdout, stderr)
+	case "web":
+		return runWeb(args[1:], stdout, stderr)
 	case "sat":
 		return runSAT(args[1:], stdout, stderr)
+	case "benchmark":
+		return runBenchmark(args[1:], stdout, stderr)
 	case "version", "--version", "-version":
 		fmt.Fprintln(stdout, Version)
 		return 0
 	default:
 		fmt.Fprintf(stderr, "bee: unknown command %q\n\n", args[0])
 		printRootUsage(stderr)
-		return 1
+		return 2
 	}
 }

 func printRootUsage(w io.Writer) {
 	fmt.Fprintln(w, `bee commands:
  bee audit   --runtime auto|local|livecd --output stdout|file:<path>
-  bee tui     --runtime auto|local|livecd
+  bee preflight --output stdout|file:<path>
  bee export  --target <device>
-  bee sat nvidia
-  bee version`)
+  bee support-bundle --output stdout|file:<path>
+  bee web     --listen :80 [--audit-path `+app.DefaultAuditJSONPath+`]
+  bee sat nvidia|memory|storage|cpu [--duration <seconds>]
+  bee benchmark nvidia [--profile standard|stability|overnight]
+  bee version
+  bee help [command]`)
+}
+
+func runHelp(args []string, stdout, stderr io.Writer) int {
+	switch args[0] {
+	case "audit":
+		return runAudit([]string{"--help"}, stdout, stdout)
+	case "export":
+		return runExport([]string{"--help"}, stdout, stdout)
+	case "preflight":
+		return runPreflight([]string{"--help"}, stdout, stdout)
+	case "support-bundle":
+		return runSupportBundle([]string{"--help"}, stdout, stdout)
+	case "web":
+		return runWeb([]string{"--help"}, stdout, stdout)
+	case "sat":
+		return runSAT([]string{"--help"}, stdout, stderr)
+	case "benchmark":
+		return runBenchmark([]string{"--help"}, stdout, stderr)
+	case "version":
+		fmt.Fprintln(stdout, "usage: bee version")
+		return 0
+	default:
+		fmt.Fprintf(stderr, "bee help: unknown command %q\n\n", args[0])
+		printRootUsage(stderr)
+		return 2
+	}
 }

 func runAudit(args []string, stdout, stderr io.Writer) int {
@@ -72,6 +131,13 @@ func runAudit(args []string, stdout, stderr io.Writer) int {
 		fs.PrintDefaults()
 	}
 	if err := fs.Parse(args); err != nil {
+		if err == flag.ErrHelp {
+			return 0
+		}
+		return 2
+	}
+	if fs.NArg() != 0 {
+		fs.Usage()
 		return 2
 	}
 	if *showVersion {
@@ -98,32 +164,6 @@ func runAudit(args []string, stdout, stderr io.Writer) int {
 	return 0
 }

-func runTUI(args []string, stdout, stderr io.Writer) int {
-	fs := flag.NewFlagSet("tui", flag.ContinueOnError)
-	fs.SetOutput(stderr)
-	runtimeFlag := fs.String("runtime", "auto", "runtime environment: auto, local, livecd")
-	fs.Usage = func() {
-		fmt.Fprintln(stderr, "usage: bee tui [--runtime auto|local|livecd]")
-		fs.PrintDefaults()
-	}
-	if err := fs.Parse(args); err != nil {
-		return 2
-	}
-
-	runtimeInfo, err := runtimeenv.Detect(*runtimeFlag)
-	if err != nil {
-		slog.Error("resolve runtime", "err", err)
-		return 1
-	}
-
-	application := app.New(platform.New())
-	if err := tui.Run(application, runtimeInfo.Mode); err != nil {
-		slog.Error("run tui", "err", err)
-		return 1
-	}
-	return 0
-}
-
 func runExport(args []string, stdout, stderr io.Writer) int {
 	fs := flag.NewFlagSet("export", flag.ContinueOnError)
 	fs.SetOutput(stderr)
@@ -133,6 +173,13 @@ func runExport(args []string, stdout, stderr io.Writer) int {
 		fs.PrintDefaults()
 	}
 	if err := fs.Parse(args); err != nil {
+		if err == flag.ErrHelp {
+			return 0
+		}
+		return 2
+	}
+	if fs.NArg() != 0 {
+		fs.Usage()
 		return 2
 	}
 	if strings.TrimSpace(*targetDevice) == "" {
@@ -164,22 +211,273 @@ func runExport(args []string, stdout, stderr io.Writer) int {
 	return 1
 }

-func runSAT(args []string, stdout, stderr io.Writer) int {
-	if len(args) == 0 || args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
-		fmt.Fprintln(stderr, "usage: bee sat nvidia")
+func runPreflight(args []string, stdout, stderr io.Writer) int {
+	fs := flag.NewFlagSet("preflight", flag.ContinueOnError)
+	fs.SetOutput(stderr)
+	output := fs.String("output", "stdout", "output destination: stdout or file:<path>")
+	fs.Usage = func() {
+		fmt.Fprintf(stderr, "usage: bee preflight [--output stdout|file:%s]\n", app.DefaultRuntimeJSONPath)
+		fs.PrintDefaults()
+	}
+	if err := fs.Parse(args); err != nil {
+		if err == flag.ErrHelp {
+			return 0
+		}
 		return 2
 	}
-	if args[0] != "nvidia" {
-		fmt.Fprintf(stderr, "bee sat: unknown target %q\n", args[0])
-		fmt.Fprintln(stderr, "usage: bee sat nvidia")
+	if fs.NArg() != 0 {
+		fs.Usage()
 		return 2
 	}
 	application := app.New(platform.New())
-	archive, err := application.RunNvidiaAcceptancePack("")
+	path, err := application.RunRuntimePreflight(*output)
 	if err != nil {
-		slog.Error("run nvidia sat", "err", err)
+		slog.Error("run preflight", "err", err)
 		return 1
 	}
-	slog.Info("nvidia sat archive written", "path", archive)
+	if path != "stdout" {
+		slog.Info("runtime health written", "path", path)
+	}
 	return 0
 }
+
+func runSupportBundle(args []string, stdout, stderr io.Writer) int {
+	fs := flag.NewFlagSet("support-bundle", flag.ContinueOnError)
+	fs.SetOutput(stderr)
+	output := fs.String("output", "stdout", "output destination: stdout or file:<path>")
+	fs.Usage = func() {
+		fmt.Fprintln(stderr, "usage: bee support-bundle [--output stdout|file:<path>]")
+		fs.PrintDefaults()
+	}
+	if err := fs.Parse(args); err != nil {
+		if err == flag.ErrHelp {
+			return 0
+		}
+		return 2
+	}
+	if fs.NArg() != 0 {
+		fs.Usage()
+		return 2
+	}
+	path, err := app.BuildSupportBundle(app.DefaultExportDir)
+	if err != nil {
+		slog.Error("build support bundle", "err", err)
+		return 1
+	}
+	defer os.Remove(path)
+
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		slog.Error("read support bundle", "err", err)
+		return 1
+	}
+	switch {
+	case *output == "stdout":
+		if _, err := stdout.Write(raw); err != nil {
+			slog.Error("write support bundle stdout", "err", err)
+			return 1
+		}
+	case strings.HasPrefix(*output, "file:"):
+		dst := strings.TrimPrefix(*output, "file:")
+		if err := os.WriteFile(dst, raw, 0644); err != nil {
+			slog.Error("write support bundle", "err", err)
+			return 1
+		}
+		slog.Info("support bundle written", "path", dst)
+	default:
+		fmt.Fprintln(stderr, "bee support-bundle: unknown output destination")
+		fs.Usage()
+		return 2
+	}
+	return 0
+}
+
+func runWeb(args []string, stdout, stderr io.Writer) int {
+	fs := flag.NewFlagSet("web", flag.ContinueOnError)
+	fs.SetOutput(stderr)
+	listenAddr := fs.String("listen", ":8080", "listen address, e.g. :80")
+	auditPath := fs.String("audit-path", "", "optional path to the latest audit JSON snapshot")
+	exportDir := fs.String("export-dir", app.DefaultExportDir, "directory with logs, SAT results, and support bundles")
+	title := fs.String("title", "Bee Hardware Audit", "page title")
+	fs.Usage = func() {
+		fmt.Fprintf(stderr, "usage: bee web [--listen :80] [--audit-path %s] [--export-dir %s] [--title \"Bee Hardware Audit\"]\n", app.DefaultAuditJSONPath, app.DefaultExportDir)
+		fs.PrintDefaults()
+	}
+	if err := fs.Parse(args); err != nil {
+		if err == flag.ErrHelp {
+			return 0
+		}
+		return 2
+	}
+	if fs.NArg() != 0 {
+		fs.Usage()
+		return 2
+	}
+
+	slog.Info("starting bee web", "listen", *listenAddr, "audit_path", *auditPath)
+
+	runtimeInfo, err := runtimeenv.Detect("auto")
+	if err != nil {
+		slog.Warn("resolve runtime for web", "err", err)
+	}
+
+	if err := webui.ListenAndServe(*listenAddr, webui.HandlerOptions{
+		Title:       *title,
+		BuildLabel:  buildLabel(),
+		AuditPath:   *auditPath,
+		ExportDir:   *exportDir,
+		App:         app.New(platform.New()),
+		RuntimeMode: runtimeInfo.Mode,
+	}); err != nil {
+		slog.Error("run web", "err", err)
+		return 1
+	}
+	return 0
+}
+
+func runSAT(args []string, stdout, stderr io.Writer) int {
+	if len(args) == 0 {
+		fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>]")
+		return 2
+	}
+	if args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
+		fmt.Fprintln(stdout, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>]")
+		return 0
+	}
+
+	fs := flag.NewFlagSet("sat", flag.ContinueOnError)
+	fs.SetOutput(stderr)
+	duration := fs.Int("duration", 0, "stress-ng duration in seconds (cpu only; default: 60)")
+	diagLevel := fs.Int("diag-level", 0, "DCGM diagnostic level for nvidia (1=quick, 2=medium, 3=targeted stress, 4=extended stress; default: 1)")
+	if err := fs.Parse(args[1:]); err != nil {
+		if err == flag.ErrHelp {
+			return 0
+		}
+		return 2
+	}
+	if fs.NArg() != 0 {
+		fmt.Fprintf(stderr, "bee sat: unexpected arguments\n")
+		return 2
+	}
+
+	target := args[0]
+	if target != "nvidia" && target != "memory" && target != "storage" && target != "cpu" {
+		fmt.Fprintf(stderr, "bee sat: unknown target %q\n", target)
+		fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>] [--diag-level <1-4>]")
+		return 2
+	}
+
+	application := app.New(platform.New())
+	var (
+		archive string
+		err     error
+	)
+	logLine := func(s string) { fmt.Fprintln(os.Stderr, s) }
+	switch target {
+	case "nvidia":
+		level := *diagLevel
+		if level > 0 {
+			_, err = application.RunNvidiaAcceptancePackWithOptions(context.Background(), "", level, nil, logLine)
+		} else {
+			archive, err = application.RunNvidiaAcceptancePack("", logLine)
+		}
+	case "memory":
+		archive, err = application.RunMemoryAcceptancePackCtx(context.Background(), "", 256, 1, logLine)
+	case "storage":
+		archive, err = application.RunStorageAcceptancePackCtx(context.Background(), "", false, logLine)
+	case "cpu":
+		dur := *duration
+		if dur <= 0 {
+			dur = 60
+		}
+		archive, err = application.RunCPUAcceptancePackCtx(context.Background(), "", dur, logLine)
+	}
+	if err != nil {
+		slog.Error("run sat", "target", target, "err", err)
+		return 1
+	}
+	slog.Info("sat archive written", "target", target, "path", archive)
+	return 0
+}
+
+func runBenchmark(args []string, stdout, stderr io.Writer) int {
+	if len(args) == 0 {
+		fmt.Fprintln(stderr, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]")
+		return 2
+	}
+	if args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
+		fmt.Fprintln(stdout, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]")
+		return 0
+	}
+	target := args[0]
+	if target != "nvidia" {
+		fmt.Fprintf(stderr, "bee benchmark: unknown target %q\n", target)
+		fmt.Fprintln(stderr, "usage: bee benchmark nvidia [--profile standard|stability|overnight] [--devices 0,1] [--exclude 2,3] [--size-mb N] [--skip-nccl]")
+		return 2
+	}
+
+	fs := flag.NewFlagSet("benchmark", flag.ContinueOnError)
+	fs.SetOutput(stderr)
+	profile := fs.String("profile", platform.NvidiaBenchmarkProfileStandard, "benchmark profile: standard, stability, overnight")
+	devices := fs.String("devices", "", "comma-separated GPU indices to include")
+	exclude := fs.String("exclude", "", "comma-separated GPU indices to exclude")
+	sizeMB := fs.Int("size-mb", 0, "per-GPU benchmark buffer size in MB (0 = auto)")
+	skipNCCL := fs.Bool("skip-nccl", false, "skip multi-GPU NCCL interconnect benchmark")
+	if err := fs.Parse(args[1:]); err != nil {
+		if err == flag.ErrHelp {
+			return 0
+		}
+		return 2
+	}
+	if fs.NArg() != 0 {
+		fmt.Fprintf(stderr, "bee benchmark: unexpected arguments\n")
+		return 2
+	}
+
+	includeIndices, err := parseBenchmarkIndexCSV(*devices)
+	if err != nil {
+		fmt.Fprintf(stderr, "bee benchmark: invalid --devices: %v\n", err)
+		return 2
+	}
+	excludeIndices, err := parseBenchmarkIndexCSV(*exclude)
+	if err != nil {
+		fmt.Fprintf(stderr, "bee benchmark: invalid --exclude: %v\n", err)
+		return 2
+	}
+
+	application := app.New(platform.New())
+	logLine := func(s string) { fmt.Fprintln(os.Stderr, s) }
+	archive, err := application.RunNvidiaBenchmark("", platform.NvidiaBenchmarkOptions{
+		Profile:           *profile,
+		SizeMB:            *sizeMB,
+		GPUIndices:        includeIndices,
+		ExcludeGPUIndices: excludeIndices,
+		RunNCCL:           !*skipNCCL,
+	}, logLine)
+	if err != nil {
+		slog.Error("run benchmark", "target", target, "err", err)
+		return 1
+	}
+	slog.Info("benchmark archive written", "target", target, "path", archive)
+	return 0
+}
+
+func parseBenchmarkIndexCSV(raw string) ([]int, error) {
+	raw = strings.TrimSpace(raw)
+	if raw == "" {
+		return nil, nil
+	}
+	var indices []int
+	for _, part := range strings.Split(raw, ",") {
+		part = strings.TrimSpace(part)
+		if part == "" {
+			continue
+		}
+		value, err := strconv.Atoi(part)
+		if err != nil || value < 0 {
+			return nil, fmt.Errorf("bad gpu index %q", part)
+		}
+		indices = append(indices, value)
+	}
+	return indices, nil
+}
--- a/audit/cmd/bee/main_test.go
+++ b/audit/cmd/bee/main_test.go
@@ -24,8 +24,8 @@ func TestRunNoArgsPrintsUsage(t *testing.T) {

 	var stdout, stderr bytes.Buffer
 	rc := run(nil, &stdout, &stderr)
-	if rc != 1 {
-		t.Fatalf("rc=%d want 1", rc)
+	if rc != 2 {
+		t.Fatalf("rc=%d want 2", rc)
 	}
 	if !strings.Contains(stderr.String(), "bee commands:") {
 		t.Fatalf("stderr missing root usage:\n%s", stderr.String())
@@ -37,8 +37,8 @@ func TestRunUnknownCommand(t *testing.T) {

 	var stdout, stderr bytes.Buffer
 	rc := run([]string{"wat"}, &stdout, &stderr)
-	if rc != 1 {
-		t.Fatalf("rc=%d want 1", rc)
+	if rc != 2 {
+		t.Fatalf("rc=%d want 2", rc)
 	}
 	if !strings.Contains(stderr.String(), `unknown command "wat"`) {
 		t.Fatalf("stderr missing unknown command message:\n%s", stderr.String())
@@ -46,8 +46,6 @@ func TestRunUnknownCommand(t *testing.T) {
 }

 func TestRunVersion(t *testing.T) {
-	t.Parallel()
-
 	old := Version
 	Version = "test-version"
 	t.Cleanup(func() { Version = old })
@@ -62,6 +60,16 @@ func TestRunVersion(t *testing.T) {
 	}
 }

+func TestBuildLabelUsesVersionAsIs(t *testing.T) {
+	old := Version
+	Version = "1.2.3"
+	t.Cleanup(func() { Version = old })
+
+	if got := buildLabel(); got != "1.2.3" {
+		t.Fatalf("buildLabel=%q want %q", got, "1.2.3")
+	}
+}
+
 func TestRunExportRequiresTarget(t *testing.T) {
 	t.Parallel()

@@ -86,11 +94,63 @@ func TestRunSATUsage(t *testing.T) {
 	if rc != 2 {
 		t.Fatalf("rc=%d want 2", rc)
 	}
-	if !strings.Contains(stderr.String(), "usage: bee sat nvidia") {
+	if !strings.Contains(stderr.String(), "usage: bee sat nvidia|memory|storage") {
 		t.Fatalf("stderr missing sat usage:\n%s", stderr.String())
 	}
 }

+func TestRunPreflightRejectsExtraArgs(t *testing.T) {
+	t.Parallel()
+
+	var stdout, stderr bytes.Buffer
+	rc := run([]string{"preflight", "extra"}, &stdout, &stderr)
+	if rc != 2 {
+		t.Fatalf("rc=%d want 2", rc)
+	}
+	if !strings.Contains(stderr.String(), "usage: bee preflight") {
+		t.Fatalf("stderr missing preflight usage:\n%s", stderr.String())
+	}
+}
+
+func TestRunSupportBundleRejectsExtraArgs(t *testing.T) {
+	t.Parallel()
+
+	var stdout, stderr bytes.Buffer
+	rc := run([]string{"support-bundle", "extra"}, &stdout, &stderr)
+	if rc != 2 {
+		t.Fatalf("rc=%d want 2", rc)
+	}
+	if !strings.Contains(stderr.String(), "usage: bee support-bundle") {
+		t.Fatalf("stderr missing support-bundle usage:\n%s", stderr.String())
+	}
+}
+
+func TestRunHelpForSubcommand(t *testing.T) {
+	t.Parallel()
+
+	var stdout, stderr bytes.Buffer
+	rc := run([]string{"help", "export"}, &stdout, &stderr)
+	if rc != 0 {
+		t.Fatalf("rc=%d want 0", rc)
+	}
+	if !strings.Contains(stdout.String(), "usage: bee export --target <device>") {
+		t.Fatalf("stdout missing export help:\n%s", stdout.String())
+	}
+}
+
+func TestRunHelpUnknownSubcommand(t *testing.T) {
+	t.Parallel()
+
+	var stdout, stderr bytes.Buffer
+	rc := run([]string{"help", "wat"}, &stdout, &stderr)
+	if rc != 2 {
+		t.Fatalf("rc=%d want 2", rc)
+	}
+	if !strings.Contains(stderr.String(), `bee help: unknown command "wat"`) {
+		t.Fatalf("stderr missing help error:\n%s", stderr.String())
+	}
+}
+
 func TestRunSATUnknownTarget(t *testing.T) {
 	t.Parallel()

@@ -104,6 +164,32 @@ func TestRunSATUnknownTarget(t *testing.T) {
 	}
 }

+func TestRunSATHelp(t *testing.T) {
+	t.Parallel()
+
+	var stdout, stderr bytes.Buffer
+	rc := run([]string{"sat", "--help"}, &stdout, &stderr)
+	if rc != 0 {
+		t.Fatalf("rc=%d want 0", rc)
+	}
+	if !strings.Contains(stdout.String(), "usage: bee sat nvidia|memory|storage|cpu") {
+		t.Fatalf("stdout missing sat help:\n%s", stdout.String())
+	}
+}
+
+func TestRunSATRejectsExtraArgs(t *testing.T) {
+	t.Parallel()
+
+	var stdout, stderr bytes.Buffer
+	rc := run([]string{"sat", "memory", "extra"}, &stdout, &stderr)
+	if rc != 2 {
+		t.Fatalf("rc=%d want 2", rc)
+	}
+	if !strings.Contains(stderr.String(), "bee sat: unexpected arguments") {
+		t.Fatalf("stderr missing sat error:\n%s", stderr.String())
+	}
+}
+
 func TestRunAuditInvalidRuntime(t *testing.T) {
 	t.Parallel()

@@ -113,3 +199,29 @@ func TestRunAuditInvalidRuntime(t *testing.T) {
 		t.Fatalf("rc=%d want 1", rc)
 	}
 }
+
+func TestRunAuditRejectsExtraArgs(t *testing.T) {
+	t.Parallel()
+
+	var stdout, stderr bytes.Buffer
+	rc := run([]string{"audit", "extra"}, &stdout, &stderr)
+	if rc != 2 {
+		t.Fatalf("rc=%d want 2", rc)
+	}
+	if !strings.Contains(stderr.String(), "usage: bee audit") {
+		t.Fatalf("stderr missing audit usage:\n%s", stderr.String())
+	}
+}
+
+func TestRunExportRejectsExtraArgs(t *testing.T) {
+	t.Parallel()
+
+	var stdout, stderr bytes.Buffer
+	rc := run([]string{"export", "--target", "/dev/sdb1", "extra"}, &stdout, &stderr)
+	if rc != 2 {
+		t.Fatalf("rc=%d want 2", rc)
+	}
+	if !strings.Contains(stderr.String(), "usage: bee export --target <device>") {
+		t.Fatalf("stderr missing export usage:\n%s", stderr.String())
+	}
+}
--- a/audit/go.mod
+++ b/audit/go.mod
@@ -1,24 +1,22 @@
 module bee/audit

-go 1.23
+go 1.25.0

-require github.com/charmbracelet/bubbletea v1.3.4
+replace reanimator/chart => ../internal/chart

 require (
-	github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
-	github.com/charmbracelet/lipgloss v1.0.0 // indirect
-	github.com/charmbracelet/x/ansi v0.8.0 // indirect
-	github.com/charmbracelet/x/term v0.2.1 // indirect
-	github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
-	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
-	github.com/mattn/go-isatty v0.0.20 // indirect
-	github.com/mattn/go-localereader v0.0.1 // indirect
-	github.com/mattn/go-runewidth v0.0.16 // indirect
-	github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
-	github.com/muesli/cancelreader v0.2.2 // indirect
-	github.com/muesli/termenv v0.15.2 // indirect
-	github.com/rivo/uniseg v0.4.7 // indirect
-	golang.org/x/sync v0.11.0 // indirect
-	golang.org/x/sys v0.30.0 // indirect
-	golang.org/x/text v0.3.8 // indirect
+	modernc.org/sqlite v1.48.0
+	reanimator/chart v0.0.0-00010101000000-000000000000
+)
+
+require (
+	github.com/dustin/go-humanize v1.0.1 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
+	github.com/ncruces/go-strftime v1.0.0 // indirect
+	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
+	golang.org/x/sys v0.42.0 // indirect
+	modernc.org/libc v1.72.0 // indirect
+	modernc.org/mathutil v1.7.1 // indirect
+	modernc.org/memory v1.11.0 // indirect
 )
--- a/audit/go.sum
+++ b/audit/go.sum
@@ -1,37 +1,51 @@
-github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
-github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
-github.com/charmbracelet/bubbletea v1.3.4 h1:kCg7B+jSCFPLYRA52SDZjr51kG/fMUEoPoZrkaDHyoI=
-github.com/charmbracelet/bubbletea v1.3.4/go.mod h1:dtcUCyCGEX3g9tosuYiut3MXgY/Jsv9nKVdibKKRRXo=
-github.com/charmbracelet/lipgloss v1.0.0 h1:O7VkGDvqEdGi93X+DeqsQ7PKHDgtQfF8j8/O2qFMQNg=
-github.com/charmbracelet/lipgloss v1.0.0/go.mod h1:U5fy9Z+C38obMs+T+tJqst9VGzlOYGj4ri9reL3qUlo=
-github.com/charmbracelet/x/ansi v0.8.0 h1:9GTq3xq9caJW8ZrBTe0LIe2fvfLR/bYXKTx2llXn7xE=
-github.com/charmbracelet/x/ansi v0.8.0/go.mod h1:wdYl/ONOLHLIVmQaxbIYEC/cRKOQyjTkowiI4blgS9Q=
-github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ=
-github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg=
-github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
-github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
-github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
-github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
+github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
+github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
+github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
-github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
-github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
-github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
-github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
-github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
-github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
-github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
-github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo=
-github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8=
-github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
-github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
-golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w=
-golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
-golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
+github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
+github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
+golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
+golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
+golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
+golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
-golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY=
-golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
+golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
+golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
+golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
+golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
+modernc.org/cc/v4 v4.27.3 h1:uNCgn37E5U09mTv1XgskEVUJ8ADKpmFMPxzGJ0TSo+U=
+modernc.org/cc/v4 v4.27.3/go.mod h1:3YjcbCqhoTTHPycJDRl2WZKKFj0nwcOIPBfEZK0Hdk8=
+modernc.org/ccgo/v4 v4.32.4 h1:L5OB8rpEX4ZsXEQwGozRfJyJSFHbbNVOoQ59DU9/KuU=
+modernc.org/ccgo/v4 v4.32.4/go.mod h1:lY7f+fiTDHfcv6YlRgSkxYfhs+UvOEEzj49jAn2TOx0=
+modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM=
+modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU=
+modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
+modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
+modernc.org/gc/v3 v3.1.2 h1:ZtDCnhonXSZexk/AYsegNRV1lJGgaNZJuKjJSWKyEqo=
+modernc.org/gc/v3 v3.1.2/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
+modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
+modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
+modernc.org/libc v1.72.0 h1:IEu559v9a0XWjw0DPoVKtXpO2qt5NVLAnFaBbjq+n8c=
+modernc.org/libc v1.72.0/go.mod h1:tTU8DL8A+XLVkEY3x5E/tO7s2Q/q42EtnNWda/L5QhQ=
+modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
+modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
+modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
+modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
+modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
+modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
+modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
+modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
+modernc.org/sqlite v1.48.0 h1:ElZyLop3Q2mHYk5IFPPXADejZrlHu7APbpB0sF78bq4=
+modernc.org/sqlite v1.48.0/go.mod h1:hWjRO6Tj/5Ik8ieqxQybiEOUXy0NJFNp2tpvVpKlvig=
+modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
+modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
+modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
+modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -1,10 +1,18 @@
 package app

 import (
+	"archive/tar"
+	"compress/gzip"
+	"context"
+	"encoding/json"
 	"errors"
+	"io"
+	"os"
+	"path/filepath"
 	"testing"

 	"bee/audit/internal/platform"
+	"bee/audit/internal/schema"
 )

 type fakeNetwork struct {
@@ -35,6 +43,13 @@ func (f fakeNetwork) SetStaticIPv4(cfg platform.StaticIPv4Config) (string, error
 	return f.setStaticIPv4Fn(cfg)
 }

+func (f fakeNetwork) SetInterfaceState(_ string, _ bool) error { return nil }
+func (f fakeNetwork) GetInterfaceState(_ string) (bool, error) { return true, nil }
+func (f fakeNetwork) CaptureNetworkSnapshot() (platform.NetworkSnapshot, error) {
+	return platform.NetworkSnapshot{}, nil
+}
+func (f fakeNetwork) RestoreNetworkSnapshot(platform.NetworkSnapshot) error { return nil }
+
 type fakeServices struct {
 	serviceStatusFn func(string) (string, error)
 	serviceDoFn     func(string, platform.ServiceAction) (string, error)
@@ -44,6 +59,10 @@ func (f fakeServices) ListBeeServices() ([]string, error) {
 	return nil, nil
 }

+func (f fakeServices) ServiceState(name string) string {
+	return "active"
+}
+
 func (f fakeServices) ServiceStatus(name string) (string, error) {
 	return f.serviceStatusFn(name)
 }
@@ -52,16 +71,41 @@ func (f fakeServices) ServiceDo(name string, action platform.ServiceAction) (str
 	return f.serviceDoFn(name, action)
 }

-type fakeExports struct{}
+type fakeExports struct {
+	listTargetsFn    func() ([]platform.RemovableTarget, error)
+	exportToTargetFn func(string, platform.RemovableTarget) (string, error)
+}

 func (f fakeExports) ListRemovableTargets() ([]platform.RemovableTarget, error) {
+	if f.listTargetsFn != nil {
+		return f.listTargetsFn()
+	}
 	return nil, nil
 }

 func (f fakeExports) ExportFileToTarget(src string, target platform.RemovableTarget) (string, error) {
+	if f.exportToTargetFn != nil {
+		return f.exportToTargetFn(src, target)
+	}
 	return "", nil
 }

+type fakeRuntime struct {
+	collectFn func(string) (schema.RuntimeHealth, error)
+	dumpFn    func(string) error
+}
+
+func (f fakeRuntime) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, error) {
+	return f.collectFn(exportDir)
+}
+
+func (f fakeRuntime) CaptureTechnicalDump(baseDir string) error {
+	if f.dumpFn != nil {
+		return f.dumpFn(baseDir)
+	}
+	return nil
+}
+
 type fakeTools struct {
 	tailFileFn   func(string, int) string
 	checkToolsFn func([]string) []platform.ToolStatus
@@ -76,11 +120,175 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
 }

 type fakeSAT struct {
-	runFn func(string) (string, error)
+	runNvidiaFn               func(string) (string, error)
+	runNvidiaBenchmarkFn      func(string, platform.NvidiaBenchmarkOptions) (string, error)
+	runNvidiaPowerBenchFn     func(string, platform.NvidiaBenchmarkOptions) (string, error)
+	runNvidiaStressFn         func(string, platform.NvidiaStressOptions) (string, error)
+	runNvidiaComputeFn        func(string, int, []int) (string, error)
+	runNvidiaPowerFn          func(string, int, []int) (string, error)
+	runNvidiaPulseFn          func(string, int, []int) (string, error)
+	runNvidiaBandwidthFn      func(string, []int) (string, error)
+	runNvidiaTargetedStressFn func(string, int, []int) (string, error)
+	runMemoryFn               func(string) (string, error)
+	runStorageFn              func(string) (string, error)
+	runCPUFn                  func(string, int) (string, error)
+	detectVendorFn            func() string
+	listAMDGPUsFn             func() ([]platform.AMDGPUInfo, error)
+	runAMDPackFn              func(string) (string, error)
+	listNvidiaGPUsFn          func() ([]platform.NvidiaGPU, error)
+	listNvidiaGPUStatusesFn   func() ([]platform.NvidiaGPUStatus, error)
+	resetNvidiaGPUFn          func(int) (string, error)
 }

-func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string) (string, error) {
-	return f.runFn(baseDir)
+func (f fakeSAT) RunNvidiaAcceptancePack(baseDir string, _ func(string)) (string, error) {
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaAcceptancePackWithOptions(_ context.Context, baseDir string, _ int, _ []int, _ func(string)) (string, error) {
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaBenchmark(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, _ func(string)) (string, error) {
+	if f.runNvidiaBenchmarkFn != nil {
+		return f.runNvidiaBenchmarkFn(baseDir, opts)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaPowerBench(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, _ func(string)) (string, error) {
+	if f.runNvidiaPowerBenchFn != nil {
+		return f.runNvidiaPowerBenchFn(baseDir, opts)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
+	if f.runNvidiaTargetedStressFn != nil {
+		return f.runNvidiaTargetedStressFn(baseDir, durationSec, gpuIndices)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ int, _ func(string)) (string, error) {
+	if f.runNvidiaComputeFn != nil {
+		return f.runNvidiaComputeFn(baseDir, durationSec, gpuIndices)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaTargetedPowerPack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
+	if f.runNvidiaPowerFn != nil {
+		return f.runNvidiaPowerFn(baseDir, durationSec, gpuIndices)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaPulseTestPack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
+	if f.runNvidiaPulseFn != nil {
+		return f.runNvidiaPulseFn(baseDir, durationSec, gpuIndices)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaBandwidthPack(_ context.Context, baseDir string, gpuIndices []int, _ func(string)) (string, error) {
+	if f.runNvidiaBandwidthFn != nil {
+		return f.runNvidiaBandwidthFn(baseDir, gpuIndices)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaStressPack(_ context.Context, baseDir string, opts platform.NvidiaStressOptions, _ func(string)) (string, error) {
+	if f.runNvidiaStressFn != nil {
+		return f.runNvidiaStressFn(baseDir, opts)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) ListNvidiaGPUs() ([]platform.NvidiaGPU, error) {
+	if f.listNvidiaGPUsFn != nil {
+		return f.listNvidiaGPUsFn()
+	}
+	return nil, nil
+}
+
+func (f fakeSAT) ListNvidiaGPUStatuses() ([]platform.NvidiaGPUStatus, error) {
+	if f.listNvidiaGPUStatusesFn != nil {
+		return f.listNvidiaGPUStatusesFn()
+	}
+	return nil, nil
+}
+
+func (f fakeSAT) ResetNvidiaGPU(index int) (string, error) {
+	if f.resetNvidiaGPUFn != nil {
+		return f.resetNvidiaGPUFn(index)
+	}
+	return "", nil
+}
+
+func (f fakeSAT) RunMemoryAcceptancePack(_ context.Context, baseDir string, _, _ int, _ func(string)) (string, error) {
+	return f.runMemoryFn(baseDir)
+}
+
+func (f fakeSAT) RunStorageAcceptancePack(_ context.Context, baseDir string, _ bool, _ func(string)) (string, error) {
+	return f.runStorageFn(baseDir)
+}
+
+func (f fakeSAT) RunCPUAcceptancePack(_ context.Context, baseDir string, durationSec int, _ func(string)) (string, error) {
+	if f.runCPUFn != nil {
+		return f.runCPUFn(baseDir, durationSec)
+	}
+	return "", nil
+}
+
+func (f fakeSAT) DetectGPUVendor() string {
+	if f.detectVendorFn != nil {
+		return f.detectVendorFn()
+	}
+	return ""
+}
+
+func (f fakeSAT) ListAMDGPUs() ([]platform.AMDGPUInfo, error) {
+	if f.listAMDGPUsFn != nil {
+		return f.listAMDGPUsFn()
+	}
+	return nil, nil
+}
+
+func (f fakeSAT) RunAMDAcceptancePack(_ context.Context, baseDir string, _ func(string)) (string, error) {
+	if f.runAMDPackFn != nil {
+		return f.runAMDPackFn(baseDir)
+	}
+	return "", nil
+}
+
+func (f fakeSAT) RunAMDMemIntegrityPack(_ context.Context, _ string, _ func(string)) (string, error) {
+	return "", nil
+}
+
+func (f fakeSAT) RunAMDMemBandwidthPack(_ context.Context, _ string, _ func(string)) (string, error) {
+	return "", nil
+}
+
+func (f fakeSAT) RunAMDStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
+	return "", nil
+}
+func (f fakeSAT) RunMemoryStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
+	return "", nil
+}
+func (f fakeSAT) RunSATStressPack(_ context.Context, _ string, _ int, _ func(string)) (string, error) {
+	return "", nil
+}
+
+func (f fakeSAT) RunFanStressTest(_ context.Context, _ string, _ platform.FanStressOptions) (string, error) {
+	return "", nil
+}
+
+func (f fakeSAT) RunPlatformStress(_ context.Context, _ string, _ platform.PlatformStressOptions, _ func(string)) (string, error) {
+	return "", nil
+}
+
+func (f fakeSAT) RunNCCLTests(_ context.Context, _ string, _ func(string)) (string, error) {
+	return "", nil
 }

 func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
@@ -96,6 +304,9 @@ func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
 			},
 			defaultRouteFn: func() string { return "10.0.0.1" },
 		},
+		runtime: fakeRuntime{
+			collectFn: func(string) (schema.RuntimeHealth, error) { return schema.RuntimeHealth{}, nil },
+		},
 	}

 	result, err := a.NetworkStatus()
@@ -116,6 +327,28 @@ func TestNetworkStatusFormatsInterfacesAndRoute(t *testing.T) {
 	}
 }

+func TestNetworkStatusHandlesNoInterfaces(t *testing.T) {
+	t.Parallel()
+
+	a := &App{
+		network: fakeNetwork{
+			listInterfacesFn: func() ([]platform.InterfaceInfo, error) { return nil, nil },
+			defaultRouteFn:   func() string { return "" },
+		},
+		runtime: fakeRuntime{
+			collectFn: func(string) (schema.RuntimeHealth, error) { return schema.RuntimeHealth{}, nil },
+		},
+	}
+
+	result, err := a.NetworkStatus()
+	if err != nil {
+		t.Fatalf("NetworkStatus error: %v", err)
+	}
+	if result.Body != "No physical interfaces found." {
+		t.Fatalf("body=%q want %q", result.Body, "No physical interfaces found.")
+	}
+}
+
 func TestNetworkStatusPropagatesListError(t *testing.T) {
 	t.Parallel()

@@ -126,6 +359,9 @@ func TestNetworkStatusPropagatesListError(t *testing.T) {
 			},
 			defaultRouteFn: func() string { return "" },
 		},
+		runtime: fakeRuntime{
+			collectFn: func(string) (schema.RuntimeHealth, error) { return schema.RuntimeHealth{}, nil },
+		},
 	}

 	result, err := a.NetworkStatus()
@@ -150,6 +386,9 @@ func TestParseStaticIPv4ConfigAndDefaults(t *testing.T) {
 			dhcpAllFn:       func() (string, error) { return "", nil },
 			setStaticIPv4Fn: func(platform.StaticIPv4Config) (string, error) { return "", nil },
 		},
+		runtime: fakeRuntime{
+			collectFn: func(string) (schema.RuntimeHealth, error) { return schema.RuntimeHealth{}, nil },
+		},
 	}

 	defaults := a.DefaultStaticIPv4FormFields("eth0")
@@ -186,13 +425,16 @@ func TestServiceActionResults(t *testing.T) {
 				return string(action) + " ok", nil
 			},
 		},
+		runtime: fakeRuntime{
+			collectFn: func(string) (schema.RuntimeHealth, error) { return schema.RuntimeHealth{}, nil },
+		},
 	}

 	statusResult, err := a.ServiceStatusResult("bee-audit")
 	if err != nil {
 		t.Fatalf("ServiceStatusResult error: %v", err)
 	}
-	if statusResult.Title != "service: bee-audit" || statusResult.Body != "active" {
+	if statusResult.Title != "service status: bee-audit" || statusResult.Body != "active" {
 		t.Fatalf("unexpected status result: %#v", statusResult)
 	}

@@ -200,7 +442,7 @@ func TestServiceActionResults(t *testing.T) {
 	if err != nil {
 		t.Fatalf("ServiceActionResult error: %v", err)
 	}
-	if actionResult.Title != "service: bee-audit" || actionResult.Body != "restart ok" {
+	if actionResult.Title != "service restart: bee-audit" || actionResult.Body != "restart ok" {
 		t.Fatalf("unexpected action result: %#v", actionResult)
 	}
 }
@@ -242,17 +484,156 @@ func TestToolCheckAndLogTailResults(t *testing.T) {
 	}
 }

+func TestActionResultsUseFallbackBody(t *testing.T) {
+	t.Parallel()
+
+	a := &App{
+		network: fakeNetwork{
+			dhcpOneFn:       func(string) (string, error) { return "   ", nil },
+			dhcpAllFn:       func() (string, error) { return "", nil },
+			setStaticIPv4Fn: func(platform.StaticIPv4Config) (string, error) { return "", nil },
+			listInterfacesFn: func() ([]platform.InterfaceInfo, error) {
+				return nil, nil
+			},
+			defaultRouteFn: func() string { return "" },
+		},
+		services: fakeServices{
+			serviceStatusFn: func(string) (string, error) { return "", nil },
+			serviceDoFn:     func(string, platform.ServiceAction) (string, error) { return "", nil },
+		},
+		tools: fakeTools{
+			tailFileFn:   func(string, int) string { return "   " },
+			checkToolsFn: func([]string) []platform.ToolStatus { return nil },
+		},
+		sat: fakeSAT{
+			runNvidiaFn:  func(string) (string, error) { return "", nil },
+			runMemoryFn:  func(string) (string, error) { return "", nil },
+			runStorageFn: func(string) (string, error) { return "", nil },
+		},
+		runtime: fakeRuntime{
+			collectFn: func(string) (schema.RuntimeHealth, error) {
+				return schema.RuntimeHealth{Status: "PARTIAL", ExportDir: "/tmp/export"}, nil
+			},
+		},
+	}
+
+	if got, _ := a.DHCPOneResult("eth0"); got.Body != "DHCP completed." {
+		t.Fatalf("dhcp one body=%q", got.Body)
+	}
+	if got, _ := a.DHCPAllResult(); got.Body != "DHCP completed." {
+		t.Fatalf("dhcp all body=%q", got.Body)
+	}
+	if got, _ := a.SetStaticIPv4Result(platform.StaticIPv4Config{Interface: "eth0"}); got.Body != "Static IPv4 updated." {
+		t.Fatalf("static body=%q", got.Body)
+	}
+	if got, _ := a.ServiceStatusResult("bee-audit"); got.Body != "No status output." {
+		t.Fatalf("status body=%q", got.Body)
+	}
+	if got, _ := a.ServiceActionResult("bee-audit", platform.ServiceRestart); got.Body != "Action completed." {
+		t.Fatalf("action body=%q", got.Body)
+	}
+	if got := a.ToolCheckResult(nil); got.Body != "No tools checked." {
+		t.Fatalf("tool body=%q", got.Body)
+	}
+	if got := a.AuditLogTailResult(); got.Body != "No audit logs found." {
+		t.Fatalf("log body=%q", got.Body)
+	}
+	if got, _ := a.RunNvidiaAcceptancePackResult(""); got.Body != "Archive written." {
+		t.Fatalf("sat body=%q", got.Body)
+	}
+	if got, _ := a.RunMemoryAcceptancePackResult(""); got.Body != "No output produced." {
+		t.Fatalf("memory sat body=%q", got.Body)
+	}
+	if got, _ := a.RunStorageAcceptancePackResult(""); got.Body != "No output produced." {
+		t.Fatalf("storage sat body=%q", got.Body)
+	}
+}
+
+func TestExportSupportBundleResultMentionsUnmountedUSB(t *testing.T) {
+	tmp := t.TempDir()
+	oldExportDir := DefaultExportDir
+	DefaultExportDir = tmp
+	t.Cleanup(func() { DefaultExportDir = oldExportDir })
+
+	if err := os.WriteFile(filepath.Join(tmp, "bee-audit.json"), []byte("{}\n"), 0644); err != nil {
+		t.Fatalf("write bee-audit.json: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(tmp, "bee-audit.log"), []byte("audit ok\n"), 0644); err != nil {
+		t.Fatalf("write bee-audit.log: %v", err)
+	}
+
+	a := &App{
+		exports: fakeExports{
+			exportToTargetFn: func(src string, target platform.RemovableTarget) (string, error) {
+				if filepath.Base(src) == "" {
+					t.Fatalf("expected non-empty source path")
+				}
+				return "/media/bee/" + filepath.Base(src), nil
+			},
+		},
+	}
+
+	result, err := a.ExportSupportBundleResult(platform.RemovableTarget{Device: "/dev/sdb1"})
+	if err != nil {
+		t.Fatalf("ExportSupportBundleResult error: %v", err)
+	}
+	if result.Title != "Export support bundle" {
+		t.Fatalf("title=%q want %q", result.Title, "Export support bundle")
+	}
+	if want := "USB target unmounted and safe to remove."; !contains(result.Body, want) {
+		t.Fatalf("body missing %q\nbody=%s", want, result.Body)
+	}
+}
+
+func TestExportSupportBundleResultDoesNotPretendSuccessOnError(t *testing.T) {
+	tmp := t.TempDir()
+	oldExportDir := DefaultExportDir
+	DefaultExportDir = tmp
+	t.Cleanup(func() { DefaultExportDir = oldExportDir })
+
+	if err := os.WriteFile(filepath.Join(tmp, "bee-audit.json"), []byte("{}\n"), 0644); err != nil {
+		t.Fatalf("write bee-audit.json: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(tmp, "bee-audit.log"), []byte("audit ok\n"), 0644); err != nil {
+		t.Fatalf("write bee-audit.log: %v", err)
+	}
+
+	a := &App{
+		exports: fakeExports{
+			exportToTargetFn: func(string, platform.RemovableTarget) (string, error) {
+				return "", errors.New("mount /dev/sda1: exFAT support is missing in this ISO build")
+			},
+		},
+	}
+
+	result, err := a.ExportSupportBundleResult(platform.RemovableTarget{Device: "/dev/sda1", FSType: "exfat"})
+	if err == nil {
+		t.Fatal("expected export error")
+	}
+	if contains(result.Body, "exported to") {
+		t.Fatalf("body should not claim success:\n%s", result.Body)
+	}
+	if result.Body != "Support bundle export failed." {
+		t.Fatalf("body=%q want %q", result.Body, "Support bundle export failed.")
+	}
+}
+
 func TestRunNvidiaAcceptancePackResult(t *testing.T) {
 	t.Parallel()

 	a := &App{
 		sat: fakeSAT{
-			runFn: func(baseDir string) (string, error) {
+			runNvidiaFn: func(baseDir string) (string, error) {
 				if baseDir != "/tmp/sat" {
 					t.Fatalf("baseDir=%q want %q", baseDir, "/tmp/sat")
 				}
 				return "/tmp/sat/out.tar.gz", nil
 			},
+			runMemoryFn:  func(string) (string, error) { return "", nil },
+			runStorageFn: func(string) (string, error) { return "", nil },
+		},
+		runtime: fakeRuntime{
+			collectFn: func(string) (schema.RuntimeHealth, error) { return schema.RuntimeHealth{}, nil },
 		},
 	}

@@ -265,6 +646,341 @@ func TestRunNvidiaAcceptancePackResult(t *testing.T) {
 	}
 }

+func TestRunSATDefaultsToExportDir(t *testing.T) {
+	oldSATBaseDir := DefaultSATBaseDir
+	DefaultSATBaseDir = "/tmp/export/bee-sat"
+	t.Cleanup(func() { DefaultSATBaseDir = oldSATBaseDir })
+
+	a := &App{
+		sat: fakeSAT{
+			runNvidiaFn: func(baseDir string) (string, error) {
+				if baseDir != "/tmp/export/bee-sat" {
+					t.Fatalf("nvidia baseDir=%q", baseDir)
+				}
+				return "", nil
+			},
+			runMemoryFn: func(baseDir string) (string, error) {
+				if baseDir != "/tmp/export/bee-sat" {
+					t.Fatalf("memory baseDir=%q", baseDir)
+				}
+				return "", nil
+			},
+			runStorageFn: func(baseDir string) (string, error) {
+				if baseDir != "/tmp/export/bee-sat" {
+					t.Fatalf("storage baseDir=%q", baseDir)
+				}
+				return "", nil
+			},
+		},
+		runtime: fakeRuntime{
+			collectFn: func(string) (schema.RuntimeHealth, error) { return schema.RuntimeHealth{}, nil },
+		},
+	}
+
+	if _, err := a.RunNvidiaAcceptancePack("", nil); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := a.RunMemoryAcceptancePack("", nil); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := a.RunStorageAcceptancePack("", nil); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestFormatSATSummary(t *testing.T) {
+	t.Parallel()
+
+	got := formatSATSummary("Memory SAT", "overall_status=PARTIAL\njob_ok=2\njob_failed=0\njob_unsupported=1\ndevices=3\n")
+	want := "Memory SAT: PARTIAL ok=2 failed=0 unsupported=1\nDevices: 3"
+	if got != want {
+		t.Fatalf("got %q want %q", got, want)
+	}
+}
+
+func TestHealthSummaryResultIncludesCompactSATSummary(t *testing.T) {
+	tmp := t.TempDir()
+	oldAuditPath := DefaultAuditJSONPath
+	oldSATBaseDir := DefaultSATBaseDir
+	DefaultAuditJSONPath = filepath.Join(tmp, "audit.json")
+	DefaultSATBaseDir = filepath.Join(tmp, "sat")
+	t.Cleanup(func() { DefaultAuditJSONPath = oldAuditPath })
+	t.Cleanup(func() { DefaultSATBaseDir = oldSATBaseDir })
+
+	satDir := filepath.Join(DefaultSATBaseDir, "memory-testcase")
+	if err := os.MkdirAll(satDir, 0755); err != nil {
+		t.Fatalf("mkdir sat dir: %v", err)
+	}
+
+	raw := `{"collected_at":"2026-03-15T10:00:00Z","hardware":{"board":{"serial_number":"SRV123"},"storage":[{"serial_number":"DISK1","status":"Warning"}]}}`
+	if err := os.WriteFile(DefaultAuditJSONPath, []byte(raw), 0644); err != nil {
+		t.Fatalf("write audit json: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(satDir, "summary.txt"), []byte("overall_status=OK\njob_ok=3\njob_failed=0\njob_unsupported=0\n"), 0644); err != nil {
+		t.Fatalf("write sat summary: %v", err)
+	}
+
+	result := (&App{}).HealthSummaryResult()
+	if !contains(result.Body, "Memory SAT: OK ok=3 failed=0") {
+		t.Fatalf("body missing compact sat summary:\n%s", result.Body)
+	}
+}
+
+func TestApplySATOverlayFiltersIgnoredLegacyDevices(t *testing.T) {
+	tmp := t.TempDir()
+	oldSATBaseDir := DefaultSATBaseDir
+	DefaultSATBaseDir = filepath.Join(tmp, "sat")
+	t.Cleanup(func() { DefaultSATBaseDir = oldSATBaseDir })
+
+	raw := `{
+	  "collected_at": "2026-03-15T10:00:00Z",
+	  "hardware": {
+	    "board": {"serial_number": "SRV123"},
+	    "storage": [
+	      {"model": "Virtual HDisk0", "serial_number": "AAAABBBBCCCC3"},
+	      {"model": "PASCARI", "serial_number": "DISK1", "status": "OK"}
+	    ],
+	    "pcie_devices": [
+	      {"device_class": "Co-processor", "model": "402xx Series QAT", "status": "OK"},
+	      {"device_class": "VideoController", "model": "NVIDIA H100", "status": "OK"}
+	    ]
+	  }
+	}`
+
+	got, err := ApplySATOverlay([]byte(raw))
+	if err != nil {
+		t.Fatalf("ApplySATOverlay error: %v", err)
+	}
+	text := string(got)
+	if contains(text, "Virtual HDisk0") {
+		t.Fatalf("overlaid audit should drop virtual hdisk:\n%s", text)
+	}
+	if contains(text, "\"device_class\": \"Co-processor\"") {
+		t.Fatalf("overlaid audit should drop co-processors:\n%s", text)
+	}
+	if !contains(text, "PASCARI") || !contains(text, "NVIDIA H100") {
+		t.Fatalf("overlaid audit should keep real devices:\n%s", text)
+	}
+}
+
+func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
+	tmp := t.TempDir()
+	exportDir := filepath.Join(tmp, "export")
+	if err := os.MkdirAll(filepath.Join(exportDir, "bee-sat", "memory-run"), 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(exportDir, "bee-audit.json"), []byte(`{"collected_at":"2026-03-15T10:00:00Z","hardware":{"board":{"serial_number":"SRV123"},"storage":[{"model":"Virtual HDisk0","serial_number":"AAAABBBBCCCC3"},{"model":"PASCARI","serial_number":"DISK1"}],"pcie_devices":[{"device_class":"Co-processor","model":"402xx Series QAT"},{"device_class":"VideoController","model":"NVIDIA H100"}]}}`), 0644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run", "verbose.log"), []byte("sat verbose"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run.tar.gz"), []byte("nested sat archive"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	archive, err := BuildSupportBundle(exportDir)
+	if err != nil {
+		t.Fatalf("BuildSupportBundle error: %v", err)
+	}
+	if _, err := os.Stat(archive); err != nil {
+		t.Fatalf("archive stat: %v", err)
+	}
+
+	file, err := os.Open(archive)
+	if err != nil {
+		t.Fatalf("open archive: %v", err)
+	}
+	defer file.Close()
+
+	gzr, err := gzip.NewReader(file)
+	if err != nil {
+		t.Fatalf("gzip reader: %v", err)
+	}
+	defer gzr.Close()
+
+	tr := tar.NewReader(gzr)
+	var names []string
+	var auditJSON string
+	for {
+		hdr, err := tr.Next()
+		if errors.Is(err, io.EOF) {
+			break
+		}
+		if err != nil {
+			t.Fatalf("read tar entry: %v", err)
+		}
+		names = append(names, hdr.Name)
+		if contains(hdr.Name, "/export/bee-audit.json") {
+			body, err := io.ReadAll(tr)
+			if err != nil {
+				t.Fatalf("read audit entry: %v", err)
+			}
+			auditJSON = string(body)
+		}
+	}
+
+	for _, want := range []string{
+		"/system/ip-link.txt",
+		"/system/ip-link-stats.txt",
+		"/system/kernel-aer-nvidia.txt",
+		"/system/lspci-nvidia-bridges-vv.txt",
+		"/system/pcie-aer-sysfs.txt",
+		"/system/ethtool-info.txt",
+		"/system/ethtool-link.txt",
+		"/system/ethtool-module.txt",
+		"/system/mstflint-query.txt",
+	} {
+		var found bool
+		for _, name := range names {
+			if contains(name, want) {
+				found = true
+				break
+			}
+		}
+		if !found {
+			t.Fatalf("support bundle missing %s, names=%v", want, names)
+		}
+	}
+
+	var foundRaw bool
+	for _, name := range names {
+		if contains(name, "/export/bee-sat/memory-run/verbose.log") {
+			foundRaw = true
+		}
+		if contains(name, "/export/bee-sat/memory-run.tar.gz") {
+			t.Fatalf("support bundle should not contain nested SAT archive: %s", name)
+		}
+	}
+	if !foundRaw {
+		t.Fatalf("support bundle missing raw SAT log, names=%v", names)
+	}
+	if contains(auditJSON, "Virtual HDisk0") || contains(auditJSON, "\"device_class\": \"Co-processor\"") {
+		t.Fatalf("support bundle should normalize ignored devices:\n%s", auditJSON)
+	}
+	if !contains(auditJSON, "PASCARI") || !contains(auditJSON, "NVIDIA H100") {
+		t.Fatalf("support bundle should keep real devices:\n%s", auditJSON)
+	}
+}
+
+func TestMainBanner(t *testing.T) {
+	tmp := t.TempDir()
+	oldAuditPath := DefaultAuditJSONPath
+	DefaultAuditJSONPath = filepath.Join(tmp, "audit.json")
+	t.Cleanup(func() { DefaultAuditJSONPath = oldAuditPath })
+
+	trueValue := true
+	manufacturer := "Dell"
+	product := "PowerEdge R760"
+	cpuModel := "Intel Xeon Gold 6430"
+	memoryType := "DDR5"
+	memorySerialA := "DIMM-A"
+	memorySerialB := "DIMM-B"
+	storageSerialA := "DISK-A"
+	storageSerialB := "DISK-B"
+	gpuClass := "VideoController"
+	gpuModel := "NVIDIA H100"
+
+	payload := schema.HardwareIngestRequest{
+		Hardware: schema.HardwareSnapshot{
+			Board: schema.HardwareBoard{
+				Manufacturer: &manufacturer,
+				ProductName:  &product,
+				SerialNumber: "SRV123",
+			},
+			CPUs: []schema.HardwareCPU{
+				{Model: &cpuModel},
+				{Model: &cpuModel},
+			},
+			Memory: []schema.HardwareMemory{
+				{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType, SerialNumber: &memorySerialA},
+				{Present: &trueValue, SizeMB: intPtr(524288), Type: &memoryType, SerialNumber: &memorySerialB},
+			},
+			Storage: []schema.HardwareStorage{
+				{Present: &trueValue, SizeGB: intPtr(3840), SerialNumber: &storageSerialA},
+				{Present: &trueValue, SizeGB: intPtr(3840), SerialNumber: &storageSerialB},
+			},
+			PCIeDevices: []schema.HardwarePCIeDevice{
+				{DeviceClass: &gpuClass, Model: &gpuModel},
+				{DeviceClass: &gpuClass, Model: &gpuModel},
+			},
+		},
+	}
+
+	raw, err := json.Marshal(payload)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	if err := os.WriteFile(DefaultAuditJSONPath, raw, 0644); err != nil {
+		t.Fatalf("write audit json: %v", err)
+	}
+
+	a := &App{
+		network: fakeNetwork{
+			listInterfacesFn: func() ([]platform.InterfaceInfo, error) {
+				return []platform.InterfaceInfo{
+					{Name: "eth0", IPv4: []string{"10.0.0.10"}},
+					{Name: "eth1", IPv4: []string{"192.168.1.10"}},
+				}, nil
+			},
+		},
+	}
+
+	got := a.MainBanner()
+	for _, want := range []string{
+		"System: Dell PowerEdge R760 | S/N SRV123",
+		"CPU: 2 x Intel Xeon Gold 6430",
+		"Memory: 1.0 TB DDR5 (2 DIMMs)",
+		"Storage: 2 drives / 7.5 TB",
+		"GPU: 2 x NVIDIA H100",
+		"IP: 10.0.0.10, 192.168.1.10",
+	} {
+		if !contains(got, want) {
+			t.Fatalf("banner missing %q:\n%s", want, got)
+		}
+	}
+}
+
+func TestRuntimeHealthResultUsesAMDLabels(t *testing.T) {
+	tmp := t.TempDir()
+	oldRuntimePath := DefaultRuntimeJSONPath
+	DefaultRuntimeJSONPath = filepath.Join(tmp, "runtime-health.json")
+	t.Cleanup(func() { DefaultRuntimeJSONPath = oldRuntimePath })
+
+	raw, err := json.Marshal(schema.RuntimeHealth{
+		Status:        "OK",
+		ExportDir:     "/appdata/bee/export",
+		DriverReady:   true,
+		CUDAReady:     true,
+		NetworkStatus: "OK",
+	})
+	if err != nil {
+		t.Fatalf("marshal runtime health: %v", err)
+	}
+	if err := os.WriteFile(DefaultRuntimeJSONPath, raw, 0644); err != nil {
+		t.Fatalf("write runtime health: %v", err)
+	}
+
+	a := &App{
+		sat: fakeSAT{
+			detectVendorFn: func() string { return "amd" },
+		},
+	}
+
+	result := a.RuntimeHealthResult()
+	if !contains(result.Body, "AMDGPU ready: true") {
+		t.Fatalf("body missing AMD driver label:\n%s", result.Body)
+	}
+	if !contains(result.Body, "ROCm SMI ready: true") {
+		t.Fatalf("body missing ROCm label:\n%s", result.Body)
+	}
+	if contains(result.Body, "CUDA ready") {
+		t.Fatalf("body should not mention CUDA on AMD:\n%s", result.Body)
+	}
+}
+
+func intPtr(v int) *int { return &v }
+
 func contains(haystack, needle string) bool {
 	return len(needle) == 0 || (len(haystack) >= len(needle) && (haystack == needle || containsAt(haystack, needle)))
 }
--- a/audit/internal/app/atomic_write.go
+++ b/audit/internal/app/atomic_write.go
@@ -0,0 +1,48 @@
+package app
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+)
+
+func atomicWriteFile(path string, data []byte, perm os.FileMode) error {
+	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		return fmt.Errorf("mkdir %s: %w", filepath.Dir(path), err)
+	}
+
+	tmpPath := path + ".tmp"
+	f, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, perm)
+	if err != nil {
+		return fmt.Errorf("open temp %s: %w", tmpPath, err)
+	}
+
+	success := false
+	defer func() {
+		_ = f.Close()
+		if !success {
+			_ = os.Remove(tmpPath)
+		}
+	}()
+
+	if _, err := f.Write(data); err != nil {
+		return fmt.Errorf("write temp %s: %w", tmpPath, err)
+	}
+	if err := f.Sync(); err != nil {
+		return fmt.Errorf("sync temp %s: %w", tmpPath, err)
+	}
+	if err := f.Close(); err != nil {
+		return fmt.Errorf("close temp %s: %w", tmpPath, err)
+	}
+	if err := os.Rename(tmpPath, path); err != nil {
+		return fmt.Errorf("rename %s -> %s: %w", tmpPath, path, err)
+	}
+
+	if dir, err := os.Open(filepath.Dir(path)); err == nil {
+		_ = dir.Sync()
+		_ = dir.Close()
+	}
+
+	success = true
+	return nil
+}
--- a/audit/internal/app/atomic_write_test.go
+++ b/audit/internal/app/atomic_write_test.go
@@ -0,0 +1,71 @@
+package app
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"bee/audit/internal/schema"
+)
+
+func TestAtomicWriteFileReplacesTargetWithoutLeavingTmp(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "bee-audit.json")
+	if err := os.WriteFile(path, []byte("old\n"), 0644); err != nil {
+		t.Fatalf("seed file: %v", err)
+	}
+
+	if err := atomicWriteFile(path, []byte("new\n"), 0644); err != nil {
+		t.Fatalf("atomicWriteFile: %v", err)
+	}
+
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read final: %v", err)
+	}
+	if string(raw) != "new\n" {
+		t.Fatalf("final content=%q want %q", string(raw), "new\n")
+	}
+	if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
+		t.Fatalf("tmp file should be absent after success, err=%v", err)
+	}
+}
+
+func TestRunRuntimePreflightWritesAtomically(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "runtime-health.json")
+	a := &App{
+		runtime: fakeRuntime{
+			collectFn: func(exportDir string) (schema.RuntimeHealth, error) {
+				return schema.RuntimeHealth{
+					Status:      "OK",
+					ExportDir:   exportDir,
+					DriverReady: true,
+					CUDAReady:   true,
+				}, nil
+			},
+		},
+	}
+
+	got, err := a.RunRuntimePreflight("file:" + path)
+	if err != nil {
+		t.Fatalf("RunRuntimePreflight: %v", err)
+	}
+	if got != path {
+		t.Fatalf("path=%q want %q", got, path)
+	}
+	if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
+		t.Fatalf("tmp file should be absent after success, err=%v", err)
+	}
+
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read runtime file: %v", err)
+	}
+	var health schema.RuntimeHealth
+	if err := json.Unmarshal(raw, &health); err != nil {
+		t.Fatalf("json unmarshal: %v", err)
+	}
+	if health.Status != "OK" {
+		t.Fatalf("status=%q want OK", health.Status)
+	}
+}
--- a/audit/internal/app/component_status_db.go
+++ b/audit/internal/app/component_status_db.go
@@ -0,0 +1,268 @@
+package app
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"time"
+)
+
+// ComponentStatusDB is a persistent, append-only store of hardware component health records.
+// Records are keyed by component identity strings (e.g. "pcie:0000:c8:00.0", "storage:nvme0n1").
+// Once a component is marked Warning or Critical, subsequent OK entries do not downgrade it —
+// the component stays at the highest observed severity until explicitly reset.
+type ComponentStatusDB struct {
+	path    string
+	mu      sync.Mutex
+	records map[string]*ComponentStatusRecord
+}
+
+// ComponentStatusRecord holds the current and historical health of one hardware component.
+type ComponentStatusRecord struct {
+	ComponentKey  string                 `json:"component_key"`
+	Status        string                 `json:"status"` // "OK", "Warning", "Critical", "Unknown"
+	LastCheckedAt time.Time              `json:"last_checked_at"`
+	LastChangedAt time.Time              `json:"last_changed_at"`
+	ErrorSummary  string                 `json:"error_summary,omitempty"`
+	History       []ComponentStatusEntry `json:"history"`
+}
+
+// ComponentStatusEntry is one observation written to a component's history.
+type ComponentStatusEntry struct {
+	At     time.Time `json:"at"`
+	Status string    `json:"status"`
+	Source string    `json:"source"` // e.g. "sat:nvidia", "sat:memory", "watchdog:kmsg"
+	Detail string    `json:"detail,omitempty"`
+}
+
+// OpenComponentStatusDB opens (or creates) the JSON status DB at path.
+func OpenComponentStatusDB(path string) (*ComponentStatusDB, error) {
+	db := &ComponentStatusDB{
+		path:    path,
+		records: make(map[string]*ComponentStatusRecord),
+	}
+	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		return nil, err
+	}
+	data, err := os.ReadFile(path)
+	if err != nil && !os.IsNotExist(err) {
+		return nil, err
+	}
+	if len(data) > 0 {
+		var records []ComponentStatusRecord
+		if err := json.Unmarshal(data, &records); err == nil {
+			for i := range records {
+				db.records[records[i].ComponentKey] = &records[i]
+			}
+		}
+	}
+	return db, nil
+}
+
+// Record writes one observation for the given component key.
+// source is a short label like "sat:nvidia" or "watchdog:kmsg".
+// status is "OK", "Warning", "Critical", or "Unknown".
+// OK never downgrades an existing Warning or Critical status.
+func (db *ComponentStatusDB) Record(key, source, status, detail string) {
+	if db == nil || strings.TrimSpace(key) == "" {
+		return
+	}
+	db.mu.Lock()
+	defer db.mu.Unlock()
+
+	now := time.Now().UTC()
+	rec, exists := db.records[key]
+	if !exists {
+		rec = &ComponentStatusRecord{ComponentKey: key}
+		db.records[key] = rec
+	}
+	rec.LastCheckedAt = now
+
+	entry := ComponentStatusEntry{At: now, Status: status, Source: source, Detail: detail}
+	rec.History = append(rec.History, entry)
+
+	// Status merge: OK never downgrades Warning/Critical.
+	newSev := componentSeverity(status)
+	curSev := componentSeverity(rec.Status)
+	if newSev > curSev {
+		rec.Status = status
+		rec.LastChangedAt = now
+		rec.ErrorSummary = detail
+	} else if rec.Status == "" {
+		rec.Status = status
+		rec.LastChangedAt = now
+	}
+
+	_ = db.saveLocked()
+}
+
+// Get returns the current record for a component key.
+func (db *ComponentStatusDB) Get(key string) (ComponentStatusRecord, bool) {
+	if db == nil {
+		return ComponentStatusRecord{}, false
+	}
+	db.mu.Lock()
+	defer db.mu.Unlock()
+	r, ok := db.records[key]
+	if !ok {
+		return ComponentStatusRecord{}, false
+	}
+	return *r, true
+}
+
+// All returns a snapshot of all records.
+func (db *ComponentStatusDB) All() []ComponentStatusRecord {
+	if db == nil {
+		return nil
+	}
+	db.mu.Lock()
+	defer db.mu.Unlock()
+	out := make([]ComponentStatusRecord, 0, len(db.records))
+	for _, r := range db.records {
+		out = append(out, *r)
+	}
+	return out
+}
+
+func (db *ComponentStatusDB) saveLocked() error {
+	records := make([]ComponentStatusRecord, 0, len(db.records))
+	for _, r := range db.records {
+		records = append(records, *r)
+	}
+	data, err := json.MarshalIndent(records, "", "  ")
+	if err != nil {
+		return err
+	}
+	return os.WriteFile(db.path, data, 0644)
+}
+
+// componentSeverity returns a numeric severity so higher values win.
+func componentSeverity(status string) int {
+	switch strings.TrimSpace(status) {
+	case "Critical":
+		return 3
+	case "Warning":
+		return 2
+	case "OK":
+		return 1
+	default:
+		return 0
+	}
+}
+
+// ApplySATResultToDB reads a SAT summary.txt from the run directory next to archivePath
+// and writes component status records to db for the given SAT target.
+// archivePath may be either a bare .tar.gz path or "Archive written to /path/foo.tar.gz".
+func ApplySATResultToDB(db *ComponentStatusDB, target, archivePath string) {
+	if db == nil || strings.TrimSpace(archivePath) == "" {
+		return
+	}
+	archivePath = extractArchivePath(archivePath)
+	if archivePath == "" {
+		return
+	}
+	runDir := strings.TrimSuffix(archivePath, ".tar.gz")
+	data, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
+	if err != nil {
+		return
+	}
+	kv := parseSATKV(string(data))
+	overall := strings.ToUpper(strings.TrimSpace(kv["overall_status"]))
+	if overall == "" {
+		return
+	}
+
+	source := "sat:" + target
+	dbStatus := satStatusToDBStatus(overall)
+
+	// Map SAT target to component keys.
+	switch target {
+	case "nvidia", "nvidia-targeted-stress", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
+		"nvidia-interconnect", "nvidia-bandwidth", "amd", "nvidia-stress",
+		"amd-stress", "amd-mem", "amd-bandwidth":
+		db.Record("pcie:gpu:"+target, source, dbStatus, target+" SAT: "+overall)
+	case "memory", "memory-stress", "sat-stress":
+		db.Record("memory:all", source, dbStatus, target+" SAT: "+overall)
+	case "cpu", "platform-stress":
+		db.Record("cpu:all", source, dbStatus, target+" SAT: "+overall)
+	case "storage":
+		// Try to record per-device if available in summary.
+		recordedAny := false
+		for key, val := range kv {
+			if !strings.HasSuffix(key, "_status") || key == "overall_status" {
+				continue
+			}
+			base := strings.TrimSuffix(key, "_status")
+			idx := strings.Index(base, "_")
+			if idx <= 0 {
+				continue
+			}
+			devName := base[:idx]
+			devStatus := satStatusToDBStatus(strings.ToUpper(strings.TrimSpace(val)))
+			db.Record("storage:"+devName, source, devStatus, "storage SAT: "+val)
+			recordedAny = true
+		}
+		if !recordedAny {
+			db.Record("storage:all", source, dbStatus, "storage SAT: "+overall)
+		}
+	}
+}
+
+func satStatusToDBStatus(overall string) string {
+	switch overall {
+	case "OK":
+		return "OK"
+	case "FAILED":
+		return "Warning"
+	case "PARTIAL", "UNSUPPORTED":
+		return "Unknown"
+	default:
+		return "Unknown"
+	}
+}
+
+// ExtractArchivePath extracts a bare .tar.gz path from a string that may be
+// "Archive written to /path/foo.tar.gz" or already a bare path.
+func ExtractArchivePath(s string) string {
+	return extractArchivePath(s)
+}
+
+// ReadSATOverallStatus reads the overall_status value from the summary.txt
+// file located in the run directory alongside archivePath.
+// Returns "" if the file cannot be read.
+func ReadSATOverallStatus(archivePath string) string {
+	if strings.TrimSpace(archivePath) == "" {
+		return ""
+	}
+	runDir := strings.TrimSuffix(archivePath, ".tar.gz")
+	data, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
+	if err != nil {
+		return ""
+	}
+	kv := parseSATKV(string(data))
+	return strings.ToUpper(strings.TrimSpace(kv["overall_status"]))
+}
+
+func extractArchivePath(s string) string {
+	s = strings.TrimSpace(s)
+	if strings.HasSuffix(s, ".tar.gz") {
+		parts := strings.Fields(s)
+		if len(parts) > 0 {
+			return parts[len(parts)-1]
+		}
+	}
+	return s
+}
+
+func parseSATKV(raw string) map[string]string {
+	kv := make(map[string]string)
+	for _, line := range strings.Split(raw, "\n") {
+		k, v, ok := strings.Cut(strings.TrimSpace(line), "=")
+		if ok {
+			kv[strings.TrimSpace(k)] = strings.TrimSpace(v)
+		}
+	}
+	return kv
+}
--- a/audit/internal/app/sat_overlay.go
+++ b/audit/internal/app/sat_overlay.go
@@ -0,0 +1,421 @@
+package app
+
+import (
+	"os"
+	"path/filepath"
+	"strconv"
+	"sort"
+	"strings"
+
+	"bee/audit/internal/schema"
+)
+
+func applyLatestSATStatuses(snap *schema.HardwareSnapshot, baseDir string, db *ComponentStatusDB) {
+	if snap == nil || strings.TrimSpace(baseDir) == "" {
+		return
+	}
+	if summary, ok := loadLatestSATSummary(baseDir, "gpu-amd-"); ok {
+		applyGPUVendorSAT(snap.PCIeDevices, "amd", summary)
+	}
+	if summary, ok := loadLatestSATSummary(baseDir, "gpu-nvidia-"); ok {
+		applyGPUVendorSAT(snap.PCIeDevices, "nvidia", summary)
+		applyNvidiaPerGPUStatus(snap.PCIeDevices, baseDir)
+	}
+	if summary, ok := loadLatestSATSummary(baseDir, "memory-"); ok {
+		applyMemorySAT(snap.Memory, summary)
+	}
+	if summary, ok := loadLatestSATSummary(baseDir, "cpu-"); ok {
+		applyCPUSAT(snap.CPUs, summary)
+	}
+	if summary, ok := loadLatestSATSummary(baseDir, "storage-"); ok {
+		applyStorageSAT(snap.Storage, summary)
+	}
+	// Apply unified component status DB — overlaid last so it can only upgrade severity.
+	applyComponentStatusDB(snap, db)
+}
+
+type nvidiaPerGPUStatus struct {
+	runStatus string
+	reason    string
+}
+
+func applyNvidiaPerGPUStatus(devs []schema.HardwarePCIeDevice, baseDir string) {
+	statusByIndex, ts, ok := loadLatestNvidiaPerGPUStatus(baseDir)
+	if !ok {
+		return
+	}
+	for i := range devs {
+		if devs[i].Telemetry == nil {
+			continue
+		}
+		rawIdx, ok := devs[i].Telemetry["nvidia_gpu_index"]
+		if !ok {
+			continue
+		}
+		idx, ok := telemetryInt(rawIdx)
+		if !ok {
+			continue
+		}
+		st, ok := statusByIndex[idx]
+		if !ok {
+			continue
+		}
+		status, description, ok := satKeyStatus(st.runStatus, firstNonEmpty(strings.TrimSpace(st.reason), "nvidia GPU SAT"))
+		if !ok {
+			continue
+		}
+		mergeComponentStatusPreferDetail(&devs[i].HardwareComponentStatus, ts, status, description)
+	}
+}
+
+func loadLatestNvidiaPerGPUStatus(baseDir string) (map[int]nvidiaPerGPUStatus, string, bool) {
+	matches, err := filepath.Glob(filepath.Join(baseDir, "gpu-nvidia-*"))
+	if err != nil || len(matches) == 0 {
+		return nil, "", false
+	}
+	sort.Strings(matches)
+	runDir := matches[len(matches)-1]
+	summaryRaw, err := os.ReadFile(filepath.Join(runDir, "summary.txt"))
+	if err != nil {
+		return nil, "", false
+	}
+	summaryKV := parseKeyValueSummary(string(summaryRaw))
+	runAtUTC := strings.TrimSpace(summaryKV["run_at_utc"])
+	files, err := filepath.Glob(filepath.Join(runDir, "gpu-*-status.txt"))
+	if err != nil || len(files) == 0 {
+		return nil, "", false
+	}
+	out := make(map[int]nvidiaPerGPUStatus, len(files))
+	for _, file := range files {
+		raw, err := os.ReadFile(file)
+		if err != nil {
+			continue
+		}
+		kv := parseKeyValueSummary(string(raw))
+		idx, err := strconv.Atoi(strings.TrimSpace(kv["gpu_index"]))
+		if err != nil {
+			continue
+		}
+		out[idx] = nvidiaPerGPUStatus{
+			runStatus: strings.ToUpper(strings.TrimSpace(kv["run_status"])),
+			reason:    strings.TrimSpace(kv["reason"]),
+		}
+	}
+	if len(out) == 0 {
+		return nil, "", false
+	}
+	return out, runAtUTC, true
+}
+
+func telemetryInt(v any) (int, bool) {
+	switch value := v.(type) {
+	case int:
+		return value, true
+	case int32:
+		return int(value), true
+	case int64:
+		return int(value), true
+	case float64:
+		return int(value), true
+	case string:
+		n, err := strconv.Atoi(strings.TrimSpace(value))
+		if err != nil {
+			return 0, false
+		}
+		return n, true
+	default:
+		return 0, false
+	}
+}
+
+type satSummary struct {
+	runAtUTC string
+	overall  string
+	kv       map[string]string
+}
+
+func loadLatestSATSummary(baseDir, prefix string) (satSummary, bool) {
+	matches, err := filepath.Glob(filepath.Join(baseDir, prefix+"*/summary.txt"))
+	if err != nil || len(matches) == 0 {
+		return satSummary{}, false
+	}
+	sort.Strings(matches)
+	raw, err := os.ReadFile(matches[len(matches)-1])
+	if err != nil {
+		return satSummary{}, false
+	}
+	kv := parseKeyValueSummary(string(raw))
+	return satSummary{
+		runAtUTC: strings.TrimSpace(kv["run_at_utc"]),
+		overall:  strings.ToUpper(strings.TrimSpace(kv["overall_status"])),
+		kv:       kv,
+	}, true
+}
+
+func applyGPUVendorSAT(devs []schema.HardwarePCIeDevice, vendor string, summary satSummary) {
+	status, description, ok := satSummaryStatus(summary, vendor+" GPU SAT")
+	if !ok {
+		return
+	}
+	for i := range devs {
+		if !matchesGPUVendor(devs[i], vendor) {
+			continue
+		}
+		mergeComponentStatus(&devs[i].HardwareComponentStatus, summary.runAtUTC, status, description)
+	}
+}
+
+func applyMemorySAT(dimms []schema.HardwareMemory, summary satSummary) {
+	status, description, ok := satSummaryStatus(summary, "memory SAT")
+	if !ok {
+		return
+	}
+	for i := range dimms {
+		mergeComponentStatus(&dimms[i].HardwareComponentStatus, summary.runAtUTC, status, description)
+	}
+}
+
+func applyCPUSAT(cpus []schema.HardwareCPU, summary satSummary) {
+	status, description, ok := satSummaryStatus(summary, "CPU SAT")
+	if !ok {
+		return
+	}
+	for i := range cpus {
+		mergeComponentStatus(&cpus[i].HardwareComponentStatus, summary.runAtUTC, status, description)
+	}
+}
+
+func applyStorageSAT(disks []schema.HardwareStorage, summary satSummary) {
+	byDevice := parseStorageSATStatus(summary)
+	for i := range disks {
+		devPath, _ := disks[i].Telemetry["linux_device"].(string)
+		devName := filepath.Base(strings.TrimSpace(devPath))
+		if devName == "" {
+			continue
+		}
+		result, ok := byDevice[devName]
+		if !ok {
+			continue
+		}
+		mergeComponentStatus(&disks[i].HardwareComponentStatus, summary.runAtUTC, result.status, result.description)
+	}
+}
+
+type satStatusResult struct {
+	status      string
+	description string
+	ok          bool
+}
+
+func parseStorageSATStatus(summary satSummary) map[string]satStatusResult {
+	result := map[string]satStatusResult{}
+	for key, value := range summary.kv {
+		if !strings.HasSuffix(key, "_status") || key == "overall_status" {
+			continue
+		}
+		base := strings.TrimSuffix(key, "_status")
+		idx := strings.Index(base, "_")
+		if idx <= 0 {
+			continue
+		}
+		devName := base[:idx]
+		step := strings.ReplaceAll(base[idx+1:], "_", "-")
+		stepStatus, desc, ok := satKeyStatus(strings.ToUpper(strings.TrimSpace(value)), "storage "+step)
+		if !ok {
+			continue
+		}
+		current := result[devName]
+		if !current.ok || statusSeverity(stepStatus) > statusSeverity(current.status) {
+			result[devName] = satStatusResult{status: stepStatus, description: desc, ok: true}
+		}
+	}
+	return result
+}
+
+func satSummaryStatus(summary satSummary, label string) (string, string, bool) {
+	return satKeyStatus(summary.overall, label)
+}
+
+func satKeyStatus(rawStatus, label string) (string, string, bool) {
+	switch strings.ToUpper(strings.TrimSpace(rawStatus)) {
+	case "OK":
+		// No error description on success — error_description is for problems only.
+		return "OK", "", true
+	case "PARTIAL", "UNSUPPORTED", "CANCELED", "CANCELLED":
+		// Tool couldn't run or test was incomplete — we can't assert hardware health.
+		return "Unknown", "", true
+	case "FAILED":
+		return "Critical", label + " failed", true
+	default:
+		return "", "", false
+	}
+}
+
+func mergeComponentStatus(component *schema.HardwareComponentStatus, changedAt, satStatus, description string) {
+	if component == nil || satStatus == "" {
+		return
+	}
+	current := strings.TrimSpace(ptrString(component.Status))
+	if current == "" || current == "Unknown" || statusSeverity(satStatus) > statusSeverity(current) {
+		component.Status = appStringPtr(satStatus)
+		if strings.TrimSpace(description) != "" {
+			component.ErrorDescription = appStringPtr(description)
+		}
+		if strings.TrimSpace(changedAt) != "" {
+			component.StatusChangedAt = appStringPtr(changedAt)
+			component.StatusHistory = append(component.StatusHistory, schema.HardwareStatusHistory{
+				Status:    satStatus,
+				ChangedAt: changedAt,
+				Details:   appStringPtr(description),
+			})
+		}
+	}
+}
+
+func mergeComponentStatusPreferDetail(component *schema.HardwareComponentStatus, changedAt, satStatus, description string) {
+	if component == nil || satStatus == "" {
+		return
+	}
+	current := strings.TrimSpace(ptrString(component.Status))
+	newSeverity := statusSeverity(satStatus)
+	currentSeverity := statusSeverity(current)
+	if current == "" || current == "Unknown" || newSeverity > currentSeverity {
+		mergeComponentStatus(component, changedAt, satStatus, description)
+		return
+	}
+	if newSeverity == currentSeverity && strings.TrimSpace(description) != "" {
+		component.Status = appStringPtr(satStatus)
+		component.ErrorDescription = appStringPtr(description)
+		if strings.TrimSpace(changedAt) != "" {
+			component.StatusChangedAt = appStringPtr(changedAt)
+			component.StatusHistory = append(component.StatusHistory, schema.HardwareStatusHistory{
+				Status:    satStatus,
+				ChangedAt: changedAt,
+				Details:   appStringPtr(description),
+			})
+		}
+	}
+}
+
+func statusSeverity(status string) int {
+	switch strings.TrimSpace(status) {
+	case "Critical":
+		return 3
+	case "Warning":
+		return 2
+	case "OK":
+		return 1
+	case "Unknown":
+		return 1 // same as OK — does not override OK from another source
+	default:
+		return 0
+	}
+}
+
+func matchesGPUVendor(dev schema.HardwarePCIeDevice, vendor string) bool {
+	if dev.DeviceClass == nil || !strings.Contains(strings.TrimSpace(*dev.DeviceClass), "Controller") && !strings.Contains(strings.TrimSpace(*dev.DeviceClass), "Accelerator") {
+		if dev.DeviceClass == nil || !strings.Contains(strings.TrimSpace(*dev.DeviceClass), "Display") && !strings.Contains(strings.TrimSpace(*dev.DeviceClass), "Video") {
+			return false
+		}
+	}
+	manufacturer := strings.ToLower(strings.TrimSpace(ptrString(dev.Manufacturer)))
+	switch vendor {
+	case "amd":
+		return strings.Contains(manufacturer, "advanced micro devices") || strings.Contains(manufacturer, "amd/ati")
+	case "nvidia":
+		return strings.Contains(manufacturer, "nvidia")
+	default:
+		return false
+	}
+}
+
+func applyComponentStatusDB(snap *schema.HardwareSnapshot, db *ComponentStatusDB) {
+	if snap == nil || db == nil {
+		return
+	}
+	for _, rec := range db.All() {
+		key := rec.ComponentKey
+		status := dbStatusToSATStatus(rec.Status)
+		if status == "" {
+			continue
+		}
+		detail := rec.ErrorSummary
+		ts := rec.LastChangedAt.UTC().Format("2006-01-02T15:04:05Z")
+
+		switch {
+		case strings.HasPrefix(key, "pcie:"):
+			bdf := strings.TrimPrefix(key, "pcie:")
+			bdf = strings.TrimPrefix(bdf, "gpu:") // strip sub-type if present
+			// bdf may be empty (e.g. "pcie:gpu:nvidia") — skip BDF matching
+			if sanitizeBDFForLookup(bdf) == "" {
+				break
+			}
+			normalized := sanitizeBDFForLookup(bdf)
+			for i := range snap.PCIeDevices {
+				if snap.PCIeDevices[i].BDF == nil {
+					continue
+				}
+				if sanitizeBDFForLookup(*snap.PCIeDevices[i].BDF) == normalized {
+					mergeComponentStatus(&snap.PCIeDevices[i].HardwareComponentStatus, ts, status, detail)
+				}
+			}
+		case strings.HasPrefix(key, "storage:"):
+			devName := strings.TrimPrefix(key, "storage:")
+			if devName == "all" {
+				for i := range snap.Storage {
+					mergeComponentStatus(&snap.Storage[i].HardwareComponentStatus, ts, status, detail)
+				}
+			} else {
+				for i := range snap.Storage {
+					linuxDev, _ := snap.Storage[i].Telemetry["linux_device"].(string)
+					if filepath.Base(strings.TrimSpace(linuxDev)) == devName {
+						mergeComponentStatus(&snap.Storage[i].HardwareComponentStatus, ts, status, detail)
+					}
+				}
+			}
+		case strings.HasPrefix(key, "memory:"):
+			for i := range snap.Memory {
+				mergeComponentStatus(&snap.Memory[i].HardwareComponentStatus, ts, status, detail)
+			}
+		case strings.HasPrefix(key, "cpu:"):
+			for i := range snap.CPUs {
+				mergeComponentStatus(&snap.CPUs[i].HardwareComponentStatus, ts, status, detail)
+			}
+		}
+	}
+}
+
+// dbStatusToSATStatus converts ComponentStatusDB status strings to the format
+// expected by mergeComponentStatus (which uses "OK", "Warning", "Critical", "Unknown").
+func dbStatusToSATStatus(s string) string {
+	switch strings.TrimSpace(s) {
+	case "OK", "Warning", "Critical", "Unknown":
+		return s
+	default:
+		return ""
+	}
+}
+
+// sanitizeBDFForLookup normalises a PCIe BDF address to a canonical lower-case form
+// suitable for comparison. "c8:00.0" → "0000:c8:00.0"; already-full BDFs are left as-is.
+func sanitizeBDFForLookup(bdf string) string {
+	bdf = strings.ToLower(strings.TrimSpace(bdf))
+	if bdf == "" || bdf == "gpu" || strings.ContainsAny(bdf, " \t") {
+		return ""
+	}
+	if strings.Count(bdf, ":") == 1 {
+		bdf = "0000:" + bdf
+	}
+	return bdf
+}
+
+func ptrString(v *string) string {
+	if v == nil {
+		return ""
+	}
+	return *v
+}
+
+func appStringPtr(value string) *string {
+	return &value
+}
--- a/audit/internal/app/sat_overlay_test.go
+++ b/audit/internal/app/sat_overlay_test.go
@@ -0,0 +1,109 @@
+package app
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"bee/audit/internal/schema"
+)
+
+func TestApplyLatestSATStatusesMarksStorageByDevice(t *testing.T) {
+	baseDir := t.TempDir()
+	runDir := filepath.Join(baseDir, "storage-20260325-161151")
+	if err := os.MkdirAll(runDir, 0755); err != nil {
+		t.Fatal(err)
+	}
+	raw := "run_at_utc=2026-03-25T16:11:51Z\nnvme0n1_nvme_smart_log_status=OK\nsda_smartctl_health_status=FAILED\noverall_status=FAILED\n"
+	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(raw), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	nvme := schema.HardwareStorage{Telemetry: map[string]any{"linux_device": "/dev/nvme0n1"}}
+	usb := schema.HardwareStorage{Telemetry: map[string]any{"linux_device": "/dev/sda"}}
+	snap := schema.HardwareSnapshot{Storage: []schema.HardwareStorage{nvme, usb}}
+
+	applyLatestSATStatuses(&snap, baseDir, nil)
+
+	if snap.Storage[0].Status == nil || *snap.Storage[0].Status != "OK" {
+		t.Fatalf("nvme status=%v want OK", snap.Storage[0].Status)
+	}
+	if snap.Storage[1].Status == nil || *snap.Storage[1].Status != "Critical" {
+		t.Fatalf("sda status=%v want Critical", snap.Storage[1].Status)
+	}
+}
+
+func TestApplyLatestSATStatusesMarksAMDGPUs(t *testing.T) {
+	baseDir := t.TempDir()
+	runDir := filepath.Join(baseDir, "gpu-amd-20260325-161436")
+	if err := os.MkdirAll(runDir, 0755); err != nil {
+		t.Fatal(err)
+	}
+	raw := "run_at_utc=2026-03-25T16:14:36Z\noverall_status=FAILED\n"
+	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(raw), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	class := "DisplayController"
+	manufacturer := "Advanced Micro Devices, Inc. [AMD/ATI]"
+	snap := schema.HardwareSnapshot{
+		PCIeDevices: []schema.HardwarePCIeDevice{{
+			DeviceClass:  &class,
+			Manufacturer: &manufacturer,
+		}},
+	}
+
+	applyLatestSATStatuses(&snap, baseDir, nil)
+
+	if snap.PCIeDevices[0].Status == nil || *snap.PCIeDevices[0].Status != "Critical" {
+		t.Fatalf("gpu status=%v want Critical", snap.PCIeDevices[0].Status)
+	}
+}
+
+func TestApplyLatestSATStatusesMarksNvidiaGPUByPerGPUStatusFile(t *testing.T) {
+	baseDir := t.TempDir()
+	runDir := filepath.Join(baseDir, "gpu-nvidia-20260407-162123")
+	if err := os.MkdirAll(runDir, 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte("run_at_utc=2026-04-07T16:21:23Z\noverall_status=FAILED\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(runDir, "gpu-1-status.txt"), []byte("gpu_index=1\ngpu_name=NVIDIA H100 PCIe\nrun_status=FAILED\nreason=GPU requires reset\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	class := "VideoController"
+	manufacturer := "NVIDIA Corporation"
+	bdf0 := "0000:4b:00.0"
+	bdf1 := "0000:4f:00.0"
+	snap := schema.HardwareSnapshot{
+		PCIeDevices: []schema.HardwarePCIeDevice{
+			{
+				DeviceClass:  &class,
+				Manufacturer: &manufacturer,
+				BDF:          &bdf0,
+				Telemetry:    map[string]any{"nvidia_gpu_index": 0},
+			},
+			{
+				DeviceClass:  &class,
+				Manufacturer: &manufacturer,
+				BDF:          &bdf1,
+				Telemetry:    map[string]any{"nvidia_gpu_index": 1},
+			},
+		},
+	}
+
+	applyLatestSATStatuses(&snap, baseDir, nil)
+
+	if snap.PCIeDevices[1].Status == nil || *snap.PCIeDevices[1].Status != "Critical" {
+		t.Fatalf("gpu1 status=%v want Critical", snap.PCIeDevices[1].Status)
+	}
+	if snap.PCIeDevices[1].ErrorDescription == nil || *snap.PCIeDevices[1].ErrorDescription != "GPU requires reset failed" {
+		got := "<nil>"
+		if snap.PCIeDevices[1].ErrorDescription != nil {
+			got = *snap.PCIeDevices[1].ErrorDescription
+		}
+		t.Fatalf("gpu1 error=%q want per-gpu reason", got)
+	}
+}
--- a/audit/internal/app/support_bundle.go
+++ b/audit/internal/app/support_bundle.go
@@ -0,0 +1,699 @@
+package app
+
+import (
+	"archive/tar"
+	"compress/gzip"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"sort"
+	"strings"
+	"time"
+)
+
+var supportBundleServices = []string{
+	"bee-audit.service",
+	"bee-web.service",
+	"bee-network.service",
+	"bee-nvidia.service",
+	"bee-preflight.service",
+	"bee-selfheal.service",
+	"bee-selfheal.timer",
+	"bee-sshsetup.service",
+	"nvidia-dcgm.service",
+	"nvidia-fabricmanager.service",
+}
+
+var supportBundleCommands = []struct {
+	name string
+	cmd  []string
+}{
+	{name: "system/uname.txt", cmd: []string{"uname", "-a"}},
+	{name: "system/cmdline.txt", cmd: []string{"cat", "/proc/cmdline"}},
+	{name: "system/lsmod.txt", cmd: []string{"lsmod"}},
+	{name: "system/lspci-nn.txt", cmd: []string{"lspci", "-nn"}},
+	{name: "system/lspci-vvv.txt", cmd: []string{"lspci", "-vvv"}},
+	{name: "system/ip-addr.txt", cmd: []string{"ip", "addr"}},
+	{name: "system/ip-link.txt", cmd: []string{"ip", "-details", "link", "show"}},
+	{name: "system/ip-link-stats.txt", cmd: []string{"ip", "-s", "link", "show"}},
+	{name: "system/ip-route.txt", cmd: []string{"ip", "route"}},
+	{name: "system/mount.txt", cmd: []string{"mount"}},
+	{name: "system/df-h.txt", cmd: []string{"df", "-h"}},
+	{name: "system/dmesg.txt", cmd: []string{"dmesg"}},
+	{name: "system/kernel-aer-nvidia.txt", cmd: []string{"sh", "-c", `
+if command -v dmesg >/dev/null 2>&1; then
+  dmesg | grep -iE 'AER|NVRM|Xid|pcieport|nvidia' || echo "no AER/NVRM/Xid kernel messages found"
+else
+  echo "dmesg not found"
+fi
+`}},
+	{name: "system/nvidia-smi-q.txt", cmd: []string{"nvidia-smi", "-q"}},
+	{name: "system/nvidia-smi-topo.txt", cmd: []string{"sh", "-c", `
+if command -v nvidia-smi >/dev/null 2>&1; then
+  nvidia-smi topo -m 2>&1 || true
+else
+  echo "nvidia-smi not found"
+fi
+`}},
+	{name: "system/systemctl-nvidia-units.txt", cmd: []string{"sh", "-c", `
+if ! command -v systemctl >/dev/null 2>&1; then
+  echo "systemctl not found"
+  exit 0
+fi
+echo "=== unit files ==="
+systemctl list-unit-files --no-pager --all 'nvidia*' 'fabric*' 2>&1 || true
+echo
+echo "=== active units ==="
+systemctl list-units --no-pager --all 'nvidia*' 'fabric*' 2>&1 || true
+echo
+echo "=== failed units ==="
+systemctl --failed --no-pager 2>&1 | grep -iE 'nvidia|fabric' || echo "no failed nvidia/fabric units"
+`}},
+	{name: "system/fabric-manager-paths.txt", cmd: []string{"sh", "-c", `
+for candidate in \
+  /usr/bin/nvidia-fabricmanager \
+  /usr/bin/nv-fabricmanager \
+  /usr/bin/nvidia-fabricmanagerd \
+  /usr/bin/nvlsm; do
+  if [ -e "$candidate" ]; then
+    echo "=== $candidate ==="
+    ls -l "$candidate" 2>&1 || true
+    echo
+  fi
+done
+if ! ls /usr/bin/nvidia-fabricmanager /usr/bin/nv-fabricmanager /usr/bin/nvidia-fabricmanagerd /usr/bin/nvlsm >/dev/null 2>&1; then
+  echo "no fabric manager binaries found"
+fi
+`}},
+	{name: "system/lspci-nvidia-bridges-vv.txt", cmd: []string{"sh", "-c", `
+if ! command -v lspci >/dev/null 2>&1; then
+  echo "lspci not found"
+  exit 0
+fi
+found=0
+	for gpu in $(lspci -Dn | awk '$2 ~ /^03(00|02):$/ && $3 ~ /^10de:/ {print $1}'); do
+  found=1
+  echo "=== GPU $gpu ==="
+  lspci -s "$gpu" -vv 2>&1 || true
+  bridge=$(basename "$(readlink -f "/sys/bus/pci/devices/$gpu/.." 2>/dev/null)" 2>/dev/null)
+  if [ -n "$bridge" ] && [ "$bridge" != "$gpu" ]; then
+    echo
+    echo "=== UPSTREAM $bridge for $gpu ==="
+    lspci -s "$bridge" -vv 2>&1 || true
+  fi
+  echo
+done
+if [ "$found" -eq 0 ]; then
+  echo "no NVIDIA PCI devices found"
+fi
+`}},
+	{name: "system/pcie-nvidia-link.txt", cmd: []string{"sh", "-c", `
+for d in /sys/bus/pci/devices/*/; do
+  vendor=$(cat "$d/vendor" 2>/dev/null)
+	  [ "$vendor" = "0x10de" ] || continue
+	  class=$(cat "$d/class" 2>/dev/null)
+	  case "$class" in
+	    0x030000|0x030200) ;;
+	    *) continue ;;
+	  esac
+	  dev=$(basename "$d")
+  echo "=== $dev ==="
+  for f in current_link_speed current_link_width max_link_speed max_link_width; do
+    printf "  %-22s %s\n" "$f" "$(cat "$d/$f" 2>/dev/null)"
+  done
+done
+`}},
+	{name: "system/pcie-aer-sysfs.txt", cmd: []string{"sh", "-c", `
+found=0
+for dev in /sys/bus/pci/devices/*; do
+  [ -e "$dev" ] || continue
+  bdf=$(basename "$dev")
+  block=""
+  for f in aer_dev_correctable aer_dev_fatal aer_dev_nonfatal aer_rootport_total_err_cor aer_rootport_total_err_fatal aer_rootport_total_err_nonfatal; do
+    if [ -r "$dev/$f" ]; then
+      if [ -z "$block" ]; then
+        block=1
+        found=1
+        echo "=== $bdf ==="
+      fi
+      printf "  %-30s %s\n" "$f" "$(cat "$dev/$f" 2>/dev/null)"
+    fi
+  done
+  if [ -n "$block" ]; then
+    echo
+  fi
+done
+if [ "$found" -eq 0 ]; then
+  echo "no PCIe AER sysfs counters found"
+fi
+`}},
+	{name: "system/ethtool-info.txt", cmd: []string{"sh", "-c", `
+if ! command -v ethtool >/dev/null 2>&1; then
+  echo "ethtool not found"
+  exit 0
+fi
+found=0
+for path in /sys/class/net/*; do
+  [ -e "$path" ] || continue
+  iface=$(basename "$path")
+  [ "$iface" = "lo" ] && continue
+  found=1
+  echo "=== $iface ==="
+  ethtool -i "$iface" 2>&1 || true
+  echo
+done
+if [ "$found" -eq 0 ]; then
+  echo "no interfaces found"
+fi
+`}},
+	{name: "system/ethtool-link.txt", cmd: []string{"sh", "-c", `
+if ! command -v ethtool >/dev/null 2>&1; then
+  echo "ethtool not found"
+  exit 0
+fi
+found=0
+for path in /sys/class/net/*; do
+  [ -e "$path" ] || continue
+  iface=$(basename "$path")
+  [ "$iface" = "lo" ] && continue
+  found=1
+  echo "=== $iface ==="
+  ethtool "$iface" 2>&1 || true
+  echo
+done
+if [ "$found" -eq 0 ]; then
+  echo "no interfaces found"
+fi
+`}},
+	{name: "system/ethtool-module.txt", cmd: []string{"sh", "-c", `
+if ! command -v ethtool >/dev/null 2>&1; then
+  echo "ethtool not found"
+  exit 0
+fi
+found=0
+for path in /sys/class/net/*; do
+  [ -e "$path" ] || continue
+  iface=$(basename "$path")
+  [ "$iface" = "lo" ] && continue
+  found=1
+  echo "=== $iface ==="
+  ethtool -m "$iface" 2>&1 || true
+  echo
+done
+if [ "$found" -eq 0 ]; then
+  echo "no interfaces found"
+fi
+`}},
+	{name: "system/mstflint-query.txt", cmd: []string{"sh", "-c", `
+if ! command -v mstflint >/dev/null 2>&1; then
+  echo "mstflint not found"
+  exit 0
+fi
+found=0
+for path in /sys/bus/pci/devices/*; do
+  [ -e "$path/vendor" ] || continue
+  vendor=$(cat "$path/vendor" 2>/dev/null)
+  [ "$vendor" = "0x15b3" ] || continue
+  bdf=$(basename "$path")
+  found=1
+  echo "=== $bdf ==="
+  mstflint -d "$bdf" q 2>&1 || true
+  echo
+done
+if [ "$found" -eq 0 ]; then
+  echo "no Mellanox/NVIDIA networking devices found"
+fi
+`}},
+}
+
+var supportBundleOptionalFiles = []struct {
+	name string
+	src  string
+}{
+	{name: "system/kern.log", src: "/var/log/kern.log"},
+	{name: "system/syslog.txt", src: "/var/log/syslog"},
+	{name: "system/fabricmanager.log", src: "/var/log/fabricmanager.log"},
+	{name: "system/nvlsm.log", src: "/var/log/nvlsm.log"},
+	{name: "system/fabricmanager/fabricmanager.log", src: "/var/log/fabricmanager/fabricmanager.log"},
+	{name: "system/fabricmanager/nvlsm.log", src: "/var/log/fabricmanager/nvlsm.log"},
+}
+
+const supportBundleGlob = "????-??-?? (BEE-SP*)*.tar.gz"
+
+func BuildSupportBundle(exportDir string) (string, error) {
+	exportDir = strings.TrimSpace(exportDir)
+	if exportDir == "" {
+		exportDir = DefaultExportDir
+	}
+	if err := os.MkdirAll(exportDir, 0755); err != nil {
+		return "", err
+	}
+	if err := cleanupOldSupportBundles(os.TempDir()); err != nil {
+		return "", err
+	}
+
+	now := time.Now().UTC()
+	date := now.Format("2006-01-02")
+	tod := now.Format("150405")
+	ver := bundleVersion()
+	model := serverModelForBundle()
+	sn := serverSerialForBundle()
+
+	stageRoot := filepath.Join(os.TempDir(), fmt.Sprintf("bee-support-stage-%s-%s", sanitizeFilename(hostnameOr("unknown")), now.Format("20060102-150405")))
+	if err := os.MkdirAll(stageRoot, 0755); err != nil {
+		return "", err
+	}
+	defer os.RemoveAll(stageRoot)
+
+	if err := copyExportDirForSupportBundle(exportDir, filepath.Join(stageRoot, "export")); err != nil {
+		return "", err
+	}
+	if err := writeJournalDump(filepath.Join(stageRoot, "systemd", "combined.journal.log")); err != nil {
+		return "", err
+	}
+	for _, svc := range supportBundleServices {
+		if err := writeCommandOutput(filepath.Join(stageRoot, "systemd", svc+".status.txt"), []string{"systemctl", "status", svc, "--no-pager"}); err != nil {
+			return "", err
+		}
+		if err := writeCommandOutput(filepath.Join(stageRoot, "systemd", svc+".journal.log"), []string{"journalctl", "--no-pager", "-u", svc}); err != nil {
+			return "", err
+		}
+	}
+	for _, item := range supportBundleCommands {
+		if err := writeCommandOutput(filepath.Join(stageRoot, item.name), item.cmd); err != nil {
+			return "", err
+		}
+	}
+	for _, item := range supportBundleOptionalFiles {
+		_ = copyOptionalFile(item.src, filepath.Join(stageRoot, item.name))
+	}
+	if err := writeManifest(filepath.Join(stageRoot, "manifest.txt"), exportDir, stageRoot); err != nil {
+		return "", err
+	}
+
+	archiveName := fmt.Sprintf("%s (BEE-SP v%s) %s %s %s.tar.gz", date, ver, model, sn, tod)
+	archivePath := filepath.Join(os.TempDir(), archiveName)
+	if err := createSupportTarGz(archivePath, stageRoot); err != nil {
+		return "", err
+	}
+	return archivePath, nil
+}
+
+func LatestSupportBundlePath() (string, error) {
+	return latestSupportBundlePath(os.TempDir())
+}
+
+func cleanupOldSupportBundles(dir string) error {
+	matches, err := filepath.Glob(filepath.Join(dir, supportBundleGlob))
+	if err != nil {
+		return err
+	}
+	entries := supportBundleEntries(matches)
+	for path, mod := range entries {
+		if time.Since(mod) > 24*time.Hour {
+			_ = os.Remove(path)
+			delete(entries, path)
+		}
+	}
+	ordered := orderSupportBundles(entries)
+	if len(ordered) > 3 {
+		for _, old := range ordered[3:] {
+			_ = os.Remove(old)
+		}
+	}
+	return nil
+}
+
+func latestSupportBundlePath(dir string) (string, error) {
+	matches, err := filepath.Glob(filepath.Join(dir, supportBundleGlob))
+	if err != nil {
+		return "", err
+	}
+	ordered := orderSupportBundles(supportBundleEntries(matches))
+	if len(ordered) == 0 {
+		return "", os.ErrNotExist
+	}
+	return ordered[0], nil
+}
+
+func supportBundleEntries(matches []string) map[string]time.Time {
+	entries := make(map[string]time.Time, len(matches))
+	for _, match := range matches {
+		info, err := os.Stat(match)
+		if err != nil {
+			continue
+		}
+		entries[match] = info.ModTime()
+	}
+	return entries
+}
+
+func orderSupportBundles(entries map[string]time.Time) []string {
+	ordered := make([]string, 0, len(entries))
+	for path := range entries {
+		ordered = append(ordered, path)
+	}
+	sort.Slice(ordered, func(i, j int) bool {
+		return entries[ordered[i]].After(entries[ordered[j]])
+	})
+	return ordered
+}
+
+func writeJournalDump(dst string) error {
+	args := []string{"--no-pager"}
+	for _, svc := range supportBundleServices {
+		args = append(args, "-u", svc)
+	}
+	raw, err := exec.Command("journalctl", args...).CombinedOutput()
+	if len(raw) == 0 && err != nil {
+		raw = []byte(err.Error() + "\n")
+	}
+	if len(raw) == 0 {
+		raw = []byte("no journal output\n")
+	}
+	if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
+		return err
+	}
+	return os.WriteFile(dst, raw, 0644)
+}
+
+func writeCommandOutput(dst string, cmd []string) error {
+	if len(cmd) == 0 {
+		return nil
+	}
+	raw, err := exec.Command(cmd[0], cmd[1:]...).CombinedOutput()
+	if len(raw) == 0 {
+		if err != nil {
+			raw = []byte(err.Error() + "\n")
+		} else {
+			raw = []byte("no output\n")
+		}
+	}
+	if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
+		return err
+	}
+	return os.WriteFile(dst, raw, 0644)
+}
+
+func copyOptionalFile(src, dst string) error {
+	in, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer in.Close()
+	if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
+		return err
+	}
+	out, err := os.Create(dst)
+	if err != nil {
+		return err
+	}
+	defer out.Close()
+	_, err = io.Copy(out, in)
+	return err
+}
+
+func writeManifest(dst, exportDir, stageRoot string) error {
+	if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
+		return err
+	}
+	var body strings.Builder
+	fmt.Fprintf(&body, "bee_version=%s\n", buildVersion())
+	fmt.Fprintf(&body, "host=%s\n", hostnameOr("unknown"))
+	fmt.Fprintf(&body, "generated_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
+	fmt.Fprintf(&body, "export_dir=%s\n", exportDir)
+	fmt.Fprintf(&body, "\nfiles:\n")
+
+	var files []string
+	if err := filepath.Walk(stageRoot, func(path string, info os.FileInfo, err error) error {
+		if err != nil || info.IsDir() {
+			return err
+		}
+		if filepath.Clean(path) == filepath.Clean(dst) {
+			return nil
+		}
+		rel, err := filepath.Rel(stageRoot, path)
+		if err != nil {
+			return err
+		}
+		files = append(files, fmt.Sprintf("%s\t%d", rel, info.Size()))
+		return nil
+	}); err != nil {
+		return err
+	}
+	sort.Strings(files)
+	for _, line := range files {
+		body.WriteString(line)
+		body.WriteByte('\n')
+	}
+	return os.WriteFile(dst, []byte(body.String()), 0644)
+}
+
+func bundleVersion() string {
+	v := buildVersion()
+	v = strings.TrimPrefix(v, "v")
+	v = strings.TrimPrefix(v, "V")
+	if v == "" || v == "unknown" {
+		return "0.0"
+	}
+	return v
+}
+
+func serverModelForBundle() string {
+	raw, err := exec.Command("dmidecode", "-t", "1").Output()
+	if err != nil {
+		return "unknown"
+	}
+	for _, line := range strings.Split(string(raw), "\n") {
+		line = strings.TrimSpace(line)
+		key, val, ok := strings.Cut(line, ": ")
+		if !ok {
+			continue
+		}
+		if strings.TrimSpace(key) == "Product Name" {
+			val = strings.TrimSpace(val)
+			if val == "" {
+				return "unknown"
+			}
+			return strings.ReplaceAll(val, " ", "_")
+		}
+	}
+	return "unknown"
+}
+
+func serverSerialForBundle() string {
+	raw, err := exec.Command("dmidecode", "-t", "1").Output()
+	if err != nil {
+		return "unknown"
+	}
+	for _, line := range strings.Split(string(raw), "\n") {
+		line = strings.TrimSpace(line)
+		key, val, ok := strings.Cut(line, ": ")
+		if !ok {
+			continue
+		}
+		if strings.TrimSpace(key) == "Serial Number" {
+			val = strings.TrimSpace(val)
+			if val == "" {
+				return "unknown"
+			}
+			return val
+		}
+	}
+	return "unknown"
+}
+
+func buildVersion() string {
+	raw, err := exec.Command("bee", "version").CombinedOutput()
+	if err != nil {
+		return "unknown"
+	}
+	return strings.TrimSpace(string(raw))
+}
+
+func copyDirContents(srcDir, dstDir string) error {
+	entries, err := os.ReadDir(srcDir)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return err
+	}
+	for _, entry := range entries {
+		src := filepath.Join(srcDir, entry.Name())
+		dst := filepath.Join(dstDir, entry.Name())
+		if err := copyPath(src, dst); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func copyExportDirForSupportBundle(srcDir, dstDir string) error {
+	if err := copyDirContentsFiltered(srcDir, dstDir, func(rel string, info os.FileInfo) bool {
+		cleanRel := filepath.ToSlash(strings.TrimPrefix(filepath.Clean(rel), "./"))
+		if cleanRel == "" {
+			return true
+		}
+		if strings.HasPrefix(cleanRel, "bee-sat/") && strings.HasSuffix(cleanRel, ".tar.gz") {
+			return false
+		}
+		if strings.HasPrefix(filepath.Base(cleanRel), "bee-support-") && strings.HasSuffix(cleanRel, ".tar.gz") {
+			return false
+		}
+		return true
+	}); err != nil {
+		return err
+	}
+	return normalizeSupportBundleAuditJSON(filepath.Join(dstDir, "bee-audit.json"))
+}
+
+func normalizeSupportBundleAuditJSON(path string) error {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return err
+	}
+	normalized, err := ApplySATOverlay(data)
+	if err != nil {
+		return nil
+	}
+	return os.WriteFile(path, normalized, 0644)
+}
+
+func copyDirContentsFiltered(srcDir, dstDir string, keep func(rel string, info os.FileInfo) bool) error {
+	entries, err := os.ReadDir(srcDir)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return err
+	}
+	for _, entry := range entries {
+		src := filepath.Join(srcDir, entry.Name())
+		dst := filepath.Join(dstDir, entry.Name())
+		if err := copyPathFiltered(srcDir, src, dst, keep); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func copyPath(src, dst string) error {
+	info, err := os.Stat(src)
+	if err != nil {
+		return err
+	}
+	if info.IsDir() {
+		if err := os.MkdirAll(dst, info.Mode().Perm()); err != nil {
+			return err
+		}
+		entries, err := os.ReadDir(src)
+		if err != nil {
+			return err
+		}
+		for _, entry := range entries {
+			if err := copyPath(filepath.Join(src, entry.Name()), filepath.Join(dst, entry.Name())); err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+
+	if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
+		return err
+	}
+	in, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer in.Close()
+
+	out, err := os.OpenFile(dst, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, info.Mode().Perm())
+	if err != nil {
+		return err
+	}
+	defer out.Close()
+
+	_, err = io.Copy(out, in)
+	return err
+}
+
+func copyPathFiltered(rootSrc, src, dst string, keep func(rel string, info os.FileInfo) bool) error {
+	info, err := os.Stat(src)
+	if err != nil {
+		return err
+	}
+	rel, err := filepath.Rel(rootSrc, src)
+	if err != nil {
+		return err
+	}
+	if keep != nil && !keep(rel, info) {
+		return nil
+	}
+	if info.IsDir() {
+		if err := os.MkdirAll(dst, info.Mode().Perm()); err != nil {
+			return err
+		}
+		entries, err := os.ReadDir(src)
+		if err != nil {
+			return err
+		}
+		for _, entry := range entries {
+			if err := copyPathFiltered(rootSrc, filepath.Join(src, entry.Name()), filepath.Join(dst, entry.Name()), keep); err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+	return copyPath(src, dst)
+}
+
+func createSupportTarGz(dst, srcDir string) error {
+	file, err := os.Create(dst)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+
+	gz := gzip.NewWriter(file)
+	defer gz.Close()
+
+	tw := tar.NewWriter(gz)
+	defer tw.Close()
+
+	base := filepath.Dir(srcDir)
+	return filepath.Walk(srcDir, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		if info.IsDir() {
+			return nil
+		}
+
+		header, err := tar.FileInfoHeader(info, "")
+		if err != nil {
+			return err
+		}
+		header.Name, err = filepath.Rel(base, path)
+		if err != nil {
+			return err
+		}
+		if err := tw.WriteHeader(header); err != nil {
+			return err
+		}
+
+		f, err := os.Open(path)
+		if err != nil {
+			return err
+		}
+		defer f.Close()
+
+		_, err = io.Copy(tw, f)
+		return err
+	})
+}
--- a/audit/internal/collector/amdgpu.go
+++ b/audit/internal/collector/amdgpu.go
@@ -0,0 +1,252 @@
+package collector
+
+import (
+	"encoding/csv"
+	"log/slog"
+	"os/exec"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+
+	"bee/audit/internal/schema"
+)
+
+var (
+	amdSMIExecCommand = exec.Command
+	amdSMILookPath    = exec.LookPath
+	amdSMIGlob        = filepath.Glob
+)
+
+var amdSMIExecutableGlobs = []string{
+	"/opt/rocm/bin/rocm-smi",
+	"/opt/rocm-*/bin/rocm-smi",
+	"/usr/local/bin/rocm-smi",
+}
+
+type amdGPUInfo struct {
+	BDF      string
+	Serial   string
+	Product  string
+	Firmware string
+	PowerW   *float64
+	TempC    *float64
+}
+
+func enrichPCIeWithAMD(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
+	if !hasAMDGPUDevices(devs) {
+		return devs
+	}
+	infoByBDF, err := queryAMDGPUs()
+	if err != nil {
+		slog.Info("amdgpu: enrichment skipped", "err", err)
+		return devs
+	}
+	enriched := 0
+	for i := range devs {
+		if !isAMDGPUDevice(devs[i]) || devs[i].BDF == nil {
+			continue
+		}
+		info, ok := infoByBDF[normalizePCIeBDF(*devs[i].BDF)]
+		if !ok {
+			continue
+		}
+		if strings.TrimSpace(info.Serial) != "" {
+			devs[i].SerialNumber = &info.Serial
+		}
+		if strings.TrimSpace(info.Firmware) != "" {
+			devs[i].Firmware = &info.Firmware
+		}
+		if strings.TrimSpace(info.Product) != "" && devs[i].Model == nil {
+			devs[i].Model = &info.Product
+		}
+		if info.PowerW != nil {
+			devs[i].PowerW = info.PowerW
+		}
+		if info.TempC != nil {
+			devs[i].TemperatureC = info.TempC
+		}
+		enriched++
+	}
+	if enriched > 0 {
+		slog.Info("amdgpu: enriched", "count", enriched)
+	}
+	return devs
+}
+
+func hasAMDGPUDevices(devs []schema.HardwarePCIeDevice) bool {
+	for _, dev := range devs {
+		if isAMDGPUDevice(dev) {
+			return true
+		}
+	}
+	return false
+}
+
+func isAMDGPUDevice(dev schema.HardwarePCIeDevice) bool {
+	if dev.Manufacturer == nil || dev.DeviceClass == nil {
+		return false
+	}
+	manufacturer := strings.ToLower(strings.TrimSpace(*dev.Manufacturer))
+	return strings.Contains(manufacturer, "advanced micro devices") && isGPUClass(strings.TrimSpace(*dev.DeviceClass))
+}
+
+func queryAMDGPUs() (map[string]amdGPUInfo, error) {
+	busByCard, err := queryAMDField("--showbus")
+	if err != nil {
+		return nil, err
+	}
+	infoByCard := map[string]amdGPUInfo{}
+	for card, bus := range busByCard {
+		bdf := normalizePCIeBDF(bus)
+		if bdf == "" {
+			continue
+		}
+		infoByCard[card] = amdGPUInfo{BDF: bdf}
+	}
+	if len(infoByCard) == 0 {
+		return map[string]amdGPUInfo{}, nil
+	}
+	mergeAMDField(infoByCard, "--showserial", func(info *amdGPUInfo, value string) { info.Serial = value })
+	mergeAMDField(infoByCard, "--showproductname", func(info *amdGPUInfo, value string) { info.Product = value })
+	mergeAMDField(infoByCard, "--showvbios", func(info *amdGPUInfo, value string) { info.Firmware = value })
+	mergeAMDNumericField(infoByCard, "--showpower", func(info *amdGPUInfo, value float64) { info.PowerW = &value })
+	mergeAMDNumericField(infoByCard, "--showtemp", func(info *amdGPUInfo, value float64) { info.TempC = &value })
+
+	result := make(map[string]amdGPUInfo, len(infoByCard))
+	for _, info := range infoByCard {
+		if info.BDF == "" {
+			continue
+		}
+		result[info.BDF] = info
+	}
+	return result, nil
+}
+
+func mergeAMDField(infoByCard map[string]amdGPUInfo, flag string, apply func(*amdGPUInfo, string)) {
+	values, err := queryAMDField(flag)
+	if err != nil {
+		return
+	}
+	for card, value := range values {
+		info, ok := infoByCard[card]
+		if !ok {
+			continue
+		}
+		value = strings.TrimSpace(value)
+		if value == "" {
+			continue
+		}
+		apply(&info, value)
+		infoByCard[card] = info
+	}
+}
+
+func mergeAMDNumericField(infoByCard map[string]amdGPUInfo, flag string, apply func(*amdGPUInfo, float64)) {
+	values, err := queryAMDNumericField(flag)
+	if err != nil {
+		return
+	}
+	for card, value := range values {
+		info, ok := infoByCard[card]
+		if !ok {
+			continue
+		}
+		apply(&info, value)
+		infoByCard[card] = info
+	}
+}
+
+func queryAMDField(flag string) (map[string]string, error) {
+	cmd, err := resolveAMDSMICmd(flag, "--csv")
+	if err != nil {
+		return nil, err
+	}
+	out, err := amdSMIExecCommand(cmd[0], cmd[1:]...).CombinedOutput()
+	if err != nil {
+		return nil, err
+	}
+	return parseROCmSingleValueCSV(string(out)), nil
+}
+
+func queryAMDNumericField(flag string) (map[string]float64, error) {
+	values, err := queryAMDField(flag)
+	if err != nil {
+		return nil, err
+	}
+	out := map[string]float64{}
+	for card, raw := range values {
+		if value, ok := firstFloat(raw); ok {
+			out[card] = value
+		}
+	}
+	return out, nil
+}
+
+func resolveAMDSMICmd(args ...string) ([]string, error) {
+	if path, err := amdSMILookPath("rocm-smi"); err == nil {
+		return append([]string{path}, args...), nil
+	}
+	for _, pattern := range amdSMIExecutableGlobs {
+		matches, err := amdSMIGlob(pattern)
+		if err != nil {
+			continue
+		}
+		sort.Strings(matches)
+		for _, match := range matches {
+			return append([]string{match}, args...), nil
+		}
+	}
+	return nil, exec.ErrNotFound
+}
+
+func parseROCmSingleValueCSV(raw string) map[string]string {
+	rows := map[string]string{}
+	reader := csv.NewReader(strings.NewReader(raw))
+	reader.FieldsPerRecord = -1
+	records, err := reader.ReadAll()
+	if err != nil {
+		return rows
+	}
+	for _, rec := range records {
+		if len(rec) < 2 {
+			continue
+		}
+		card := normalizeROCmCardKey(rec[0])
+		if card == "" {
+			continue
+		}
+		value := strings.TrimSpace(strings.Join(rec[1:], ","))
+		if value == "" || looksLikeCSVHeaderValue(value) {
+			continue
+		}
+		rows[card] = value
+	}
+	return rows
+}
+
+func normalizeROCmCardKey(raw string) string {
+	raw = strings.ToLower(strings.TrimSpace(raw))
+	raw = strings.Trim(raw, "\"")
+	if raw == "" {
+		return ""
+	}
+	if raw == "device" || raw == "gpu" || raw == "card" {
+		return ""
+	}
+	if strings.HasPrefix(raw, "card") {
+		return raw
+	}
+	if _, err := strconv.Atoi(raw); err == nil {
+		return "card" + raw
+	}
+	return ""
+}
+
+func looksLikeCSVHeaderValue(value string) bool {
+	value = strings.ToLower(strings.TrimSpace(value))
+	return strings.Contains(value, "product") ||
+		strings.Contains(value, "serial") ||
+		strings.Contains(value, "vbios") ||
+		strings.Contains(value, "bus")
+}
--- a/audit/internal/collector/amdgpu_test.go
+++ b/audit/internal/collector/amdgpu_test.go
@@ -0,0 +1,56 @@
+package collector
+
+import (
+	"os/exec"
+	"testing"
+)
+
+func TestParseROCmSingleValueCSV(t *testing.T) {
+	raw := "device,Serial Number\ncard0,ABC123\ncard1,XYZ789\n"
+	got := parseROCmSingleValueCSV(raw)
+	if got["card0"] != "ABC123" {
+		t.Fatalf("card0=%q want ABC123", got["card0"])
+	}
+	if got["card1"] != "XYZ789" {
+		t.Fatalf("card1=%q want XYZ789", got["card1"])
+	}
+}
+
+func TestQueryAMDNumericFieldParsesUnits(t *testing.T) {
+	origExec := amdSMIExecCommand
+	origLookPath := amdSMILookPath
+	t.Cleanup(func() {
+		amdSMIExecCommand = origExec
+		amdSMILookPath = origLookPath
+	})
+
+	amdSMILookPath = func(string) (string, error) { return "/usr/bin/rocm-smi", nil }
+	amdSMIExecCommand = func(name string, args ...string) *exec.Cmd {
+		return exec.Command("sh", "-c", "printf 'device,Temperature\\ncard0,45.5c\\ncard1,67.0c\\n'")
+	}
+
+	got, err := queryAMDNumericField("--showtemp")
+	if err != nil {
+		t.Fatalf("queryAMDNumericField: %v", err)
+	}
+	if got["card0"] != 45.5 {
+		t.Fatalf("card0=%v want 45.5", got["card0"])
+	}
+	if got["card1"] != 67.0 {
+		t.Fatalf("card1=%v want 67.0", got["card1"])
+	}
+}
+
+func TestNormalizeROCmCardKey(t *testing.T) {
+	tests := map[string]string{
+		"0":      "card0",
+		"card1":  "card1",
+		"Device": "",
+		"":       "",
+	}
+	for input, want := range tests {
+		if got := normalizeROCmCardKey(input); got != want {
+			t.Fatalf("normalizeROCmCardKey(%q)=%q want %q", input, got, want)
+		}
+	}
+}
--- a/audit/internal/collector/board.go
+++ b/audit/internal/collector/board.go
@@ -4,10 +4,27 @@ import (
 	"bee/audit/internal/schema"
 	"bufio"
 	"log/slog"
+	"os"
 	"os/exec"
 	"strings"
 )

+var execDmidecode = func(typeNum string) (string, error) {
+	out, err := exec.Command("dmidecode", "-t", typeNum).Output()
+	if err != nil {
+		return "", err
+	}
+	return string(out), nil
+}
+
+var execIpmitool = func(args ...string) (string, error) {
+	out, err := exec.Command("ipmitool", args...).Output()
+	if err != nil {
+		return "", err
+	}
+	return string(out), nil
+}
+
 // collectBoard runs dmidecode for types 0, 1, 2 and returns the board record
 // plus the BIOS firmware entry. Any failure is logged and returns zero values.
 func collectBoard() (schema.HardwareBoard, []schema.HardwareFirmwareRecord) {
@@ -61,6 +78,45 @@ func parseBoard(type1, type2 string) schema.HardwareBoard {
 	return board
 }

+// collectBMCFirmware collects BMC firmware version via ipmitool mc info.
+// Returns nil if ipmitool is missing, /dev/ipmi0 is absent, or any error occurs.
+func collectBMCFirmware() []schema.HardwareFirmwareRecord {
+	if _, err := exec.LookPath("ipmitool"); err != nil {
+		return nil
+	}
+	if _, err := os.Stat("/dev/ipmi0"); err != nil {
+		return nil
+	}
+	out, err := execIpmitool("mc", "info")
+	if err != nil {
+		slog.Info("bmc: ipmitool mc info unavailable", "err", err)
+		return nil
+	}
+	version := parseBMCFirmwareRevision(out)
+	if version == "" {
+		return nil
+	}
+	slog.Info("bmc: collected", "version", version)
+	return []schema.HardwareFirmwareRecord{
+		{DeviceName: "BMC", Version: version},
+	}
+}
+
+// parseBMCFirmwareRevision extracts the "Firmware Revision" field from ipmitool mc info output.
+func parseBMCFirmwareRevision(out string) string {
+	for _, line := range strings.Split(out, "\n") {
+		line = strings.TrimSpace(line)
+		key, val, ok := strings.Cut(line, ":")
+		if !ok {
+			continue
+		}
+		if strings.TrimSpace(key) == "Firmware Revision" {
+			return strings.TrimSpace(val)
+		}
+	}
+	return ""
+}
+
 // parseBIOSFirmware extracts BIOS version from dmidecode type 0 output.
 func parseBIOSFirmware(type0 string) []schema.HardwareFirmwareRecord {
 	fields := parseDMIFields(type0, "BIOS Information")
@@ -141,9 +197,5 @@ func cleanDMIValue(v string) string {

 // runDmidecode executes dmidecode -t <typeNum> and returns its stdout.
 func runDmidecode(typeNum string) (string, error) {
-	out, err := exec.Command("dmidecode", "-t", typeNum).Output()
-	if err != nil {
-		return "", err
-	}
-	return string(out), nil
+	return execDmidecode(typeNum)
 }
--- a/audit/internal/collector/collector.go
+++ b/audit/internal/collector/collector.go
@@ -7,13 +7,15 @@ import (
 	"bee/audit/internal/runtimeenv"
 	"bee/audit/internal/schema"
 	"log/slog"
+	"os"
 	"time"
 )

 // Run executes all collectors and returns the combined snapshot.
 // Partial failures are logged as warnings; collection always completes.
-func Run(runtimeMode runtimeenv.Mode) schema.HardwareIngestRequest {
+func Run(_ runtimeenv.Mode) schema.HardwareIngestRequest {
 	start := time.Now()
+	collectedAt := time.Now().UTC().Format(time.RFC3339)
 	slog.Info("audit started")

 	snap := schema.HardwareSnapshot{}
@@ -21,32 +23,45 @@ func Run(runtimeMode runtimeenv.Mode) schema.HardwareIngestRequest {
 	board, biosFW := collectBoard()
 	snap.Board = board
 	snap.Firmware = append(snap.Firmware, biosFW...)
+	snap.Firmware = append(snap.Firmware, collectBMCFirmware()...)

-	cpus, cpuFW := collectCPUs(snap.Board.SerialNumber)
-	snap.CPUs = cpus
-	snap.Firmware = append(snap.Firmware, cpuFW...)
+	snap.CPUs = collectCPUs()

 	snap.Memory = collectMemory()
+	sensorDoc, err := readSensorsJSONDoc()
+	if err != nil {
+		slog.Info("sensors: unavailable for enrichment", "err", err)
+	}
+	snap.CPUs = enrichCPUsWithTelemetry(snap.CPUs, sensorDoc)
+	snap.Memory = enrichMemoryWithTelemetry(snap.Memory, sensorDoc)
 	snap.Storage = collectStorage()
 	snap.PCIeDevices = collectPCIe()
-	snap.PCIeDevices = enrichPCIeWithNVIDIA(snap.PCIeDevices, snap.Board.SerialNumber)
+	snap.PCIeDevices = enrichPCIeWithAMD(snap.PCIeDevices)
+	snap.PCIeDevices = enrichPCIeWithPCISerials(snap.PCIeDevices)
+	snap.PCIeDevices = enrichPCIeWithNVIDIA(snap.PCIeDevices)
 	snap.PCIeDevices = enrichPCIeWithMellanox(snap.PCIeDevices)
 	snap.PCIeDevices = enrichPCIeWithNICTelemetry(snap.PCIeDevices)
+	snap.PCIeDevices = enrichPCIeWithRAIDTelemetry(snap.PCIeDevices)
 	snap.Storage = enrichStorageWithVROC(snap.Storage, snap.PCIeDevices)
 	snap.Storage = appendUniqueStorage(snap.Storage, collectRAIDStorage(snap.PCIeDevices))
 	snap.PowerSupplies = collectPSUs()
+	snap.PowerSupplies = enrichPSUsWithTelemetry(snap.PowerSupplies, sensorDoc)
+	snap.Sensors = buildSensorsFromDoc(sensorDoc)
+	finalizeSnapshot(&snap, collectedAt)

 	// remaining collectors added in steps 1.8 – 1.10

 	slog.Info("audit completed", "duration", time.Since(start).Round(time.Millisecond))

-	sourceType := string(runtimeMode)
-	protocol := "os-direct"
-
+	sourceType := "manual"
+	var targetHost *string
+	if hostname, err := os.Hostname(); err == nil && hostname != "" {
+		targetHost = &hostname
+	}
 	return schema.HardwareIngestRequest{
 		SourceType:  &sourceType,
-		Protocol:    &protocol,
-		CollectedAt: time.Now().UTC().Format(time.RFC3339),
+		TargetHost:  targetHost,
+		CollectedAt: collectedAt,
 		Hardware:    snap,
 	}
 }
--- a/audit/internal/collector/contract.go
+++ b/audit/internal/collector/contract.go
@@ -0,0 +1,64 @@
+package collector
+
+import "strings"
+
+const (
+	statusOK       = "OK"
+	statusWarning  = "Warning"
+	statusCritical = "Critical"
+	statusUnknown  = "Unknown"
+	statusEmpty    = "Empty"
+)
+
+func mapPCIeDeviceClass(raw string) string {
+	normalized := strings.ToLower(strings.TrimSpace(raw))
+	switch {
+	case normalized == "":
+		return ""
+	case strings.Contains(normalized, "ethernet controller"):
+		return "EthernetController"
+	case strings.Contains(normalized, "fibre channel"):
+		return "FibreChannelController"
+	case strings.Contains(normalized, "network controller"), strings.Contains(normalized, "infiniband controller"):
+		return "NetworkController"
+	case strings.Contains(normalized, "serial attached scsi"), strings.Contains(normalized, "storage controller"):
+		return "StorageController"
+	case strings.Contains(normalized, "raid"), strings.Contains(normalized, "mass storage"):
+		return "MassStorageController"
+	case strings.Contains(normalized, "display controller"):
+		return "DisplayController"
+	case strings.Contains(normalized, "vga"), strings.Contains(normalized, "3d controller"), strings.Contains(normalized, "video controller"):
+		return "VideoController"
+	case strings.Contains(normalized, "processing accelerators"), strings.Contains(normalized, "processing accelerator"):
+		return "ProcessingAccelerator"
+	default:
+		return raw
+	}
+}
+
+func isNICClass(class string) bool {
+	switch strings.TrimSpace(class) {
+	case "EthernetController", "NetworkController":
+		return true
+	default:
+		return false
+	}
+}
+
+func isGPUClass(class string) bool {
+	switch strings.TrimSpace(class) {
+	case "VideoController", "DisplayController", "ProcessingAccelerator":
+		return true
+	default:
+		return false
+	}
+}
+
+func isRAIDClass(class string) bool {
+	switch strings.TrimSpace(class) {
+	case "MassStorageController", "StorageController":
+		return true
+	default:
+		return false
+	}
+}
--- a/audit/internal/collector/cpu.go
+++ b/audit/internal/collector/cpu.go
@@ -3,42 +3,39 @@ package collector
 import (
 	"bee/audit/internal/schema"
 	"bufio"
-	"fmt"
 	"log/slog"
 	"os"
+	"path/filepath"
 	"strconv"
 	"strings"
 )

-// collectCPUs runs dmidecode -t 4 and reads microcode version from sysfs.
-func collectCPUs(boardSerial string) ([]schema.HardwareCPU, []schema.HardwareFirmwareRecord) {
+// collectCPUs runs dmidecode -t 4 and enriches CPUs with microcode from sysfs.
+func collectCPUs() []schema.HardwareCPU {
 	out, err := runDmidecode("4")
 	if err != nil {
 		slog.Warn("cpu: dmidecode type 4 failed", "err", err)
-		return nil, nil
+		return nil
 	}

-	cpus := parseCPUs(out, boardSerial)
-
-	var firmware []schema.HardwareFirmwareRecord
+	cpus := parseCPUs(out)
 	if mc := readMicrocode(); mc != "" {
-		firmware = append(firmware, schema.HardwareFirmwareRecord{
-			DeviceName: "CPU Microcode",
-			Version:    mc,
-		})
+		for i := range cpus {
+			cpus[i].Firmware = &mc
+		}
 	}

 	slog.Info("cpu: collected", "count", len(cpus))
-	return cpus, firmware
+	return cpus
 }

 // parseCPUs splits dmidecode output into per-processor sections and parses each.
-func parseCPUs(output, boardSerial string) []schema.HardwareCPU {
+func parseCPUs(output string) []schema.HardwareCPU {
 	sections := splitDMISections(output, "Processor Information")
 	cpus := make([]schema.HardwareCPU, 0, len(sections))

 	for _, section := range sections {
-		cpu, ok := parseCPUSection(section, boardSerial)
+		cpu, ok := parseCPUSection(section)
 		if !ok {
 			continue
 		}
@@ -49,14 +46,16 @@ func parseCPUs(output, boardSerial string) []schema.HardwareCPU {

 // parseCPUSection parses one "Processor Information" block into a HardwareCPU.
 // Returns false if the socket is unpopulated.
-func parseCPUSection(fields map[string]string, boardSerial string) (schema.HardwareCPU, bool) {
+func parseCPUSection(fields map[string]string) (schema.HardwareCPU, bool) {
 	status := parseCPUStatus(fields["Status"])
-	if status == "EMPTY" {
+	if status == statusEmpty {
 		return schema.HardwareCPU{}, false
 	}

 	cpu := schema.HardwareCPU{}
 	cpu.Status = &status
+	present := true
+	cpu.Present = &present

 	if socket, ok := parseSocketIndex(fields["Socket Designation"]); ok {
 		cpu.Socket = &socket
@@ -70,11 +69,6 @@ func parseCPUSection(fields map[string]string, boardSerial string) (schema.Hardw
 	}
 	if v := cleanDMIValue(fields["Serial Number"]); v != "" {
 		cpu.SerialNumber = &v
-	} else if boardSerial != "" && cpu.Socket != nil {
-		// Intel Xeon never exposes serial via DMI — generate stable fallback
-		// matching core's generateCPUVendorSerial() logic
-		fb := fmt.Sprintf("%s-CPU-%d", boardSerial, *cpu.Socket)
-		cpu.SerialNumber = &fb
 	}

 	if v := parseMHz(fields["Max Speed"]); v > 0 {
@@ -99,15 +93,15 @@ func parseCPUStatus(raw string) string {
 	upper := strings.ToUpper(raw)
 	switch {
 	case upper == "" || upper == "UNKNOWN":
-		return "UNKNOWN"
+		return statusUnknown
 	case strings.Contains(upper, "UNPOPULATED") || strings.Contains(upper, "NOT POPULATED"):
-		return "EMPTY"
+		return statusEmpty
 	case strings.Contains(upper, "ENABLED"):
-		return "OK"
+		return statusOK
 	case strings.Contains(upper, "DISABLED"):
-		return "WARNING"
+		return statusWarning
 	default:
-		return "UNKNOWN"
+		return statusUnknown
 	}
 }

@@ -178,7 +172,7 @@ func parseInt(v string) int {
 // readMicrocode reads the CPU microcode revision from sysfs.
 // Returns empty string if unavailable.
 func readMicrocode() string {
-	data, err := os.ReadFile("/sys/devices/system/cpu/cpu0/microcode/version")
+	data, err := os.ReadFile(filepath.Join(cpuSysBaseDir, "cpu0", "microcode", "version"))
 	if err != nil {
 		return ""
 	}
--- a/audit/internal/collector/cpu_telemetry.go
+++ b/audit/internal/collector/cpu_telemetry.go
@@ -0,0 +1,196 @@
+package collector
+
+import (
+	"bee/audit/internal/schema"
+	"os"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+var (
+	cpuSysBaseDir = "/sys/devices/system/cpu"
+	socketIndexRe = regexp.MustCompile(`(?i)(?:package id|socket|cpu)\s*([0-9]+)`)
+)
+
+func enrichCPUsWithTelemetry(cpus []schema.HardwareCPU, doc sensorsDoc) []schema.HardwareCPU {
+	if len(cpus) == 0 {
+		return cpus
+	}
+
+	tempBySocket := cpuTempsFromSensors(doc, len(cpus))
+	powerBySocket := cpuPowerFromSensors(doc, len(cpus))
+	throttleBySocket := cpuThrottleBySocket()
+
+	for i := range cpus {
+		socket := 0
+		if cpus[i].Socket != nil {
+			socket = *cpus[i].Socket
+		}
+		if value, ok := tempBySocket[socket]; ok {
+			cpus[i].TemperatureC = &value
+		}
+		if value, ok := powerBySocket[socket]; ok {
+			cpus[i].PowerW = &value
+		}
+		if value, ok := throttleBySocket[socket]; ok {
+			cpus[i].Throttled = &value
+		}
+	}
+
+	return cpus
+}
+
+func cpuTempsFromSensors(doc sensorsDoc, cpuCount int) map[int]float64 {
+	out := map[int]float64{}
+	if len(doc) == 0 {
+		return out
+	}
+	var fallback []float64
+	for chip, features := range doc {
+		for featureName, raw := range features {
+			feature, ok := raw.(map[string]any)
+			if !ok {
+				continue
+			}
+			if classifySensorFeature(feature) != "temp" {
+				continue
+			}
+			temp, ok := firstFeatureFloat(feature, "_input")
+			if !ok {
+				continue
+			}
+			if socket, ok := detectCPUSocket(chip, featureName); ok {
+				if _, exists := out[socket]; !exists {
+					out[socket] = temp
+				}
+				continue
+			}
+			if isLikelyCPUTemp(chip, featureName) {
+				fallback = append(fallback, temp)
+			}
+		}
+	}
+	if len(out) == 0 && cpuCount == 1 && len(fallback) > 0 {
+		out[0] = fallback[0]
+	}
+	return out
+}
+
+func cpuPowerFromSensors(doc sensorsDoc, cpuCount int) map[int]float64 {
+	out := map[int]float64{}
+	if len(doc) == 0 {
+		return out
+	}
+	var fallback []float64
+	for chip, features := range doc {
+		for featureName, raw := range features {
+			feature, ok := raw.(map[string]any)
+			if !ok {
+				continue
+			}
+			if classifySensorFeature(feature) != "power" {
+				continue
+			}
+			power, ok := firstFeatureFloatWithContains(feature, []string{"power"})
+			if !ok {
+				continue
+			}
+			if socket, ok := detectCPUSocket(chip, featureName); ok {
+				if _, exists := out[socket]; !exists {
+					out[socket] = power
+				}
+				continue
+			}
+			if isLikelyCPUPower(chip, featureName) {
+				fallback = append(fallback, power)
+			}
+		}
+	}
+	if len(out) == 0 && cpuCount == 1 && len(fallback) > 0 {
+		out[0] = fallback[0]
+	}
+	return out
+}
+
+func detectCPUSocket(parts ...string) (int, bool) {
+	for _, part := range parts {
+		matches := socketIndexRe.FindStringSubmatch(strings.ToLower(part))
+		if len(matches) == 2 {
+			value, err := strconv.Atoi(matches[1])
+			if err == nil {
+				return value, true
+			}
+		}
+	}
+	return 0, false
+}
+
+func isLikelyCPUTemp(chip, feature string) bool {
+	value := strings.ToLower(chip + " " + feature)
+	return strings.Contains(value, "coretemp") ||
+		strings.Contains(value, "k10temp") ||
+		strings.Contains(value, "package id") ||
+		strings.Contains(value, "tdie") ||
+		strings.Contains(value, "tctl") ||
+		strings.Contains(value, "cpu temp")
+}
+
+func isLikelyCPUPower(chip, feature string) bool {
+	value := strings.ToLower(chip + " " + feature)
+	return strings.Contains(value, "intel-rapl") ||
+		strings.Contains(value, "package id") ||
+		strings.Contains(value, "package-") ||
+		strings.Contains(value, "cpu power")
+}
+
+func cpuThrottleBySocket() map[int]bool {
+	out := map[int]bool{}
+	cpuDirs, err := filepath.Glob(filepath.Join(cpuSysBaseDir, "cpu[0-9]*"))
+	if err != nil {
+		return out
+	}
+	sort.Strings(cpuDirs)
+	for _, cpuDir := range cpuDirs {
+		socket, ok := readSocketIndex(cpuDir)
+		if !ok {
+			continue
+		}
+		if cpuPackageThrottled(cpuDir) {
+			out[socket] = true
+		}
+	}
+	return out
+}
+
+func readSocketIndex(cpuDir string) (int, bool) {
+	raw, err := os.ReadFile(filepath.Join(cpuDir, "topology", "physical_package_id"))
+	if err != nil {
+		return 0, false
+	}
+	value, err := strconv.Atoi(strings.TrimSpace(string(raw)))
+	if err != nil || value < 0 {
+		return 0, false
+	}
+	return value, true
+}
+
+func cpuPackageThrottled(cpuDir string) bool {
+	paths := []string{
+		filepath.Join(cpuDir, "thermal_throttle", "package_throttle_count"),
+		filepath.Join(cpuDir, "thermal_throttle", "core_throttle_count"),
+	}
+	for _, path := range paths {
+		raw, err := os.ReadFile(path)
+		if err != nil {
+			continue
+		}
+		value, err := strconv.ParseInt(strings.TrimSpace(string(raw)), 10, 64)
+		if err == nil && value > 0 {
+			return true
+		}
+	}
+	return false
+}
--- a/audit/internal/collector/cpu_telemetry_test.go
+++ b/audit/internal/collector/cpu_telemetry_test.go
@@ -0,0 +1,71 @@
+package collector
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"bee/audit/internal/schema"
+)
+
+func TestEnrichCPUsWithTelemetry(t *testing.T) {
+	tmp := t.TempDir()
+	oldBase := cpuSysBaseDir
+	cpuSysBaseDir = tmp
+	t.Cleanup(func() { cpuSysBaseDir = oldBase })
+
+	mustWriteFile(t, filepath.Join(tmp, "cpu0", "topology", "physical_package_id"), "0\n")
+	mustWriteFile(t, filepath.Join(tmp, "cpu0", "thermal_throttle", "package_throttle_count"), "3\n")
+	mustWriteFile(t, filepath.Join(tmp, "cpu1", "topology", "physical_package_id"), "1\n")
+	mustWriteFile(t, filepath.Join(tmp, "cpu1", "thermal_throttle", "package_throttle_count"), "0\n")
+
+	doc := sensorsDoc{
+		"coretemp-isa-0000": {
+			"Package id 0": map[string]any{"temp1_input": 61.5},
+			"Package id 1": map[string]any{"temp2_input": 58.0},
+		},
+		"intel-rapl-mmio-0": {
+			"Package id 0": map[string]any{"power1_average": 180.0},
+			"Package id 1": map[string]any{"power2_average": 175.0},
+		},
+	}
+
+	socket0 := 0
+	socket1 := 1
+	status := statusOK
+	cpus := []schema.HardwareCPU{
+		{Socket: &socket0, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+		{Socket: &socket1, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+	}
+
+	got := enrichCPUsWithTelemetry(cpus, doc)
+
+	if got[0].TemperatureC == nil || *got[0].TemperatureC != 61.5 {
+		t.Fatalf("cpu0 temperature mismatch: %#v", got[0].TemperatureC)
+	}
+	if got[0].PowerW == nil || *got[0].PowerW != 180.0 {
+		t.Fatalf("cpu0 power mismatch: %#v", got[0].PowerW)
+	}
+	if got[0].Throttled == nil || !*got[0].Throttled {
+		t.Fatalf("cpu0 throttled mismatch: %#v", got[0].Throttled)
+	}
+	if got[1].TemperatureC == nil || *got[1].TemperatureC != 58.0 {
+		t.Fatalf("cpu1 temperature mismatch: %#v", got[1].TemperatureC)
+	}
+	if got[1].PowerW == nil || *got[1].PowerW != 175.0 {
+		t.Fatalf("cpu1 power mismatch: %#v", got[1].PowerW)
+	}
+	if got[1].Throttled != nil && *got[1].Throttled {
+		t.Fatalf("cpu1 throttled mismatch: %#v", got[1].Throttled)
+	}
+}
+
+func mustWriteFile(t *testing.T, path, content string) {
+	t.Helper()
+	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+		t.Fatalf("mkdir %s: %v", path, err)
+	}
+	if err := os.WriteFile(path, []byte(content), 0644); err != nil {
+		t.Fatalf("write %s: %v", path, err)
+	}
+}
--- a/audit/internal/collector/cpu_test.go
+++ b/audit/internal/collector/cpu_test.go
@@ -1,12 +1,14 @@
 package collector

 import (
+	"os"
+	"path/filepath"
 	"testing"
 )

 func TestParseCPUs_dual_socket(t *testing.T) {
 	out := mustReadFile(t, "testdata/dmidecode_type4.txt")
-	cpus := parseCPUs(out, "CAR315KA0803B90")
+	cpus := parseCPUs(out)

 	if len(cpus) != 2 {
 		t.Fatalf("expected 2 CPUs, got %d", len(cpus))
@@ -37,23 +39,22 @@ func TestParseCPUs_dual_socket(t *testing.T) {
 	if cpu0.Status == nil || *cpu0.Status != "OK" {
 		t.Errorf("cpu0 status: got %v, want OK", cpu0.Status)
 	}
-	// Intel Xeon serial not available → fallback
-	if cpu0.SerialNumber == nil || *cpu0.SerialNumber != "CAR315KA0803B90-CPU-0" {
-		t.Errorf("cpu0 serial fallback: got %v, want CAR315KA0803B90-CPU-0", cpu0.SerialNumber)
+	if cpu0.SerialNumber != nil {
+		t.Errorf("cpu0 serial should stay nil without source data, got %v", cpu0.SerialNumber)
 	}

 	cpu1 := cpus[1]
 	if cpu1.Socket == nil || *cpu1.Socket != 1 {
 		t.Errorf("cpu1 socket: got %v, want 1", cpu1.Socket)
 	}
-	if cpu1.SerialNumber == nil || *cpu1.SerialNumber != "CAR315KA0803B90-CPU-1" {
-		t.Errorf("cpu1 serial fallback: got %v, want CAR315KA0803B90-CPU-1", cpu1.SerialNumber)
+	if cpu1.SerialNumber != nil {
+		t.Errorf("cpu1 serial should stay nil without source data, got %v", cpu1.SerialNumber)
 	}
 }

 func TestParseCPUs_unpopulated_skipped(t *testing.T) {
 	out := mustReadFile(t, "testdata/dmidecode_type4_disabled.txt")
-	cpus := parseCPUs(out, "BOARD-001")
+	cpus := parseCPUs(out)

 	if len(cpus) != 1 {
 		t.Fatalf("expected 1 CPU (unpopulated skipped), got %d", len(cpus))
@@ -63,18 +64,51 @@ func TestParseCPUs_unpopulated_skipped(t *testing.T) {
 	}
 }

+func TestCollectCPUsSetsFirmwareFromMicrocode(t *testing.T) {
+	tmp := t.TempDir()
+	origBase := cpuSysBaseDir
+	cpuSysBaseDir = tmp
+	t.Cleanup(func() { cpuSysBaseDir = origBase })
+
+	if err := os.MkdirAll(filepath.Join(tmp, "cpu0", "microcode"), 0755); err != nil {
+		t.Fatalf("mkdir microcode dir: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(tmp, "cpu0", "microcode", "version"), []byte("0x2b000643\n"), 0644); err != nil {
+		t.Fatalf("write microcode version: %v", err)
+	}
+
+	origRun := execDmidecode
+	execDmidecode = func(typeNum string) (string, error) {
+		if typeNum != "4" {
+			t.Fatalf("unexpected dmidecode type: %s", typeNum)
+		}
+		return mustReadFile(t, "testdata/dmidecode_type4.txt"), nil
+	}
+	t.Cleanup(func() { execDmidecode = origRun })
+
+	cpus := collectCPUs()
+	if len(cpus) != 2 {
+		t.Fatalf("expected 2 CPUs, got %d", len(cpus))
+	}
+	for i, cpu := range cpus {
+		if cpu.Firmware == nil || *cpu.Firmware != "0x2b000643" {
+			t.Fatalf("cpu[%d] firmware=%v want microcode", i, cpu.Firmware)
+		}
+	}
+}
+
 func TestParseCPUStatus(t *testing.T) {
 	tests := []struct {
 		input string
 		want  string
 	}{
 		{"Populated, Enabled", "OK"},
-		{"Populated, Disabled By User", "WARNING"},
-		{"Populated, Disabled By BIOS", "WARNING"},
-		{"Unpopulated", "EMPTY"},
-		{"Not Populated", "EMPTY"},
-		{"Unknown", "UNKNOWN"},
-		{"", "UNKNOWN"},
+		{"Populated, Disabled By User", statusWarning},
+		{"Populated, Disabled By BIOS", statusWarning},
+		{"Unpopulated", statusEmpty},
+		{"Not Populated", statusEmpty},
+		{"Unknown", statusUnknown},
+		{"", statusUnknown},
 	}
 	for _, tt := range tests {
 		got := parseCPUStatus(tt.input)
--- a/audit/internal/collector/finalize.go
+++ b/audit/internal/collector/finalize.go
@@ -0,0 +1,110 @@
+package collector
+
+import (
+	"bee/audit/internal/schema"
+	"strings"
+)
+
+func NormalizeSnapshot(snap *schema.HardwareSnapshot, collectedAt string) {
+	finalizeSnapshot(snap, collectedAt)
+}
+
+func finalizeSnapshot(snap *schema.HardwareSnapshot, collectedAt string) {
+	snap.Memory = filterMemory(snap.Memory)
+	snap.Storage = filterStorage(snap.Storage)
+	snap.PCIeDevices = filterPCIe(snap.PCIeDevices)
+	snap.PowerSupplies = filterPSUs(snap.PowerSupplies)
+
+	setComponentStatusMetadata(snap, collectedAt)
+}
+
+func filterMemory(dimms []schema.HardwareMemory) []schema.HardwareMemory {
+	out := make([]schema.HardwareMemory, 0, len(dimms))
+	for _, dimm := range dimms {
+		if dimm.Present != nil && !*dimm.Present {
+			continue
+		}
+		if dimm.Status != nil && *dimm.Status == statusEmpty {
+			continue
+		}
+		if dimm.SerialNumber == nil || *dimm.SerialNumber == "" {
+			continue
+		}
+		out = append(out, dimm)
+	}
+	return out
+}
+
+func filterStorage(disks []schema.HardwareStorage) []schema.HardwareStorage {
+	out := make([]schema.HardwareStorage, 0, len(disks))
+	for _, disk := range disks {
+		if disk.SerialNumber == nil || *disk.SerialNumber == "" {
+			continue
+		}
+		if disk.Model != nil && isVirtualHDiskModel(*disk.Model) {
+			continue
+		}
+		out = append(out, disk)
+	}
+	return out
+}
+
+func filterPCIe(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
+	out := make([]schema.HardwarePCIeDevice, 0, len(devs))
+	for _, dev := range devs {
+		if dev.DeviceClass != nil && strings.Contains(strings.ToLower(strings.TrimSpace(*dev.DeviceClass)), "co-processor") {
+			continue
+		}
+		out = append(out, dev)
+	}
+	return out
+}
+
+func filterPSUs(psus []schema.HardwarePowerSupply) []schema.HardwarePowerSupply {
+	out := make([]schema.HardwarePowerSupply, 0, len(psus))
+	for _, psu := range psus {
+		hasIdentity := false
+		switch {
+		case psu.SerialNumber != nil && *psu.SerialNumber != "":
+			hasIdentity = true
+		case psu.Slot != nil && *psu.Slot != "":
+			hasIdentity = true
+		case psu.Model != nil && *psu.Model != "":
+			hasIdentity = true
+		case psu.Vendor != nil && *psu.Vendor != "":
+			hasIdentity = true
+		}
+		if !hasIdentity {
+			continue
+		}
+		out = append(out, psu)
+	}
+	return out
+}
+
+func setComponentStatusMetadata(snap *schema.HardwareSnapshot, collectedAt string) {
+	for i := range snap.CPUs {
+		setStatusCheckedAt(&snap.CPUs[i].HardwareComponentStatus, collectedAt)
+	}
+	for i := range snap.Memory {
+		setStatusCheckedAt(&snap.Memory[i].HardwareComponentStatus, collectedAt)
+	}
+	for i := range snap.Storage {
+		setStatusCheckedAt(&snap.Storage[i].HardwareComponentStatus, collectedAt)
+	}
+	for i := range snap.PCIeDevices {
+		setStatusCheckedAt(&snap.PCIeDevices[i].HardwareComponentStatus, collectedAt)
+	}
+	for i := range snap.PowerSupplies {
+		setStatusCheckedAt(&snap.PowerSupplies[i].HardwareComponentStatus, collectedAt)
+	}
+}
+
+func setStatusCheckedAt(status *schema.HardwareComponentStatus, collectedAt string) {
+	if status == nil || status.Status == nil || *status.Status == "" {
+		return
+	}
+	if status.StatusCheckedAt == nil {
+		status.StatusCheckedAt = &collectedAt
+	}
+}
--- a/audit/internal/collector/finalize_test.go
+++ b/audit/internal/collector/finalize_test.go
@@ -0,0 +1,93 @@
+package collector
+
+import (
+	"bee/audit/internal/schema"
+	"testing"
+)
+
+func TestFinalizeSnapshotFiltersComponentsWithoutRequiredSerials(t *testing.T) {
+	collectedAt := "2026-03-15T12:00:00Z"
+	present := true
+	status := statusOK
+	serial := "SN-1"
+	virtualModel := "Virtual HDisk1"
+	realModel := "PASCARI"
+	coProcessorClass := "Co-processor"
+	gpuClass := "VideoController"
+
+	snap := schema.HardwareSnapshot{
+		Memory: []schema.HardwareMemory{
+			{Present: &present, SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+			{Present: &present, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+		},
+		Storage: []schema.HardwareStorage{
+			{Model: &virtualModel, SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+			{SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+			{Model: &realModel, SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+			{HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+		},
+		PCIeDevices: []schema.HardwarePCIeDevice{
+			{DeviceClass: &coProcessorClass, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+			{DeviceClass: &gpuClass, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+		},
+		PowerSupplies: []schema.HardwarePowerSupply{
+			{SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+			{HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+		},
+	}
+
+	finalizeSnapshot(&snap, collectedAt)
+
+	if len(snap.Memory) != 1 || snap.Memory[0].StatusCheckedAt == nil || *snap.Memory[0].StatusCheckedAt != collectedAt {
+		t.Fatalf("memory finalize mismatch: %+v", snap.Memory)
+	}
+	if len(snap.Storage) != 2 || snap.Storage[0].StatusCheckedAt == nil || *snap.Storage[0].StatusCheckedAt != collectedAt {
+		t.Fatalf("storage finalize mismatch: %+v", snap.Storage)
+	}
+	if len(snap.PCIeDevices) != 1 || snap.PCIeDevices[0].DeviceClass == nil || *snap.PCIeDevices[0].DeviceClass != gpuClass {
+		t.Fatalf("pcie finalize mismatch: %+v", snap.PCIeDevices)
+	}
+	if len(snap.PowerSupplies) != 1 || snap.PowerSupplies[0].StatusCheckedAt == nil || *snap.PowerSupplies[0].StatusCheckedAt != collectedAt {
+		t.Fatalf("psu finalize mismatch: %+v", snap.PowerSupplies)
+	}
+}
+
+func TestFinalizeSnapshotPreservesDuplicateSerials(t *testing.T) {
+	collectedAt := "2026-03-15T12:00:00Z"
+	status := statusOK
+	model := "Device"
+	serial := "DUPLICATE"
+
+	snap := schema.HardwareSnapshot{
+		Storage: []schema.HardwareStorage{
+			{Model: &model, SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+			{Model: &model, SerialNumber: &serial, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+		},
+	}
+
+	finalizeSnapshot(&snap, collectedAt)
+
+	if got := *snap.Storage[0].SerialNumber; got != serial {
+		t.Fatalf("first serial changed: %q", got)
+	}
+	if got := *snap.Storage[1].SerialNumber; got != serial {
+		t.Fatalf("duplicate serial should stay unchanged: %q", got)
+	}
+}
+
+func TestFilterPSUsKeepsSlotOnlyEntries(t *testing.T) {
+	slot := "0"
+	status := statusOK
+
+	got := filterPSUs([]schema.HardwarePowerSupply{
+		{Slot: &slot, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+		{HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+	})
+
+	if len(got) != 1 {
+		t.Fatalf("len(got)=%d want 1", len(got))
+	}
+	if got[0].Slot == nil || *got[0].Slot != "0" {
+		t.Fatalf("unexpected kept PSU: %+v", got[0])
+	}
+}
--- a/audit/internal/collector/memory.go
+++ b/audit/internal/collector/memory.go
@@ -47,12 +47,12 @@ func parseMemorySection(fields map[string]string) schema.HardwareMemory {
 	dimm.Present = &present

 	if !present {
-		status := "EMPTY"
+		status := statusEmpty
 		dimm.Status = &status
 		return dimm
 	}

-	status := "OK"
+	status := statusOK
 	dimm.Status = &status

 	if mb := parseMemorySizeMB(rawSize); mb > 0 {
--- a/audit/internal/collector/memory_telemetry.go
+++ b/audit/internal/collector/memory_telemetry.go
@@ -0,0 +1,203 @@
+package collector
+
+import (
+	"bee/audit/internal/schema"
+	"os"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+var edacBaseDir = "/sys/devices/system/edac/mc"
+
+type edacDIMMStats struct {
+	Label   string
+	CECount *int64
+	UECount *int64
+}
+
+func enrichMemoryWithTelemetry(dimms []schema.HardwareMemory, doc sensorsDoc) []schema.HardwareMemory {
+	if len(dimms) == 0 {
+		return dimms
+	}
+
+	tempByLabel := memoryTempsFromSensors(doc)
+	stats := readEDACStats()
+
+	for i := range dimms {
+		labelKeys := dimmMatchKeys(dimms[i].Slot, dimms[i].Location)
+
+		for _, key := range labelKeys {
+			if temp, ok := tempByLabel[key]; ok {
+				dimms[i].TemperatureC = &temp
+				break
+			}
+		}
+
+		for _, key := range labelKeys {
+			if stat, ok := stats[key]; ok {
+				if stat.CECount != nil {
+					dimms[i].CorrectableECCErrorCount = stat.CECount
+				}
+				if stat.UECount != nil {
+					dimms[i].UncorrectableECCErrorCount = stat.UECount
+				}
+				if stat.UECount != nil && *stat.UECount > 0 {
+					dimms[i].DataLossDetected = boolPtr(true)
+					status := statusCritical
+					dimms[i].Status = &status
+					if dimms[i].ErrorDescription == nil {
+						dimms[i].ErrorDescription = stringPtr("EDAC reports uncorrectable ECC errors")
+					}
+				} else if stat.CECount != nil && *stat.CECount > 0 && (dimms[i].Status == nil || *dimms[i].Status == statusOK) {
+					status := statusWarning
+					dimms[i].Status = &status
+					if dimms[i].ErrorDescription == nil {
+						dimms[i].ErrorDescription = stringPtr("EDAC reports correctable ECC errors")
+					}
+				}
+				break
+			}
+		}
+	}
+
+	return dimms
+}
+
+func memoryTempsFromSensors(doc sensorsDoc) map[string]float64 {
+	out := map[string]float64{}
+	if len(doc) == 0 {
+		return out
+	}
+	for chip, features := range doc {
+		for featureName, raw := range features {
+			feature, ok := raw.(map[string]any)
+			if !ok || classifySensorFeature(feature) != "temp" {
+				continue
+			}
+			if !isLikelyMemoryTemp(chip, featureName) {
+				continue
+			}
+			temp, ok := firstFeatureFloat(feature, "_input")
+			if !ok {
+				continue
+			}
+			key := canonicalLabel(featureName)
+			if key == "" {
+				continue
+			}
+			if _, exists := out[key]; !exists {
+				out[key] = temp
+			}
+		}
+	}
+	return out
+}
+
+func readEDACStats() map[string]edacDIMMStats {
+	out := map[string]edacDIMMStats{}
+	mcDirs, err := filepath.Glob(filepath.Join(edacBaseDir, "mc*"))
+	if err != nil {
+		return out
+	}
+	sort.Strings(mcDirs)
+	for _, mcDir := range mcDirs {
+		dimmDirs, err := filepath.Glob(filepath.Join(mcDir, "dimm*"))
+		if err != nil {
+			continue
+		}
+		sort.Strings(dimmDirs)
+		for _, dimmDir := range dimmDirs {
+			stat, ok := readEDACDIMMStats(dimmDir)
+			if !ok {
+				continue
+			}
+			key := canonicalLabel(stat.Label)
+			if key == "" {
+				continue
+			}
+			out[key] = stat
+		}
+	}
+	return out
+}
+
+func readEDACDIMMStats(dimmDir string) (edacDIMMStats, bool) {
+	labelBytes, err := os.ReadFile(filepath.Join(dimmDir, "dimm_label"))
+	if err != nil {
+		labelBytes, err = os.ReadFile(filepath.Join(dimmDir, "label"))
+		if err != nil {
+			return edacDIMMStats{}, false
+		}
+	}
+	label := strings.TrimSpace(string(labelBytes))
+	if label == "" {
+		return edacDIMMStats{}, false
+	}
+
+	stat := edacDIMMStats{Label: label}
+	if value, ok := readEDACCount(dimmDir, []string{"dimm_ce_count", "ce_count"}); ok {
+		stat.CECount = &value
+	}
+	if value, ok := readEDACCount(dimmDir, []string{"dimm_ue_count", "ue_count"}); ok {
+		stat.UECount = &value
+	}
+	return stat, true
+}
+
+func readEDACCount(dir string, names []string) (int64, bool) {
+	for _, name := range names {
+		raw, err := os.ReadFile(filepath.Join(dir, name))
+		if err != nil {
+			continue
+		}
+		value, err := strconv.ParseInt(strings.TrimSpace(string(raw)), 10, 64)
+		if err == nil && value >= 0 {
+			return value, true
+		}
+	}
+	return 0, false
+}
+
+func dimmMatchKeys(slot, location *string) []string {
+	var out []string
+	add := func(value *string) {
+		key := canonicalLabel(derefString(value))
+		if key == "" {
+			return
+		}
+		for _, existing := range out {
+			if existing == key {
+				return
+			}
+		}
+		out = append(out, key)
+	}
+	add(slot)
+	add(location)
+	return out
+}
+
+func canonicalLabel(value string) string {
+	value = strings.ToUpper(strings.TrimSpace(value))
+	if value == "" {
+		return ""
+	}
+	var b strings.Builder
+	for _, r := range value {
+		if (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') {
+			b.WriteRune(r)
+		}
+	}
+	return b.String()
+}
+
+func isLikelyMemoryTemp(chip, feature string) bool {
+	value := strings.ToLower(chip + " " + feature)
+	return strings.Contains(value, "dimm") || strings.Contains(value, "sodimm")
+}
+
+func boolPtr(value bool) *bool {
+	return &value
+}
--- a/audit/internal/collector/memory_telemetry_test.go
+++ b/audit/internal/collector/memory_telemetry_test.go
@@ -0,0 +1,61 @@
+package collector
+
+import (
+	"path/filepath"
+	"testing"
+
+	"bee/audit/internal/schema"
+)
+
+func TestEnrichMemoryWithTelemetry(t *testing.T) {
+	tmp := t.TempDir()
+	oldBase := edacBaseDir
+	edacBaseDir = tmp
+	t.Cleanup(func() { edacBaseDir = oldBase })
+
+	mustWriteFile(t, filepath.Join(tmp, "mc0", "dimm0", "dimm_label"), "CPU0_DIMM_A1\n")
+	mustWriteFile(t, filepath.Join(tmp, "mc0", "dimm0", "dimm_ce_count"), "7\n")
+	mustWriteFile(t, filepath.Join(tmp, "mc0", "dimm0", "dimm_ue_count"), "0\n")
+	mustWriteFile(t, filepath.Join(tmp, "mc0", "dimm1", "dimm_label"), "CPU1_DIMM_B2\n")
+	mustWriteFile(t, filepath.Join(tmp, "mc0", "dimm1", "dimm_ce_count"), "0\n")
+	mustWriteFile(t, filepath.Join(tmp, "mc0", "dimm1", "dimm_ue_count"), "2\n")
+
+	doc := sensorsDoc{
+		"jc42-i2c-0-18": {
+			"CPU0 DIMM A1": map[string]any{"temp1_input": 43.0},
+			"CPU1 DIMM B2": map[string]any{"temp2_input": 46.0},
+		},
+	}
+
+	status := statusOK
+	slotA := "CPU0_DIMM_A1"
+	slotB := "CPU1_DIMM_B2"
+	dimms := []schema.HardwareMemory{
+		{Slot: &slotA, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+		{Slot: &slotB, HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status}},
+	}
+
+	got := enrichMemoryWithTelemetry(dimms, doc)
+
+	if got[0].TemperatureC == nil || *got[0].TemperatureC != 43.0 {
+		t.Fatalf("dimm0 temperature mismatch: %#v", got[0].TemperatureC)
+	}
+	if got[0].CorrectableECCErrorCount == nil || *got[0].CorrectableECCErrorCount != 7 {
+		t.Fatalf("dimm0 ce mismatch: %#v", got[0].CorrectableECCErrorCount)
+	}
+	if got[0].Status == nil || *got[0].Status != statusWarning {
+		t.Fatalf("dimm0 status mismatch: %#v", got[0].Status)
+	}
+	if got[1].TemperatureC == nil || *got[1].TemperatureC != 46.0 {
+		t.Fatalf("dimm1 temperature mismatch: %#v", got[1].TemperatureC)
+	}
+	if got[1].UncorrectableECCErrorCount == nil || *got[1].UncorrectableECCErrorCount != 2 {
+		t.Fatalf("dimm1 ue mismatch: %#v", got[1].UncorrectableECCErrorCount)
+	}
+	if got[1].Status == nil || *got[1].Status != statusCritical {
+		t.Fatalf("dimm1 status mismatch: %#v", got[1].Status)
+	}
+	if got[1].DataLossDetected == nil || !*got[1].DataLossDetected {
+		t.Fatalf("dimm1 data_loss_detected mismatch: %#v", got[1].DataLossDetected)
+	}
+}
--- a/audit/internal/collector/nic_mellanox.go
+++ b/audit/internal/collector/nic_mellanox.go
@@ -2,18 +2,21 @@ package collector

 import (
 	"bee/audit/internal/schema"
+	"context"
 	"log/slog"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"strings"
+	"time"
 )

 const mellanoxVendorID = 0x15b3
+const nicProbeTimeout = 2 * time.Second

 var (
 	mstflintQuery = func(bdf string) (string, error) {
-		out, err := exec.Command("mstflint", "-d", bdf, "q").Output()
+		out, err := commandOutputWithTimeout(nicProbeTimeout, "mstflint", "-d", bdf, "q")
 		if err != nil {
 			return "", err
 		}
@@ -21,7 +24,7 @@ var (
 	}

 	ethtoolInfoQuery = func(iface string) (string, error) {
-		out, err := exec.Command("ethtool", "-i", iface).Output()
+		out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-i", iface)
 		if err != nil {
 			return "", err
 		}
@@ -29,6 +32,14 @@ var (
 	}

 	netIfacesByBDF = listNetIfacesByBDF
+	readNetCarrierFile = func(iface string) (string, error) {
+		path := filepath.Join("/sys/class/net", iface, "carrier")
+		raw, err := os.ReadFile(path)
+		if err != nil {
+			return "", err
+		}
+		return strings.TrimSpace(string(raw)), nil
+	}
 )

 // enrichPCIeWithMellanox enriches Mellanox/NVIDIA Networking devices with
@@ -162,3 +173,9 @@ func listNetIfacesByBDF(bdf string) []string {
 	}
 	return ifaces
 }
+
+func commandOutputWithTimeout(timeout time.Duration, name string, args ...string) ([]byte, error) {
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+	return exec.CommandContext(ctx, name, args...).Output()
+}
--- a/audit/internal/collector/nic_telemetry.go
+++ b/audit/internal/collector/nic_telemetry.go
@@ -12,23 +12,19 @@ import (

 var (
 	ethtoolModuleQuery = func(iface string) (string, error) {
-		out, err := raidToolQuery("ethtool", "-m", iface)
+		out, err := commandOutputWithTimeout(nicProbeTimeout, "ethtool", "-m", iface)
 		if err != nil {
 			return "", err
 		}
 		return string(out), nil
 	}
-	readNetStatFile = func(iface, key string) (int64, error) {
-		path := filepath.Join("/sys/class/net", iface, "statistics", key)
+	readNetAddressFile = func(iface string) (string, error) {
+		path := filepath.Join("/sys/class/net", iface, "address")
 		raw, err := os.ReadFile(path)
 		if err != nil {
-			return 0, err
+			return "", err
 		}
-		v, err := strconv.ParseInt(strings.TrimSpace(string(raw)), 10, 64)
-		if err != nil {
-			return 0, err
-		}
-		return v, nil
+		return strings.TrimSpace(string(raw)), nil
 	}
 )

@@ -47,6 +43,12 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw
 			continue
 		}
 		iface := ifaces[0]
+		devs[i].MacAddresses = collectInterfaceMACs(ifaces)
+		if devs[i].SerialNumber == nil {
+			if serial := queryPCIDeviceSerial(bdf); serial != "" {
+				devs[i].SerialNumber = &serial
+			}
+		}

 		if devs[i].Firmware == nil {
 			if out, err := ethtoolInfoQuery(iface); err == nil {
@@ -56,16 +58,13 @@ func enrichPCIeWithNICTelemetry(devs []schema.HardwarePCIeDevice) []schema.Hardw
 			}
 		}

-		if devs[i].Telemetry == nil {
-			devs[i].Telemetry = map[string]any{}
-		}
-		injectNICPacketStats(devs[i].Telemetry, iface)
 		if out, err := ethtoolModuleQuery(iface); err == nil {
-			injectSFPDOMTelemetry(devs[i].Telemetry, out)
+			if injectSFPDOMTelemetry(&devs[i], out) {
+				enriched++
+				continue
+			}
 		}
-		if len(devs[i].Telemetry) == 0 {
-			devs[i].Telemetry = nil
-		} else {
+		if len(devs[i].MacAddresses) > 0 || devs[i].Firmware != nil {
 			enriched++
 		}
 	}
@@ -77,31 +76,32 @@ func isNICDevice(dev schema.HardwarePCIeDevice) bool {
 	if dev.DeviceClass == nil {
 		return false
 	}
-	c := strings.ToLower(strings.TrimSpace(*dev.DeviceClass))
-	return strings.Contains(c, "ethernet controller") ||
-		strings.Contains(c, "network controller") ||
-		strings.Contains(c, "infiniband controller")
+	c := strings.TrimSpace(*dev.DeviceClass)
+	return isNICClass(c) || strings.EqualFold(c, "FibreChannelController")
 }

-func injectNICPacketStats(dst map[string]any, iface string) {
-	for _, key := range []string{"rx_packets", "tx_packets", "rx_errors", "tx_errors"} {
-		if v, err := readNetStatFile(iface, key); err == nil {
-			dst[key] = v
+func collectInterfaceMACs(ifaces []string) []string {
+	seen := map[string]struct{}{}
+	var out []string
+	for _, iface := range ifaces {
+		mac, err := readNetAddressFile(iface)
+		if err != nil || mac == "" {
+			continue
 		}
+		mac = strings.ToLower(strings.TrimSpace(mac))
+		if _, ok := seen[mac]; ok {
+			continue
+		}
+		seen[mac] = struct{}{}
+		out = append(out, mac)
 	}
-}
-
-func injectSFPDOMTelemetry(dst map[string]any, raw string) {
-	parsed := parseSFPDOM(raw)
-	for k, v := range parsed {
-		dst[k] = v
-	}
+	return out
 }

 var floatRe = regexp.MustCompile(`[-+]?[0-9]*\.?[0-9]+`)

-func parseSFPDOM(raw string) map[string]any {
-	out := map[string]any{}
+func injectSFPDOMTelemetry(dev *schema.HardwarePCIeDevice, raw string) bool {
+	var changed bool
 	for _, line := range strings.Split(raw, "\n") {
 		trimmed := strings.TrimSpace(line)
 		if trimmed == "" {
@@ -113,30 +113,138 @@ func parseSFPDOM(raw string) map[string]any {
 		}
 		key := strings.ToLower(strings.TrimSpace(trimmed[:idx]))
 		val := strings.TrimSpace(trimmed[idx+1:])
+		if val == "" || strings.EqualFold(val, "not supported") || strings.EqualFold(val, "unknown") {
+			continue
+		}

 		switch {
+		case key == "identifier":
+			s := parseSFPIdentifier(val)
+			dev.SFPIdentifier = &s
+			t := true
+			dev.SFPPresent = &t
+			changed = true
+		case key == "connector":
+			s := parseSFPConnector(val)
+			dev.SFPConnector = &s
+			changed = true
+		case key == "vendor name":
+			s := strings.TrimSpace(val)
+			dev.SFPVendor = &s
+			changed = true
+		case key == "vendor pn":
+			s := strings.TrimSpace(val)
+			dev.SFPPartNumber = &s
+			changed = true
+		case key == "vendor sn":
+			s := strings.TrimSpace(val)
+			dev.SFPSerialNumber = &s
+			changed = true
+		case strings.Contains(key, "laser wavelength"):
+			if f, ok := firstFloat(val); ok {
+				dev.SFPWavelengthNM = &f
+				changed = true
+			}
 		case strings.Contains(key, "module temperature"):
 			if f, ok := firstFloat(val); ok {
-				out["sfp_temperature_c"] = f
+				dev.SFPTemperatureC = &f
+				changed = true
 			}
 		case strings.Contains(key, "laser output power"):
 			if f, ok := dbmValue(val); ok {
-				out["sfp_tx_power_dbm"] = f
+				dev.SFPTXPowerDBM = &f
+				changed = true
 			}
 		case strings.Contains(key, "receiver signal"):
 			if f, ok := dbmValue(val); ok {
-				out["sfp_rx_power_dbm"] = f
+				dev.SFPRXPowerDBM = &f
+				changed = true
 			}
 		case strings.Contains(key, "module voltage"):
 			if f, ok := firstFloat(val); ok {
-				out["sfp_voltage_v"] = f
+				dev.SFPVoltageV = &f
+				changed = true
 			}
 		case strings.Contains(key, "laser bias current"):
 			if f, ok := firstFloat(val); ok {
-				out["sfp_bias_ma"] = f
+				dev.SFPBiasMA = &f
+				changed = true
 			}
 		}
 	}
+	return changed
+}
+
+// parseSFPIdentifier extracts the human-readable transceiver type from the
+// raw ethtool identifier line, e.g. "0x03 (SFP)" → "SFP".
+func parseSFPIdentifier(val string) string {
+	if s := extractParens(val); s != "" {
+		return s
+	}
+	return val
+}
+
+// parseSFPConnector extracts the connector type from the raw ethtool line,
+// e.g. "0x07 (LC)" → "LC".
+func parseSFPConnector(val string) string {
+	if s := extractParens(val); s != "" {
+		return s
+	}
+	return val
+}
+
+var parenRe = regexp.MustCompile(`\(([^)]+)\)`)
+
+func extractParens(s string) string {
+	m := parenRe.FindStringSubmatch(s)
+	if len(m) < 2 {
+		return ""
+	}
+	return strings.TrimSpace(m[1])
+}
+
+func parseSFPDOM(raw string) map[string]any {
+	dev := schema.HardwarePCIeDevice{}
+	if !injectSFPDOMTelemetry(&dev, raw) {
+		return map[string]any{}
+	}
+	out := map[string]any{}
+	if dev.SFPPresent != nil {
+		out["sfp_present"] = *dev.SFPPresent
+	}
+	if dev.SFPIdentifier != nil {
+		out["sfp_identifier"] = *dev.SFPIdentifier
+	}
+	if dev.SFPConnector != nil {
+		out["sfp_connector"] = *dev.SFPConnector
+	}
+	if dev.SFPVendor != nil {
+		out["sfp_vendor"] = *dev.SFPVendor
+	}
+	if dev.SFPPartNumber != nil {
+		out["sfp_part_number"] = *dev.SFPPartNumber
+	}
+	if dev.SFPSerialNumber != nil {
+		out["sfp_serial_number"] = *dev.SFPSerialNumber
+	}
+	if dev.SFPWavelengthNM != nil {
+		out["sfp_wavelength_nm"] = *dev.SFPWavelengthNM
+	}
+	if dev.SFPTemperatureC != nil {
+		out["sfp_temperature_c"] = *dev.SFPTemperatureC
+	}
+	if dev.SFPTXPowerDBM != nil {
+		out["sfp_tx_power_dbm"] = *dev.SFPTXPowerDBM
+	}
+	if dev.SFPRXPowerDBM != nil {
+		out["sfp_rx_power_dbm"] = *dev.SFPRXPowerDBM
+	}
+	if dev.SFPVoltageV != nil {
+		out["sfp_voltage_v"] = *dev.SFPVoltageV
+	}
+	if dev.SFPBiasMA != nil {
+		out["sfp_bias_ma"] = *dev.SFPBiasMA
+	}
 	return out
 }

--- a/audit/internal/collector/nic_telemetry_test.go
+++ b/audit/internal/collector/nic_telemetry_test.go
@@ -1,6 +1,10 @@
 package collector

-import "testing"
+import (
+	"bee/audit/internal/schema"
+	"fmt"
+	"testing"
+)

 func TestParseSFPDOM(t *testing.T) {
 	raw := `
@@ -29,6 +33,110 @@ func TestParseSFPDOM(t *testing.T) {
 	}
 }

+func TestParseLSPCIDetailSerial(t *testing.T) {
+	raw := `
+05:00.0 Ethernet controller: Mellanox Technologies MT28908 Family [ConnectX-6]
+	Serial number: NIC-SN-12345
+`
+	if got := parseLSPCIDetailSerial(raw); got != "NIC-SN-12345" {
+		t.Fatalf("serial=%q want %q", got, "NIC-SN-12345")
+	}
+}
+
+func TestParsePCIVPDSerial(t *testing.T) {
+	raw := []byte{0x82, 0x05, 0x00, 'M', 'L', 'X', '5', 0x90, 0x08, 0x00, 'S', 'N', 0x08, 'M', 'T', '1', '2', '3', '4', '5', '6'}
+	if got := parsePCIVPDSerial(raw); got != "MT123456" {
+		t.Fatalf("serial=%q want %q", got, "MT123456")
+	}
+}
+
+func TestEnrichPCIeWithNICTelemetryAddsSerialFallback(t *testing.T) {
+	origDetail := queryPCILSPCIDetail
+	origVPD := readPCIVPDFile
+	origIfaces := netIfacesByBDF
+	origReadMAC := readNetAddressFile
+	origEth := ethtoolInfoQuery
+	origModule := ethtoolModuleQuery
+	origCarrier := readNetCarrierFile
+	t.Cleanup(func() {
+		queryPCILSPCIDetail = origDetail
+		readPCIVPDFile = origVPD
+		netIfacesByBDF = origIfaces
+		readNetAddressFile = origReadMAC
+		ethtoolInfoQuery = origEth
+		ethtoolModuleQuery = origModule
+		readNetCarrierFile = origCarrier
+	})
+
+	queryPCILSPCIDetail = func(bdf string) (string, error) {
+		if bdf != "0000:18:00.0" {
+			t.Fatalf("unexpected bdf: %s", bdf)
+		}
+		return "Serial number: NIC-SN-98765\n", nil
+	}
+	readPCIVPDFile = func(string) ([]byte, error) {
+		return nil, fmt.Errorf("no vpd needed")
+	}
+	netIfacesByBDF = func(string) []string { return []string{"eth0"} }
+	readNetAddressFile = func(iface string) (string, error) {
+		if iface != "eth0" {
+			t.Fatalf("unexpected iface: %s", iface)
+		}
+		return "aa:bb:cc:dd:ee:ff", nil
+	}
+	readNetCarrierFile = func(string) (string, error) { return "1", nil }
+	ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
+	ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("skip optics") }
+
+	class := "EthernetController"
+	bdf := "0000:18:00.0"
+	devs := []schema.HardwarePCIeDevice{{
+		DeviceClass: &class,
+		BDF:         &bdf,
+	}}
+
+	out := enrichPCIeWithNICTelemetry(devs)
+	if out[0].SerialNumber == nil || *out[0].SerialNumber != "NIC-SN-98765" {
+		t.Fatalf("serial=%v want NIC-SN-98765", out[0].SerialNumber)
+	}
+	if len(out[0].MacAddresses) != 1 || out[0].MacAddresses[0] != "aa:bb:cc:dd:ee:ff" {
+		t.Fatalf("mac_addresses=%v", out[0].MacAddresses)
+	}
+}
+
+func TestEnrichPCIeWithNICTelemetrySkipsModuleQueryWithoutCarrier(t *testing.T) {
+	origIfaces := netIfacesByBDF
+	origReadMAC := readNetAddressFile
+	origEth := ethtoolInfoQuery
+	origModule := ethtoolModuleQuery
+	origCarrier := readNetCarrierFile
+	t.Cleanup(func() {
+		netIfacesByBDF = origIfaces
+		readNetAddressFile = origReadMAC
+		ethtoolInfoQuery = origEth
+		ethtoolModuleQuery = origModule
+		readNetCarrierFile = origCarrier
+	})
+
+	netIfacesByBDF = func(string) []string { return []string{"eth0"} }
+	readNetAddressFile = func(string) (string, error) { return "aa:bb:cc:dd:ee:ff", nil }
+	readNetCarrierFile = func(string) (string, error) { return "0", nil }
+	ethtoolInfoQuery = func(string) (string, error) { return "", fmt.Errorf("skip firmware") }
+	ethtoolModuleQuery = func(string) (string, error) { return "", fmt.Errorf("no module") }
+
+	class := "EthernetController"
+	bdf := "0000:18:00.0"
+	devs := []schema.HardwarePCIeDevice{{
+		DeviceClass: &class,
+		BDF:         &bdf,
+	}}
+
+	out := enrichPCIeWithNICTelemetry(devs)
+	if len(out[0].MacAddresses) != 1 || out[0].MacAddresses[0] != "aa:bb:cc:dd:ee:ff" {
+		t.Fatalf("mac_addresses=%v", out[0].MacAddresses)
+	}
+}
+
 func TestDBMValue(t *testing.T) {
 	tests := []struct {
 		in   string
--- a/audit/internal/collector/nvidia.go
+++ b/audit/internal/collector/nvidia.go
@@ -13,29 +13,34 @@ import (
 const nvidiaVendorID = 0x10de

 type nvidiaGPUInfo struct {
-	BDF            string
-	Serial         string
-	VBIOS          string
-	TemperatureC   *float64
-	PowerW         *float64
-	ECCUncorrected *int64
-	ECCCorrected   *int64
-	HWSlowdown     *bool
+	Index              int
+	BDF                string
+	Name               string
+	Serial             string
+	VBIOS              string
+	TemperatureC       *float64
+	PowerW             *float64
+	ECCUncorrected     *int64
+	ECCCorrected       *int64
+	HWSlowdown         *bool
+	PCIeLinkGenCurrent *int
+	PCIeLinkGenMax     *int
+	PCIeLinkWidthCur   *int
+	PCIeLinkWidthMax   *int
 }

 // enrichPCIeWithNVIDIA enriches NVIDIA PCIe devices with data from nvidia-smi.
-// If the driver/tool is unavailable, NVIDIA devices get UNKNOWN status and
-// a stable serial fallback based on board serial + slot.
-func enrichPCIeWithNVIDIA(devs []schema.HardwarePCIeDevice, boardSerial string) []schema.HardwarePCIeDevice {
+// If the driver/tool is unavailable, NVIDIA devices get Unknown status.
+func enrichPCIeWithNVIDIA(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
 	if !hasNVIDIADevices(devs) {
 		return devs
 	}
 	gpuByBDF, err := queryNVIDIAGPUs()
 	if err != nil {
 		slog.Info("nvidia: enrichment skipped", "err", err)
-		return enrichPCIeWithNVIDIAData(devs, nil, boardSerial, false)
+		return enrichPCIeWithNVIDIAData(devs, nil, false)
 	}
-	return enrichPCIeWithNVIDIAData(devs, gpuByBDF, boardSerial, true)
+	return enrichPCIeWithNVIDIAData(devs, gpuByBDF, true)
 }

 func hasNVIDIADevices(devs []schema.HardwarePCIeDevice) bool {
@@ -47,7 +52,7 @@ func hasNVIDIADevices(devs []schema.HardwarePCIeDevice) bool {
 	return false
 }

-func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[string]nvidiaGPUInfo, boardSerial string, driverLoaded bool) []schema.HardwarePCIeDevice {
+func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[string]nvidiaGPUInfo, driverLoaded bool) []schema.HardwarePCIeDevice {
 	enriched := 0
 	for i := range devs {
 		if !isNVIDIADevice(devs[i]) {
@@ -55,7 +60,7 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
 		}

 		if !driverLoaded {
-			setPCIeFallback(&devs[i], boardSerial)
+			setPCIeFallback(&devs[i])
 			continue
 		}

@@ -65,22 +70,24 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
 		}
 		info, ok := gpuByBDF[bdf]
 		if !ok {
-			setPCIeFallback(&devs[i], boardSerial)
+			setPCIeFallback(&devs[i])
 			continue
 		}

+		if v := strings.TrimSpace(info.Name); v != "" {
+			devs[i].Model = &v
+		}
 		if v := strings.TrimSpace(info.Serial); v != "" {
 			devs[i].SerialNumber = &v
-		} else {
-			setPCIeFallbackSerial(&devs[i], boardSerial)
 		}
 		if v := strings.TrimSpace(info.VBIOS); v != "" {
 			devs[i].Firmware = &v
 		}

-		status := "OK"
+		status := statusOK
 		if info.ECCUncorrected != nil && *info.ECCUncorrected > 0 {
-			status = "WARNING"
+			status = statusWarning
+			devs[i].ErrorDescription = stringPtr("GPU reports uncorrected ECC errors")
 		}
 		devs[i].Status = &status
 		injectNVIDIATelemetry(&devs[i], info)
@@ -96,7 +103,7 @@ func enrichPCIeWithNVIDIAData(devs []schema.HardwarePCIeDevice, gpuByBDF map[str
 func queryNVIDIAGPUs() (map[string]nvidiaGPUInfo, error) {
 	out, err := exec.Command(
 		"nvidia-smi",
-		"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown",
+		"--query-gpu=index,pci.bus_id,name,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown,pcie.link.gen.current,pcie.link.gen.max,pcie.link.width.current,pcie.link.width.max",
 		"--format=csv,noheader,nounits",
 	).Output()
 	if err != nil {
@@ -120,8 +127,8 @@ func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
 		if len(rec) == 0 {
 			continue
 		}
-		if len(rec) < 9 {
-			return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 9", len(rec))
+		if len(rec) < 14 {
+			return nil, fmt.Errorf("unexpected nvidia-smi columns: got %d, want 14", len(rec))
 		}

 		bdf := normalizePCIeBDF(rec[1])
@@ -130,14 +137,20 @@ func parseNVIDIASMIQuery(raw string) (map[string]nvidiaGPUInfo, error) {
 		}

 		info := nvidiaGPUInfo{
-			BDF:            bdf,
-			Serial:         strings.TrimSpace(rec[2]),
-			VBIOS:          strings.TrimSpace(rec[3]),
-			TemperatureC:   parseMaybeFloat(rec[4]),
-			PowerW:         parseMaybeFloat(rec[5]),
-			ECCUncorrected: parseMaybeInt64(rec[6]),
-			ECCCorrected:   parseMaybeInt64(rec[7]),
-			HWSlowdown:     parseMaybeBool(rec[8]),
+			Index:              parseRequiredInt(rec[0]),
+			BDF:                bdf,
+			Name:               strings.TrimSpace(rec[2]),
+			Serial:             strings.TrimSpace(rec[3]),
+			VBIOS:              strings.TrimSpace(rec[4]),
+			TemperatureC:       parseMaybeFloat(rec[5]),
+			PowerW:             parseMaybeFloat(rec[6]),
+			ECCUncorrected:     parseMaybeInt64(rec[7]),
+			ECCCorrected:       parseMaybeInt64(rec[8]),
+			HWSlowdown:         parseMaybeBool(rec[9]),
+			PCIeLinkGenCurrent: parseMaybeInt(rec[10]),
+			PCIeLinkGenMax:     parseMaybeInt(rec[11]),
+			PCIeLinkWidthCur:   parseMaybeInt(rec[12]),
+			PCIeLinkWidthMax:   parseMaybeInt(rec[13]),
 		}
 		result[bdf] = info
 	}
@@ -169,6 +182,30 @@ func parseMaybeInt64(v string) *int64 {
 	return &n
 }

+func parseMaybeInt(v string) *int {
+	v = strings.TrimSpace(v)
+	if v == "" || strings.EqualFold(v, "n/a") || strings.EqualFold(v, "not supported") || strings.EqualFold(v, "[not supported]") {
+		return nil
+	}
+	n, err := strconv.Atoi(v)
+	if err != nil {
+		return nil
+	}
+	return &n
+}
+
+func parseRequiredInt(v string) int {
+	n, err := strconv.Atoi(strings.TrimSpace(v))
+	if err != nil {
+		return 0
+	}
+	return n
+}
+
+func pcieLinkGenLabel(gen int) string {
+	return fmt.Sprintf("Gen%d", gen)
+}
+
 func parseMaybeBool(v string) *bool {
 	v = strings.TrimSpace(strings.ToLower(v))
 	switch v {
@@ -212,46 +249,47 @@ func isNVIDIADevice(dev schema.HardwarePCIeDevice) bool {
 	return false
 }

-func setPCIeFallback(dev *schema.HardwarePCIeDevice, boardSerial string) {
-	setPCIeFallbackSerial(dev, boardSerial)
-	status := "UNKNOWN"
+func setPCIeFallback(dev *schema.HardwarePCIeDevice) {
+	status := statusUnknown
 	dev.Status = &status
 }

-func setPCIeFallbackSerial(dev *schema.HardwarePCIeDevice, boardSerial string) {
-	if strings.TrimSpace(boardSerial) == "" || dev.SerialNumber != nil {
-		return
-	}
-	slot := "unknown"
-	if dev.BDF != nil && strings.TrimSpace(*dev.BDF) != "" {
-		slot = strings.TrimSpace(*dev.BDF)
-	} else if dev.Slot != nil && strings.TrimSpace(*dev.Slot) != "" {
-		slot = strings.TrimSpace(*dev.Slot)
-	}
-	fb := fmt.Sprintf("%s-PCIE-%s", boardSerial, slot)
-	dev.SerialNumber = &fb
-}
-
 func injectNVIDIATelemetry(dev *schema.HardwarePCIeDevice, info nvidiaGPUInfo) {
 	if dev.Telemetry == nil {
 		dev.Telemetry = map[string]any{}
 	}
+	dev.Telemetry["nvidia_gpu_index"] = info.Index
 	if info.TemperatureC != nil {
-		dev.Telemetry["temperature_c"] = *info.TemperatureC
+		dev.TemperatureC = info.TemperatureC
 	}
 	if info.PowerW != nil {
-		dev.Telemetry["power_w"] = *info.PowerW
+		dev.PowerW = info.PowerW
 	}
 	if info.ECCUncorrected != nil {
-		dev.Telemetry["ecc_uncorrected_total"] = *info.ECCUncorrected
+		dev.ECCUncorrectedTotal = info.ECCUncorrected
 	}
 	if info.ECCCorrected != nil {
-		dev.Telemetry["ecc_corrected_total"] = *info.ECCCorrected
+		dev.ECCCorrectedTotal = info.ECCCorrected
 	}
 	if info.HWSlowdown != nil {
-		dev.Telemetry["hw_slowdown_active"] = *info.HWSlowdown
+		dev.HWSlowdown = info.HWSlowdown
 	}
-	if len(dev.Telemetry) == 0 {
-		dev.Telemetry = nil
+	// Override PCIe link speed/width with nvidia-smi driver values.
+	// sysfs current_link_speed reflects the instantaneous physical link state and
+	// can show Gen1 when the GPU is idle due to ASPM power management. The driver
+	// knows the negotiated speed regardless of the current power state.
+	if info.PCIeLinkGenCurrent != nil {
+		s := pcieLinkGenLabel(*info.PCIeLinkGenCurrent)
+		dev.LinkSpeed = &s
+	}
+	if info.PCIeLinkGenMax != nil {
+		s := pcieLinkGenLabel(*info.PCIeLinkGenMax)
+		dev.MaxLinkSpeed = &s
+	}
+	if info.PCIeLinkWidthCur != nil {
+		dev.LinkWidth = info.PCIeLinkWidthCur
+	}
+	if info.PCIeLinkWidthMax != nil {
+		dev.MaxLinkWidth = info.PCIeLinkWidthMax
 	}
 }
--- a/audit/internal/collector/nvidia_test.go
+++ b/audit/internal/collector/nvidia_test.go
@@ -6,7 +6,7 @@ import (
 )

 func TestParseNVIDIASMIQuery(t *testing.T) {
-	raw := "0, 00000000:65:00.0, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active\n"
+	raw := "0, 00000000:65:00.0, NVIDIA H100 80GB HBM3, GPU-SERIAL-1, 96.00.1F.00.02, 54, 210.33, 0, 5, Not Active, 4, 4, 16, 16\n"
 	byBDF, err := parseNVIDIASMIQuery(raw)
 	if err != nil {
 		t.Fatalf("parse failed: %v", err)
@@ -16,6 +16,9 @@ func TestParseNVIDIASMIQuery(t *testing.T) {
 	if !ok {
 		t.Fatalf("gpu by normalized bdf not found")
 	}
+	if gpu.Name != "NVIDIA H100 80GB HBM3" {
+		t.Fatalf("name: got %q", gpu.Name)
+	}
 	if gpu.Serial != "GPU-SERIAL-1" {
 		t.Fatalf("serial: got %q", gpu.Serial)
 	}
@@ -28,6 +31,12 @@ func TestParseNVIDIASMIQuery(t *testing.T) {
 	if gpu.HWSlowdown == nil || *gpu.HWSlowdown {
 		t.Fatalf("hw slowdown: got %v, want false", gpu.HWSlowdown)
 	}
+	if gpu.PCIeLinkGenCurrent == nil || *gpu.PCIeLinkGenCurrent != 4 {
+		t.Fatalf("pcie link gen current: got %v, want 4", gpu.PCIeLinkGenCurrent)
+	}
+	if gpu.PCIeLinkGenMax == nil || *gpu.PCIeLinkGenMax != 4 {
+		t.Fatalf("pcie link gen max: got %v, want 4", gpu.PCIeLinkGenMax)
+	}
 }

 func TestNormalizePCIeBDF(t *testing.T) {
@@ -54,10 +63,10 @@ func TestEnrichPCIeWithNVIDIAData_driverLoaded(t *testing.T) {
 	status := "OK"
 	devices := []schema.HardwarePCIeDevice{
 		{
-			VendorID:     &vendorID,
-			BDF:          &bdf,
-			Manufacturer: &manufacturer,
-			Status:       &status,
+			HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status},
+			VendorID:                &vendorID,
+			BDF:                     &bdf,
+			Manufacturer:            &manufacturer,
 		},
 	}

@@ -73,21 +82,24 @@ func TestEnrichPCIeWithNVIDIAData_driverLoaded(t *testing.T) {
 		},
 	}

-	out := enrichPCIeWithNVIDIAData(devices, byBDF, "BOARD-001", true)
+	out := enrichPCIeWithNVIDIAData(devices, byBDF, true)
 	if out[0].SerialNumber == nil || *out[0].SerialNumber != "GPU-ABC" {
 		t.Fatalf("serial: got %v", out[0].SerialNumber)
 	}
 	if out[0].Firmware == nil || *out[0].Firmware != "96.00.1F.00.02" {
 		t.Fatalf("firmware: got %v", out[0].Firmware)
 	}
-	if out[0].Status == nil || *out[0].Status != "WARNING" {
+	if out[0].Telemetry == nil || out[0].Telemetry["nvidia_gpu_index"] != 0 {
+		t.Fatalf("telemetry nvidia_gpu_index: got %#v", out[0].Telemetry)
+	}
+	if out[0].Status == nil || *out[0].Status != statusWarning {
 		t.Fatalf("status: got %v", out[0].Status)
 	}
-	if out[0].Telemetry == nil {
-		t.Fatal("expected telemetry")
+	if out[0].ECCUncorrectedTotal == nil || *out[0].ECCUncorrectedTotal != 2 {
+		t.Fatalf("ecc_uncorrected_total: got %#v", out[0].ECCUncorrectedTotal)
 	}
-	if got, ok := out[0].Telemetry["ecc_uncorrected_total"].(int64); !ok || got != 2 {
-		t.Fatalf("ecc_uncorrected_total: got %#v", out[0].Telemetry["ecc_uncorrected_total"])
+	if out[0].TemperatureC == nil || *out[0].TemperatureC != 55.5 {
+		t.Fatalf("temperature_c: got %#v", out[0].TemperatureC)
 	}
 }

@@ -103,11 +115,11 @@ func TestEnrichPCIeWithNVIDIAData_driverMissingFallback(t *testing.T) {
 		},
 	}

-	out := enrichPCIeWithNVIDIAData(devices, nil, "BOARD-123", false)
-	if out[0].SerialNumber == nil || *out[0].SerialNumber != "BOARD-123-PCIE-0000:17:00.0" {
-		t.Fatalf("fallback serial: got %v", out[0].SerialNumber)
+	out := enrichPCIeWithNVIDIAData(devices, nil, false)
+	if out[0].SerialNumber != nil {
+		t.Fatalf("serial should stay nil without source data, got %v", out[0].SerialNumber)
 	}
-	if out[0].Status == nil || *out[0].Status != "UNKNOWN" {
+	if out[0].Status == nil || *out[0].Status != statusUnknown {
 		t.Fatalf("fallback status: got %v", out[0].Status)
 	}
 }
--- a/audit/internal/collector/pcie.go
+++ b/audit/internal/collector/pcie.go
@@ -2,6 +2,7 @@ package collector

 import (
 	"bee/audit/internal/schema"
+	"fmt"
 	"log/slog"
 	"os/exec"
 	"strconv"
@@ -37,7 +38,7 @@ func parseLspci(output string) []schema.HardwarePCIeDevice {
 			val := strings.TrimSpace(line[idx+2:])
 			fields[key] = val
 		}
-		if !shouldIncludePCIeDevice(fields["Class"]) {
+		if !shouldIncludePCIeDevice(fields["Class"], fields["Vendor"], fields["Device"]) {
 			continue
 		}
 		dev := parseLspciDevice(fields)
@@ -46,8 +47,10 @@ func parseLspci(output string) []schema.HardwarePCIeDevice {
 	return devs
 }

-func shouldIncludePCIeDevice(class string) bool {
+func shouldIncludePCIeDevice(class, vendor, device string) bool {
 	c := strings.ToLower(strings.TrimSpace(class))
+	v := strings.ToLower(strings.TrimSpace(vendor))
+	d := strings.ToLower(strings.TrimSpace(device))
 	if c == "" {
 		return true
 	}
@@ -57,6 +60,9 @@ func shouldIncludePCIeDevice(class string) bool {
 		"host bridge",
 		"isa bridge",
 		"pci bridge",
+		"co-processor",
+		"performance counter",
+		"performance counters",
 		"ram memory",
 		"system peripheral",
 		"communication controller",
@@ -66,12 +72,47 @@ func shouldIncludePCIeDevice(class string) bool {
 		"audio device",
 		"serial bus controller",
 		"unassigned class",
+		"non-essential instrumentation",
 	}
 	for _, bad := range excluded {
 		if strings.Contains(c, bad) {
 			return false
 		}
 	}
+
+	// Exclude BMC/management virtual VGA adapters — these are firmware video chips,
+	// not real GPUs, and pollute the GPU inventory (e.g. iBMC, iDRAC, iLO VGA).
+	if strings.Contains(c, "vga") || strings.Contains(c, "display") || strings.Contains(c, "3d") {
+		bmcPatterns := []string{
+			"management system chip",
+			"management controller",
+			"ibmc",
+			"idrac",
+			"ilo vga",
+			"aspeed",
+			"matrox",
+		}
+		for _, bad := range bmcPatterns {
+			if strings.Contains(d, bad) {
+				return false
+			}
+		}
+	}
+
+	if strings.Contains(v, "advanced micro devices") || strings.Contains(v, "[amd]") {
+		internalAMDPatterns := []string{
+			"dummy function",
+			"reserved spp",
+			"ptdma",
+			"cryptographic coprocessor pspcpp",
+			"pspcpp",
+		}
+		for _, bad := range internalAMDPatterns {
+			if strings.Contains(d, bad) {
+				return false
+			}
+		}
+	}
 	return true
 }

@@ -79,11 +120,12 @@ func parseLspciDevice(fields map[string]string) schema.HardwarePCIeDevice {
 	dev := schema.HardwarePCIeDevice{}
 	present := true
 	dev.Present = &present
-	status := "OK"
+	status := statusOK
 	dev.Status = &status

 	// Slot is the BDF: "0000:00:02.0"
 	if bdf := fields["Slot"]; bdf != "" {
+		dev.Slot = &bdf
 		dev.BDF = &bdf
 		// parse vendor_id and device_id from sysfs
 		vendorID, deviceID := readPCIIDs(bdf)
@@ -93,10 +135,34 @@ func parseLspciDevice(fields map[string]string) schema.HardwarePCIeDevice {
 		if deviceID != 0 {
 			dev.DeviceID = &deviceID
 		}
+		if numaNode, ok := readPCINumaNode(bdf); ok {
+			dev.NUMANode = &numaNode
+		} else if numaNode, ok := parsePCINumaNode(fields["NUMANode"]); ok {
+			dev.NUMANode = &numaNode
+		}
+		if width, ok := readPCIIntAttribute(bdf, "current_link_width"); ok {
+			dev.LinkWidth = &width
+		}
+		if width, ok := readPCIIntAttribute(bdf, "max_link_width"); ok {
+			dev.MaxLinkWidth = &width
+		}
+		if speed, ok := readPCIStringAttribute(bdf, "current_link_speed"); ok {
+			linkSpeed := normalizePCILinkSpeed(speed)
+			if linkSpeed != "" {
+				dev.LinkSpeed = &linkSpeed
+			}
+		}
+		if speed, ok := readPCIStringAttribute(bdf, "max_link_speed"); ok {
+			linkSpeed := normalizePCILinkSpeed(speed)
+			if linkSpeed != "" {
+				dev.MaxLinkSpeed = &linkSpeed
+			}
+		}
 	}

 	if v := fields["Class"]; v != "" {
-		dev.DeviceClass = &v
+		class := mapPCIeDeviceClass(v)
+		dev.DeviceClass = &class
 	}
 	if v := fields["Vendor"]; v != "" {
 		dev.Manufacturer = &v
@@ -107,6 +173,9 @@ func parseLspciDevice(fields map[string]string) schema.HardwarePCIeDevice {

 	// SVendor/SDevice available but not in schema — skip

+	// Warn if PCIe link is running below its maximum negotiated speed.
+	applyPCIeLinkSpeedWarning(&dev)
+
 	return dev
 }

@@ -131,3 +200,102 @@ func readHexFile(path string) (int, error) {
 	n, err := strconv.ParseInt(s, 16, 64)
 	return int(n), err
 }
+
+func readPCINumaNode(bdf string) (int, bool) {
+	value, ok := readPCIIntAttribute(bdf, "numa_node")
+	if !ok || value < 0 {
+		return 0, false
+	}
+	return value, true
+}
+
+func parsePCINumaNode(raw string) (int, bool) {
+	raw = strings.TrimSpace(raw)
+	if raw == "" {
+		return 0, false
+	}
+	value, err := strconv.Atoi(raw)
+	if err != nil || value < 0 {
+		return 0, false
+	}
+	return value, true
+}
+
+func readPCIIntAttribute(bdf, attribute string) (int, bool) {
+	out, err := exec.Command("cat", "/sys/bus/pci/devices/"+bdf+"/"+attribute).Output()
+	if err != nil {
+		return 0, false
+	}
+	value, err := strconv.Atoi(strings.TrimSpace(string(out)))
+	if err != nil || value < 0 {
+		return 0, false
+	}
+	return value, true
+}
+
+func readPCIStringAttribute(bdf, attribute string) (string, bool) {
+	out, err := exec.Command("cat", "/sys/bus/pci/devices/"+bdf+"/"+attribute).Output()
+	if err != nil {
+		return "", false
+	}
+	value := strings.TrimSpace(string(out))
+	if value == "" {
+		return "", false
+	}
+	return value, true
+}
+
+// applyPCIeLinkSpeedWarning sets the device status to Warning if the current PCIe link
+// speed is below the maximum negotiated speed supported by both ends.
+func applyPCIeLinkSpeedWarning(dev *schema.HardwarePCIeDevice) {
+	if dev.LinkSpeed == nil || dev.MaxLinkSpeed == nil {
+		return
+	}
+	if pcieLinkSpeedRank(*dev.LinkSpeed) < pcieLinkSpeedRank(*dev.MaxLinkSpeed) {
+		warn := statusWarning
+		dev.Status = &warn
+		desc := fmt.Sprintf("PCIe link speed degraded: running at %s, capable of %s", *dev.LinkSpeed, *dev.MaxLinkSpeed)
+		dev.ErrorDescription = &desc
+	}
+}
+
+// pcieLinkSpeedRank returns a numeric rank for a normalized Gen string (e.g. "Gen4" → 4).
+// Returns 0 for unrecognised values so comparisons fail safe.
+func pcieLinkSpeedRank(gen string) int {
+	switch gen {
+	case "Gen1":
+		return 1
+	case "Gen2":
+		return 2
+	case "Gen3":
+		return 3
+	case "Gen4":
+		return 4
+	case "Gen5":
+		return 5
+	case "Gen6":
+		return 6
+	default:
+		return 0
+	}
+}
+
+func normalizePCILinkSpeed(raw string) string {
+	raw = strings.TrimSpace(strings.ToLower(raw))
+	switch {
+	case strings.Contains(raw, "2.5"):
+		return "Gen1"
+	case strings.Contains(raw, "5.0"):
+		return "Gen2"
+	case strings.Contains(raw, "8.0"):
+		return "Gen3"
+	case strings.Contains(raw, "16.0"):
+		return "Gen4"
+	case strings.Contains(raw, "32.0"):
+		return "Gen5"
+	case strings.Contains(raw, "64.0"):
+		return "Gen6"
+	default:
+		return ""
+	}
+}
--- a/audit/internal/collector/pcie_filter_test.go
+++ b/audit/internal/collector/pcie_filter_test.go
@@ -1,41 +1,218 @@
 package collector

-import "testing"
+import (
+	"bee/audit/internal/schema"
+	"encoding/json"
+	"strings"
+	"testing"
+)

 func TestShouldIncludePCIeDevice(t *testing.T) {
 	tests := []struct {
-		class string
-		want  bool
+		name   string
+		class  string
+		vendor string
+		device string
+		want   bool
 	}{
-		{"USB controller", false},
-		{"System peripheral", false},
-		{"Audio device", false},
-		{"Host bridge", false},
-		{"PCI bridge", false},
-		{"SMBus", false},
-		{"Ethernet controller", true},
-		{"RAID bus controller", true},
-		{"Non-Volatile memory controller", true},
-		{"VGA compatible controller", true},
+		{name: "usb", class: "USB controller", want: false},
+		{name: "system peripheral", class: "System peripheral", want: false},
+		{name: "audio", class: "Audio device", want: false},
+		{name: "host bridge", class: "Host bridge", want: false},
+		{name: "pci bridge", class: "PCI bridge", want: false},
+		{name: "co-processor", class: "Co-processor", want: false},
+		{name: "smbus", class: "SMBus", want: false},
+		{name: "perf", class: "Performance counters", want: false},
+		{name: "non essential instrumentation", class: "Non-Essential Instrumentation", want: false},
+		{name: "amd dummy function", class: "Encryption controller", vendor: "Advanced Micro Devices, Inc. [AMD]", device: "Starship/Matisse PTDMA", want: false},
+		{name: "amd pspcpp", class: "Encryption controller", vendor: "Advanced Micro Devices, Inc. [AMD]", device: "Starship/Matisse Cryptographic Coprocessor PSPCPP", want: false},
+		{name: "ethernet", class: "Ethernet controller", want: true},
+		{name: "raid", class: "RAID bus controller", want: true},
+		{name: "nvme", class: "Non-Volatile memory controller", want: true},
+		{name: "vga", class: "VGA compatible controller", want: true},
+		{name: "ibmc vga", class: "VGA compatible controller", vendor: "Huawei Technologies Co., Ltd.", device: "Hi171x Series [iBMC Intelligent Management system chip w/VGA support]", want: false},
+		{name: "aspeed vga", class: "VGA compatible controller", vendor: "ASPEED Technology, Inc.", device: "ASPEED Graphics Family", want: false},
+		{name: "other encryption controller", class: "Encryption controller", vendor: "Intel Corporation", device: "QuickAssist", want: true},
 	}

 	for _, tt := range tests {
-		got := shouldIncludePCIeDevice(tt.class)
-		if got != tt.want {
-			t.Fatalf("class %q include=%v want %v", tt.class, got, tt.want)
-		}
+		t.Run(tt.name, func(t *testing.T) {
+			got := shouldIncludePCIeDevice(tt.class, tt.vendor, tt.device)
+			if got != tt.want {
+				t.Fatalf("class=%q vendor=%q device=%q include=%v want %v", tt.class, tt.vendor, tt.device, got, tt.want)
+			}
+		})
 	}
 }

 func TestParseLspci_filtersExcludedClasses(t *testing.T) {
 	input := "Slot:\t0000:00:14.0\nClass:\tUSB controller\nVendor:\tIntel Corporation\nDevice:\tUSB 3.0\n\n" +
+		"Slot:\t0000:00:18.0\nClass:\tNon-Essential Instrumentation\nVendor:\tAdvanced Micro Devices, Inc. [AMD]\nDevice:\tStarship/Matisse PCIe Dummy Function\n\n" +
 		"Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"

 	devs := parseLspci(input)
 	if len(devs) != 1 {
 		t.Fatalf("expected 1 filtered device, got %d", len(devs))
 	}
-	if devs[0].DeviceClass == nil || *devs[0].DeviceClass != "VGA compatible controller" {
+	if devs[0].DeviceClass == nil || *devs[0].DeviceClass != "VideoController" {
 		t.Fatalf("unexpected remaining class: %v", devs[0].DeviceClass)
 	}
+	if devs[0].Slot == nil || *devs[0].Slot != "0000:65:00.0" {
+		t.Fatalf("slot: got %v", devs[0].Slot)
+	}
+	if devs[0].BDF == nil || *devs[0].BDF != "0000:65:00.0" {
+		t.Fatalf("bdf: got %v", devs[0].BDF)
+	}
+}
+
+func TestParseLspci_filtersAMDChipsetNoise(t *testing.T) {
+	input := "" +
+		"Slot:\t0000:1a:00.0\nClass:\tNon-Essential Instrumentation\nVendor:\tAdvanced Micro Devices, Inc. [AMD]\nDevice:\tStarship/Matisse PCIe Dummy Function\n\n" +
+		"Slot:\t0000:1a:00.2\nClass:\tEncryption controller\nVendor:\tAdvanced Micro Devices, Inc. [AMD]\nDevice:\tStarship/Matisse PTDMA\n\n" +
+		"Slot:\t0000:05:00.0\nClass:\tEthernet controller\nVendor:\tMellanox Technologies\nDevice:\tMT28908 Family [ConnectX-6]\n\n"
+
+	devs := parseLspci(input)
+	if len(devs) != 1 {
+		t.Fatalf("expected 1 remaining device, got %d", len(devs))
+	}
+	if devs[0].Model == nil || *devs[0].Model != "MT28908 Family [ConnectX-6]" {
+		t.Fatalf("unexpected remaining device: %+v", devs[0])
+	}
+}
+
+func TestParseLspci_filtersCoProcessors(t *testing.T) {
+	input := "" +
+		"Slot:\t0000:01:00.0\nClass:\tCo-processor\nVendor:\tIntel Corporation\nDevice:\t402xx Series QAT\n\n" +
+		"Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
+
+	devs := parseLspci(input)
+	if len(devs) != 1 {
+		t.Fatalf("expected 1 remaining device, got %d", len(devs))
+	}
+	if devs[0].Model == nil || *devs[0].Model != "H100" {
+		t.Fatalf("unexpected remaining device: %+v", devs[0])
+	}
+}
+
+func TestPCIeJSONUsesSlotNotBDF(t *testing.T) {
+	input := "Slot:\t0000:65:00.0\nClass:\tVGA compatible controller\nVendor:\tNVIDIA Corporation\nDevice:\tH100\n\n"
+
+	devs := parseLspci(input)
+	data, err := json.Marshal(devs[0])
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	text := string(data)
+	if !strings.Contains(text, `"slot":"0000:65:00.0"`) {
+		t.Fatalf("json missing slot: %s", text)
+	}
+	if strings.Contains(text, `"bdf"`) {
+		t.Fatalf("json should not emit bdf: %s", text)
+	}
+}
+
+func TestParseLspciUsesNUMANodeFieldWhenSysfsUnavailable(t *testing.T) {
+	input := "Slot:\t0000:65:00.0\nClass:\tEthernet controller\nVendor:\tIntel Corporation\nDevice:\tX710\nNUMANode:\t1\n\n"
+
+	devs := parseLspci(input)
+	if len(devs) != 1 {
+		t.Fatalf("expected 1 device, got %d", len(devs))
+	}
+	if devs[0].NUMANode == nil || *devs[0].NUMANode != 1 {
+		t.Fatalf("numa_node=%v want 1", devs[0].NUMANode)
+	}
+}
+
+func TestNormalizePCILinkSpeed(t *testing.T) {
+	tests := []struct {
+		raw  string
+		want string
+	}{
+		{"2.5 GT/s PCIe", "Gen1"},
+		{"5.0 GT/s PCIe", "Gen2"},
+		{"8.0 GT/s PCIe", "Gen3"},
+		{"16.0 GT/s PCIe", "Gen4"},
+		{"32.0 GT/s PCIe", "Gen5"},
+		{"64.0 GT/s PCIe", "Gen6"},
+		{"unknown", ""},
+	}
+	for _, tt := range tests {
+		if got := normalizePCILinkSpeed(tt.raw); got != tt.want {
+			t.Fatalf("normalizePCILinkSpeed(%q)=%q want %q", tt.raw, got, tt.want)
+		}
+	}
+}
+
+func TestApplyPCIeLinkSpeedWarning(t *testing.T) {
+	ptr := func(s string) *string { return &s }
+
+	tests := []struct {
+		name        string
+		linkSpeed   *string
+		maxSpeed    *string
+		wantWarning bool
+		wantGenIn   string // substring expected in ErrorDescription when warning
+	}{
+		{
+			name:        "degraded Gen1 vs Gen5",
+			linkSpeed:   ptr("Gen1"),
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: true,
+			wantGenIn:   "Gen1",
+		},
+		{
+			name:        "at max Gen5",
+			linkSpeed:   ptr("Gen5"),
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: false,
+		},
+		{
+			name:        "degraded Gen4 vs Gen5",
+			linkSpeed:   ptr("Gen4"),
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: true,
+			wantGenIn:   "Gen4",
+		},
+		{
+			name:        "missing current speed — no warning",
+			linkSpeed:   nil,
+			maxSpeed:    ptr("Gen5"),
+			wantWarning: false,
+		},
+		{
+			name:        "missing max speed — no warning",
+			linkSpeed:   ptr("Gen1"),
+			maxSpeed:    nil,
+			wantWarning: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			dev := schema.HardwarePCIeDevice{}
+			ok := statusOK
+			dev.Status = &ok
+			dev.LinkSpeed = tt.linkSpeed
+			dev.MaxLinkSpeed = tt.maxSpeed
+
+			applyPCIeLinkSpeedWarning(&dev)
+
+			gotWarn := dev.Status != nil && *dev.Status == statusWarning
+			if gotWarn != tt.wantWarning {
+				t.Fatalf("wantWarning=%v gotWarning=%v (status=%v)", tt.wantWarning, gotWarn, dev.Status)
+			}
+			if tt.wantWarning {
+				if dev.ErrorDescription == nil {
+					t.Fatal("expected ErrorDescription to be set")
+				}
+				if !strings.Contains(*dev.ErrorDescription, tt.wantGenIn) {
+					t.Fatalf("ErrorDescription %q does not contain %q", *dev.ErrorDescription, tt.wantGenIn)
+				}
+			} else {
+				if dev.ErrorDescription != nil {
+					t.Fatalf("unexpected ErrorDescription: %s", *dev.ErrorDescription)
+				}
+			}
+		})
+	}
 }
--- a/audit/internal/collector/pcie_identity.go
+++ b/audit/internal/collector/pcie_identity.go
@@ -0,0 +1,123 @@
+package collector
+
+import (
+	"bee/audit/internal/schema"
+	"log/slog"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+)
+
+var (
+	queryPCILSPCIDetail = func(bdf string) (string, error) {
+		out, err := exec.Command("lspci", "-vv", "-s", bdf).Output()
+		if err != nil {
+			return "", err
+		}
+		return string(out), nil
+	}
+	readPCIVPDFile = func(bdf string) ([]byte, error) {
+		return os.ReadFile(filepath.Join("/sys/bus/pci/devices", bdf, "vpd"))
+	}
+)
+
+func enrichPCIeWithPCISerials(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
+	enriched := 0
+	for i := range devs {
+		if !shouldProbePCIeSerial(devs[i]) {
+			continue
+		}
+		bdf := normalizePCIeBDF(*devs[i].BDF)
+		if bdf == "" {
+			continue
+		}
+		if serial := queryPCIDeviceSerial(bdf); serial != "" {
+			devs[i].SerialNumber = &serial
+			enriched++
+		}
+	}
+	if enriched > 0 {
+		slog.Info("pcie: serials enriched", "count", enriched)
+	}
+	return devs
+}
+
+func shouldProbePCIeSerial(dev schema.HardwarePCIeDevice) bool {
+	if dev.BDF == nil || dev.SerialNumber != nil {
+		return false
+	}
+	if dev.DeviceClass == nil {
+		return false
+	}
+	class := strings.TrimSpace(*dev.DeviceClass)
+	return isNICClass(class) || isGPUClass(class)
+}
+
+func queryPCIDeviceSerial(bdf string) string {
+	if out, err := queryPCILSPCIDetail(bdf); err == nil {
+		if serial := parseLSPCIDetailSerial(out); serial != "" {
+			return serial
+		}
+	}
+	if raw, err := readPCIVPDFile(bdf); err == nil {
+		return parsePCIVPDSerial(raw)
+	}
+	return ""
+}
+
+func parseLSPCIDetailSerial(raw string) string {
+	for _, line := range strings.Split(raw, "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+		lower := strings.ToLower(line)
+		if !strings.Contains(lower, "serial number:") {
+			continue
+		}
+		idx := strings.Index(line, ":")
+		if idx < 0 {
+			continue
+		}
+		if serial := strings.TrimSpace(line[idx+1:]); serial != "" {
+			return serial
+		}
+	}
+	return ""
+}
+
+func parsePCIVPDSerial(raw []byte) string {
+	for i := 0; i+3 < len(raw); i++ {
+		if raw[i] != 'S' || raw[i+1] != 'N' {
+			continue
+		}
+		length := int(raw[i+2])
+		if length <= 0 || length > 64 || i+3+length > len(raw) {
+			continue
+		}
+		value := strings.TrimSpace(strings.Trim(string(raw[i+3:i+3+length]), "\x00"))
+		if !looksLikeSerial(value) {
+			continue
+		}
+		return value
+	}
+	return ""
+}
+
+func looksLikeSerial(value string) bool {
+	if len(value) < 4 {
+		return false
+	}
+	hasAlphaNum := false
+	for _, r := range value {
+		switch {
+		case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z', r >= '0' && r <= '9':
+			hasAlphaNum = true
+		case strings.ContainsRune(" -_./:", r):
+		default:
+			return false
+		}
+	}
+	return hasAlphaNum
+}
--- a/audit/internal/collector/pcie_identity_test.go
+++ b/audit/internal/collector/pcie_identity_test.go
@@ -0,0 +1,47 @@
+package collector
+
+import (
+	"bee/audit/internal/schema"
+	"fmt"
+	"testing"
+)
+
+func TestEnrichPCIeWithPCISerialsAddsGPUFallback(t *testing.T) {
+	origDetail := queryPCILSPCIDetail
+	origVPD := readPCIVPDFile
+	t.Cleanup(func() {
+		queryPCILSPCIDetail = origDetail
+		readPCIVPDFile = origVPD
+	})
+
+	queryPCILSPCIDetail = func(bdf string) (string, error) {
+		if bdf != "0000:11:00.0" {
+			t.Fatalf("unexpected bdf: %s", bdf)
+		}
+		return "Serial number: GPU-SN-12345\n", nil
+	}
+	readPCIVPDFile = func(string) ([]byte, error) {
+		return nil, fmt.Errorf("no vpd needed")
+	}
+
+	class := "DisplayController"
+	bdf := "0000:11:00.0"
+	devs := []schema.HardwarePCIeDevice{{
+		DeviceClass: &class,
+		BDF:         &bdf,
+	}}
+
+	out := enrichPCIeWithPCISerials(devs)
+	if out[0].SerialNumber == nil || *out[0].SerialNumber != "GPU-SN-12345" {
+		t.Fatalf("serial=%v want GPU-SN-12345", out[0].SerialNumber)
+	}
+}
+
+func TestShouldProbePCIeSerialSkipsNonGPUOrNIC(t *testing.T) {
+	class := "StorageController"
+	bdf := "0000:19:00.0"
+	dev := schema.HardwarePCIeDevice{DeviceClass: &class, BDF: &bdf}
+	if shouldProbePCIeSerial(dev) {
+		t.Fatal("unexpected probe for storage controller")
+	}
+}
--- a/audit/internal/collector/psu.go
+++ b/audit/internal/collector/psu.go
@@ -4,18 +4,32 @@ import (
 	"bee/audit/internal/schema"
 	"log/slog"
 	"os/exec"
+	"regexp"
+	"sort"
 	"strconv"
 	"strings"
 )

 func collectPSUs() []schema.HardwarePowerSupply {
-	// ipmitool requires /dev/ipmi0 — not available on non-server hardware
-	out, err := exec.Command("ipmitool", "fru", "print").Output()
-	if err != nil {
+	var psus []schema.HardwarePowerSupply
+	if out, err := exec.Command("ipmitool", "fru", "print").Output(); err == nil {
+		psus = parseFRU(string(out))
+	} else {
+		slog.Info("psu: fru unavailable", "err", err)
+	}
+
+	sdrData := map[int]psuSDR{}
+	if sdrOut, err := exec.Command("ipmitool", "sdr").Output(); err == nil {
+		sdrData = parsePSUSDR(string(sdrOut))
+		if len(psus) == 0 {
+			psus = synthesizePSUsFromSDR(sdrData)
+		} else {
+			mergePSUSDR(psus, sdrData)
+		}
+	} else if len(psus) == 0 {
 		slog.Info("psu: ipmitool unavailable, skipping", "err", err)
 		return nil
 	}
-	psus := parseFRU(string(out))
 	slog.Info("psu: collected", "count", len(psus))
 	return psus
 }
@@ -75,9 +89,7 @@ func parseFRUBlock(block string, slotIdx int) (schema.HardwarePowerSupply, bool)

 	// Only process PSU FRU records
 	headerLower := strings.ToLower(header)
-	if !strings.Contains(headerLower, "psu") &&
-		!strings.Contains(headerLower, "power supply") &&
-		!strings.Contains(headerLower, "power_supply") {
+	if !isPSUHeader(headerLower) {
 		return schema.HardwarePowerSupply{}, false
 	}

@@ -85,21 +97,24 @@ func parseFRUBlock(block string, slotIdx int) (schema.HardwarePowerSupply, bool)
 	psu := schema.HardwarePowerSupply{Present: &present}

 	slotStr := strconv.Itoa(slotIdx)
+	if slot, ok := parsePSUSlot(header); ok && slot > 0 {
+		slotStr = strconv.Itoa(slot - 1)
+	}
 	psu.Slot = &slotStr

-	if v := cleanDMIValue(fields["Board Product"]); v != "" {
+	if v := firstNonEmptyField(fields, "Board Product", "Product Name", "Product Part Number"); v != "" {
 		psu.Model = &v
 	}
-	if v := cleanDMIValue(fields["Board Mfg"]); v != "" {
+	if v := firstNonEmptyField(fields, "Board Mfg", "Product Manufacturer", "Product Manufacturer Name"); v != "" {
 		psu.Vendor = &v
 	}
-	if v := cleanDMIValue(fields["Board Serial"]); v != "" {
+	if v := firstNonEmptyField(fields, "Board Serial", "Product Serial", "Product Serial Number"); v != "" {
 		psu.SerialNumber = &v
 	}
-	if v := cleanDMIValue(fields["Board Part Number"]); v != "" {
+	if v := firstNonEmptyField(fields, "Board Part Number", "Product Part Number", "Part Number"); v != "" {
 		psu.PartNumber = &v
 	}
-	if v := cleanDMIValue(fields["Board Extra"]); v != "" {
+	if v := firstNonEmptyField(fields, "Board Extra", "Product Version", "Board Version"); v != "" {
 		psu.Firmware = &v
 	}

@@ -110,12 +125,230 @@ func parseFRUBlock(block string, slotIdx int) (schema.HardwarePowerSupply, bool)
 		}
 	}

-	status := "OK"
+	status := statusOK
 	psu.Status = &status

 	return psu, true
 }

+func isPSUHeader(headerLower string) bool {
+	return strings.Contains(headerLower, "psu") ||
+		strings.Contains(headerLower, "pws") ||
+		strings.Contains(headerLower, "power supply") ||
+		strings.Contains(headerLower, "power_supply") ||
+		strings.Contains(headerLower, "power module")
+}
+
+func firstNonEmptyField(fields map[string]string, keys ...string) string {
+	for _, key := range keys {
+		if value := cleanDMIValue(fields[key]); value != "" {
+			return value
+		}
+	}
+	return ""
+}
+
+type psuSDR struct {
+	slot         int
+	status       string
+	reason       string
+	inputPowerW  *float64
+	outputPowerW *float64
+	inputVoltage *float64
+	temperatureC *float64
+	healthPct    *float64
+}
+
+var psuSlotPatterns = []*regexp.Regexp{
+	regexp.MustCompile(`(?i)\bpsu?\s*([0-9]+)\b`),
+	regexp.MustCompile(`(?i)\bps\s*([0-9]+)\b`),
+	regexp.MustCompile(`(?i)\bpws\s*([0-9]+)\b`),
+	regexp.MustCompile(`(?i)\bpower\s*supply(?:\s*bay)?\s*([0-9]+)\b`),
+	regexp.MustCompile(`(?i)\bbay\s*([0-9]+)\b`),
+}
+
+func parsePSUSDR(raw string) map[int]psuSDR {
+	out := map[int]psuSDR{}
+	for _, line := range strings.Split(raw, "\n") {
+		fields := splitSDRFields(line)
+		if len(fields) < 3 {
+			continue
+		}
+		name := fields[0]
+		value := fields[1]
+		state := strings.ToLower(fields[2])
+		slot, ok := parsePSUSlot(name)
+		if !ok {
+			continue
+		}
+
+		entry := out[slot]
+		entry.slot = slot
+		if entry.status == "" {
+			entry.status = statusOK
+		}
+		if state != "" && state != "ok" && state != "ns" {
+			entry.status = statusCritical
+			entry.reason = "PSU sensor reported non-OK state: " + state
+		}
+
+		lowerName := strings.ToLower(name)
+		switch {
+		case strings.Contains(lowerName, "input power"):
+			entry.inputPowerW = parseFloatPtr(value)
+		case strings.Contains(lowerName, "output power"):
+			entry.outputPowerW = parseFloatPtr(value)
+		case strings.Contains(lowerName, "power supply bay"), strings.Contains(lowerName, "psu bay"):
+			entry.outputPowerW = parseFloatPtr(value)
+		case strings.Contains(lowerName, "input voltage"), strings.Contains(lowerName, "ac input"):
+			entry.inputVoltage = parseFloatPtr(value)
+		case strings.Contains(lowerName, "temp"):
+			entry.temperatureC = parseFloatPtr(value)
+		case strings.Contains(lowerName, "health"), strings.Contains(lowerName, "remaining life"), strings.Contains(lowerName, "life remaining"):
+			entry.healthPct = parsePercentPtr(value)
+		}
+		out[slot] = entry
+	}
+	return out
+}
+
+func synthesizePSUsFromSDR(sdr map[int]psuSDR) []schema.HardwarePowerSupply {
+	if len(sdr) == 0 {
+		return nil
+	}
+	slots := make([]int, 0, len(sdr))
+	for slot := range sdr {
+		slots = append(slots, slot)
+	}
+	sort.Ints(slots)
+
+	out := make([]schema.HardwarePowerSupply, 0, len(slots))
+	for _, slot := range slots {
+		entry := sdr[slot]
+		present := true
+		status := entry.status
+		if status == "" {
+			status = statusUnknown
+		}
+		slotStr := strconv.Itoa(slot - 1)
+		model := "PSU"
+		psu := schema.HardwarePowerSupply{
+			HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status},
+			Slot:                    &slotStr,
+			Present:                 &present,
+			Model:                   &model,
+			InputPowerW:             entry.inputPowerW,
+			OutputPowerW:            entry.outputPowerW,
+			InputVoltage:            entry.inputVoltage,
+			TemperatureC:            entry.temperatureC,
+		}
+		if entry.healthPct != nil {
+			psu.LifeRemainingPct = entry.healthPct
+			lifeUsed := 100 - *entry.healthPct
+			psu.LifeUsedPct = &lifeUsed
+		}
+		if entry.reason != "" {
+			psu.ErrorDescription = &entry.reason
+		}
+		out = append(out, psu)
+	}
+	return out
+}
+
+func mergePSUSDR(psus []schema.HardwarePowerSupply, sdr map[int]psuSDR) {
+	for i := range psus {
+		slotIdx, err := strconv.Atoi(derefPSUSlot(psus[i].Slot))
+		if err != nil {
+			continue
+		}
+		entry, ok := sdr[slotIdx+1]
+		if !ok {
+			continue
+		}
+		if entry.inputPowerW != nil {
+			psus[i].InputPowerW = entry.inputPowerW
+		}
+		if entry.outputPowerW != nil {
+			psus[i].OutputPowerW = entry.outputPowerW
+		}
+		if entry.inputVoltage != nil {
+			psus[i].InputVoltage = entry.inputVoltage
+		}
+		if entry.temperatureC != nil {
+			psus[i].TemperatureC = entry.temperatureC
+		}
+		if entry.healthPct != nil {
+			psus[i].LifeRemainingPct = entry.healthPct
+			lifeUsed := 100 - *entry.healthPct
+			psus[i].LifeUsedPct = &lifeUsed
+		}
+		if entry.status != "" {
+			psus[i].Status = &entry.status
+		}
+		if entry.reason != "" {
+			psus[i].ErrorDescription = &entry.reason
+		}
+		if psus[i].Status != nil && *psus[i].Status == statusOK {
+			if (entry.inputPowerW == nil && entry.outputPowerW == nil && entry.inputVoltage == nil) && entry.status == "" {
+				unknown := statusUnknown
+				psus[i].Status = &unknown
+			}
+		}
+	}
+}
+
+func splitSDRFields(line string) []string {
+	parts := strings.Split(line, "|")
+	out := make([]string, 0, len(parts))
+	for _, part := range parts {
+		part = strings.TrimSpace(part)
+		if part != "" {
+			out = append(out, part)
+		}
+	}
+	return out
+}
+
+func parsePSUSlot(name string) (int, bool) {
+	for _, re := range psuSlotPatterns {
+		m := re.FindStringSubmatch(strings.ToLower(name))
+		if len(m) == 0 {
+			continue
+		}
+		for _, group := range m[1:] {
+			if group == "" {
+				continue
+			}
+			n, err := strconv.Atoi(group)
+			if err == nil && n > 0 {
+				return n, true
+			}
+		}
+	}
+	return 0, false
+}
+
+func parseFloatPtr(raw string) *float64 {
+	raw = strings.TrimSpace(raw)
+	if raw == "" || strings.EqualFold(raw, "na") {
+		return nil
+	}
+	for _, field := range strings.Fields(raw) {
+		n, err := strconv.ParseFloat(strings.TrimSpace(field), 64)
+		if err == nil {
+			return &n
+		}
+	}
+	return nil
+}
+
+func derefPSUSlot(slot *string) string {
+	if slot == nil {
+		return ""
+	}
+	return *slot
+}
+
 // parseWattage extracts wattage from strings like "PSU 800W", "1200W PLATINUM".
 func parseWattage(s string) int {
 	s = strings.ToUpper(s)
--- a/audit/internal/collector/psu_sdr_test.go
+++ b/audit/internal/collector/psu_sdr_test.go
@@ -0,0 +1,91 @@
+package collector
+
+import "testing"
+
+func TestParsePSUSDR(t *testing.T) {
+	raw := `
+PS1 Input Power   | 215 Watts | ok
+PS1 Output Power  | 198 Watts | ok
+PS1 Input Voltage | 229 Volts | ok
+PS1 Temp         | 39 C      | ok
+PS1 Health       | 97 %      | ok
+PS2 Input Power   | 0 Watts   | cr
+`
+
+	got := parsePSUSDR(raw)
+	if len(got) != 2 {
+		t.Fatalf("len(got)=%d want 2", len(got))
+	}
+	if got[1].status != statusOK {
+		t.Fatalf("ps1 status=%q", got[1].status)
+	}
+	if got[1].inputPowerW == nil || *got[1].inputPowerW != 215 {
+		t.Fatalf("ps1 input power=%v", got[1].inputPowerW)
+	}
+	if got[1].outputPowerW == nil || *got[1].outputPowerW != 198 {
+		t.Fatalf("ps1 output power=%v", got[1].outputPowerW)
+	}
+	if got[1].inputVoltage == nil || *got[1].inputVoltage != 229 {
+		t.Fatalf("ps1 input voltage=%v", got[1].inputVoltage)
+	}
+	if got[1].temperatureC == nil || *got[1].temperatureC != 39 {
+		t.Fatalf("ps1 temperature=%v", got[1].temperatureC)
+	}
+	if got[1].healthPct == nil || *got[1].healthPct != 97 {
+		t.Fatalf("ps1 health=%v", got[1].healthPct)
+	}
+	if got[2].status != statusCritical {
+		t.Fatalf("ps2 status=%q", got[2].status)
+	}
+}
+
+func TestParsePSUSlotVendorVariants(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name string
+		want int
+	}{
+		{name: "PWS1 Status", want: 1},
+		{name: "Power Supply Bay 8", want: 8},
+		{name: "PS 6 Input Power", want: 6},
+	}
+
+	for _, tt := range tests {
+		got, ok := parsePSUSlot(tt.name)
+		if !ok || got != tt.want {
+			t.Fatalf("parsePSUSlot(%q)=(%d,%v) want (%d,true)", tt.name, got, ok, tt.want)
+		}
+	}
+}
+
+func TestSynthesizePSUsFromSDR(t *testing.T) {
+	t.Parallel()
+
+	health := 97.0
+	outputPower := 915.0
+	got := synthesizePSUsFromSDR(map[int]psuSDR{
+		1: {
+			slot:         1,
+			status:       statusOK,
+			outputPowerW: &outputPower,
+			healthPct:    &health,
+		},
+	})
+
+	if len(got) != 1 {
+		t.Fatalf("len(got)=%d want 1", len(got))
+	}
+	if got[0].Slot == nil || *got[0].Slot != "0" {
+		t.Fatalf("slot=%v want 0", got[0].Slot)
+	}
+	if got[0].OutputPowerW == nil || *got[0].OutputPowerW != 915 {
+		t.Fatalf("output power=%v", got[0].OutputPowerW)
+	}
+	if got[0].LifeRemainingPct == nil || *got[0].LifeRemainingPct != 97 {
+		t.Fatalf("life remaining=%v", got[0].LifeRemainingPct)
+	}
+	if got[0].LifeUsedPct == nil || *got[0].LifeUsedPct != 3 {
+		t.Fatalf("life used=%v", got[0].LifeUsedPct)
+	}
+}
--- a/audit/internal/collector/psu_telemetry.go
+++ b/audit/internal/collector/psu_telemetry.go
@@ -0,0 +1,121 @@
+package collector
+
+import (
+	"bee/audit/internal/schema"
+	"strconv"
+	"strings"
+)
+
+func enrichPSUsWithTelemetry(psus []schema.HardwarePowerSupply, doc sensorsDoc) []schema.HardwarePowerSupply {
+	if len(psus) == 0 || len(doc) == 0 {
+		return psus
+	}
+
+	tempBySlot := psuTempsFromSensors(doc)
+	healthBySlot := psuHealthFromSensors(doc)
+	for i := range psus {
+		slot := derefPSUSlot(psus[i].Slot)
+		if slot == "" {
+			continue
+		}
+		if psus[i].TemperatureC == nil {
+			if value, ok := tempBySlot[slot]; ok {
+				psus[i].TemperatureC = &value
+			}
+		}
+		if psus[i].LifeRemainingPct == nil {
+			if value, ok := healthBySlot[slot]; ok {
+				psus[i].LifeRemainingPct = &value
+				used := 100 - value
+				psus[i].LifeUsedPct = &used
+			}
+		}
+	}
+	return psus
+}
+
+func psuHealthFromSensors(doc sensorsDoc) map[string]float64 {
+	out := map[string]float64{}
+	for chip, features := range doc {
+		for featureName, raw := range features {
+			feature, ok := raw.(map[string]any)
+			if !ok {
+				continue
+			}
+			if !isLikelyPSUHealth(chip, featureName) {
+				continue
+			}
+			value, ok := firstFeaturePercent(feature)
+			if !ok {
+				continue
+			}
+			if slot, ok := detectPSUSlot(chip, featureName); ok {
+				if _, exists := out[slot]; !exists {
+					out[slot] = value
+				}
+			}
+		}
+	}
+	return out
+}
+
+func firstFeaturePercent(feature map[string]any) (float64, bool) {
+	keys := sortedFeatureKeys(feature)
+	for _, key := range keys {
+		lower := strings.ToLower(key)
+		if strings.HasSuffix(lower, "_alarm") {
+			continue
+		}
+		if strings.Contains(lower, "health") || strings.Contains(lower, "life") || strings.Contains(lower, "remain") {
+			if value, ok := floatFromAny(feature[key]); ok {
+				return value, true
+			}
+		}
+	}
+	return 0, false
+}
+
+func isLikelyPSUHealth(chip, feature string) bool {
+	value := strings.ToLower(chip + " " + feature)
+	return (strings.Contains(value, "psu") || strings.Contains(value, "power supply")) &&
+		(strings.Contains(value, "health") || strings.Contains(value, "life") || strings.Contains(value, "remain"))
+}
+
+func psuTempsFromSensors(doc sensorsDoc) map[string]float64 {
+	out := map[string]float64{}
+	for chip, features := range doc {
+		for featureName, raw := range features {
+			feature, ok := raw.(map[string]any)
+			if !ok || classifySensorFeature(feature) != "temp" {
+				continue
+			}
+			if !isLikelyPSUTemp(chip, featureName) {
+				continue
+			}
+			temp, ok := firstFeatureFloat(feature, "_input")
+			if !ok {
+				continue
+			}
+			if slot, ok := detectPSUSlot(chip, featureName); ok {
+				if _, exists := out[slot]; !exists {
+					out[slot] = temp
+				}
+			}
+		}
+	}
+	return out
+}
+
+func isLikelyPSUTemp(chip, feature string) bool {
+	value := strings.ToLower(chip + " " + feature)
+	return strings.Contains(value, "psu") || strings.Contains(value, "power supply")
+}
+
+func detectPSUSlot(parts ...string) (string, bool) {
+	for _, part := range parts {
+		if value, ok := parsePSUSlot(part); ok && value > 0 {
+			return strconv.Itoa(value - 1), true
+		}
+	}
+	return "", false
+}
--- a/audit/internal/collector/psu_telemetry_test.go
+++ b/audit/internal/collector/psu_telemetry_test.go
@@ -0,0 +1,42 @@
+package collector
+
+import (
+	"testing"
+
+	"bee/audit/internal/schema"
+)
+
+func TestEnrichPSUsWithTelemetry(t *testing.T) {
+	slot0 := "0"
+	slot1 := "1"
+	psus := []schema.HardwarePowerSupply{
+		{Slot: &slot0},
+		{Slot: &slot1},
+	}
+
+	doc := sensorsDoc{
+		"psu-hwmon-0": {
+			"PSU1 Temp":           map[string]any{"temp1_input": 39.5},
+			"PSU2 Temp":           map[string]any{"temp2_input": 41.0},
+			"PSU1 Health":         map[string]any{"health1_input": 98.0},
+			"PSU2 Remaining Life": map[string]any{"life2_input": 95.0},
+		},
+	}
+
+	got := enrichPSUsWithTelemetry(psus, doc)
+	if got[0].TemperatureC == nil || *got[0].TemperatureC != 39.5 {
+		t.Fatalf("psu0 temperature mismatch: %#v", got[0].TemperatureC)
+	}
+	if got[1].TemperatureC == nil || *got[1].TemperatureC != 41.0 {
+		t.Fatalf("psu1 temperature mismatch: %#v", got[1].TemperatureC)
+	}
+	if got[0].LifeRemainingPct == nil || *got[0].LifeRemainingPct != 98.0 {
+		t.Fatalf("psu0 life remaining mismatch: %#v", got[0].LifeRemainingPct)
+	}
+	if got[0].LifeUsedPct == nil || *got[0].LifeUsedPct != 2.0 {
+		t.Fatalf("psu0 life used mismatch: %#v", got[0].LifeUsedPct)
+	}
+	if got[1].LifeRemainingPct == nil || *got[1].LifeRemainingPct != 95.0 {
+		t.Fatalf("psu1 life remaining mismatch: %#v", got[1].LifeRemainingPct)
+	}
+}
--- a/audit/internal/collector/raid.go
+++ b/audit/internal/collector/raid.go
@@ -83,11 +83,7 @@ func isLikelyRAIDController(dev schema.HardwarePCIeDevice) bool {
 	if dev.DeviceClass == nil {
 		return false
 	}
-	c := strings.ToLower(*dev.DeviceClass)
-	return strings.Contains(c, "raid") ||
-		strings.Contains(c, "sas") ||
-		strings.Contains(c, "mass storage") ||
-		strings.Contains(c, "serial attached scsi")
+	return isRAIDClass(*dev.DeviceClass)
 }

 func collectStorcliDrives() []schema.HardwareStorage {
@@ -182,7 +178,10 @@ func parseSASIrcuDisplay(raw string) []schema.HardwareStorage {

 		present := true
 		status := mapRAIDDriveStatus(b["State"])
-		s := schema.HardwareStorage{Present: &present, Status: &status}
+		s := schema.HardwareStorage{
+			HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status},
+			Present:                 &present,
+		}

 		enclosure := strings.TrimSpace(b["Enclosure #"])
 		slot := strings.TrimSpace(b["Slot #"])
@@ -281,7 +280,10 @@ func parseArcconfPhysicalDrives(raw string) []schema.HardwareStorage {
 	for _, b := range blocks {
 		present := true
 		status := mapRAIDDriveStatus(b["State"])
-		s := schema.HardwareStorage{Present: &present, Status: &status}
+		s := schema.HardwareStorage{
+			HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status},
+			Present:                 &present,
+		}

 		if v := strings.TrimSpace(b["Reported Location"]); v != "" {
 			s.Slot = &v
@@ -362,8 +364,11 @@ func parseSSACLIPhysicalDrives(raw string) []schema.HardwareStorage {
 		if m := ssacliPhysicalDriveLine.FindStringSubmatch(trimmed); len(m) == 3 {
 			flush()
 			present := true
-			status := "UNKNOWN"
-			s := schema.HardwareStorage{Present: &present, Status: &status}
+			status := statusUnknown
+			s := schema.HardwareStorage{
+				HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status},
+				Present:                 &present,
+			}
 			slot := m[1]
 			s.Slot = &slot

@@ -475,8 +480,8 @@ func storcliDriveToStorage(d struct {
 	present := true
 	status := mapRAIDDriveStatus(d.State)
 	s := schema.HardwareStorage{
-		Present: &present,
-		Status:  &status,
+		HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status},
+		Present:                 &present,
 	}

 	if v := strings.TrimSpace(d.EIDSlt); v != "" {
@@ -527,15 +532,15 @@ func mapRAIDDriveStatus(raw string) string {
 	u := strings.ToUpper(strings.TrimSpace(raw))
 	switch {
 	case strings.Contains(u, "OK"), strings.Contains(u, "OPTIMAL"), strings.Contains(u, "READY"):
-		return "OK"
+		return statusOK
 	case strings.Contains(u, "ONLN"), strings.Contains(u, "ONLINE"):
-		return "OK"
+		return statusOK
 	case strings.Contains(u, "RBLD"), strings.Contains(u, "REBUILD"):
-		return "WARNING"
+		return statusWarning
 	case strings.Contains(u, "FAIL"), strings.Contains(u, "OFFLINE"):
-		return "CRITICAL"
+		return statusCritical
 	default:
-		return "UNKNOWN"
+		return statusUnknown
 	}
 }

@@ -641,8 +646,9 @@ func enrichStorageWithVROC(storage []schema.HardwareStorage, pcie []schema.Hardw
 		storage[i].Telemetry["vroc_array"] = arr.Name
 		storage[i].Telemetry["vroc_degraded"] = arr.Degraded
 		if arr.Degraded {
-			status := "WARNING"
+			status := statusWarning
 			storage[i].Status = &status
+			storage[i].ErrorDescription = stringPtr("VROC array is degraded")
 		}
 		updated++
 	}
@@ -659,14 +665,14 @@ func hasVROCController(pcie []schema.HardwarePCIeDevice) bool {

 		class := ""
 		if dev.DeviceClass != nil {
-			class = strings.ToLower(*dev.DeviceClass)
+			class = strings.TrimSpace(*dev.DeviceClass)
 		}
 		model := ""
 		if dev.Model != nil {
 			model = strings.ToLower(*dev.Model)
 		}

-		if strings.Contains(class, "raid") ||
+		if isRAIDClass(class) ||
 			strings.Contains(model, "vroc") ||
 			strings.Contains(model, "volume management device") ||
 			strings.Contains(model, "vmd") {
--- a/audit/internal/collector/raid_controller_telemetry.go
+++ b/audit/internal/collector/raid_controller_telemetry.go
@@ -0,0 +1,334 @@
+package collector
+
+import (
+	"bee/audit/internal/schema"
+	"encoding/json"
+	"log/slog"
+	"strconv"
+	"strings"
+)
+
+type raidControllerTelemetry struct {
+	BatteryChargePct       *float64
+	BatteryHealthPct       *float64
+	BatteryTemperatureC    *float64
+	BatteryVoltageV        *float64
+	BatteryReplaceRequired *bool
+	ErrorDescription       *string
+}
+
+func enrichPCIeWithRAIDTelemetry(devs []schema.HardwarePCIeDevice) []schema.HardwarePCIeDevice {
+	byVendor := collectRAIDControllerTelemetry()
+	if len(byVendor) == 0 {
+		return devs
+	}
+
+	positions := map[int]int{}
+	for i := range devs {
+		if devs[i].VendorID == nil || !isLikelyRAIDController(devs[i]) {
+			continue
+		}
+		vendor := *devs[i].VendorID
+		list := byVendor[vendor]
+		if len(list) == 0 {
+			continue
+		}
+		index := positions[vendor]
+		if index >= len(list) {
+			continue
+		}
+		positions[vendor] = index + 1
+		applyRAIDControllerTelemetry(&devs[i], list[index])
+	}
+
+	return devs
+}
+
+func applyRAIDControllerTelemetry(dev *schema.HardwarePCIeDevice, tel raidControllerTelemetry) {
+	if tel.BatteryChargePct != nil {
+		dev.BatteryChargePct = tel.BatteryChargePct
+	}
+	if tel.BatteryHealthPct != nil {
+		dev.BatteryHealthPct = tel.BatteryHealthPct
+	}
+	if tel.BatteryTemperatureC != nil {
+		dev.BatteryTemperatureC = tel.BatteryTemperatureC
+	}
+	if tel.BatteryVoltageV != nil {
+		dev.BatteryVoltageV = tel.BatteryVoltageV
+	}
+	if tel.BatteryReplaceRequired != nil {
+		dev.BatteryReplaceRequired = tel.BatteryReplaceRequired
+	}
+	if tel.ErrorDescription != nil {
+		dev.ErrorDescription = tel.ErrorDescription
+		if dev.Status == nil || *dev.Status == statusOK {
+			status := statusWarning
+			dev.Status = &status
+		}
+	}
+}
+
+func collectRAIDControllerTelemetry() map[int][]raidControllerTelemetry {
+	out := map[int][]raidControllerTelemetry{}
+
+	if raw, err := raidToolQuery("storcli64", "/call", "show", "all", "J"); err == nil {
+		list := parseStorcliControllerTelemetry(raw)
+		if len(list) > 0 {
+			out[vendorBroadcomLSI] = append(out[vendorBroadcomLSI], list...)
+			slog.Info("raid: storcli controller telemetry", "count", len(list))
+		}
+	}
+
+	if raw, err := raidToolQuery("ssacli", "ctrl", "all", "show", "config", "detail"); err == nil {
+		list := parseSSACLIControllerTelemetry(string(raw))
+		if len(list) > 0 {
+			out[vendorHPE] = append(out[vendorHPE], list...)
+			slog.Info("raid: ssacli controller telemetry", "count", len(list))
+		}
+	}
+
+	if raw, err := raidToolQuery("arcconf", "getconfig", "1", "ad"); err == nil {
+		list := parseArcconfControllerTelemetry(string(raw))
+		if len(list) > 0 {
+			out[vendorAdaptec] = append(out[vendorAdaptec], list...)
+			slog.Info("raid: arcconf controller telemetry", "count", len(list))
+		}
+	}
+
+	return out
+}
+
+func parseStorcliControllerTelemetry(raw []byte) []raidControllerTelemetry {
+	var doc struct {
+		Controllers []struct {
+			ResponseData map[string]any `json:"Response Data"`
+		} `json:"Controllers"`
+	}
+	if err := json.Unmarshal(raw, &doc); err != nil {
+		slog.Warn("raid: parse storcli controller telemetry failed", "err", err)
+		return nil
+	}
+
+	var out []raidControllerTelemetry
+	for _, ctl := range doc.Controllers {
+		tel := raidControllerTelemetry{}
+		mergeStorcliBatteryMap(&tel, nestedStringMap(ctl.ResponseData["BBU_Info"]))
+		mergeStorcliBatteryMap(&tel, nestedStringMap(ctl.ResponseData["BBU_Info_Details"]))
+		mergeStorcliBatteryMap(&tel, nestedStringMap(ctl.ResponseData["CV_Info"]))
+		mergeStorcliBatteryMap(&tel, nestedStringMap(ctl.ResponseData["CV_Info_Details"]))
+		if hasRAIDControllerTelemetry(tel) {
+			out = append(out, tel)
+		}
+	}
+	return out
+}
+
+func nestedStringMap(raw any) map[string]string {
+	switch value := raw.(type) {
+	case map[string]any:
+		out := map[string]string{}
+		flattenStringMap("", value, out)
+		return out
+	case []any:
+		out := map[string]string{}
+		for _, item := range value {
+			if m, ok := item.(map[string]any); ok {
+				flattenStringMap("", m, out)
+			}
+		}
+		return out
+	default:
+		return nil
+	}
+}
+
+func flattenStringMap(prefix string, in map[string]any, out map[string]string) {
+	for key, raw := range in {
+		fullKey := strings.TrimSpace(strings.ToLower(strings.Trim(prefix+" "+key, " ")))
+		switch value := raw.(type) {
+		case map[string]any:
+			flattenStringMap(fullKey, value, out)
+		case []any:
+			for _, item := range value {
+				if m, ok := item.(map[string]any); ok {
+					flattenStringMap(fullKey, m, out)
+				}
+			}
+		case string:
+			out[fullKey] = value
+		case json.Number:
+			out[fullKey] = value.String()
+		case float64:
+			out[fullKey] = strconv.FormatFloat(value, 'f', -1, 64)
+		case bool:
+			if value {
+				out[fullKey] = "true"
+			} else {
+				out[fullKey] = "false"
+			}
+		}
+	}
+}
+
+func mergeStorcliBatteryMap(tel *raidControllerTelemetry, fields map[string]string) {
+	if len(fields) == 0 {
+		return
+	}
+	for key, raw := range fields {
+		lower := strings.ToLower(strings.TrimSpace(key))
+		switch {
+		case strings.Contains(lower, "relative state of charge"), strings.Contains(lower, "remaining capacity"), strings.Contains(lower, "charge"):
+			if tel.BatteryChargePct == nil {
+				tel.BatteryChargePct = parsePercentPtr(raw)
+			}
+		case strings.Contains(lower, "state of health"), strings.Contains(lower, "health"):
+			if tel.BatteryHealthPct == nil {
+				tel.BatteryHealthPct = parsePercentPtr(raw)
+			}
+		case strings.Contains(lower, "temperature"):
+			if tel.BatteryTemperatureC == nil {
+				tel.BatteryTemperatureC = parseFloatPtr(raw)
+			}
+		case strings.Contains(lower, "voltage"):
+			if tel.BatteryVoltageV == nil {
+				tel.BatteryVoltageV = parseFloatPtr(raw)
+			}
+		case strings.Contains(lower, "replace"), strings.Contains(lower, "replacement required"):
+			if tel.BatteryReplaceRequired == nil {
+				tel.BatteryReplaceRequired = parseReplaceRequired(raw)
+			}
+		case strings.Contains(lower, "learn cycle requested"), strings.Contains(lower, "battery state"), strings.Contains(lower, "capacitance state"):
+			if desc := batteryStateDescription(raw); desc != nil && tel.ErrorDescription == nil {
+				tel.ErrorDescription = desc
+			}
+		}
+	}
+}
+
+func parseSSACLIControllerTelemetry(raw string) []raidControllerTelemetry {
+	lines := strings.Split(raw, "\n")
+	var out []raidControllerTelemetry
+	var current *raidControllerTelemetry
+
+	flush := func() {
+		if current != nil && hasRAIDControllerTelemetry(*current) {
+			out = append(out, *current)
+		}
+		current = nil
+	}
+
+	for _, line := range lines {
+		trimmed := strings.TrimSpace(line)
+		if trimmed == "" {
+			continue
+		}
+		if strings.HasPrefix(strings.ToLower(trimmed), "smart array") || strings.HasPrefix(strings.ToLower(trimmed), "controller ") {
+			flush()
+			current = &raidControllerTelemetry{}
+			continue
+		}
+		if current == nil {
+			continue
+		}
+		if idx := strings.Index(trimmed, ":"); idx > 0 {
+			key := strings.ToLower(strings.TrimSpace(trimmed[:idx]))
+			val := strings.TrimSpace(trimmed[idx+1:])
+			switch {
+			case strings.Contains(key, "capacitor temperature"), strings.Contains(key, "battery temperature"):
+				current.BatteryTemperatureC = parseFloatPtr(val)
+			case strings.Contains(key, "capacitor voltage"), strings.Contains(key, "battery voltage"):
+				current.BatteryVoltageV = parseFloatPtr(val)
+			case strings.Contains(key, "capacitor charge"), strings.Contains(key, "battery charge"):
+				current.BatteryChargePct = parsePercentPtr(val)
+			case strings.Contains(key, "capacitor health"), strings.Contains(key, "battery health"):
+				current.BatteryHealthPct = parsePercentPtr(val)
+			case strings.Contains(key, "replace") || strings.Contains(key, "failed"):
+				if current.BatteryReplaceRequired == nil {
+					current.BatteryReplaceRequired = parseReplaceRequired(val)
+				}
+				if desc := batteryStateDescription(val); desc != nil && current.ErrorDescription == nil {
+					current.ErrorDescription = desc
+				}
+			}
+		}
+	}
+	flush()
+	return out
+}
+
+func parseArcconfControllerTelemetry(raw string) []raidControllerTelemetry {
+	lines := strings.Split(raw, "\n")
+	tel := raidControllerTelemetry{}
+	for _, line := range lines {
+		trimmed := strings.TrimSpace(line)
+		if idx := strings.Index(trimmed, ":"); idx > 0 {
+			key := strings.ToLower(strings.TrimSpace(trimmed[:idx]))
+			val := strings.TrimSpace(trimmed[idx+1:])
+			switch {
+			case strings.Contains(key, "battery temperature"), strings.Contains(key, "capacitor temperature"):
+				tel.BatteryTemperatureC = parseFloatPtr(val)
+			case strings.Contains(key, "battery voltage"), strings.Contains(key, "capacitor voltage"):
+				tel.BatteryVoltageV = parseFloatPtr(val)
+			case strings.Contains(key, "battery charge"), strings.Contains(key, "capacitor charge"):
+				tel.BatteryChargePct = parsePercentPtr(val)
+			case strings.Contains(key, "battery health"), strings.Contains(key, "capacitor health"):
+				tel.BatteryHealthPct = parsePercentPtr(val)
+			case strings.Contains(key, "replace"), strings.Contains(key, "failed"):
+				if tel.BatteryReplaceRequired == nil {
+					tel.BatteryReplaceRequired = parseReplaceRequired(val)
+				}
+				if desc := batteryStateDescription(val); desc != nil && tel.ErrorDescription == nil {
+					tel.ErrorDescription = desc
+				}
+			}
+		}
+	}
+	if hasRAIDControllerTelemetry(tel) {
+		return []raidControllerTelemetry{tel}
+	}
+	return nil
+}
+
+func hasRAIDControllerTelemetry(tel raidControllerTelemetry) bool {
+	return tel.BatteryChargePct != nil ||
+		tel.BatteryHealthPct != nil ||
+		tel.BatteryTemperatureC != nil ||
+		tel.BatteryVoltageV != nil ||
+		tel.BatteryReplaceRequired != nil ||
+		tel.ErrorDescription != nil
+}
+
+func parsePercentPtr(raw string) *float64 {
+	raw = strings.ReplaceAll(strings.TrimSpace(raw), "%", "")
+	return parseFloatPtr(raw)
+}
+
+func parseReplaceRequired(raw string) *bool {
+	lower := strings.ToLower(strings.TrimSpace(raw))
+	switch {
+	case lower == "":
+		return nil
+	case strings.Contains(lower, "replace"), strings.Contains(lower, "failed"), strings.Contains(lower, "yes"), strings.Contains(lower, "required"):
+		value := true
+		return &value
+	case strings.Contains(lower, "no"), strings.Contains(lower, "ok"), strings.Contains(lower, "good"), strings.Contains(lower, "optimal"):
+		value := false
+		return &value
+	default:
+		return nil
+	}
+}
+
+func batteryStateDescription(raw string) *string {
+	lower := strings.ToLower(strings.TrimSpace(raw))
+	if lower == "" {
+		return nil
+	}
+	switch {
+	case strings.Contains(lower, "failed"), strings.Contains(lower, "fault"), strings.Contains(lower, "replace"), strings.Contains(lower, "warning"), strings.Contains(lower, "degraded"):
+		return &raw
+	default:
+		return nil
+	}
+}
--- a/audit/internal/collector/raid_parsers_test.go
+++ b/audit/internal/collector/raid_parsers_test.go
@@ -1,6 +1,10 @@
 package collector

-import "testing"
+import (
+	"bee/audit/internal/schema"
+	"errors"
+	"testing"
+)

 func TestParseSASIrcuControllerIDs(t *testing.T) {
 	raw := `LSI Corporation SAS2 IR Configuration Utility.
@@ -90,7 +94,111 @@ physicaldrive 1I:1:2 (894 GB, SAS HDD, Failed)
 	if drives[0].Status == nil || *drives[0].Status != "OK" {
 		t.Fatalf("drive0 status: %v", drives[0].Status)
 	}
-	if drives[1].Status == nil || *drives[1].Status != "CRITICAL" {
+	if drives[1].Status == nil || *drives[1].Status != statusCritical {
 		t.Fatalf("drive1 status: %v", drives[1].Status)
 	}
 }
+
+func TestParseStorcliControllerTelemetry(t *testing.T) {
+	raw := []byte(`{
+  "Controllers": [
+    {
+      "Response Data": {
+        "BBU_Info": {
+          "State of Health": "98 %",
+          "Relative State of Charge": "76 %",
+          "Temperature": "41 C",
+          "Voltage": "12.3 V",
+          "Replacement required": "No"
+        }
+      }
+    }
+  ]
+}`)
+	got := parseStorcliControllerTelemetry(raw)
+	if len(got) != 1 {
+		t.Fatalf("len(got)=%d want 1", len(got))
+	}
+	if got[0].BatteryHealthPct == nil || *got[0].BatteryHealthPct != 98 {
+		t.Fatalf("battery health=%v", got[0].BatteryHealthPct)
+	}
+	if got[0].BatteryChargePct == nil || *got[0].BatteryChargePct != 76 {
+		t.Fatalf("battery charge=%v", got[0].BatteryChargePct)
+	}
+	if got[0].BatteryTemperatureC == nil || *got[0].BatteryTemperatureC != 41 {
+		t.Fatalf("battery temperature=%v", got[0].BatteryTemperatureC)
+	}
+	if got[0].BatteryVoltageV == nil || *got[0].BatteryVoltageV != 12.3 {
+		t.Fatalf("battery voltage=%v", got[0].BatteryVoltageV)
+	}
+	if got[0].BatteryReplaceRequired == nil || *got[0].BatteryReplaceRequired {
+		t.Fatalf("battery replace=%v", got[0].BatteryReplaceRequired)
+	}
+}
+
+func TestParseSSACLIControllerTelemetry(t *testing.T) {
+	raw := `Smart Array P440ar in Slot 0
+   Battery/Capacitor Count: 1
+   Capacitor Temperature  (C): 37
+   Capacitor Charge (%): 94
+   Capacitor Health (%): 96
+   Capacitor Voltage (V): 9.8
+   Capacitor Failed: No
+`
+	got := parseSSACLIControllerTelemetry(raw)
+	if len(got) != 1 {
+		t.Fatalf("len(got)=%d want 1", len(got))
+	}
+	if got[0].BatteryTemperatureC == nil || *got[0].BatteryTemperatureC != 37 {
+		t.Fatalf("battery temperature=%v", got[0].BatteryTemperatureC)
+	}
+	if got[0].BatteryChargePct == nil || *got[0].BatteryChargePct != 94 {
+		t.Fatalf("battery charge=%v", got[0].BatteryChargePct)
+	}
+}
+
+func TestEnrichPCIeWithRAIDTelemetry(t *testing.T) {
+	orig := raidToolQuery
+	t.Cleanup(func() { raidToolQuery = orig })
+	raidToolQuery = func(name string, args ...string) ([]byte, error) {
+		switch name {
+		case "storcli64":
+			return []byte(`{
+  "Controllers": [
+    {
+      "Response Data": {
+        "CV_Info": {
+          "State of Health": "99 %",
+          "Relative State of Charge": "81 %",
+          "Temperature": "38 C",
+          "Voltage": "12.1 V",
+          "Replacement required": "No"
+        }
+      }
+    }
+  ]
+}`), nil
+		default:
+			return nil, errors.New("skip")
+		}
+	}
+
+	vendor := vendorBroadcomLSI
+	class := "MassStorageController"
+	status := statusOK
+	devs := []schema.HardwarePCIeDevice{{
+		VendorID:                &vendor,
+		DeviceClass:             &class,
+		HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status},
+	}}
+	out := enrichPCIeWithRAIDTelemetry(devs)
+	if out[0].BatteryHealthPct == nil || *out[0].BatteryHealthPct != 99 {
+		t.Fatalf("battery health=%v", out[0].BatteryHealthPct)
+	}
+	if out[0].BatteryChargePct == nil || *out[0].BatteryChargePct != 81 {
+		t.Fatalf("battery charge=%v", out[0].BatteryChargePct)
+	}
+	if out[0].BatteryVoltageV == nil || *out[0].BatteryVoltageV != 12.1 {
+		t.Fatalf("battery voltage=%v", out[0].BatteryVoltageV)
+	}
+}
--- a/audit/internal/collector/sensors.go
+++ b/audit/internal/collector/sensors.go
@@ -0,0 +1,373 @@
+package collector
+
+import (
+	"bee/audit/internal/schema"
+	"encoding/json"
+	"log/slog"
+	"os/exec"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+type sensorsDoc map[string]map[string]any
+
+func collectSensors() *schema.HardwareSensors {
+	doc, err := readSensorsJSONDoc()
+	if err != nil {
+		slog.Info("sensors: unavailable, skipping", "err", err)
+		return nil
+	}
+	sensors := buildSensorsFromDoc(doc)
+	if sensors == nil || (len(sensors.Fans) == 0 && len(sensors.Power) == 0 && len(sensors.Temperatures) == 0 && len(sensors.Other) == 0) {
+		return nil
+	}
+	slog.Info("sensors: collected",
+		"fans", len(sensors.Fans),
+		"power", len(sensors.Power),
+		"temperatures", len(sensors.Temperatures),
+		"other", len(sensors.Other),
+	)
+	return sensors
+}
+
+func readSensorsJSONDoc() (sensorsDoc, error) {
+	out, err := exec.Command("sensors", "-j").Output()
+	if err != nil {
+		return nil, err
+	}
+	var doc sensorsDoc
+	if err := json.Unmarshal(out, &doc); err != nil {
+		return nil, err
+	}
+	return doc, nil
+}
+
+func buildSensorsFromDoc(doc sensorsDoc) *schema.HardwareSensors {
+	if len(doc) == 0 {
+		return nil
+	}
+	result := &schema.HardwareSensors{}
+	seen := map[string]struct{}{}
+
+	chips := make([]string, 0, len(doc))
+	for chip := range doc {
+		chips = append(chips, chip)
+	}
+	sort.Strings(chips)
+
+	for _, chip := range chips {
+		features := doc[chip]
+		location := sensorLocation(chip)
+
+		keys := make([]string, 0, len(features))
+		for key := range features {
+			keys = append(keys, key)
+		}
+		sort.Strings(keys)
+
+		for _, key := range keys {
+			if strings.EqualFold(key, "Adapter") {
+				continue
+			}
+			feature, ok := features[key].(map[string]any)
+			if !ok {
+				continue
+			}
+			name := strings.TrimSpace(key)
+			if name == "" {
+				continue
+			}
+			switch classifySensorFeature(feature) {
+			case "fan":
+				item := buildFanSensor(name, location, feature)
+				if item == nil || duplicateSensor(seen, "fan", item.Name) {
+					continue
+				}
+				result.Fans = append(result.Fans, *item)
+			case "temp":
+				item := buildTempSensor(name, location, feature)
+				if item == nil || duplicateSensor(seen, "temp", item.Name) {
+					continue
+				}
+				result.Temperatures = append(result.Temperatures, *item)
+			case "power":
+				item := buildPowerSensor(name, location, feature)
+				if item == nil || duplicateSensor(seen, "power", item.Name) {
+					continue
+				}
+				result.Power = append(result.Power, *item)
+			default:
+				item := buildOtherSensor(name, location, feature)
+				if item == nil || duplicateSensor(seen, "other", item.Name) {
+					continue
+				}
+				result.Other = append(result.Other, *item)
+			}
+		}
+	}
+
+	return result
+}
+
+func parseSensorsJSON(raw []byte) (*schema.HardwareSensors, error) {
+	var doc sensorsDoc
+	err := json.Unmarshal(raw, &doc)
+	if err != nil {
+		return nil, err
+	}
+	return buildSensorsFromDoc(doc), nil
+}
+
+func duplicateSensor(seen map[string]struct{}, sensorType, name string) bool {
+	key := sensorType + "\x00" + name
+	if _, ok := seen[key]; ok {
+		return true
+	}
+	seen[key] = struct{}{}
+	return false
+}
+
+func sensorLocation(chip string) *string {
+	chip = strings.TrimSpace(chip)
+	if chip == "" {
+		return nil
+	}
+	return &chip
+}
+
+func classifySensorFeature(feature map[string]any) string {
+	for key := range feature {
+		switch {
+		case strings.Contains(key, "fan") && strings.HasSuffix(key, "_input"):
+			return "fan"
+		case strings.Contains(key, "temp") && strings.HasSuffix(key, "_input"):
+			return "temp"
+		case strings.Contains(key, "power") && (strings.HasSuffix(key, "_input") || strings.HasSuffix(key, "_average")):
+			return "power"
+		case strings.Contains(key, "curr") && strings.HasSuffix(key, "_input"):
+			return "power"
+		case strings.HasPrefix(key, "in") && strings.HasSuffix(key, "_input"):
+			return "power"
+		}
+	}
+	return "other"
+}
+
+func buildFanSensor(name string, location *string, feature map[string]any) *schema.HardwareFanSensor {
+	rpm, ok := firstFeatureInt(feature, "_input")
+	if !ok {
+		return nil
+	}
+	item := &schema.HardwareFanSensor{Name: name, Location: location, RPM: &rpm}
+	if status := sensorStatusFromFeature(feature); status != nil {
+		item.Status = status
+	}
+	return item
+}
+
+func buildTempSensor(name string, location *string, feature map[string]any) *schema.HardwareTemperatureSensor {
+	celsius, ok := firstFeatureFloat(feature, "_input")
+	if !ok {
+		return nil
+	}
+	item := &schema.HardwareTemperatureSensor{Name: name, Location: location, Celsius: &celsius}
+	if warning, ok := firstFeatureFloatWithSuffixes(feature, []string{"_max", "_high"}); ok {
+		item.ThresholdWarningCelsius = &warning
+	}
+	if critical, ok := firstFeatureFloatWithSuffixes(feature, []string{"_crit", "_emergency"}); ok {
+		item.ThresholdCriticalCelsius = &critical
+	}
+	if status := sensorStatusFromFeature(feature); status != nil {
+		item.Status = status
+	} else {
+		item.Status = deriveTemperatureStatus(item.Celsius, item.ThresholdWarningCelsius, item.ThresholdCriticalCelsius)
+	}
+	return item
+}
+
+func buildPowerSensor(name string, location *string, feature map[string]any) *schema.HardwarePowerSensor {
+	item := &schema.HardwarePowerSensor{Name: name, Location: location}
+	if v, ok := firstFeatureFloatWithContains(feature, []string{"power"}); ok {
+		item.PowerW = &v
+	}
+	if v, ok := firstFeatureFloatWithPrefix(feature, "curr"); ok {
+		item.CurrentA = &v
+	}
+	if v, ok := firstFeatureFloatWithPrefix(feature, "in"); ok {
+		item.VoltageV = &v
+	}
+	if item.PowerW == nil && item.CurrentA == nil && item.VoltageV == nil {
+		return nil
+	}
+	if status := sensorStatusFromFeature(feature); status != nil {
+		item.Status = status
+	}
+	return item
+}
+
+func buildOtherSensor(name string, location *string, feature map[string]any) *schema.HardwareOtherSensor {
+	value, unit, ok := firstGenericSensorValue(feature)
+	if !ok {
+		return nil
+	}
+	item := &schema.HardwareOtherSensor{Name: name, Location: location, Value: &value}
+	if unit != "" {
+		item.Unit = &unit
+	}
+	if status := sensorStatusFromFeature(feature); status != nil {
+		item.Status = status
+	}
+	return item
+}
+
+func sensorStatusFromFeature(feature map[string]any) *string {
+	for key, raw := range feature {
+		if !strings.HasSuffix(key, "_alarm") {
+			continue
+		}
+		if number, ok := floatFromAny(raw); ok && number > 0 {
+			status := statusWarning
+			return &status
+		}
+	}
+	return nil
+}
+
+func deriveTemperatureStatus(current, warning, critical *float64) *string {
+	if current == nil {
+		return nil
+	}
+	switch {
+	case critical != nil && *current >= *critical:
+		status := statusCritical
+		return &status
+	case warning != nil && *current >= *warning:
+		status := statusWarning
+		return &status
+	default:
+		status := statusOK
+		return &status
+	}
+}
+
+func firstFeatureInt(feature map[string]any, suffix string) (int, bool) {
+	for key, raw := range feature {
+		if strings.HasSuffix(key, suffix) {
+			if value, ok := floatFromAny(raw); ok {
+				return int(value), true
+			}
+		}
+	}
+	return 0, false
+}
+
+func firstFeatureFloat(feature map[string]any, suffix string) (float64, bool) {
+	return firstFeatureFloatWithSuffixes(feature, []string{suffix})
+}
+
+func firstFeatureFloatWithSuffixes(feature map[string]any, suffixes []string) (float64, bool) {
+	keys := sortedFeatureKeys(feature)
+	for _, key := range keys {
+		for _, suffix := range suffixes {
+			if strings.HasSuffix(key, suffix) {
+				if value, ok := floatFromAny(feature[key]); ok {
+					return value, true
+				}
+			}
+		}
+	}
+	return 0, false
+}
+
+func firstFeatureFloatWithContains(feature map[string]any, parts []string) (float64, bool) {
+	keys := sortedFeatureKeys(feature)
+	for _, key := range keys {
+		matched := true
+		for _, part := range parts {
+			if !strings.Contains(key, part) {
+				matched = false
+				break
+			}
+		}
+		if matched {
+			if value, ok := floatFromAny(feature[key]); ok {
+				return value, true
+			}
+		}
+	}
+	return 0, false
+}
+
+func firstFeatureFloatWithPrefix(feature map[string]any, prefix string) (float64, bool) {
+	keys := sortedFeatureKeys(feature)
+	for _, key := range keys {
+		if strings.HasPrefix(key, prefix) && strings.HasSuffix(key, "_input") {
+			if value, ok := floatFromAny(feature[key]); ok {
+				return value, true
+			}
+		}
+	}
+	return 0, false
+}
+
+func firstGenericSensorValue(feature map[string]any) (float64, string, bool) {
+	keys := sortedFeatureKeys(feature)
+	for _, key := range keys {
+		if strings.HasSuffix(key, "_alarm") {
+			continue
+		}
+		value, ok := floatFromAny(feature[key])
+		if !ok {
+			continue
+		}
+		unit := inferSensorUnit(key)
+		return value, unit, true
+	}
+	return 0, "", false
+}
+
+func inferSensorUnit(key string) string {
+	switch {
+	case strings.Contains(key, "humidity"):
+		return "%"
+	case strings.Contains(key, "intrusion"):
+		return ""
+	default:
+		return ""
+	}
+}
+
+func sortedFeatureKeys(feature map[string]any) []string {
+	keys := make([]string, 0, len(feature))
+	for key := range feature {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+	return keys
+}
+
+func floatFromAny(raw any) (float64, bool) {
+	switch value := raw.(type) {
+	case float64:
+		return value, true
+	case float32:
+		return float64(value), true
+	case int:
+		return float64(value), true
+	case int64:
+		return float64(value), true
+	case json.Number:
+		if f, err := value.Float64(); err == nil {
+			return f, true
+		}
+	case string:
+		if value == "" {
+			return 0, false
+		}
+		if f, err := strconv.ParseFloat(value, 64); err == nil {
+			return f, true
+		}
+	}
+	return 0, false
+}
--- a/audit/internal/collector/sensors_test.go
+++ b/audit/internal/collector/sensors_test.go
@@ -0,0 +1,54 @@
+package collector
+
+import "testing"
+
+func TestParseSensorsJSON(t *testing.T) {
+	raw := []byte(`{
+  "coretemp-isa-0000": {
+    "Adapter": "ISA adapter",
+    "Package id 0": {
+      "temp1_input": 61.5,
+      "temp1_max": 80.0,
+      "temp1_crit": 95.0
+    },
+    "fan1": {
+      "fan1_input": 4200
+    }
+  },
+  "acpitz-acpi-0": {
+    "Adapter": "ACPI interface",
+    "in0": {
+      "in0_input": 12.06
+    },
+    "curr1": {
+      "curr1_input": 0.64
+    },
+    "power1": {
+      "power1_average": 137.0
+    },
+    "humidity1": {
+      "humidity1_input": 38.5
+    }
+  }
+}`)
+
+	got, err := parseSensorsJSON(raw)
+	if err != nil {
+		t.Fatalf("parseSensorsJSON error: %v", err)
+	}
+	if got == nil {
+		t.Fatal("expected sensors")
+	}
+	if len(got.Temperatures) != 1 || got.Temperatures[0].Celsius == nil || *got.Temperatures[0].Celsius != 61.5 {
+		t.Fatalf("temperatures mismatch: %#v", got.Temperatures)
+	}
+	if len(got.Fans) != 1 || got.Fans[0].RPM == nil || *got.Fans[0].RPM != 4200 {
+		t.Fatalf("fans mismatch: %#v", got.Fans)
+	}
+	if len(got.Power) != 3 {
+		t.Fatalf("power sensors mismatch: %#v", got.Power)
+	}
+	if len(got.Other) != 1 || got.Other[0].Unit == nil || *got.Other[0].Unit != "%" {
+		t.Fatalf("other sensors mismatch: %#v", got.Other)
+	}
+}
--- a/audit/internal/collector/storage.go
+++ b/audit/internal/collector/storage.go
@@ -5,11 +5,13 @@ import (
 	"encoding/json"
 	"log/slog"
 	"os/exec"
+	"path/filepath"
+	"strconv"
 	"strings"
 )

 func collectStorage() []schema.HardwareStorage {
-	devs := lsblkDevices()
+	devs := discoverStorageDevices()
 	result := make([]schema.HardwareStorage, 0, len(devs))
 	for _, dev := range devs {
 		var s schema.HardwareStorage
@@ -26,19 +28,77 @@ func collectStorage() []schema.HardwareStorage {

 // lsblkDevice is a minimal lsblk JSON record.
 type lsblkDevice struct {
-	Name     string `json:"name"`
-	Type     string `json:"type"`
-	Size     string `json:"size"`
-	Serial   string `json:"serial"`
-	Model    string `json:"model"`
-	Tran     string `json:"tran"`
-	Hctl     string `json:"hctl"`
+	Name   string `json:"name"`
+	Type   string `json:"type"`
+	Size   string `json:"size"`
+	Serial string `json:"serial"`
+	Model  string `json:"model"`
+	Tran   string `json:"tran"`
+	Hctl   string `json:"hctl"`
 }

 type lsblkRoot struct {
 	Blockdevices []lsblkDevice `json:"blockdevices"`
 }

+type nvmeListRoot struct {
+	Devices []nvmeListDevice `json:"Devices"`
+}
+
+type nvmeListDevice struct {
+	DevicePath   string `json:"DevicePath"`
+	ModelNumber  string `json:"ModelNumber"`
+	SerialNumber string `json:"SerialNumber"`
+	Firmware     string `json:"Firmware"`
+	PhysicalSize int64  `json:"PhysicalSize"`
+}
+
+func discoverStorageDevices() []lsblkDevice {
+	merged := map[string]lsblkDevice{}
+	for _, dev := range lsblkDevices() {
+		if dev.Name == "" {
+			continue
+		}
+		merged[dev.Name] = dev
+	}
+	for _, dev := range nvmeListDevices() {
+		if dev.Name == "" {
+			continue
+		}
+		current := merged[dev.Name]
+		merged[dev.Name] = mergeStorageDevice(current, dev)
+	}
+
+	disks := make([]lsblkDevice, 0, len(merged))
+	for _, dev := range merged {
+		if dev.Type == "" {
+			dev.Type = "disk"
+		}
+		if dev.Type != "disk" {
+			continue
+		}
+		if isVirtualBMCDisk(dev) {
+			slog.Debug("storage: skipping BMC virtual disk", "name", dev.Name, "model", dev.Model)
+			continue
+		}
+		disks = append(disks, dev)
+	}
+	return disks
+}
+
+// isVirtualBMCDisk returns true for BMC/IPMI virtual USB mass storage devices
+// that appear as disks but are not real hardware (e.g. iDRAC Virtual HDisk*).
+// These have zero reported size, a generic fake serial, and a model name that
+// starts with "Virtual HDisk".
+func isVirtualBMCDisk(dev lsblkDevice) bool {
+	return isVirtualHDiskModel(dev.Model)
+}
+
+func isVirtualHDiskModel(model string) bool {
+	model = strings.ToLower(strings.TrimSpace(model))
+	return strings.HasPrefix(model, "virtual hdisk")
+}
+
 func lsblkDevices() []lsblkDevice {
 	out, err := exec.Command("lsblk", "-J", "-d",
 		"-o", "NAME,TYPE,SIZE,SERIAL,MODEL,TRAN,HCTL").Output()
@@ -60,6 +120,59 @@ func lsblkDevices() []lsblkDevice {
 	return disks
 }

+func nvmeListDevices() []lsblkDevice {
+	out, err := exec.Command("nvme", "list", "-o", "json").Output()
+	if err != nil {
+		return nil
+	}
+	var root nvmeListRoot
+	if err := json.Unmarshal(out, &root); err != nil {
+		slog.Warn("storage: nvme list parse failed", "err", err)
+		return nil
+	}
+	devices := make([]lsblkDevice, 0, len(root.Devices))
+	for _, dev := range root.Devices {
+		name := filepath.Base(strings.TrimSpace(dev.DevicePath))
+		if name == "" {
+			continue
+		}
+		devices = append(devices, lsblkDevice{
+			Name:   name,
+			Type:   "disk",
+			Size:   strconv.FormatInt(dev.PhysicalSize, 10),
+			Serial: strings.TrimSpace(dev.SerialNumber),
+			Model:  strings.TrimSpace(dev.ModelNumber),
+			Tran:   "nvme",
+		})
+	}
+	return devices
+}
+
+func mergeStorageDevice(existing, incoming lsblkDevice) lsblkDevice {
+	if existing.Name == "" {
+		return incoming
+	}
+	if existing.Type == "" {
+		existing.Type = incoming.Type
+	}
+	if strings.TrimSpace(existing.Size) == "" {
+		existing.Size = incoming.Size
+	}
+	if strings.TrimSpace(existing.Serial) == "" {
+		existing.Serial = incoming.Serial
+	}
+	if strings.TrimSpace(existing.Model) == "" {
+		existing.Model = incoming.Model
+	}
+	if strings.TrimSpace(existing.Tran) == "" {
+		existing.Tran = incoming.Tran
+	}
+	if strings.TrimSpace(existing.Hctl) == "" {
+		existing.Hctl = incoming.Hctl
+	}
+	return existing
+}
+
 // smartctlInfo is the subset of smartctl -j -a output we care about.
 type smartctlInfo struct {
 	ModelFamily  string `json:"model_family"`
@@ -67,14 +180,22 @@ type smartctlInfo struct {
 	SerialNumber string `json:"serial_number"`
 	FirmwareVer  string `json:"firmware_version"`
 	RotationRate int    `json:"rotation_rate"`
+	Temperature  struct {
+		Current int `json:"current"`
+	} `json:"temperature"`
+	SmartStatus struct {
+		Passed bool `json:"passed"`
+	} `json:"smart_status"`
 	UserCapacity struct {
 		Bytes int64 `json:"bytes"`
 	} `json:"user_capacity"`
 	AtaSmartAttributes struct {
 		Table []struct {
-			ID    int    `json:"id"`
-			Name  string `json:"name"`
-			Raw   struct{ Value int64 `json:"value"` } `json:"raw"`
+			ID   int    `json:"id"`
+			Name string `json:"name"`
+			Raw  struct {
+				Value int64 `json:"value"`
+			} `json:"raw"`
 		} `json:"table"`
 	} `json:"ata_smart_attributes"`
 	PowerOnTime struct {
@@ -86,6 +207,7 @@ type smartctlInfo struct {
 func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
 	present := true
 	s := schema.HardwareStorage{Present: &present}
+	s.Telemetry = map[string]any{"linux_device": "/dev/" + dev.Name}

 	tran := strings.ToLower(dev.Tran)
 	devPath := "/dev/" + dev.Name
@@ -149,69 +271,117 @@ func enrichWithSmartctl(dev lsblkDevice) schema.HardwareStorage {
 		} else if info.RotationRate > 0 {
 			devType = "HDD"
 		}
+		s.Type = &devType

-		// telemetry
-		tel := map[string]any{}
+		if info.Temperature.Current > 0 {
+			t := float64(info.Temperature.Current)
+			s.TemperatureC = &t
+		}
 		if info.PowerOnTime.Hours > 0 {
-			tel["power_on_hours"] = info.PowerOnTime.Hours
+			v := int64(info.PowerOnTime.Hours)
+			s.PowerOnHours = &v
 		}
 		if info.PowerCycleCount > 0 {
-			tel["power_cycles"] = info.PowerCycleCount
+			v := int64(info.PowerCycleCount)
+			s.PowerCycles = &v
 		}
+		reallocated := int64(0)
+		pending := int64(0)
+		uncorrectable := int64(0)
+		lifeRemaining := int64(0)
 		for _, attr := range info.AtaSmartAttributes.Table {
 			switch attr.ID {
 			case 5:
-				tel["reallocated_sectors"] = attr.Raw.Value
+				reallocated = attr.Raw.Value
+				s.ReallocatedSectors = &reallocated
 			case 177:
-				tel["wear_leveling_pct"] = attr.Raw.Value
+				value := float64(attr.Raw.Value)
+				s.LifeUsedPct = &value
 			case 231:
-				tel["life_remaining_pct"] = attr.Raw.Value
+				lifeRemaining = attr.Raw.Value
+				value := float64(attr.Raw.Value)
+				s.LifeRemainingPct = &value
 			case 241:
-				tel["total_lba_written"] = attr.Raw.Value
+				value := attr.Raw.Value
+				s.WrittenBytes = &value
+			case 197:
+				pending = attr.Raw.Value
+				s.CurrentPendingSectors = &pending
+			case 198:
+				uncorrectable = attr.Raw.Value
+				s.OfflineUncorrectable = &uncorrectable
 			}
 		}
-		if len(tel) > 0 {
-			s.Telemetry = tel
+
+		status := storageHealthStatus{
+			overallPassed:        info.SmartStatus.Passed,
+			hasOverall:           true,
+			reallocatedSectors:   reallocated,
+			pendingSectors:       pending,
+			offlineUncorrectable: uncorrectable,
+			lifeRemainingPct:     lifeRemaining,
 		}
+		setStorageHealthStatus(&s, status)
+		return s
 	}

 	s.Type = &devType
-	status := "OK"
+	status := statusUnknown
 	s.Status = &status
 	return s
 }

 // nvmeSmartLog is the subset of `nvme smart-log -o json` output we care about.
 type nvmeSmartLog struct {
+	CriticalWarning  int   `json:"critical_warning"`
 	PercentageUsed   int   `json:"percentage_used"`
+	AvailableSpare   int   `json:"available_spare"`
+	SpareThreshold   int   `json:"spare_thresh"`
+	Temperature      int64 `json:"temperature"`
 	PowerOnHours     int64 `json:"power_on_hours"`
 	PowerCycles      int64 `json:"power_cycles"`
 	UnsafeShutdowns  int64 `json:"unsafe_shutdowns"`
+	DataUnitsRead    int64 `json:"data_units_read"`
 	DataUnitsWritten int64 `json:"data_units_written"`
 	ControllerBusy   int64 `json:"controller_busy_time"`
+	MediaErrors      int64 `json:"media_errors"`
+	NumErrLogEntries int64 `json:"num_err_log_entries"`
 }

 // nvmeIDCtrl is the subset of `nvme id-ctrl -o json` output.
 type nvmeIDCtrl struct {
-	ModelNumber    string `json:"mn"`
-	SerialNumber   string `json:"sn"`
-	FirmwareRev    string `json:"fr"`
-	TotalCapacity  int64  `json:"tnvmcap"`
+	ModelNumber   string `json:"mn"`
+	SerialNumber  string `json:"sn"`
+	FirmwareRev   string `json:"fr"`
+	TotalCapacity int64  `json:"tnvmcap"`
 }

 func enrichWithNVMe(dev lsblkDevice) schema.HardwareStorage {
 	present := true
 	devType := "NVMe"
 	iface := "NVMe"
-	status := "OK"
+	status := statusOK
 	s := schema.HardwareStorage{
-		Present:   &present,
-		Type:      &devType,
-		Interface: &iface,
-		Status:    &status,
+		HardwareComponentStatus: schema.HardwareComponentStatus{Status: &status},
+		Present:                 &present,
+		Type:                    &devType,
+		Interface:               &iface,
+		Telemetry:               map[string]any{"linux_device": "/dev/" + dev.Name},
 	}

 	devPath := "/dev/" + dev.Name
+	if v := cleanDMIValue(strings.TrimSpace(dev.Model)); v != "" {
+		s.Model = &v
+	}
+	if v := cleanDMIValue(strings.TrimSpace(dev.Serial)); v != "" {
+		s.SerialNumber = &v
+	}
+	if size := parseStorageBytes(dev.Size); size > 0 {
+		gb := int(size / 1_000_000_000)
+		if gb > 0 {
+			s.SizeGB = &gb
+		}
+	}

 	// id-ctrl: model, serial, firmware, capacity
 	if out, err := exec.Command("nvme", "id-ctrl", devPath, "-o", "json").Output(); err == nil {
@@ -237,30 +407,131 @@ func enrichWithNVMe(dev lsblkDevice) schema.HardwareStorage {
 	if out, err := exec.Command("nvme", "smart-log", devPath, "-o", "json").Output(); err == nil {
 		var log nvmeSmartLog
 		if json.Unmarshal(out, &log) == nil {
-			tel := map[string]any{}
 			if log.PowerOnHours > 0 {
-				tel["power_on_hours"] = log.PowerOnHours
+				s.PowerOnHours = &log.PowerOnHours
 			}
 			if log.PowerCycles > 0 {
-				tel["power_cycles"] = log.PowerCycles
+				s.PowerCycles = &log.PowerCycles
 			}
 			if log.UnsafeShutdowns > 0 {
-				tel["unsafe_shutdowns"] = log.UnsafeShutdowns
+				s.UnsafeShutdowns = &log.UnsafeShutdowns
 			}
 			if log.PercentageUsed > 0 {
-				tel["percentage_used"] = log.PercentageUsed
+				v := float64(log.PercentageUsed)
+				s.LifeUsedPct = &v
+				remaining := 100 - v
+				s.LifeRemainingPct = &remaining
 			}
 			if log.DataUnitsWritten > 0 {
-				tel["data_units_written"] = log.DataUnitsWritten
+				v := nvmeDataUnitsToBytes(log.DataUnitsWritten)
+				s.WrittenBytes = &v
 			}
-			if log.ControllerBusy > 0 {
-				tel["controller_busy_time"] = log.ControllerBusy
+			if log.DataUnitsRead > 0 {
+				v := nvmeDataUnitsToBytes(log.DataUnitsRead)
+				s.ReadBytes = &v
 			}
-			if len(tel) > 0 {
-				s.Telemetry = tel
+			if log.AvailableSpare > 0 {
+				v := float64(log.AvailableSpare)
+				s.AvailableSparePct = &v
 			}
+			if log.MediaErrors > 0 {
+				s.MediaErrors = &log.MediaErrors
+			}
+			if log.NumErrLogEntries > 0 {
+				s.ErrorLogEntries = &log.NumErrLogEntries
+			}
+			if log.Temperature > 0 {
+				v := float64(log.Temperature - 273)
+				s.TemperatureC = &v
+			}
+			setStorageHealthStatus(&s, storageHealthStatus{
+				criticalWarning: log.CriticalWarning,
+				percentageUsed:  int64(log.PercentageUsed),
+				availableSpare:  int64(log.AvailableSpare),
+				spareThreshold:  int64(log.SpareThreshold),
+				unsafeShutdowns: log.UnsafeShutdowns,
+				mediaErrors:     log.MediaErrors,
+				errorLogEntries: log.NumErrLogEntries,
+			})
+			return s
 		}
 	}

+	status = statusUnknown
+	s.Status = &status
 	return s
 }
+
+func parseStorageBytes(raw string) int64 {
+	value, err := strconv.ParseInt(strings.TrimSpace(raw), 10, 64)
+	if err == nil && value > 0 {
+		return value
+	}
+	return 0
+}
+
+func nvmeDataUnitsToBytes(units int64) int64 {
+	if units <= 0 {
+		return 0
+	}
+	return units * 512000
+}
+
+type storageHealthStatus struct {
+	hasOverall           bool
+	overallPassed        bool
+	reallocatedSectors   int64
+	pendingSectors       int64
+	offlineUncorrectable int64
+	lifeRemainingPct     int64
+	criticalWarning      int
+	percentageUsed       int64
+	availableSpare       int64
+	spareThreshold       int64
+	unsafeShutdowns      int64
+	mediaErrors          int64
+	errorLogEntries      int64
+}
+
+func setStorageHealthStatus(s *schema.HardwareStorage, health storageHealthStatus) {
+	status := statusOK
+	var description *string
+	switch {
+	case health.hasOverall && !health.overallPassed:
+		status = statusCritical
+		description = stringPtr("SMART overall self-assessment failed")
+	case health.criticalWarning > 0:
+		status = statusCritical
+		description = stringPtr("NVMe critical warning is set")
+	case health.pendingSectors > 0 || health.offlineUncorrectable > 0:
+		status = statusCritical
+		description = stringPtr("Pending or offline uncorrectable sectors detected")
+	case health.mediaErrors > 0:
+		status = statusWarning
+		description = stringPtr("Media errors reported")
+	case health.reallocatedSectors > 0:
+		status = statusWarning
+		description = stringPtr("Reallocated sectors detected")
+	case health.errorLogEntries > 0:
+		status = statusWarning
+		description = stringPtr("Device error log contains entries")
+	case health.lifeRemainingPct > 0 && health.lifeRemainingPct <= 10:
+		status = statusWarning
+		description = stringPtr("Life remaining is low")
+	case health.percentageUsed >= 95:
+		status = statusWarning
+		description = stringPtr("Drive wear level is high")
+	case health.availableSpare > 0 && health.spareThreshold > 0 && health.availableSpare <= health.spareThreshold:
+		status = statusWarning
+		description = stringPtr("Available spare is at or below threshold")
+	case health.unsafeShutdowns > 100:
+		status = statusWarning
+		description = stringPtr("Unsafe shutdown count is high")
+	}
+	s.Status = &status
+	s.ErrorDescription = description
+}
+
+func stringPtr(value string) *string {
+	return &value
+}
--- a/audit/internal/collector/storage_discovery_test.go
+++ b/audit/internal/collector/storage_discovery_test.go
@@ -0,0 +1,33 @@
+package collector
+
+import "testing"
+
+func TestMergeStorageDevicePrefersNonEmptyFields(t *testing.T) {
+	t.Parallel()
+
+	got := mergeStorageDevice(
+		lsblkDevice{Name: "nvme0n1", Type: "disk", Tran: "nvme"},
+		lsblkDevice{Name: "nvme0n1", Type: "disk", Size: "1024", Serial: "SN123", Model: "Kioxia"},
+	)
+
+	if got.Serial != "SN123" {
+		t.Fatalf("serial=%q want SN123", got.Serial)
+	}
+	if got.Model != "Kioxia" {
+		t.Fatalf("model=%q want Kioxia", got.Model)
+	}
+	if got.Size != "1024" {
+		t.Fatalf("size=%q want 1024", got.Size)
+	}
+}
+
+func TestParseStorageBytes(t *testing.T) {
+	t.Parallel()
+
+	if got := parseStorageBytes(" 2048 "); got != 2048 {
+		t.Fatalf("parseStorageBytes=%d want 2048", got)
+	}
+	if got := parseStorageBytes("1.92 TB"); got != 0 {
+		t.Fatalf("parseStorageBytes invalid=%d want 0", got)
+	}
+}
--- a/audit/internal/collector/storage_health_test.go
+++ b/audit/internal/collector/storage_health_test.go
@@ -0,0 +1,63 @@
+package collector
+
+import (
+	"testing"
+
+	"bee/audit/internal/schema"
+)
+
+func TestSetStorageHealthStatus(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name   string
+		health storageHealthStatus
+		want   string
+	}{
+		{
+			name:   "smart overall failed",
+			health: storageHealthStatus{hasOverall: true, overallPassed: false},
+			want:   statusCritical,
+		},
+		{
+			name:   "nvme critical warning",
+			health: storageHealthStatus{criticalWarning: 1},
+			want:   statusCritical,
+		},
+		{
+			name:   "pending sectors",
+			health: storageHealthStatus{pendingSectors: 1},
+			want:   statusCritical,
+		},
+		{
+			name:   "media errors warning",
+			health: storageHealthStatus{mediaErrors: 2},
+			want:   statusWarning,
+		},
+		{
+			name:   "reallocated warning",
+			health: storageHealthStatus{reallocatedSectors: 1},
+			want:   statusWarning,
+		},
+		{
+			name:   "life remaining low",
+			health: storageHealthStatus{lifeRemainingPct: 8},
+			want:   statusWarning,
+		},
+		{
+			name:   "healthy",
+			health: storageHealthStatus{},
+			want:   statusOK,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var disk schema.HardwareStorage
+			setStorageHealthStatus(&disk, tt.health)
+			if disk.Status == nil || *disk.Status != tt.want {
+				t.Fatalf("status=%v want %q", disk.Status, tt.want)
+			}
+		})
+	}
+}
--- a/audit/internal/collector/summary.go
+++ b/audit/internal/collector/summary.go
@@ -0,0 +1,114 @@
+package collector
+
+import (
+	"bee/audit/internal/schema"
+	"fmt"
+	"time"
+)
+
+func BuildHealthSummary(snap schema.HardwareSnapshot) *schema.HardwareHealthSummary {
+	summary := &schema.HardwareHealthSummary{
+		Status:      statusOK,
+		CollectedAt: time.Now().UTC().Format(time.RFC3339),
+	}
+
+	for _, dimm := range snap.Memory {
+		switch derefString(dimm.Status) {
+		case statusWarning:
+			summary.MemoryWarn++
+			summary.Warnings = append(summary.Warnings, formatMemorySummary(dimm))
+		case statusCritical:
+			summary.MemoryFail++
+			summary.Failures = append(summary.Failures, formatMemorySummary(dimm))
+		case statusEmpty:
+			summary.EmptyDIMMs++
+		}
+	}
+
+	for _, disk := range snap.Storage {
+		switch derefString(disk.Status) {
+		case statusWarning:
+			summary.StorageWarn++
+			summary.Warnings = append(summary.Warnings, formatStorageSummary(disk))
+		case statusCritical:
+			summary.StorageFail++
+			summary.Failures = append(summary.Failures, formatStorageSummary(disk))
+		}
+	}
+
+	for _, dev := range snap.PCIeDevices {
+		switch derefString(dev.Status) {
+		case statusWarning:
+			summary.PCIeWarn++
+			summary.Warnings = append(summary.Warnings, formatPCIeSummary(dev))
+		case statusCritical:
+			summary.PCIeFail++
+			summary.Failures = append(summary.Failures, formatPCIeSummary(dev))
+		}
+	}
+
+	for _, psu := range snap.PowerSupplies {
+		if psu.Present != nil && !*psu.Present {
+			summary.MissingPSUs++
+		}
+		switch derefString(psu.Status) {
+		case statusWarning:
+			summary.PSUWarn++
+			summary.Warnings = append(summary.Warnings, formatPSUSummary(psu))
+		case statusCritical:
+			summary.PSUFail++
+			summary.Failures = append(summary.Failures, formatPSUSummary(psu))
+		}
+	}
+
+	if len(summary.Failures) > 0 || summary.StorageFail > 0 || summary.PCIeFail > 0 || summary.PSUFail > 0 || summary.MemoryFail > 0 {
+		summary.Status = statusCritical
+	} else if len(summary.Warnings) > 0 || summary.StorageWarn > 0 || summary.PCIeWarn > 0 || summary.PSUWarn > 0 || summary.MemoryWarn > 0 {
+		summary.Status = statusWarning
+	}
+
+	if len(summary.Warnings) == 0 {
+		summary.Warnings = nil
+	}
+	if len(summary.Failures) == 0 {
+		summary.Failures = nil
+	}
+
+	return summary
+}
+
+func derefString(value *string) string {
+	if value == nil {
+		return ""
+	}
+	return *value
+}
+
+func preferredName(model, serial, slot *string) string {
+	switch {
+	case model != nil && *model != "":
+		return *model
+	case serial != nil && *serial != "":
+		return *serial
+	case slot != nil && *slot != "":
+		return *slot
+	default:
+		return "unknown"
+	}
+}
+
+func formatStorageSummary(disk schema.HardwareStorage) string {
+	return fmt.Sprintf("storage %s status=%s", preferredName(disk.Model, disk.SerialNumber, disk.Slot), derefString(disk.Status))
+}
+
+func formatPCIeSummary(dev schema.HardwarePCIeDevice) string {
+	return fmt.Sprintf("pcie %s status=%s", preferredName(dev.Model, dev.SerialNumber, dev.BDF), derefString(dev.Status))
+}
+
+func formatPSUSummary(psu schema.HardwarePowerSupply) string {
+	return fmt.Sprintf("psu %s status=%s", preferredName(psu.Model, psu.SerialNumber, psu.Slot), derefString(psu.Status))
+}
+
+func formatMemorySummary(dimm schema.HardwareMemory) string {
+	return fmt.Sprintf("memory %s status=%s", preferredName(dimm.PartNumber, dimm.SerialNumber, dimm.Slot), derefString(dimm.Status))
+}
--- a/audit/internal/collector/vroc_test.go
+++ b/audit/internal/collector/vroc_test.go
@@ -31,7 +31,7 @@ md125 : active raid1 nvme2n1[0] nvme3n1[1]
 func TestHasVROCController(t *testing.T) {
 	intel := vendorIntel
 	model := "Volume Management Device NVMe RAID Controller"
-	class := "RAID bus controller"
+	class := "MassStorageController"
 	tests := []struct {
 		name string
 		pcie []schema.HardwarePCIeDevice
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -0,0 +1,492 @@
+package platform
+
+import (
+	"fmt"
+	"strings"
+	"time"
+)
+
+func renderBenchmarkReport(result NvidiaBenchmarkResult) string {
+	return renderBenchmarkReportWithCharts(result)
+}
+
+func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
+	var b strings.Builder
+
+	// ── Header ────────────────────────────────────────────────────────────────
+	b.WriteString("# Bee NVIDIA Benchmark Report\n\n")
+
+	// System identity block
+	if result.ServerModel != "" {
+		fmt.Fprintf(&b, "**Server:** %s  \n", result.ServerModel)
+	}
+	if result.Hostname != "" {
+		fmt.Fprintf(&b, "**Host:** %s  \n", result.Hostname)
+	}
+	// GPU models summary
+	if len(result.GPUs) > 0 {
+		modelCount := make(map[string]int)
+		var modelOrder []string
+		for _, g := range result.GPUs {
+			m := strings.TrimSpace(g.Name)
+			if m == "" {
+				m = "Unknown GPU"
+			}
+			if modelCount[m] == 0 {
+				modelOrder = append(modelOrder, m)
+			}
+			modelCount[m]++
+		}
+		var parts []string
+		for _, m := range modelOrder {
+			if modelCount[m] == 1 {
+				parts = append(parts, m)
+			} else {
+				parts = append(parts, fmt.Sprintf("%d× %s", modelCount[m], m))
+			}
+		}
+		fmt.Fprintf(&b, "**GPU(s):** %s  \n", strings.Join(parts, ", "))
+	}
+	fmt.Fprintf(&b, "**Profile:** %s  \n", result.BenchmarkProfile)
+	fmt.Fprintf(&b, "**Benchmark version:** %s  \n", result.BenchmarkVersion)
+	fmt.Fprintf(&b, "**Generated:** %s  \n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC"))
+	if result.RampStep > 0 && result.RampTotal > 0 {
+		fmt.Fprintf(&b, "**Ramp-up step:** %d of %d  \n", result.RampStep, result.RampTotal)
+		if result.RampRunID != "" {
+			fmt.Fprintf(&b, "**Ramp-up run ID:** %s  \n", result.RampRunID)
+		}
+	} else if result.ParallelGPUs {
+		fmt.Fprintf(&b, "**Mode:** parallel (all GPUs simultaneously)  \n")
+	}
+	if result.ScalabilityScore > 0 {
+		fmt.Fprintf(&b, "**Scalability score:** %.1f%%  \n", result.ScalabilityScore)
+	}
+	if result.PlatformPowerScore > 0 {
+		fmt.Fprintf(&b, "**Platform power score:** %.1f%%  \n", result.PlatformPowerScore)
+	}
+	fmt.Fprintf(&b, "**Overall status:** %s  \n", result.OverallStatus)
+	b.WriteString("\n")
+
+	// ── Executive Summary ─────────────────────────────────────────────────────
+	if len(result.Findings) > 0 {
+		b.WriteString("## Executive Summary\n\n")
+		for _, finding := range result.Findings {
+			fmt.Fprintf(&b, "- %s\n", finding)
+		}
+		b.WriteString("\n")
+	}
+
+	if len(result.Warnings) > 0 {
+		b.WriteString("## Warnings\n\n")
+		for _, warning := range result.Warnings {
+			fmt.Fprintf(&b, "- %s\n", warning)
+		}
+		b.WriteString("\n")
+	}
+
+	// ── Balanced Scorecard ────────────────────────────────────────────────────
+	b.WriteString("## Balanced Scorecard\n\n")
+
+	// Perspective 1: Compatibility — hard stops
+	b.WriteString("### 1. Compatibility\n\n")
+	b.WriteString("| GPU | Thermal throttle | Fan duty at throttle | ECC uncorr | Status |\n")
+	b.WriteString("|-----|------------------|----------------------|------------|--------|\n")
+	for _, gpu := range result.GPUs {
+		thermalThrottle := "-"
+		if gpu.Scores.ThermalThrottlePct > 0 {
+			thermalThrottle = fmt.Sprintf("%.1f%%", gpu.Scores.ThermalThrottlePct)
+		}
+		fanAtThrottle := "-"
+		if result.Cooling != nil && result.Cooling.FanDutyCycleAvailable && gpu.Scores.ThermalThrottlePct > 0 {
+			fanAtThrottle = fmt.Sprintf("%.0f%%", result.Cooling.P95FanDutyCyclePct)
+		}
+		ecc := "-"
+		if gpu.ECC.Uncorrected > 0 {
+			ecc = fmt.Sprintf("⛔ %d", gpu.ECC.Uncorrected)
+		}
+		compatStatus := "✓ OK"
+		if gpu.ECC.Uncorrected > 0 || (gpu.Scores.ThermalThrottlePct > 0 && result.Cooling != nil && result.Cooling.FanDutyCycleAvailable && result.Cooling.P95FanDutyCyclePct < 95) {
+			compatStatus = "⛔ HARD STOP"
+		}
+		fmt.Fprintf(&b, "| GPU %d | %s | %s | %s | %s |\n",
+			gpu.Index, thermalThrottle, fanAtThrottle, ecc, compatStatus)
+	}
+	b.WriteString("\n")
+
+	// Perspective 2: Thermal headroom
+	b.WriteString("### 2. Thermal Headroom\n\n")
+	b.WriteString("| GPU | p95 temp | Slowdown limit | Shutdown limit | Headroom | Thermal throttle | Status |\n")
+	b.WriteString("|-----|----------|----------------|----------------|----------|------------------|--------|\n")
+	for _, gpu := range result.GPUs {
+		shutdownTemp := gpu.ShutdownTempC
+		if shutdownTemp <= 0 {
+			shutdownTemp = 90
+		}
+		slowdownTemp := gpu.SlowdownTempC
+		if slowdownTemp <= 0 {
+			slowdownTemp = 80
+		}
+		headroom := gpu.Scores.TempHeadroomC
+		thermalStatus := "✓ OK"
+		switch {
+		case headroom < 10:
+			thermalStatus = "⛔ CRITICAL"
+		case gpu.Steady.P95TempC >= slowdownTemp:
+			thermalStatus = "⚠ WARNING"
+		}
+		throttlePct := "-"
+		if gpu.Scores.ThermalThrottlePct > 0 {
+			throttlePct = fmt.Sprintf("%.1f%%", gpu.Scores.ThermalThrottlePct)
+		}
+		fmt.Fprintf(&b, "| GPU %d | %.1f°C | %.0f°C | %.0f°C | %.1f°C | %s | %s |\n",
+			gpu.Index, gpu.Steady.P95TempC, slowdownTemp, shutdownTemp, headroom, throttlePct, thermalStatus)
+	}
+	b.WriteString("\n")
+
+	// Perspective 3: Power delivery
+	b.WriteString("### 3. Power Delivery\n\n")
+	b.WriteString("| GPU | Power cap throttle | Power stability | Fan duty (p95) | Status |\n")
+	b.WriteString("|-----|-------------------|-----------------|----------------|--------|\n")
+	for _, gpu := range result.GPUs {
+		powerCap := "-"
+		if gpu.Scores.PowerCapThrottlePct > 0 {
+			powerCap = fmt.Sprintf("%.1f%%", gpu.Scores.PowerCapThrottlePct)
+		}
+		fanDuty := "-"
+		if result.Cooling != nil && result.Cooling.FanDutyCycleAvailable {
+			fanDuty = fmt.Sprintf("%.0f%%", result.Cooling.P95FanDutyCyclePct)
+		}
+		powerStatus := "✓ OK"
+		if gpu.Scores.PowerCapThrottlePct > 5 {
+			powerStatus = "⚠ POWER LIMITED"
+		}
+		fmt.Fprintf(&b, "| GPU %d | %s | %.1f | %s | %s |\n",
+			gpu.Index, powerCap, gpu.Scores.PowerSustainScore, fanDuty, powerStatus)
+	}
+	b.WriteString("\n")
+
+	// Perspective 4: Performance
+	b.WriteString("### 4. Performance\n\n")
+	b.WriteString("| GPU | Compute TOPS | Synthetic | Mixed | Mixed Eff. | TOPS/SM/GHz |\n")
+	b.WriteString("|-----|--------------|-----------|-------|------------|-------------|\n")
+	for _, gpu := range result.GPUs {
+		synthetic := "-"
+		if gpu.Scores.SyntheticScore > 0 {
+			synthetic = fmt.Sprintf("%.2f", gpu.Scores.SyntheticScore)
+		}
+		mixed := "-"
+		if gpu.Scores.MixedScore > 0 {
+			mixed = fmt.Sprintf("%.2f", gpu.Scores.MixedScore)
+		}
+		mixedEff := "-"
+		if gpu.Scores.MixedEfficiency > 0 {
+			mixedEff = fmt.Sprintf("%.1f%%", gpu.Scores.MixedEfficiency*100)
+		}
+		topsPerSM := "-"
+		if gpu.Scores.TOPSPerSMPerGHz > 0 {
+			topsPerSM = fmt.Sprintf("%.3f", gpu.Scores.TOPSPerSMPerGHz)
+		}
+		fmt.Fprintf(&b, "| GPU %d | **%.2f** | %s | %s | %s | %s |\n",
+			gpu.Index, gpu.Scores.CompositeScore, synthetic, mixed, mixedEff, topsPerSM)
+	}
+	if len(result.PerformanceRampSteps) > 0 {
+		fmt.Fprintf(&b, "\n**Platform power score (scalability):** %.1f%%\n", result.PlatformPowerScore)
+	}
+	b.WriteString("\n")
+
+	// Perspective 5: Anomaly flags
+	b.WriteString("### 5. Anomalies\n\n")
+	b.WriteString("| GPU | ECC corrected | Sync boost throttle | Power instability | Thermal instability |\n")
+	b.WriteString("|-----|---------------|---------------------|-------------------|---------------------|\n")
+	for _, gpu := range result.GPUs {
+		eccCorr := "-"
+		if gpu.ECC.Corrected > 0 {
+			eccCorr = fmt.Sprintf("⚠ %d", gpu.ECC.Corrected)
+		}
+		syncBoost := "-"
+		if gpu.Scores.SyncBoostThrottlePct > 0 {
+			syncBoost = fmt.Sprintf("%.1f%%", gpu.Scores.SyncBoostThrottlePct)
+		}
+		powerVar := "OK"
+		if gpu.Scores.PowerSustainScore < 70 {
+			powerVar = "⚠ unstable"
+		}
+		thermalVar := "OK"
+		if gpu.Scores.ThermalSustainScore < 70 {
+			thermalVar = "⚠ unstable"
+		}
+		fmt.Fprintf(&b, "| GPU %d | %s | %s | %s | %s |\n",
+			gpu.Index, eccCorr, syncBoost, powerVar, thermalVar)
+	}
+	b.WriteString("\n")
+
+	// ── Per GPU detail ────────────────────────────────────────────────────────
+	b.WriteString("## Per-GPU Details\n\n")
+	for _, gpu := range result.GPUs {
+		name := strings.TrimSpace(gpu.Name)
+		if name == "" {
+			name = "Unknown GPU"
+		}
+		fmt.Fprintf(&b, "### GPU %d — %s\n\n", gpu.Index, name)
+
+		// Identity
+		if gpu.BusID != "" {
+			fmt.Fprintf(&b, "- **Bus ID:** %s\n", gpu.BusID)
+		}
+		if gpu.VBIOS != "" {
+			fmt.Fprintf(&b, "- **vBIOS:** %s\n", gpu.VBIOS)
+		}
+		if gpu.ComputeCapability != "" {
+			fmt.Fprintf(&b, "- **Compute capability:** %s\n", gpu.ComputeCapability)
+		}
+		if gpu.MultiprocessorCount > 0 {
+			fmt.Fprintf(&b, "- **SMs:** %d\n", gpu.MultiprocessorCount)
+		}
+		if gpu.PowerLimitW > 0 {
+			fmt.Fprintf(&b, "- **Power limit:** %.0f W (default %.0f W)\n", gpu.PowerLimitW, gpu.DefaultPowerLimitW)
+		}
+		if gpu.PowerLimitDerated {
+			fmt.Fprintf(&b, "- **Power limit derating:** active (reduced limit %.0f W)\n", gpu.PowerLimitW)
+		}
+		if gpu.CalibratedPeakPowerW > 0 {
+			if gpu.CalibratedPeakTempC > 0 {
+				fmt.Fprintf(&b, "- **Calibrated peak power:** %.0f W p95 at %.1f °C p95\n", gpu.CalibratedPeakPowerW, gpu.CalibratedPeakTempC)
+			} else {
+				fmt.Fprintf(&b, "- **Calibrated peak power:** %.0f W p95\n", gpu.CalibratedPeakPowerW)
+			}
+		}
+		if gpu.LockedGraphicsClockMHz > 0 {
+			fmt.Fprintf(&b, "- **Locked clocks:** GPU %.0f MHz / Mem %.0f MHz\n", gpu.LockedGraphicsClockMHz, gpu.LockedMemoryClockMHz)
+		}
+		b.WriteString("\n")
+
+		// Steady-state telemetry
+		if benchmarkTelemetryAvailable(gpu.Steady) {
+			fmt.Fprintf(&b, "**Steady-state telemetry** (%ds):\n\n", int(gpu.Steady.DurationSec))
+			b.WriteString("| | Avg | P95 |\n|---|---|---|\n")
+			fmt.Fprintf(&b, "| Power | %.1f W | %.1f W |\n", gpu.Steady.AvgPowerW, gpu.Steady.P95PowerW)
+			fmt.Fprintf(&b, "| Temperature | %.1f °C | %.1f °C |\n", gpu.Steady.AvgTempC, gpu.Steady.P95TempC)
+			fmt.Fprintf(&b, "| GPU clock | %.0f MHz | %.0f MHz |\n", gpu.Steady.AvgGraphicsClockMHz, gpu.Steady.P95GraphicsClockMHz)
+			fmt.Fprintf(&b, "| Memory clock | %.0f MHz | %.0f MHz |\n", gpu.Steady.AvgMemoryClockMHz, gpu.Steady.P95MemoryClockMHz)
+			fmt.Fprintf(&b, "| GPU utilisation | %.1f %% | — |\n", gpu.Steady.AvgUsagePct)
+			b.WriteString("\n")
+		} else {
+			b.WriteString("**Steady-state telemetry:** unavailable\n\n")
+		}
+
+		// Per-precision stability phases.
+		if len(gpu.PrecisionSteady) > 0 {
+			b.WriteString("**Per-precision stability:**\n\n")
+			b.WriteString("| Precision | Status | Clock CV | Power CV | Clock Drift | ECC corr | ECC uncorr |\n|-----------|--------|----------|----------|-------------|----------|------------|\n")
+			for _, p := range gpu.PrecisionSteady {
+				eccCorr := "—"
+				eccUncorr := "—"
+				if !p.ECC.IsZero() {
+					eccCorr = fmt.Sprintf("%d", p.ECC.Corrected)
+					eccUncorr = fmt.Sprintf("%d", p.ECC.Uncorrected)
+				}
+				status := p.Status
+				if strings.TrimSpace(status) == "" {
+					status = "OK"
+				}
+				fmt.Fprintf(&b, "| %s | %s | %.1f%% | %.1f%% | %.1f%% | %s | %s |\n",
+					p.Precision, status, p.Steady.ClockCVPct, p.Steady.PowerCVPct, p.Steady.ClockDriftPct,
+					eccCorr, eccUncorr)
+			}
+			b.WriteString("\n")
+		} else {
+			// Legacy: show combined-window variance.
+			fmt.Fprintf(&b, "**Clock/power variance (combined window):** clock CV %.1f%% · power CV %.1f%% · clock drift %.1f%%\n\n",
+				gpu.Steady.ClockCVPct, gpu.Steady.PowerCVPct, gpu.Steady.ClockDriftPct)
+		}
+
+		// ECC summary
+		if !gpu.ECC.IsZero() {
+			fmt.Fprintf(&b, "**ECC errors (total):** corrected=%d uncorrected=%d\n\n",
+				gpu.ECC.Corrected, gpu.ECC.Uncorrected)
+		}
+
+		// Throttle
+		throttle := formatThrottleLine(gpu.Throttle, gpu.Steady.DurationSec)
+		if throttle != "none" {
+			fmt.Fprintf(&b, "**Throttle:** %s\n\n", throttle)
+		}
+
+		// Precision results
+		if len(gpu.PrecisionResults) > 0 {
+			b.WriteString("**Precision results:**\n\n")
+			b.WriteString("| Precision | TOPS (raw) | Weight | TOPS (fp32-eq) | Lanes | Iterations |\n|-----------|------------|--------|----------------|-------|------------|\n")
+			for _, p := range gpu.PrecisionResults {
+				if p.Supported {
+					weightStr := fmt.Sprintf("×%.3g", p.Weight)
+					fmt.Fprintf(&b, "| %s | %.2f | %s | %.2f | %d | %d |\n",
+						p.Name, p.TeraOpsPerSec, weightStr, p.WeightedTeraOpsPerSec, p.Lanes, p.Iterations)
+				} else {
+					fmt.Fprintf(&b, "| %s | — (unsupported) | — | — | — | — |\n", p.Name)
+				}
+			}
+			b.WriteString("\n")
+		}
+
+		// Degradation / Notes
+		if len(gpu.DegradationReasons) > 0 {
+			fmt.Fprintf(&b, "**Degradation reasons:** %s\n\n", strings.Join(gpu.DegradationReasons, ", "))
+		}
+		if len(gpu.Notes) > 0 {
+			b.WriteString("**Notes:**\n\n")
+			for _, note := range gpu.Notes {
+				fmt.Fprintf(&b, "- %s\n", note)
+			}
+			b.WriteString("\n")
+		}
+	}
+
+	// ── Interconnect ──────────────────────────────────────────────────────────
+	if result.Interconnect != nil {
+		b.WriteString("## Interconnect (NCCL)\n\n")
+		fmt.Fprintf(&b, "**Status:** %s\n\n", result.Interconnect.Status)
+		if result.Interconnect.Supported {
+			b.WriteString("| Metric | Avg | Max |\n|--------|-----|-----|\n")
+			fmt.Fprintf(&b, "| Alg BW | %.1f GB/s | %.1f GB/s |\n", result.Interconnect.AvgAlgBWGBps, result.Interconnect.MaxAlgBWGBps)
+			fmt.Fprintf(&b, "| Bus BW | %.1f GB/s | %.1f GB/s |\n", result.Interconnect.AvgBusBWGBps, result.Interconnect.MaxBusBWGBps)
+			b.WriteString("\n")
+		}
+		for _, note := range result.Interconnect.Notes {
+			fmt.Fprintf(&b, "- %s\n", note)
+		}
+		if len(result.Interconnect.Notes) > 0 {
+			b.WriteString("\n")
+		}
+	}
+
+	// ── Server Power (IPMI) ───────────────────────────────────────────────────
+	if sp := result.ServerPower; sp != nil {
+		b.WriteString("## Server Power (IPMI)\n\n")
+		if !sp.Available {
+			b.WriteString("IPMI power measurement unavailable.\n\n")
+		} else {
+			b.WriteString("| | Value |\n|---|---|\n")
+			fmt.Fprintf(&b, "| Server idle | %.0f W |\n", sp.IdleW)
+			fmt.Fprintf(&b, "| Server under load | %.0f W |\n", sp.LoadedW)
+			fmt.Fprintf(&b, "| Server delta (load − idle) | %.0f W |\n", sp.DeltaW)
+			fmt.Fprintf(&b, "| GPU-reported sum | %.0f W |\n", sp.GPUReportedSumW)
+			if sp.ReportingRatio > 0 {
+				fmt.Fprintf(&b, "| Reporting ratio | %.2f (1.0 = accurate, <0.75 = GPU over-reports) |\n", sp.ReportingRatio)
+			}
+			b.WriteString("\n")
+		}
+		for _, note := range sp.Notes {
+			fmt.Fprintf(&b, "- %s\n", note)
+		}
+		if len(sp.Notes) > 0 {
+			b.WriteString("\n")
+		}
+	}
+
+	// ── Cooling ───────────────────────────────────────────────────────────────
+	if cooling := result.Cooling; cooling != nil {
+		b.WriteString("## Cooling\n\n")
+		if cooling.Available {
+			b.WriteString("| Metric | Value |\n|--------|-------|\n")
+			fmt.Fprintf(&b, "| Average fan speed | %.0f RPM |\n", cooling.AvgFanRPM)
+			if cooling.FanDutyCycleAvailable {
+				fmt.Fprintf(&b, "| Average fan duty cycle | %.1f%% |\n", cooling.AvgFanDutyCyclePct)
+				fmt.Fprintf(&b, "| P95 fan duty cycle | %.1f%% |\n", cooling.P95FanDutyCyclePct)
+			} else {
+				b.WriteString("| Average fan duty cycle | N/A |\n")
+				b.WriteString("| P95 fan duty cycle | N/A |\n")
+			}
+			b.WriteString("\n")
+		} else {
+			b.WriteString("Cooling telemetry unavailable.\n\n")
+		}
+		for _, note := range cooling.Notes {
+			fmt.Fprintf(&b, "- %s\n", note)
+		}
+		if len(cooling.Notes) > 0 {
+			b.WriteString("\n")
+		}
+	}
+
+	// ── Platform Scalability ──────────────────────────────────────────────────
+	if len(result.PerformanceRampSteps) > 0 {
+		b.WriteString("## Platform Scalability (Performance Ramp)\n\n")
+		fmt.Fprintf(&b, "**Platform power score:** %.1f%%  \n\n", result.PlatformPowerScore)
+		b.WriteString("| k GPUs | GPU Indices | Total Synthetic TOPS | Scalability |\n")
+		b.WriteString("|--------|-------------|----------------------|-------------|\n")
+		for _, step := range result.PerformanceRampSteps {
+			fmt.Fprintf(&b, "| %d | %s | %.2f | %.1f%% |\n",
+				step.StepIndex, joinIndexList(step.GPUIndices), step.TotalSyntheticTOPS, step.ScalabilityPct)
+		}
+		b.WriteString("\n")
+	}
+
+	// ── Raw files ─────────────────────────────────────────────────────────────
+	b.WriteString("## Raw Files\n\n")
+	b.WriteString("- `result.json`\n- `report.md`\n- `summary.txt`\n- `verbose.log`\n")
+	b.WriteString("- `gpu-metrics.csv`\n- `gpu-metrics.html`\n- `gpu-burn.log`\n")
+	if result.Interconnect != nil {
+		b.WriteString("- `nccl-all-reduce.log`\n")
+	}
+	return b.String()
+}
+
+// formatThrottleLine renders throttle counters as human-readable percentages of
+// the steady-state window.  Only non-zero counters are shown.  When the steady
+// duration is unknown (0), raw seconds are shown instead.
+func formatThrottleLine(t BenchmarkThrottleCounters, steadyDurationSec float64) string {
+	type counter struct {
+		label string
+		us    uint64
+	}
+	counters := []counter{
+		{"sw_power", t.SWPowerCapUS},
+		{"sw_thermal", t.SWThermalSlowdownUS},
+		{"sync_boost", t.SyncBoostUS},
+		{"hw_thermal", t.HWThermalSlowdownUS},
+		{"hw_power_brake", t.HWPowerBrakeSlowdownUS},
+	}
+	var parts []string
+	for _, c := range counters {
+		if c.us == 0 {
+			continue
+		}
+		sec := float64(c.us) / 1e6
+		if steadyDurationSec > 0 {
+			pct := sec / steadyDurationSec * 100
+			parts = append(parts, fmt.Sprintf("%s=%.1f%% (%.0fs)", c.label, pct, sec))
+		} else if sec < 1 {
+			parts = append(parts, fmt.Sprintf("%s=%.0fms", c.label, sec*1000))
+		} else {
+			parts = append(parts, fmt.Sprintf("%s=%.1fs", c.label, sec))
+		}
+	}
+	if len(parts) == 0 {
+		return "none"
+	}
+	return strings.Join(parts, "  ")
+}
+
+func renderBenchmarkSummary(result NvidiaBenchmarkResult) string {
+	var b strings.Builder
+	fmt.Fprintf(&b, "run_at_utc=%s\n", result.GeneratedAt.Format(time.RFC3339))
+	fmt.Fprintf(&b, "benchmark_version=%s\n", result.BenchmarkVersion)
+	fmt.Fprintf(&b, "benchmark_profile=%s\n", result.BenchmarkProfile)
+	fmt.Fprintf(&b, "overall_status=%s\n", result.OverallStatus)
+	fmt.Fprintf(&b, "gpu_count=%d\n", len(result.GPUs))
+	fmt.Fprintf(&b, "normalization_status=%s\n", result.Normalization.Status)
+	var best float64
+	for i, gpu := range result.GPUs {
+		fmt.Fprintf(&b, "gpu_%d_status=%s\n", gpu.Index, gpu.Status)
+		fmt.Fprintf(&b, "gpu_%d_composite_score=%.2f\n", gpu.Index, gpu.Scores.CompositeScore)
+		if i == 0 || gpu.Scores.CompositeScore > best {
+			best = gpu.Scores.CompositeScore
+		}
+	}
+	fmt.Fprintf(&b, "best_composite_score=%.2f\n", best)
+	if result.Interconnect != nil {
+		fmt.Fprintf(&b, "interconnect_status=%s\n", result.Interconnect.Status)
+		fmt.Fprintf(&b, "interconnect_max_busbw_gbps=%.1f\n", result.Interconnect.MaxBusBWGBps)
+	}
+	return b.String()
+}
--- a/audit/internal/platform/benchmark_test.go
+++ b/audit/internal/platform/benchmark_test.go
@@ -0,0 +1,403 @@
+package platform
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestResolveBenchmarkProfile(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name    string
+		profile string
+		want    benchmarkProfileSpec
+	}{
+		{
+			name:    "default",
+			profile: "",
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 45, SteadySec: 480, NCCLSec: 180, CooldownSec: 0},
+		},
+		{
+			name:    "stability",
+			profile: "stability",
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 120, SteadySec: 3600, NCCLSec: 300, CooldownSec: 0},
+		},
+		{
+			name:    "overnight",
+			profile: "overnight",
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 180, SteadySec: 27000, NCCLSec: 600, CooldownSec: 0},
+		},
+	}
+
+	for _, tc := range cases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			got := resolveBenchmarkProfile(tc.profile)
+			if got != tc.want {
+				t.Fatalf("profile=%q got %+v want %+v", tc.profile, got, tc.want)
+			}
+		})
+	}
+}
+
+func TestBuildBenchmarkSteadyPlanStandard(t *testing.T) {
+	t.Parallel()
+
+	labels, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
+		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, SteadySec: 480},
+		benchmarkPrecisionPhases,
+		func(label string) string { return label },
+	)
+	if len(labels) != 5 || len(phases) != 5 {
+		t.Fatalf("labels=%d phases=%d want 5", len(labels), len(phases))
+	}
+	if basePhaseSec != 60 {
+		t.Fatalf("basePhaseSec=%d want 60", basePhaseSec)
+	}
+	if mixedPhaseSec != 300 {
+		t.Fatalf("mixedPhaseSec=%d want 300", mixedPhaseSec)
+	}
+	if phases[len(phases)-1].PlanLabel != "mixed" || phases[len(phases)-1].DurationSec != 300 {
+		t.Fatalf("mixed phase=%+v want duration 300", phases[len(phases)-1])
+	}
+	if benchmarkPlanDurationsCSV(phases) != "60,60,60,60,300" {
+		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
+	}
+}
+
+func TestBuildBenchmarkSteadyPlanStability(t *testing.T) {
+	t.Parallel()
+
+	_, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
+		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, SteadySec: 3600},
+		benchmarkPrecisionPhases,
+		func(label string) string { return label },
+	)
+	if basePhaseSec != 300 {
+		t.Fatalf("basePhaseSec=%d want 300", basePhaseSec)
+	}
+	if mixedPhaseSec != 3600 {
+		t.Fatalf("mixedPhaseSec=%d want 3600", mixedPhaseSec)
+	}
+	if benchmarkPlanDurationsCSV(phases) != "300,300,300,300,3600" {
+		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
+	}
+}
+
+func TestBuildBenchmarkSteadyPlanOvernight(t *testing.T) {
+	t.Parallel()
+
+	_, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
+		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, SteadySec: 27000},
+		benchmarkPrecisionPhases,
+		func(label string) string { return label },
+	)
+	if basePhaseSec != 3600 {
+		t.Fatalf("basePhaseSec=%d want 3600", basePhaseSec)
+	}
+	if mixedPhaseSec != 14400 {
+		t.Fatalf("mixedPhaseSec=%d want 14400", mixedPhaseSec)
+	}
+	if benchmarkPlanDurationsCSV(phases) != "3600,3600,3600,3600,14400" {
+		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
+	}
+}
+
+func TestSplitBenchmarkRowsByPlannedPhaseUsesPhaseDurations(t *testing.T) {
+	t.Parallel()
+
+	phases := []benchmarkPlannedPhase{
+		{PlanLabel: "fp8", MetricStage: "fp8", DurationSec: 10},
+		{PlanLabel: "fp16", MetricStage: "fp16", DurationSec: 10},
+		{PlanLabel: "mixed", MetricStage: "mixed", DurationSec: 50},
+	}
+	rows := []GPUMetricRow{
+		{ElapsedSec: 5},
+		{ElapsedSec: 15},
+		{ElapsedSec: 25},
+		{ElapsedSec: 65},
+	}
+	got := splitBenchmarkRowsByPlannedPhase(rows, phases)
+	if len(got["fp8"]) != 1 {
+		t.Fatalf("fp8 rows=%d want 1", len(got["fp8"]))
+	}
+	if len(got["fp16"]) != 1 {
+		t.Fatalf("fp16 rows=%d want 1", len(got["fp16"]))
+	}
+	if len(got["mixed"]) != 2 {
+		t.Fatalf("mixed rows=%d want 2", len(got["mixed"]))
+	}
+}
+
+func TestBenchmarkSupportedPrecisionsSkipsFP4BeforeBlackwell(t *testing.T) {
+	t.Parallel()
+
+	if got := benchmarkSupportedPrecisions("9.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32" {
+		t.Fatalf("supported=%v", got)
+	}
+	if got := benchmarkSupportedPrecisions("10.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32" {
+		t.Fatalf("supported=%v", got)
+	}
+}
+
+func TestBenchmarkPlannedPhaseStatus(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name       string
+		raw        string
+		wantStatus string
+	}{
+		{name: "ok", raw: "status=OK\n", wantStatus: "OK"},
+		{name: "failed", raw: "phase_error=fp16\n", wantStatus: "FAILED"},
+		{name: "unsupported", raw: "cublasLt_profiles=unsupported\nphase_error=fp4\n", wantStatus: "UNSUPPORTED"},
+	}
+	for _, tc := range cases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			got, _ := benchmarkPlannedPhaseStatus([]byte(tc.raw))
+			if got != tc.wantStatus {
+				t.Fatalf("status=%q want %q", got, tc.wantStatus)
+			}
+		})
+	}
+}
+
+func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
+	t.Parallel()
+
+	opts := normalizeNvidiaBenchmarkOptionsForBenchmark(NvidiaBenchmarkOptions{
+		Profile: "stability",
+		RunNCCL: false,
+	})
+	if opts.Profile != NvidiaBenchmarkProfileStability {
+		t.Fatalf("profile=%q want %q", opts.Profile, NvidiaBenchmarkProfileStability)
+	}
+	if opts.RunNCCL {
+		t.Fatalf("RunNCCL should stay false when explicitly disabled")
+	}
+}
+
+func TestParseBenchmarkBurnLog(t *testing.T) {
+	t.Parallel()
+
+	raw := strings.Join([]string{
+		"loader=bee-gpu-burn",
+		"[gpu 0] device=NVIDIA H100",
+		"[gpu 0] compute_capability=9.0",
+		"[gpu 0] backend=cublasLt",
+		"[gpu 0] duration_s=10",
+		"[gpu 0] int8_tensor[0]=READY dim=16384x16384x8192 block=128 stream=0",
+		"[gpu 0] fp16_tensor[0]=READY dim=4096x4096x4096 block=128 stream=0",
+		"[gpu 0] fp8_e4m3[0]=READY dim=8192x8192x4096 block=128 stream=0",
+		"[gpu 0] int8_tensor_iterations=80",
+		"[gpu 0] fp16_tensor_iterations=200",
+		"[gpu 0] fp8_e4m3_iterations=50",
+		"[gpu 0] status=OK",
+	}, "\n")
+
+	got := parseBenchmarkBurnLog(raw)
+	if got.Backend != "cublasLt" {
+		t.Fatalf("backend=%q want cublasLt", got.Backend)
+	}
+	if got.ComputeCapability != "9.0" {
+		t.Fatalf("compute capability=%q want 9.0", got.ComputeCapability)
+	}
+	if len(got.Profiles) != 3 {
+		t.Fatalf("profiles=%d want 3", len(got.Profiles))
+	}
+	if got.Profiles[0].TeraOpsPerSec <= 0 {
+		t.Fatalf("profile[0] teraops=%f want >0", got.Profiles[0].TeraOpsPerSec)
+	}
+	if got.Profiles[0].Category != "fp16_bf16" {
+		t.Fatalf("profile[0] category=%q want fp16_bf16", got.Profiles[0].Category)
+	}
+	if got.Profiles[1].Category != "fp8" {
+		t.Fatalf("profile[1] category=%q want fp8", got.Profiles[1].Category)
+	}
+	if got.Profiles[2].Category != "int8" {
+		t.Fatalf("profile[2] category=%q want int8", got.Profiles[2].Category)
+	}
+	if got.Profiles[2].Weight != 0.25 {
+		t.Fatalf("profile[2] weight=%f want 0.25", got.Profiles[2].Weight)
+	}
+}
+
+func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
+	t.Parallel()
+
+	result := NvidiaBenchmarkResult{
+		BenchmarkVersion:   benchmarkVersion,
+		BenchmarkProfile:   NvidiaBenchmarkProfileStandard,
+		OverallStatus:      "PARTIAL",
+		SelectedGPUIndices: []int{0},
+		Normalization: BenchmarkNormalization{
+			Status: "partial",
+		},
+		Findings: []string{"GPU 0 spent measurable time under SW power cap."},
+		GPUs: []BenchmarkGPUResult{
+			{
+				Index:  0,
+				Name:   "NVIDIA H100",
+				Status: "OK",
+				Steady: BenchmarkTelemetrySummary{
+					AvgPowerW:           680,
+					AvgTempC:            79,
+					AvgGraphicsClockMHz: 1725,
+					P95PowerW:           700,
+					P95TempC:            82,
+					P95GraphicsClockMHz: 1800,
+				},
+				Scores: BenchmarkScorecard{
+					ComputeScore:        1200,
+					PowerSustainScore:   96,
+					ThermalSustainScore: 88,
+					StabilityScore:      92,
+					CompositeScore:      1176,
+				},
+				PrecisionResults: []BenchmarkPrecisionResult{
+					{Name: "fp16_tensor", Supported: true, TeraOpsPerSec: 700},
+				},
+				Throttle: BenchmarkThrottleCounters{
+					SWPowerCapUS: 1000000,
+				},
+				DegradationReasons: []string{"power_capped"},
+			},
+		},
+		Cooling: &BenchmarkCoolingSummary{
+			Available:             true,
+			AvgFanRPM:             9200,
+			FanDutyCycleAvailable: true,
+			AvgFanDutyCyclePct:    47.5,
+			P95FanDutyCyclePct:    62.0,
+		},
+	}
+
+	report := renderBenchmarkReport(result)
+	for _, needle := range []string{
+		"Executive Summary",
+		"GPU 0 spent measurable time under SW power cap.",
+		"1176.00",
+		"fp16_tensor",
+		"700.00",
+		"Cooling",
+		"Average fan duty cycle",
+		"47.5%",
+	} {
+		if !strings.Contains(report, needle) {
+			t.Fatalf("report missing %q\n%s", needle, report)
+		}
+	}
+}
+
+func TestRenderBenchmarkReportListsUnifiedArtifacts(t *testing.T) {
+	t.Parallel()
+
+	report := renderBenchmarkReport(NvidiaBenchmarkResult{
+		BenchmarkProfile:   NvidiaBenchmarkProfileStandard,
+		OverallStatus:      "OK",
+		SelectedGPUIndices: []int{0},
+		Normalization: BenchmarkNormalization{
+			Status: "full",
+		},
+	})
+
+	for _, needle := range []string{
+		"gpu-metrics.csv",
+		"gpu-metrics.html",
+		"gpu-burn.log",
+	} {
+		if !strings.Contains(report, needle) {
+			t.Fatalf("report missing %q\n%s", needle, report)
+		}
+	}
+}
+
+func TestScoreBenchmarkGPUIgnoresDisabledPrecisions(t *testing.T) {
+	t.Parallel()
+
+	score := scoreBenchmarkGPUResult(BenchmarkGPUResult{
+		PrecisionSteady: []BenchmarkPrecisionSteadyPhase{
+			{Precision: "fp16", WeightedTeraOpsPerSec: 100},
+			{Precision: "fp64", WeightedTeraOpsPerSec: 999},
+			{Precision: "fp4", WeightedTeraOpsPerSec: 999},
+		},
+		PrecisionResults: []BenchmarkPrecisionResult{
+			{Category: "fp32_tf32", Supported: true, WeightedTeraOpsPerSec: 50},
+			{Category: "fp64", Supported: true, WeightedTeraOpsPerSec: 999},
+			{Category: "fp4", Supported: true, WeightedTeraOpsPerSec: 999},
+		},
+	})
+
+	if score.SyntheticScore != 100 {
+		t.Fatalf("SyntheticScore=%f want 100", score.SyntheticScore)
+	}
+	if score.MixedScore != 50 {
+		t.Fatalf("MixedScore=%f want 50", score.MixedScore)
+	}
+}
+
+func TestEnrichGPUInfoWithMaxClocks(t *testing.T) {
+	t.Parallel()
+
+	nvsmiQ := []byte(`
+GPU 00000000:4E:00.0
+    Product Name                          : NVIDIA RTX PRO 6000 Blackwell Server Edition
+    Clocks
+        Graphics                          : 2422 MHz
+        Memory                            : 12481 MHz
+    Max Clocks
+        Graphics                          : 2430 MHz
+        SM                                : 2430 MHz
+        Memory                            : 12481 MHz
+        Video                             : 2107 MHz
+
+GPU 00000000:4F:00.0
+    Product Name                          : NVIDIA RTX PRO 6000 Blackwell Server Edition
+    Max Clocks
+        Graphics                          : 2430 MHz
+        Memory                            : 12481 MHz
+`)
+
+	infoByIndex := map[int]benchmarkGPUInfo{
+		0: {Index: 0, BusID: "00000000:4E:00.0"},
+		1: {Index: 1, BusID: "00000000:4F:00.0"},
+	}
+
+	enrichGPUInfoWithMaxClocks(infoByIndex, nvsmiQ)
+
+	if infoByIndex[0].MaxGraphicsClockMHz != 2430 {
+		t.Errorf("GPU 0 MaxGraphicsClockMHz = %v, want 2430", infoByIndex[0].MaxGraphicsClockMHz)
+	}
+	if infoByIndex[0].MaxMemoryClockMHz != 12481 {
+		t.Errorf("GPU 0 MaxMemoryClockMHz = %v, want 12481", infoByIndex[0].MaxMemoryClockMHz)
+	}
+	if infoByIndex[1].MaxGraphicsClockMHz != 2430 {
+		t.Errorf("GPU 1 MaxGraphicsClockMHz = %v, want 2430", infoByIndex[1].MaxGraphicsClockMHz)
+	}
+	if infoByIndex[1].MaxMemoryClockMHz != 12481 {
+		t.Errorf("GPU 1 MaxMemoryClockMHz = %v, want 12481", infoByIndex[1].MaxMemoryClockMHz)
+	}
+}
+
+func TestEnrichGPUInfoWithMaxClocksSkipsPopulated(t *testing.T) {
+	t.Parallel()
+
+	nvsmiQ := []byte(`
+GPU 00000000:4E:00.0
+    Max Clocks
+        Graphics                          : 9999 MHz
+        Memory                            : 9999 MHz
+`)
+	// Already populated — must not be overwritten.
+	infoByIndex := map[int]benchmarkGPUInfo{
+		0: {Index: 0, BusID: "00000000:4E:00.0", MaxGraphicsClockMHz: 2430, MaxMemoryClockMHz: 12481},
+	}
+
+	enrichGPUInfoWithMaxClocks(infoByIndex, nvsmiQ)
+
+	if infoByIndex[0].MaxGraphicsClockMHz != 2430 {
+		t.Errorf("expected existing value to be preserved, got %v", infoByIndex[0].MaxGraphicsClockMHz)
+	}
+}
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -0,0 +1,363 @@
+package platform
+
+import "time"
+
+// BenchmarkHostConfig holds static CPU and memory configuration captured at
+// benchmark start. Useful for correlating results across runs on different hardware.
+type BenchmarkHostConfig struct {
+	CPUModel    string  `json:"cpu_model,omitempty"`
+	CPUSockets  int     `json:"cpu_sockets,omitempty"`
+	CPUCores    int     `json:"cpu_cores,omitempty"`
+	CPUThreads  int     `json:"cpu_threads,omitempty"`
+	MemTotalGiB float64 `json:"mem_total_gib,omitempty"`
+}
+
+// BenchmarkCPULoad summarises host CPU utilisation sampled during the GPU
+// steady-state phase. High or unstable CPU load during a GPU benchmark may
+// indicate a competing workload or a CPU-bound driver bottleneck.
+type BenchmarkCPULoad struct {
+	AvgPct  float64 `json:"avg_pct"`
+	MaxPct  float64 `json:"max_pct"`
+	P95Pct  float64 `json:"p95_pct"`
+	Samples int     `json:"samples"`
+	// Status is "ok", "high", or "unstable".
+	Status string `json:"status"`
+	Note   string `json:"note,omitempty"`
+}
+
+// BenchmarkCoolingSummary captures fan telemetry averaged across the full
+// benchmark run.
+type BenchmarkCoolingSummary struct {
+	Available             bool     `json:"available"`
+	AvgFanRPM             float64  `json:"avg_fan_rpm,omitempty"`
+	FanDutyCycleAvailable bool     `json:"fan_duty_cycle_available,omitempty"`
+	AvgFanDutyCyclePct    float64  `json:"avg_fan_duty_cycle_pct,omitempty"`
+	P95FanDutyCyclePct    float64  `json:"p95_fan_duty_cycle_pct,omitempty"`
+	Notes                 []string `json:"notes,omitempty"`
+}
+
+const (
+	NvidiaBenchmarkProfileStandard  = "standard"
+	NvidiaBenchmarkProfileStability = "stability"
+	NvidiaBenchmarkProfileOvernight = "overnight"
+)
+
+type NvidiaBenchmarkOptions struct {
+	Profile           string
+	SizeMB            int
+	GPUIndices        []int
+	ExcludeGPUIndices []int
+	RunNCCL           bool
+	ParallelGPUs      bool   // run all selected GPUs simultaneously instead of sequentially
+	RampStep          int    // 1-based step index within a ramp-up run (0 = not a ramp-up)
+	RampTotal         int    // total number of ramp-up steps in this run
+	RampRunID         string // shared identifier across all steps of the same ramp-up run
+}
+
+type NvidiaBenchmarkResult struct {
+	BenchmarkVersion   string                       `json:"benchmark_version"`
+	GeneratedAt        time.Time                    `json:"generated_at"`
+	Hostname           string                       `json:"hostname,omitempty"`
+	ServerModel        string                       `json:"server_model,omitempty"`
+	BenchmarkProfile   string                       `json:"benchmark_profile"`
+	ParallelGPUs       bool                         `json:"parallel_gpus,omitempty"`
+	RampStep           int                          `json:"ramp_step,omitempty"`
+	RampTotal          int                          `json:"ramp_total,omitempty"`
+	RampRunID          string                       `json:"ramp_run_id,omitempty"`
+	ScalabilityScore   float64                      `json:"scalability_score,omitempty"`
+	// PlatformPowerScore is the mean compute scalability across ramp steps 2..N.
+	// 100% = each added GPU contributes exactly its single-card throughput.
+	// < 100% = throughput loss due to thermal throttle, power limits, or contention.
+	PlatformPowerScore   float64                    `json:"platform_power_score,omitempty"`
+	PerformanceRampSteps []NvidiaPerformanceRampStep `json:"performance_ramp_steps,omitempty"`
+	OverallStatus      string                       `json:"overall_status"`
+	SelectedGPUIndices []int                        `json:"selected_gpu_indices"`
+	Findings           []string                     `json:"findings,omitempty"`
+	Warnings           []string                     `json:"warnings,omitempty"`
+	Normalization      BenchmarkNormalization       `json:"normalization"`
+	HostConfig         *BenchmarkHostConfig         `json:"host_config,omitempty"`
+	CPULoad            *BenchmarkCPULoad            `json:"cpu_load,omitempty"`
+	Cooling            *BenchmarkCoolingSummary     `json:"cooling,omitempty"`
+	GPUs               []BenchmarkGPUResult         `json:"gpus"`
+	Interconnect       *BenchmarkInterconnectResult `json:"interconnect,omitempty"`
+	ServerPower        *BenchmarkServerPower        `json:"server_power,omitempty"`
+}
+
+type BenchmarkNormalization struct {
+	Status string                      `json:"status"`
+	Notes  []string                    `json:"notes,omitempty"`
+	GPUs   []BenchmarkNormalizationGPU `json:"gpus,omitempty"`
+}
+
+type BenchmarkNormalizationGPU struct {
+	Index                 int      `json:"index"`
+	PersistenceMode       string   `json:"persistence_mode,omitempty"`
+	GPUClockLockMHz       float64  `json:"gpu_clock_lock_mhz,omitempty"`
+	GPUClockLockStatus    string   `json:"gpu_clock_lock_status,omitempty"`
+	MemoryClockLockMHz    float64  `json:"memory_clock_lock_mhz,omitempty"`
+	MemoryClockLockStatus string   `json:"memory_clock_lock_status,omitempty"`
+	Notes                 []string `json:"notes,omitempty"`
+}
+
+type BenchmarkGPUResult struct {
+	Index               int     `json:"index"`
+	UUID                string  `json:"uuid,omitempty"`
+	Name                string  `json:"name,omitempty"`
+	BusID               string  `json:"bus_id,omitempty"`
+	VBIOS               string  `json:"vbios,omitempty"`
+	ComputeCapability   string  `json:"compute_capability,omitempty"`
+	Backend             string  `json:"backend,omitempty"`
+	Status              string  `json:"status"`
+	PowerLimitW         float64 `json:"power_limit_w,omitempty"`
+	PowerLimitDerated   bool    `json:"power_limit_derated,omitempty"`
+	MultiprocessorCount int     `json:"multiprocessor_count,omitempty"`
+	DefaultPowerLimitW  float64 `json:"default_power_limit_w,omitempty"`
+	// ShutdownTempC is the hardware thermal shutdown threshold for this GPU,
+	// sourced from nvidia-smi -q ("GPU Shutdown Temp"). Fallback: 90°C.
+	ShutdownTempC float64 `json:"shutdown_temp_c,omitempty"`
+	// SlowdownTempC is the software throttle onset threshold ("GPU Slowdown Temp").
+	// Fallback: 80°C.
+	SlowdownTempC float64 `json:"slowdown_temp_c,omitempty"`
+	// CalibratedPeakPowerW is the p95 power measured during a short
+	// dcgmi targeted_power calibration run before the main benchmark.
+	// Used as the reference denominator for PowerSustainScore instead of
+	// the hardware default limit, which bee-gpu-burn cannot reach.
+	CalibratedPeakPowerW   float64                         `json:"calibrated_peak_power_w,omitempty"`
+	CalibratedPeakTempC    float64                         `json:"calibrated_peak_temp_c,omitempty"`
+	PowerCalibrationTries  int                             `json:"power_calibration_tries,omitempty"`
+	MaxGraphicsClockMHz    float64                         `json:"max_graphics_clock_mhz,omitempty"`
+	BaseGraphicsClockMHz   float64                         `json:"base_graphics_clock_mhz,omitempty"`
+	MaxMemoryClockMHz      float64                         `json:"max_memory_clock_mhz,omitempty"`
+	LockedGraphicsClockMHz float64                         `json:"locked_graphics_clock_mhz,omitempty"`
+	LockedMemoryClockMHz   float64                         `json:"locked_memory_clock_mhz,omitempty"`
+	Baseline               BenchmarkTelemetrySummary       `json:"baseline"`
+	Steady                 BenchmarkTelemetrySummary       `json:"steady"`
+	PrecisionSteady        []BenchmarkPrecisionSteadyPhase `json:"precision_steady,omitempty"`
+	PrecisionFailures      []string                        `json:"precision_failures,omitempty"`
+	Cooldown               BenchmarkTelemetrySummary       `json:"cooldown"`
+	Throttle               BenchmarkThrottleCounters       `json:"throttle_counters"`
+	// ECC error delta accumulated over the full benchmark (all phases combined).
+	ECC                BenchmarkECCCounters       `json:"ecc,omitempty"`
+	PrecisionResults   []BenchmarkPrecisionResult `json:"precision_results,omitempty"`
+	Scores             BenchmarkScorecard         `json:"scores"`
+	DegradationReasons []string                   `json:"degradation_reasons,omitempty"`
+	Notes              []string                   `json:"notes,omitempty"`
+	// CoolingWarning is non-empty when a thermal throttle event occurred with
+	// a clock drop ≥20% while server fans were not at 100% duty cycle.
+	CoolingWarning string `json:"cooling_warning,omitempty"`
+}
+
+type BenchmarkTelemetrySummary struct {
+	DurationSec         float64 `json:"duration_sec"`
+	Samples             int     `json:"samples"`
+	AvgTempC            float64 `json:"avg_temp_c"`
+	P95TempC            float64 `json:"p95_temp_c"`
+	AvgPowerW           float64 `json:"avg_power_w"`
+	P95PowerW           float64 `json:"p95_power_w"`
+	AvgGraphicsClockMHz float64 `json:"avg_graphics_clock_mhz"`
+	P95GraphicsClockMHz float64 `json:"p95_graphics_clock_mhz"`
+	AvgMemoryClockMHz   float64 `json:"avg_memory_clock_mhz"`
+	P95MemoryClockMHz   float64 `json:"p95_memory_clock_mhz"`
+	AvgUsagePct         float64 `json:"avg_usage_pct"`
+	AvgMemUsagePct      float64 `json:"avg_mem_usage_pct"`
+	ClockCVPct          float64 `json:"clock_cv_pct"`
+	PowerCVPct          float64 `json:"power_cv_pct"`
+	TempCVPct           float64 `json:"temp_cv_pct"`
+	ClockDriftPct       float64 `json:"clock_drift_pct"`
+}
+
+type BenchmarkThrottleCounters struct {
+	SWPowerCapUS           uint64 `json:"sw_power_cap_us"`
+	SWThermalSlowdownUS    uint64 `json:"sw_thermal_slowdown_us"`
+	SyncBoostUS            uint64 `json:"sync_boost_us"`
+	HWThermalSlowdownUS    uint64 `json:"hw_thermal_slowdown_us"`
+	HWPowerBrakeSlowdownUS uint64 `json:"hw_power_brake_slowdown_us"`
+}
+
+// BenchmarkECCCounters holds ECC error counts sampled at a point in time.
+// Corrected = single-bit errors fixed by ECC (DRAM degradation).
+// Uncorrected = double-bit errors that could not be corrected (serious fault).
+// Both are volatile (since last driver reset), not persistent.
+type BenchmarkECCCounters struct {
+	Corrected   uint64 `json:"corrected"`
+	Uncorrected uint64 `json:"uncorrected"`
+}
+
+func (e BenchmarkECCCounters) Total() uint64 { return e.Corrected + e.Uncorrected }
+func (e BenchmarkECCCounters) IsZero() bool  { return e.Corrected == 0 && e.Uncorrected == 0 }
+
+type BenchmarkPrecisionResult struct {
+	Name          string  `json:"name"`
+	Category      string  `json:"category"`
+	Supported     bool    `json:"supported"`
+	Lanes         int     `json:"lanes,omitempty"`
+	M             uint64  `json:"m,omitempty"`
+	N             uint64  `json:"n,omitempty"`
+	K             uint64  `json:"k,omitempty"`
+	Iterations    uint64  `json:"iterations,omitempty"`
+	TeraOpsPerSec float64 `json:"teraops_per_sec,omitempty"`
+	// Weight is the fp32-equivalence factor for this precision category.
+	// fp32 = 1.0 (baseline), fp64 = 2.0, fp16 = 0.5, int8/fp8 = 0.25, fp4 = 0.125.
+	// WeightedTOPS = TeraOpsPerSec * Weight gives fp32-equivalent throughput.
+	Weight                float64 `json:"weight,omitempty"`
+	WeightedTeraOpsPerSec float64 `json:"weighted_teraops_per_sec,omitempty"`
+	Notes                 string  `json:"notes,omitempty"`
+}
+
+type BenchmarkScorecard struct {
+	ComputeScore float64 `json:"compute_score"`
+	// SyntheticScore is the sum of fp32-equivalent TOPS from per-precision
+	// steady phases (each precision ran alone, full GPU dedicated).
+	SyntheticScore float64 `json:"synthetic_score,omitempty"`
+	// MixedScore is the sum of fp32-equivalent TOPS from the combined phase
+	// (all precisions competing simultaneously — closer to real workloads).
+	MixedScore float64 `json:"mixed_score,omitempty"`
+	// MixedEfficiency = MixedScore / SyntheticScore. Measures how well the GPU
+	// sustains throughput under concurrent mixed-precision load.
+	MixedEfficiency     float64 `json:"mixed_efficiency,omitempty"`
+	PowerSustainScore   float64 `json:"power_sustain_score"`
+	ThermalSustainScore float64 `json:"thermal_sustain_score"`
+	// StabilityScore: fraction of steady-state time the GPU spent throttling
+	// (thermal + power cap combined). 0% throttle = 100; 100% throttle = 0.
+	StabilityScore float64 `json:"stability_score"`
+
+	// Throttle breakdown — percentage of steady-state time in each throttle type.
+	// Used for diagnosis: tells WHY the GPU throttled, not just whether it did.
+	ThermalThrottlePct  float64 `json:"thermal_throttle_pct"`  // HW+SW thermal slowdown
+	PowerCapThrottlePct float64 `json:"power_cap_throttle_pct"` // SW power cap
+	SyncBoostThrottlePct float64 `json:"sync_boost_throttle_pct,omitempty"`
+
+	// Temperature headroom: distance to the 100°C destruction threshold.
+	// TempHeadroomC = 100 - P95TempC. < 20°C = warning; < 10°C = critical.
+	// Independent of throttle — a GPU at 86°C without throttle is still in the red zone.
+	TempHeadroomC float64 `json:"temp_headroom_c"`
+
+	InterconnectScore float64 `json:"interconnect_score"`
+	// ServerQualityScore (0–100) reflects server infrastructure quality independent
+	// of GPU model. Combines throttle time, power variance, and temp variance.
+	// Use this to compare servers with the same GPU, or to flag a bad server
+	// that throttles an otherwise fast GPU.
+	ServerQualityScore float64 `json:"server_quality_score"`
+	// CompositeScore is the raw compute score (TOPS, fp32-equivalent).
+	// A throttling GPU will score lower here automatically — no quality multiplier.
+	CompositeScore float64 `json:"composite_score"`
+	// TOPSPerSMPerGHz is compute efficiency independent of clock speed and SM count.
+	TOPSPerSMPerGHz float64 `json:"tops_per_sm_per_ghz,omitempty"`
+}
+
+// BenchmarkServerPower captures server-side power via IPMI alongside GPU-reported
+// power. The reporting_ratio (delta / gpu_reported_sum) near 1.0 means GPU power
+// telemetry is accurate; a ratio well below 1.0 (e.g. 0.5) means the GPU is
+// over-reporting its power consumption.
+type BenchmarkServerPower struct {
+	Available       bool     `json:"available"`
+	IdleW           float64  `json:"idle_w,omitempty"`
+	LoadedW         float64  `json:"loaded_w,omitempty"`
+	DeltaW          float64  `json:"delta_w,omitempty"`
+	GPUReportedSumW float64  `json:"gpu_reported_sum_w,omitempty"`
+	ReportingRatio  float64  `json:"reporting_ratio,omitempty"`
+	Notes           []string `json:"notes,omitempty"`
+}
+
+// BenchmarkPrecisionSteadyPhase holds per-precision-category telemetry collected
+// during a dedicated single-precision steady window.  Because only one kernel
+// type runs at a time the PowerCVPct here is a genuine stability signal.
+type BenchmarkPrecisionSteadyPhase struct {
+	Precision             string                    `json:"precision"` // e.g. "fp8", "fp16", "fp32"
+	Status                string                    `json:"status,omitempty"`
+	Steady                BenchmarkTelemetrySummary `json:"steady"`
+	TeraOpsPerSec         float64                   `json:"teraops_per_sec,omitempty"`
+	WeightedTeraOpsPerSec float64                   `json:"weighted_teraops_per_sec,omitempty"`
+	// ECC errors accumulated during this precision phase only.
+	// Non-zero corrected = stress-induced DRAM errors for this kernel type.
+	// Any uncorrected = serious fault triggered by this precision workload.
+	ECC   BenchmarkECCCounters `json:"ecc,omitempty"`
+	Notes string               `json:"notes,omitempty"`
+}
+
+type BenchmarkInterconnectResult struct {
+	Status             string   `json:"status"`
+	Attempted          bool     `json:"attempted"`
+	Supported          bool     `json:"supported"`
+	SelectedGPUIndices []int    `json:"selected_gpu_indices,omitempty"`
+	AvgAlgBWGBps       float64  `json:"avg_algbw_gbps,omitempty"`
+	MaxAlgBWGBps       float64  `json:"max_algbw_gbps,omitempty"`
+	AvgBusBWGBps       float64  `json:"avg_busbw_gbps,omitempty"`
+	MaxBusBWGBps       float64  `json:"max_busbw_gbps,omitempty"`
+	Notes              []string `json:"notes,omitempty"`
+}
+
+type NvidiaPowerBenchResult struct {
+	BenchmarkVersion     string                 `json:"benchmark_version"`
+	GeneratedAt          time.Time              `json:"generated_at"`
+	Hostname             string                 `json:"hostname,omitempty"`
+	ServerModel          string                 `json:"server_model,omitempty"`
+	BenchmarkProfile     string                 `json:"benchmark_profile"`
+	SelectedGPUIndices   []int                  `json:"selected_gpu_indices"`
+	RecommendedSlotOrder []int                  `json:"recommended_slot_order,omitempty"`
+	RampSteps            []NvidiaPowerBenchStep `json:"ramp_steps,omitempty"`
+	OverallStatus        string                 `json:"overall_status"`
+	// PlatformMaxTDPW is the sum of per-GPU stable power limits found during the
+	// cumulative thermal ramp. Represents the actual sustained power budget of
+	// this server under full GPU load. Use for rack power planning.
+	PlatformMaxTDPW float64               `json:"platform_max_tdp_w"`
+	// ServerPower captures IPMI server power delta (idle→loaded) measured in
+	// parallel with the thermal ramp. Use to compare GPU-reported TDP against
+	// actual wall-power draw as seen by the server's power supply.
+	ServerPower     *BenchmarkServerPower `json:"server_power,omitempty"`
+	Findings        []string              `json:"findings,omitempty"`
+	GPUs            []NvidiaPowerBenchGPU `json:"gpus"`
+}
+
+type NvidiaPowerBenchGPU struct {
+	Index               int      `json:"index"`
+	Name                string   `json:"name,omitempty"`
+	BusID               string   `json:"bus_id,omitempty"`
+	DefaultPowerLimitW  float64  `json:"default_power_limit_w,omitempty"`
+	// AppliedPowerLimitW is the stable limit found during single-card calibration.
+	AppliedPowerLimitW  float64  `json:"applied_power_limit_w,omitempty"`
+	// StablePowerLimitW is the final fixed limit for this GPU after the
+	// cumulative thermal ramp. This is the limit at which the GPU operated
+	// stably with all other GPUs running simultaneously at their own limits.
+	// May be lower than AppliedPowerLimitW if multi-GPU thermal load required
+	// additional derating.
+	StablePowerLimitW   float64  `json:"stable_power_limit_w,omitempty"`
+	MaxObservedPowerW   float64  `json:"max_observed_power_w,omitempty"`
+	MaxObservedTempC    float64  `json:"max_observed_temp_c,omitempty"`
+	CalibrationAttempts int      `json:"calibration_attempts,omitempty"`
+	Derated             bool     `json:"derated,omitempty"`
+	Status              string   `json:"status"`
+	Notes               []string `json:"notes,omitempty"`
+	// CoolingWarning mirrors BenchmarkGPUResult.CoolingWarning for the power workflow.
+	CoolingWarning string `json:"cooling_warning,omitempty"`
+}
+
+type NvidiaPowerBenchStep struct {
+	StepIndex           int      `json:"step_index"`
+	GPUIndices          []int    `json:"gpu_indices"`
+	// NewGPUIndex is the GPU whose stable limit was searched in this step.
+	NewGPUIndex         int      `json:"new_gpu_index"`
+	// NewGPUStableLimitW is the stable power limit found for the new GPU.
+	NewGPUStableLimitW  float64  `json:"new_gpu_stable_limit_w,omitempty"`
+	TotalObservedPowerW float64  `json:"total_observed_power_w,omitempty"`
+	AvgObservedPowerW   float64  `json:"avg_observed_power_w,omitempty"`
+	Derated             bool     `json:"derated,omitempty"`
+	Status              string   `json:"status"`
+	Notes               []string `json:"notes,omitempty"`
+}
+
+// NvidiaPerformanceRampStep holds per-step performance data for the
+// scalability ramp-up phase of the performance benchmark.
+type NvidiaPerformanceRampStep struct {
+	StepIndex          int      `json:"step_index"`
+	GPUIndices         []int    `json:"gpu_indices"`
+	// TotalSyntheticTOPS is the sum of per-GPU SyntheticScore (fp32-equivalent
+	// TOPS from dedicated single-precision phases) across all GPUs in this step.
+	TotalSyntheticTOPS float64  `json:"total_synthetic_tops"`
+	TotalMixedTOPS     float64  `json:"total_mixed_tops,omitempty"`
+	// ScalabilityPct = TotalSyntheticTOPS / (k × best_single_gpu_tops) × 100.
+	// 100% = perfect linear scaling. < 100% = thermal/power/interconnect loss.
+	ScalabilityPct     float64  `json:"scalability_pct"`
+	Status             string   `json:"status"`
+	Notes              []string `json:"notes,omitempty"`
+}
--- a/audit/internal/platform/error_patterns.go
+++ b/audit/internal/platform/error_patterns.go
@@ -0,0 +1,139 @@
+package platform
+
+import "regexp"
+
+// ErrorPattern describes a kernel log pattern that indicates a hardware error.
+// Add new patterns by appending to HardwareErrorPatterns — no other code changes needed.
+type ErrorPattern struct {
+	// Name is a short machine-readable label for logging and deduplication.
+	Name string
+	// Re is the compiled regular expression matched against a single kmsg line.
+	Re *regexp.Regexp
+	// Category groups related errors: "gpu", "pcie", "storage", "mce", "memory", "cpu".
+	Category string
+	// Severity is "warning" for recoverable/uncertain faults, "critical" for definitive failures.
+	Severity string
+	// BDFGroup is the capture group index (1-based) that contains a PCIe BDF address
+	// (e.g. "0000:c8:00.0"). 0 means no BDF is captured by this pattern.
+	BDFGroup int
+	// DevGroup is the capture group index (1-based) that contains a device name
+	// (e.g. "sda", "nvme0"). 0 means no device name is captured by this pattern.
+	DevGroup int
+}
+
+// HardwareErrorPatterns is the global list of kernel log patterns that indicate hardware faults.
+// To add a new pattern: append a new ErrorPattern struct to this slice.
+var HardwareErrorPatterns = []ErrorPattern{
+	// ── GPU / NVIDIA ────────────────────────────────────────────────────────────
+	{
+		Name:     "nvidia-rminitadapter",
+		Re:       mustPat(`(?i)NVRM:.*GPU\s+([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d)`),
+		Category: "gpu",
+		Severity: "warning",
+		BDFGroup: 1,
+	},
+	{
+		Name:     "nvidia-msi-fail",
+		Re:       mustPat(`(?i)NVRM:.*Failed to enable MSI`),
+		Category: "gpu",
+		Severity: "warning",
+	},
+	{
+		Name:     "nvidia-aer",
+		Re:       mustPat(`(?i)nvidia\s+([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d).*AER`),
+		Category: "gpu",
+		Severity: "warning",
+		BDFGroup: 1,
+	},
+	{
+		Name:     "nvidia-xid",
+		Re:       mustPat(`(?i)NVRM:.*Xid.*\b([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d)`),
+		Category: "gpu",
+		Severity: "warning",
+		BDFGroup: 1,
+	},
+
+	// ── PCIe AER (generic) ──────────────────────────────────────────────────────
+	{
+		Name:     "pcie-aer",
+		Re:       mustPat(`(?i)pcieport\s+([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d).*AER`),
+		Category: "pcie",
+		Severity: "warning",
+		BDFGroup: 1,
+	},
+	{
+		Name:     "pcie-uncorrectable",
+		Re:       mustPat(`(?i)([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d).*[Uu]ncorrectable`),
+		Category: "pcie",
+		Severity: "warning",
+		BDFGroup: 1,
+	},
+	{
+		Name:     "pcie-link-down",
+		Re:       mustPat(`(?i)pcieport\s+([\da-f]{4}:[\da-f]{2}:[\da-f]{2}\.\d).*[Ll]ink.*[Dd]own`),
+		Category: "pcie",
+		Severity: "warning",
+		BDFGroup: 1,
+	},
+
+	// ── Storage ─────────────────────────────────────────────────────────────────
+	{
+		Name:     "blk-io-error",
+		Re:       mustPat(`(?i)blk_update_request.*I/O error.*dev\s+(\w+)`),
+		Category: "storage",
+		Severity: "warning",
+		DevGroup: 1,
+	},
+	{
+		Name:     "nvme-timeout",
+		Re:       mustPat(`(?i)nvme\s+(\w+):.*timeout`),
+		Category: "storage",
+		Severity: "warning",
+		DevGroup: 1,
+	},
+	{
+		Name:     "scsi-failed",
+		Re:       mustPat(`(?i)sd\s+[\da-f:]+:.*FAILED`),
+		Category: "storage",
+		Severity: "warning",
+	},
+	{
+		Name:     "nvme-reset",
+		Re:       mustPat(`(?i)nvme\s+(\w+):.*reset`),
+		Category: "storage",
+		Severity: "warning",
+		DevGroup: 1,
+	},
+
+	// ── Machine Check Exceptions ────────────────────────────────────────────────
+	{
+		Name:     "mce-hardware-error",
+		Re:       mustPat(`(?i)mce:.*[Hh]ardware [Ee]rror`),
+		Category: "mce",
+		Severity: "warning",
+	},
+	{
+		Name:     "mce-corrected",
+		Re:       mustPat(`(?i)mce:.*[Cc]orrected`),
+		Category: "mce",
+		Severity: "warning",
+	},
+
+	// ── Memory ─────────────────────────────────────────────────────────────────
+	{
+		Name:     "edac-ue",
+		Re:       mustPat(`(?i)EDAC.*[Uu]ncorrectable`),
+		Category: "memory",
+		Severity: "warning",
+	},
+	{
+		Name:     "edac-ce",
+		Re:       mustPat(`(?i)EDAC.*[Cc]orrectable`),
+		Category: "memory",
+		Severity: "warning",
+	},
+}
+
+func mustPat(s string) *regexp.Regexp {
+	return regexp.MustCompile(s)
+}
--- a/audit/internal/platform/export.go
+++ b/audit/internal/platform/export.go
@@ -9,8 +9,50 @@ import (
 	"strings"
 )

+var exportExecCommand = exec.Command
+
+func formatMountTargetError(target RemovableTarget, raw string, err error) error {
+	msg := strings.TrimSpace(raw)
+	fstype := strings.ToLower(strings.TrimSpace(target.FSType))
+	if fstype == "exfat" && strings.Contains(strings.ToLower(msg), "unknown filesystem type 'exfat'") {
+		return fmt.Errorf("mount %s: exFAT support is missing in this ISO build: %w", target.Device, err)
+	}
+	if msg == "" {
+		return err
+	}
+	return fmt.Errorf("%s: %w", msg, err)
+}
+
+func removableTargetReadOnly(fields map[string]string) bool {
+	if fields["RO"] == "1" {
+		return true
+	}
+	switch strings.ToLower(strings.TrimSpace(fields["FSTYPE"])) {
+	case "iso9660", "squashfs":
+		return true
+	default:
+		return false
+	}
+}
+
+func ensureWritableMountpoint(mountpoint string) error {
+	probe, err := os.CreateTemp(mountpoint, ".bee-write-test-*")
+	if err != nil {
+		return fmt.Errorf("target filesystem is not writable: %w", err)
+	}
+	name := probe.Name()
+	if closeErr := probe.Close(); closeErr != nil {
+		_ = os.Remove(name)
+		return closeErr
+	}
+	if err := os.Remove(name); err != nil {
+		return err
+	}
+	return nil
+}
+
 func (s *System) ListRemovableTargets() ([]RemovableTarget, error) {
-	raw, err := exec.Command("lsblk", "-P", "-o", "NAME,TYPE,PKNAME,RM,FSTYPE,MOUNTPOINT,SIZE,LABEL,MODEL").Output()
+	raw, err := exportExecCommand("lsblk", "-P", "-o", "NAME,TYPE,PKNAME,RM,RO,FSTYPE,MOUNTPOINT,SIZE,LABEL,MODEL").Output()
 	if err != nil {
 		return nil, err
 	}
@@ -34,7 +76,7 @@ func (s *System) ListRemovableTargets() ([]RemovableTarget, error) {
 				}
 			}
 		}
-		if !removable || fields["FSTYPE"] == "" {
+		if !removable || fields["FSTYPE"] == "" || removableTargetReadOnly(fields) {
 			continue
 		}

@@ -52,7 +94,7 @@ func (s *System) ListRemovableTargets() ([]RemovableTarget, error) {
 	return out, nil
 }

-func (s *System) ExportFileToTarget(src string, target RemovableTarget) (string, error) {
+func (s *System) ExportFileToTarget(src string, target RemovableTarget) (dst string, retErr error) {
 	if src == "" || target.Device == "" {
 		return "", fmt.Errorf("source and target are required")
 	}
@@ -62,20 +104,43 @@ func (s *System) ExportFileToTarget(src string, target RemovableTarget) (string,

 	mountpoint := strings.TrimSpace(target.Mountpoint)
 	mountedHere := false
+	mounted := mountpoint != ""
 	if mountpoint == "" {
 		mountpoint = filepath.Join("/tmp", "bee-export-"+filepath.Base(target.Device))
 		if err := os.MkdirAll(mountpoint, 0755); err != nil {
 			return "", err
 		}
-		if raw, err := exec.Command("mount", target.Device, mountpoint).CombinedOutput(); err != nil {
+		if raw, err := exportExecCommand("mount", target.Device, mountpoint).CombinedOutput(); err != nil {
 			_ = os.Remove(mountpoint)
-			return string(raw), err
+			return "", formatMountTargetError(target, string(raw), err)
 		}
 		mountedHere = true
+		mounted = true
+	}
+	defer func() {
+		if !mounted {
+			return
+		}
+		_ = exportExecCommand("sync").Run()
+		if raw, err := exportExecCommand("umount", mountpoint).CombinedOutput(); err != nil && retErr == nil {
+			msg := strings.TrimSpace(string(raw))
+			if msg == "" {
+				retErr = err
+			} else {
+				retErr = fmt.Errorf("%s: %w", msg, err)
+			}
+		}
+		if mountedHere {
+			_ = os.Remove(mountpoint)
+		}
+	}()
+
+	if err := ensureWritableMountpoint(mountpoint); err != nil {
+		return "", err
 	}

 	filename := filepath.Base(src)
-	dst := filepath.Join(mountpoint, filename)
+	dst = filepath.Join(mountpoint, filename)
 	data, err := os.ReadFile(src)
 	if err != nil {
 		return "", err
@@ -83,12 +148,6 @@ func (s *System) ExportFileToTarget(src string, target RemovableTarget) (string,
 	if err := os.WriteFile(dst, data, 0644); err != nil {
 		return "", err
 	}
-	_ = exec.Command("sync").Run()
-
-	if mountedHere {
-		_ = exec.Command("umount", mountpoint).Run()
-		_ = os.Remove(mountpoint)
-	}

 	return dst, nil
 }
--- a/audit/internal/platform/export_test.go
+++ b/audit/internal/platform/export_test.go
@@ -0,0 +1,112 @@
+package platform
+
+import (
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestExportFileToTargetUnmountsExistingMountpoint(t *testing.T) {
+	tmp := t.TempDir()
+	src := filepath.Join(tmp, "bundle.tar.gz")
+	mountpoint := filepath.Join(tmp, "mnt")
+	if err := os.MkdirAll(mountpoint, 0755); err != nil {
+		t.Fatalf("mkdir mountpoint: %v", err)
+	}
+	if err := os.WriteFile(src, []byte("bundle"), 0644); err != nil {
+		t.Fatalf("write src: %v", err)
+	}
+
+	var calls [][]string
+	oldExec := exportExecCommand
+	exportExecCommand = func(name string, args ...string) *exec.Cmd {
+		calls = append(calls, append([]string{name}, args...))
+		return exec.Command("sh", "-c", "exit 0")
+	}
+	t.Cleanup(func() { exportExecCommand = oldExec })
+
+	s := &System{}
+	dst, err := s.ExportFileToTarget(src, RemovableTarget{
+		Device:     "/dev/sdb1",
+		Mountpoint: mountpoint,
+	})
+	if err != nil {
+		t.Fatalf("ExportFileToTarget error: %v", err)
+	}
+	if got, want := dst, filepath.Join(mountpoint, "bundle.tar.gz"); got != want {
+		t.Fatalf("dst=%q want %q", got, want)
+	}
+	if _, err := os.Stat(filepath.Join(mountpoint, "bundle.tar.gz")); err != nil {
+		t.Fatalf("exported file missing: %v", err)
+	}
+
+	foundUmount := false
+	for _, call := range calls {
+		if len(call) == 2 && call[0] == "umount" && call[1] == mountpoint {
+			foundUmount = true
+			break
+		}
+	}
+	if !foundUmount {
+		t.Fatalf("expected umount %q call, got %#v", mountpoint, calls)
+	}
+}
+
+func TestExportFileToTargetRejectsNonWritableMountpoint(t *testing.T) {
+	tmp := t.TempDir()
+	src := filepath.Join(tmp, "bundle.tar.gz")
+	mountpoint := filepath.Join(tmp, "mnt")
+	if err := os.MkdirAll(mountpoint, 0755); err != nil {
+		t.Fatalf("mkdir mountpoint: %v", err)
+	}
+	if err := os.WriteFile(src, []byte("bundle"), 0644); err != nil {
+		t.Fatalf("write src: %v", err)
+	}
+	if err := os.Chmod(mountpoint, 0555); err != nil {
+		t.Fatalf("chmod mountpoint: %v", err)
+	}
+
+	oldExec := exportExecCommand
+	exportExecCommand = func(name string, args ...string) *exec.Cmd {
+		return exec.Command("sh", "-c", "exit 0")
+	}
+	t.Cleanup(func() { exportExecCommand = oldExec })
+
+	s := &System{}
+	_, err := s.ExportFileToTarget(src, RemovableTarget{
+		Device:     "/dev/sdb1",
+		Mountpoint: mountpoint,
+	})
+	if err == nil {
+		t.Fatal("expected error for non-writable mountpoint")
+	}
+	if !strings.Contains(err.Error(), "target filesystem is not writable") {
+		t.Fatalf("err=%q want writable message", err)
+	}
+}
+
+func TestListRemovableTargetsSkipsReadOnlyMedia(t *testing.T) {
+	oldExec := exportExecCommand
+	lsblkOut := `NAME="sda1" TYPE="part" PKNAME="sda" RM="1" RO="1" FSTYPE="iso9660" MOUNTPOINT="/run/live/medium" SIZE="3.7G" LABEL="BEE" MODEL=""
+NAME="sdb1" TYPE="part" PKNAME="sdb" RM="1" RO="0" FSTYPE="vfat" MOUNTPOINT="/media/bee/USB" SIZE="29.8G" LABEL="USB" MODEL=""`
+	exportExecCommand = func(name string, args ...string) *exec.Cmd {
+		cmd := exec.Command("sh", "-c", "printf '%s\n' \"$LSBLK_OUT\"")
+		cmd.Env = append(os.Environ(), "LSBLK_OUT="+lsblkOut)
+		return cmd
+	}
+	t.Cleanup(func() { exportExecCommand = oldExec })
+
+	s := &System{}
+	targets, err := s.ListRemovableTargets()
+	if err != nil {
+		t.Fatalf("ListRemovableTargets error: %v", err)
+	}
+	if len(targets) != 1 {
+		t.Fatalf("len(targets)=%d want 1 (%+v)", len(targets), targets)
+	}
+	if got := targets[0].Device; got != "/dev/sdb1" {
+		t.Fatalf("device=%q want /dev/sdb1", got)
+	}
+}
--- a/audit/internal/platform/gpu_metrics.go
+++ b/audit/internal/platform/gpu_metrics.go
@@ -0,0 +1,549 @@
+package platform
+
+import (
+	"bytes"
+	"fmt"
+	"math"
+	"os"
+	"os/exec"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// GPUMetricRow is one telemetry sample from nvidia-smi during a stress test.
+type GPUMetricRow struct {
+	Stage                 string  `json:"stage,omitempty"`
+	StageStartSec         float64 `json:"stage_start_sec,omitempty"`
+	StageEndSec           float64 `json:"stage_end_sec,omitempty"`
+	ElapsedSec            float64 `json:"elapsed_sec"`
+	GPUIndex              int     `json:"index"`
+	TempC                 float64 `json:"temp_c"`
+	UsagePct              float64 `json:"usage_pct"`
+	MemUsagePct           float64 `json:"mem_usage_pct"`
+	PowerW                float64 `json:"power_w"`
+	ClockMHz              float64 `json:"clock_mhz"`
+	MemClockMHz           float64 `json:"mem_clock_mhz"`
+	FanAvgRPM             float64 `json:"fan_avg_rpm,omitempty"`
+	FanDutyCyclePct       float64 `json:"fan_duty_cycle_pct,omitempty"`
+	FanDutyCycleAvailable bool    `json:"fan_duty_cycle_available,omitempty"`
+}
+
+// sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
+func sampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
+	args := []string{
+		"--query-gpu=index,temperature.gpu,utilization.gpu,utilization.memory,power.draw,clocks.current.graphics,clocks.current.memory",
+		"--format=csv,noheader,nounits",
+	}
+	if len(gpuIndices) > 0 {
+		ids := make([]string, len(gpuIndices))
+		for i, idx := range gpuIndices {
+			ids[i] = strconv.Itoa(idx)
+		}
+		args = append([]string{"--id=" + strings.Join(ids, ",")}, args...)
+	}
+	out, err := exec.Command("nvidia-smi", args...).Output()
+	if err != nil {
+		return nil, err
+	}
+	var rows []GPUMetricRow
+	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+		parts := strings.Split(line, ", ")
+		if len(parts) < 7 {
+			continue
+		}
+		idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
+		rows = append(rows, GPUMetricRow{
+			GPUIndex:    idx,
+			TempC:       parseGPUFloat(parts[1]),
+			UsagePct:    parseGPUFloat(parts[2]),
+			MemUsagePct: parseGPUFloat(parts[3]),
+			PowerW:      parseGPUFloat(parts[4]),
+			ClockMHz:    parseGPUFloat(parts[5]),
+			MemClockMHz: parseGPUFloat(parts[6]),
+		})
+	}
+	return rows, nil
+}
+
+func parseGPUFloat(s string) float64 {
+	s = strings.TrimSpace(s)
+	if s == "N/A" || s == "[Not Supported]" || s == "" {
+		return 0
+	}
+	v, _ := strconv.ParseFloat(s, 64)
+	return v
+}
+
+// SampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
+func SampleGPUMetrics(gpuIndices []int) ([]GPUMetricRow, error) {
+	return sampleGPUMetrics(gpuIndices)
+}
+
+// sampleAMDGPUMetrics queries rocm-smi for live GPU metrics.
+func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
+	out, err := runROCmSMI("--showtemp", "--showuse", "--showpower", "--showmemuse", "--csv")
+	if err != nil {
+		return nil, err
+	}
+	lines := strings.Split(strings.TrimSpace(string(out)), "\n")
+	if len(lines) < 2 {
+		return nil, fmt.Errorf("rocm-smi: insufficient output")
+	}
+
+	// Parse header to find column indices by name.
+	headers := strings.Split(lines[0], ",")
+	colIdx := func(keywords ...string) int {
+		for i, h := range headers {
+			hl := strings.ToLower(strings.TrimSpace(h))
+			for _, kw := range keywords {
+				if strings.Contains(hl, kw) {
+					return i
+				}
+			}
+		}
+		return -1
+	}
+	idxTemp := colIdx("sensor edge", "temperature (c)", "temp")
+	idxUse := colIdx("gpu use (%)")
+	idxMem := colIdx("vram%", "memory allocated")
+	idxPow := colIdx("average graphics package power", "power (w)")
+
+	var rows []GPUMetricRow
+	for _, line := range lines[1:] {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+		parts := strings.Split(line, ",")
+		idx := len(rows)
+		row := GPUMetricRow{GPUIndex: idx}
+		get := func(i int) float64 {
+			if i < 0 || i >= len(parts) {
+				return 0
+			}
+			v := strings.TrimSpace(parts[i])
+			if strings.EqualFold(v, "n/a") {
+				return 0
+			}
+			return parseGPUFloat(v)
+		}
+		row.TempC = get(idxTemp)
+		row.UsagePct = get(idxUse)
+		row.MemUsagePct = get(idxMem)
+		row.PowerW = get(idxPow)
+		rows = append(rows, row)
+	}
+	if len(rows) == 0 {
+		return nil, fmt.Errorf("rocm-smi: no GPU rows parsed")
+	}
+	return rows, nil
+}
+
+// WriteGPUMetricsCSV writes collected rows as a CSV file.
+func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
+	var b bytes.Buffer
+	b.WriteString("stage,elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz,fan_avg_rpm,fan_duty_cycle_pct,fan_duty_cycle_available\n")
+	for _, r := range rows {
+		dutyAvail := 0
+		if r.FanDutyCycleAvailable {
+			dutyAvail = 1
+		}
+		fmt.Fprintf(&b, "%s,%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f,%.0f,%.1f,%d\n",
+			strconv.Quote(strings.TrimSpace(r.Stage)), r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz, r.FanAvgRPM, r.FanDutyCyclePct, dutyAvail)
+	}
+	return os.WriteFile(path, b.Bytes(), 0644)
+}
+
+type gpuMetricStageSpan struct {
+	Name  string
+	Start float64
+	End   float64
+}
+
+// WriteGPUMetricsHTML writes a standalone HTML file with one SVG chart per GPU.
+func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
+	// Group by GPU index preserving order.
+	seen := make(map[int]bool)
+	var order []int
+	gpuMap := make(map[int][]GPUMetricRow)
+	for _, r := range rows {
+		if !seen[r.GPUIndex] {
+			seen[r.GPUIndex] = true
+			order = append(order, r.GPUIndex)
+		}
+		gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
+	}
+
+	stageSpans := buildGPUMetricStageSpans(rows)
+	stageColorByName := make(map[string]string, len(stageSpans))
+	for i, span := range stageSpans {
+		stageColorByName[span.Name] = gpuMetricStagePalette[i%len(gpuMetricStagePalette)]
+	}
+
+	var legend strings.Builder
+	if len(stageSpans) > 0 {
+		legend.WriteString(`<div class="stage-legend">`)
+		for _, span := range stageSpans {
+			fmt.Fprintf(&legend, `<span class="stage-chip"><span class="stage-swatch" style="background:%s"></span>%s</span>`,
+				stageColorByName[span.Name], gpuHTMLEscape(span.Name))
+		}
+		legend.WriteString(`</div>`)
+	}
+
+	var svgs strings.Builder
+	for _, gpuIdx := range order {
+		svgs.WriteString(drawGPUChartSVG(gpuMap[gpuIdx], gpuIdx, stageSpans, stageColorByName))
+		svgs.WriteString("\n")
+	}
+
+	ts := time.Now().UTC().Format("2006-01-02 15:04:05 UTC")
+	html := fmt.Sprintf(`<!DOCTYPE html>
+<html><head>
+<meta charset="utf-8">
+<title>GPU Stress Test Metrics</title>
+<style>
+:root{--bg:#fff;--surface:#fff;--surface-2:#f9fafb;--border:rgba(34,36,38,.15);--border-lite:rgba(34,36,38,.1);--ink:rgba(0,0,0,.87);--muted:rgba(0,0,0,.6)}
+*{box-sizing:border-box}
+body{font:14px/1.5 Lato,"Helvetica Neue",Arial,Helvetica,sans-serif;background:var(--bg);color:var(--ink);margin:0}
+.page{padding:24px}
+.card{background:var(--surface);border:1px solid var(--border);border-radius:4px;box-shadow:0 1px 2px rgba(34,36,38,.15);overflow:hidden}
+.card-head{padding:11px 16px;background:var(--surface-2);border-bottom:1px solid var(--border);font-weight:700;font-size:13px}
+.card-body{padding:16px}
+h1{font-size:22px;margin:0 0 6px}
+p{color:var(--muted);font-size:13px;margin:0 0 16px}
+.stage-legend{display:flex;flex-wrap:wrap;gap:10px;margin:0 0 16px}
+.stage-chip{display:inline-flex;align-items:center;gap:8px;padding:4px 10px;border-radius:999px;background:var(--surface-2);border:1px solid var(--border-lite);font-size:12px}
+.stage-swatch{display:inline-block;width:12px;height:12px;border-radius:999px}
+.chart-block{margin-top:16px}
+</style>
+</head><body>
+<div class="page">
+<div class="card">
+<div class="card-head">GPU Stress Test Metrics</div>
+<div class="card-body">
+<h1>GPU Stress Test Metrics</h1>
+<p>Generated %s</p>
+%s
+<div class="chart-block">%s</div>
+</div>
+</div>
+</div>
+</body></html>`, ts, legend.String(), svgs.String())
+
+	return os.WriteFile(path, []byte(html), 0644)
+}
+
+// drawGPUChartSVG generates a self-contained SVG chart for one GPU.
+func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int, stageSpans []gpuMetricStageSpan, stageColorByName map[string]string) string {
+	// Layout
+	const W, H = 960, 520
+	const plotX1 = 120 // usage axis / chart left border
+	const plotX2 = 840 // power axis / chart right border
+	const plotY1 = 70  // top
+	const plotY2 = 465 // bottom  (PH = 395)
+	const PW = plotX2 - plotX1
+	const PH = plotY2 - plotY1
+	// Outer axes
+	const tempAxisX = 60   // temp axis line
+	const clockAxisX = 900 // clock axis line
+
+	colors := [4]string{"#e74c3c", "#3498db", "#2ecc71", "#f39c12"}
+	seriesLabel := [4]string{
+		fmt.Sprintf("GPU %d Temp (°C)", gpuIdx),
+		fmt.Sprintf("GPU %d Usage (%%)", gpuIdx),
+		fmt.Sprintf("GPU %d Power (W)", gpuIdx),
+		fmt.Sprintf("GPU %d Clock (MHz)", gpuIdx),
+	}
+	axisLabel := [4]string{"Temperature (°C)", "GPU Usage (%)", "Power (W)", "Clock (MHz)"}
+
+	// Extract series
+	t := make([]float64, len(rows))
+	vals := [4][]float64{}
+	for i := range vals {
+		vals[i] = make([]float64, len(rows))
+	}
+	for i, r := range rows {
+		t[i] = r.ElapsedSec
+		vals[0][i] = r.TempC
+		vals[1][i] = r.UsagePct
+		vals[2][i] = r.PowerW
+		vals[3][i] = r.ClockMHz
+	}
+
+	tMin, tMax := gpuMinMax(t)
+	type axisScale struct {
+		ticks    []float64
+		min, max float64
+	}
+	var axes [4]axisScale
+	for i := 0; i < 4; i++ {
+		mn, mx := gpuMinMax(vals[i])
+		tks := gpuNiceTicks(mn, mx, 8)
+		axes[i] = axisScale{ticks: tks, min: tks[0], max: tks[len(tks)-1]}
+	}
+
+	xv := func(tv float64) float64 {
+		if tMax == tMin {
+			return float64(plotX1)
+		}
+		return float64(plotX1) + (tv-tMin)/(tMax-tMin)*float64(PW)
+	}
+	yv := func(v float64, ai int) float64 {
+		a := axes[ai]
+		if a.max == a.min {
+			return float64(plotY1 + PH/2)
+		}
+		return float64(plotY2) - (v-a.min)/(a.max-a.min)*float64(PH)
+	}
+
+	var b strings.Builder
+
+	fmt.Fprintf(&b, `<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d"`+
+		` style="background:#fff;border-radius:8px;display:block;margin:0 auto 24px;`+
+		`box-shadow:0 2px 12px rgba(0,0,0,.12)">`+"\n", W, H)
+
+	// Title
+	fmt.Fprintf(&b, `<text x="%d" y="22" text-anchor="middle" font-family="sans-serif"`+
+		` font-size="14" font-weight="bold" fill="#333">GPU Stress Test Metrics — GPU %d</text>`+"\n",
+		plotX1+PW/2, gpuIdx)
+
+	// Horizontal grid (align to temp axis ticks)
+	b.WriteString(`<g stroke="#e0e0e0" stroke-width="0.5">` + "\n")
+	for _, tick := range axes[0].ticks {
+		y := yv(tick, 0)
+		if y < float64(plotY1) || y > float64(plotY2) {
+			continue
+		}
+		fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"/>`+"\n",
+			plotX1, y, plotX2, y)
+	}
+	// Vertical grid
+	xTicks := gpuNiceTicks(tMin, tMax, 10)
+	for _, tv := range xTicks {
+		x := xv(tv)
+		if x < float64(plotX1) || x > float64(plotX2) {
+			continue
+		}
+		fmt.Fprintf(&b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d"/>`+"\n",
+			x, plotY1, x, plotY2)
+	}
+	b.WriteString("</g>\n")
+
+	// Stage backgrounds
+	for _, span := range stageSpans {
+		x1 := xv(span.Start)
+		x2 := xv(span.End)
+		if x2 < x1 {
+			x1, x2 = x2, x1
+		}
+		if x2-x1 < 1 {
+			x2 = x1 + 1
+		}
+		color := stageColorByName[span.Name]
+		fmt.Fprintf(&b, `<rect x="%.1f" y="%d" width="%.1f" height="%d" fill="%s" fill-opacity="0.18"/>`+"\n",
+			x1, plotY1, x2-x1, PH, color)
+		fmt.Fprintf(&b, `<text x="%.1f" y="%d" font-family="sans-serif" font-size="10" fill="#444" text-anchor="middle">%s</text>`+"\n",
+			x1+(x2-x1)/2, plotY1+12, gpuHTMLEscape(span.Name))
+	}
+
+	// Chart border
+	fmt.Fprintf(&b, `<rect x="%d" y="%d" width="%d" height="%d"`+
+		` fill="none" stroke="#333" stroke-width="1"/>`+"\n",
+		plotX1, plotY1, PW, PH)
+
+	// X axis ticks and labels
+	b.WriteString(`<g font-family="sans-serif" font-size="11" fill="#333" text-anchor="middle">` + "\n")
+	for _, tv := range xTicks {
+		x := xv(tv)
+		if x < float64(plotX1) || x > float64(plotX2) {
+			continue
+		}
+		fmt.Fprintf(&b, `<text x="%.1f" y="%d">%s</text>`+"\n", x, plotY2+18, gpuFormatTick(tv))
+		fmt.Fprintf(&b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d" stroke="#333" stroke-width="1"/>`+"\n",
+			x, plotY2, x, plotY2+4)
+	}
+	b.WriteString("</g>\n")
+	fmt.Fprintf(&b, `<text x="%d" y="%d" font-family="sans-serif" font-size="13"`+
+		` fill="#333" text-anchor="middle">Time (seconds)</text>`+"\n",
+		plotX1+PW/2, plotY2+38)
+
+	// Y axes: [tempAxisX, plotX1, plotX2, clockAxisX]
+	axisLineX := [4]int{tempAxisX, plotX1, plotX2, clockAxisX}
+	axisRight := [4]bool{false, false, true, true}
+	// Label x positions (for rotated vertical text)
+	axisLabelX := [4]int{10, 68, 868, 950}
+
+	for i := 0; i < 4; i++ {
+		ax := axisLineX[i]
+		right := axisRight[i]
+		color := colors[i]
+
+		// Axis line
+		fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d"`+
+			` stroke="%s" stroke-width="1"/>`+"\n",
+			ax, plotY1, ax, plotY2, color)
+
+		// Ticks and tick labels
+		fmt.Fprintf(&b, `<g font-family="sans-serif" font-size="10" fill="%s">`+"\n", color)
+		for _, tick := range axes[i].ticks {
+			y := yv(tick, i)
+			if y < float64(plotY1) || y > float64(plotY2) {
+				continue
+			}
+			dx := -5
+			textX := ax - 8
+			anchor := "end"
+			if right {
+				dx = 5
+				textX = ax + 8
+				anchor = "start"
+			}
+			fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"`+
+				` stroke="%s" stroke-width="1"/>`+"\n",
+				ax, y, ax+dx, y, color)
+			fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="%s" dy="4">%s</text>`+"\n",
+				textX, y, anchor, gpuFormatTick(tick))
+		}
+		b.WriteString("</g>\n")
+
+		// Axis label (rotated)
+		lx := axisLabelX[i]
+		fmt.Fprintf(&b, `<text transform="translate(%d,%d) rotate(-90)"`+
+			` font-family="sans-serif" font-size="12" fill="%s" text-anchor="middle">%s</text>`+"\n",
+			lx, plotY1+PH/2, color, axisLabel[i])
+	}
+
+	// Data lines
+	for i := 0; i < 4; i++ {
+		var pts strings.Builder
+		for j := range rows {
+			x := xv(t[j])
+			y := yv(vals[i][j], i)
+			if j == 0 {
+				fmt.Fprintf(&pts, "%.1f,%.1f", x, y)
+			} else {
+				fmt.Fprintf(&pts, " %.1f,%.1f", x, y)
+			}
+		}
+		fmt.Fprintf(&b, `<polyline points="%s" fill="none" stroke="%s" stroke-width="1.5"/>`+"\n",
+			pts.String(), colors[i])
+	}
+
+	// Legend
+	const legendY = 42
+	for i := 0; i < 4; i++ {
+		lx := plotX1 + i*(PW/4) + 10
+		fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d"`+
+			` stroke="%s" stroke-width="2"/>`+"\n",
+			lx, legendY, lx+20, legendY, colors[i])
+		fmt.Fprintf(&b, `<text x="%d" y="%d" font-family="sans-serif" font-size="12" fill="#333">%s</text>`+"\n",
+			lx+25, legendY+4, seriesLabel[i])
+	}
+
+	b.WriteString("</svg>\n")
+	return b.String()
+}
+
+func gpuMinMax(vals []float64) (float64, float64) {
+	if len(vals) == 0 {
+		return 0, 1
+	}
+	mn, mx := vals[0], vals[0]
+	for _, v := range vals[1:] {
+		if v < mn {
+			mn = v
+		}
+		if v > mx {
+			mx = v
+		}
+	}
+	return mn, mx
+}
+
+func gpuNiceTicks(mn, mx float64, targetCount int) []float64 {
+	if mn == mx {
+		mn -= 1
+		mx += 1
+	}
+	r := mx - mn
+	step := math.Pow(10, math.Floor(math.Log10(r/float64(targetCount))))
+	for _, f := range []float64{1, 2, 5, 10} {
+		if r/(f*step) <= float64(targetCount)*1.5 {
+			step = f * step
+			break
+		}
+	}
+	lo := math.Floor(mn/step) * step
+	hi := math.Ceil(mx/step) * step
+	var ticks []float64
+	for v := lo; v <= hi+step*0.001; v += step {
+		ticks = append(ticks, math.Round(v*1e9)/1e9)
+	}
+	return ticks
+}
+
+func gpuFormatTick(v float64) string {
+	if v == math.Trunc(v) {
+		return strconv.Itoa(int(v))
+	}
+	return strconv.FormatFloat(v, 'f', 1, 64)
+}
+
+var gpuMetricStagePalette = []string{
+	"#d95c5c",
+	"#2185d0",
+	"#21ba45",
+	"#f2c037",
+	"#6435c9",
+	"#00b5ad",
+	"#a5673f",
+}
+
+func buildGPUMetricStageSpans(rows []GPUMetricRow) []gpuMetricStageSpan {
+	var spans []gpuMetricStageSpan
+	for _, row := range rows {
+		name := strings.TrimSpace(row.Stage)
+		if name == "" {
+			name = "run"
+		}
+		start := row.StageStartSec
+		end := row.StageEndSec
+		if end <= start {
+			start = row.ElapsedSec
+			end = row.ElapsedSec
+		}
+		if len(spans) == 0 || spans[len(spans)-1].Name != name {
+			spans = append(spans, gpuMetricStageSpan{Name: name, Start: start, End: end})
+			continue
+		}
+		if start < spans[len(spans)-1].Start {
+			spans[len(spans)-1].Start = start
+		}
+		if end > spans[len(spans)-1].End {
+			spans[len(spans)-1].End = end
+		}
+	}
+	for i := range spans {
+		if spans[i].End <= spans[i].Start {
+			spans[i].End = spans[i].Start + 1
+		}
+	}
+	return spans
+}
+
+var gpuHTMLReplacer = strings.NewReplacer(
+	"&", "&amp;",
+	"<", "&lt;",
+	">", "&gt;",
+	`"`, "&quot;",
+	"'", "&#39;",
+)
+
+func gpuHTMLEscape(s string) string {
+	return gpuHTMLReplacer.Replace(s)
+}
--- a/audit/internal/platform/gpu_metrics_test.go
+++ b/audit/internal/platform/gpu_metrics_test.go
@@ -0,0 +1,65 @@
+package platform
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestWriteGPUMetricsCSVIncludesStageColumn(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	path := filepath.Join(dir, "gpu-metrics.csv")
+	rows := []GPUMetricRow{
+		{Stage: "warmup", ElapsedSec: 1, GPUIndex: 0, TempC: 71, UsagePct: 99, MemUsagePct: 80, PowerW: 420, ClockMHz: 1800, MemClockMHz: 1200},
+	}
+	if err := WriteGPUMetricsCSV(path, rows); err != nil {
+		t.Fatalf("WriteGPUMetricsCSV: %v", err)
+	}
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("ReadFile: %v", err)
+	}
+	text := string(raw)
+	for _, needle := range []string{
+		"stage,elapsed_sec,gpu_index",
+		`"warmup",1.0,0,71.0,99.0,80.0,420.0,1800,1200`,
+	} {
+		if !strings.Contains(text, needle) {
+			t.Fatalf("csv missing %q\n%s", needle, text)
+		}
+	}
+}
+
+func TestWriteGPUMetricsHTMLShowsStageLegendAndLabels(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	path := filepath.Join(dir, "gpu-metrics.html")
+	rows := []GPUMetricRow{
+		{Stage: "baseline", ElapsedSec: 1, GPUIndex: 0, TempC: 50, UsagePct: 10, MemUsagePct: 5, PowerW: 100, ClockMHz: 500, MemClockMHz: 400},
+		{Stage: "baseline", ElapsedSec: 2, GPUIndex: 0, TempC: 51, UsagePct: 11, MemUsagePct: 5, PowerW: 101, ClockMHz: 510, MemClockMHz: 400},
+		{Stage: "steady-fp16", ElapsedSec: 3, GPUIndex: 0, TempC: 70, UsagePct: 98, MemUsagePct: 75, PowerW: 390, ClockMHz: 1700, MemClockMHz: 1100},
+		{Stage: "steady-fp16", ElapsedSec: 4, GPUIndex: 0, TempC: 71, UsagePct: 99, MemUsagePct: 76, PowerW: 395, ClockMHz: 1710, MemClockMHz: 1110},
+	}
+	if err := WriteGPUMetricsHTML(path, rows); err != nil {
+		t.Fatalf("WriteGPUMetricsHTML: %v", err)
+	}
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("ReadFile: %v", err)
+	}
+	text := string(raw)
+	for _, needle := range []string{
+		"stage-legend",
+		"baseline",
+		"steady-fp16",
+		"GPU Stress Test Metrics",
+	} {
+		if !strings.Contains(text, needle) {
+			t.Fatalf("html missing %q\n%s", needle, text)
+		}
+	}
+}
--- a/audit/internal/platform/install.go
+++ b/audit/internal/platform/install.go
@@ -0,0 +1,269 @@
+package platform
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/exec"
+	"strconv"
+	"strings"
+)
+
+// InstallDisk describes a candidate disk for installation.
+type InstallDisk struct {
+	Device       string // e.g. /dev/sda
+	Model        string
+	Size         string   // human-readable, e.g. "500G"
+	SizeBytes    int64    // raw byte count from lsblk
+	MountedParts []string // partition mount points currently active
+}
+
+const squashfsPath = "/run/live/medium/live/filesystem.squashfs"
+
+// ListInstallDisks returns block devices suitable for installation.
+// Excludes the current live boot medium but includes USB drives.
+func (s *System) ListInstallDisks() ([]InstallDisk, error) {
+	out, err := exec.Command("lsblk", "-dn", "-o", "NAME,MODEL,SIZE,TYPE,TRAN").Output()
+	if err != nil {
+		return nil, fmt.Errorf("lsblk: %w", err)
+	}
+
+	bootDev := findLiveBootDevice()
+
+	var disks []InstallDisk
+	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
+		fields := strings.Fields(line)
+		// NAME MODEL SIZE TYPE TRAN  — model may have spaces so we parse from end
+		if len(fields) < 4 {
+			continue
+		}
+		// Last field: TRAN, second-to-last: TYPE, third-to-last: SIZE
+		typ := fields[len(fields)-2]
+		size := fields[len(fields)-3]
+		name := fields[0]
+		model := strings.Join(fields[1:len(fields)-3], " ")
+
+		if typ != "disk" {
+			continue
+		}
+
+		device := "/dev/" + name
+		if device == bootDev {
+			continue
+		}
+
+		sizeBytes := diskSizeBytes(device)
+		mounted := mountedParts(device)
+
+		disks = append(disks, InstallDisk{
+			Device:       device,
+			Model:        strings.TrimSpace(model),
+			Size:         size,
+			SizeBytes:    sizeBytes,
+			MountedParts: mounted,
+		})
+	}
+	return disks, nil
+}
+
+// diskSizeBytes returns the byte size of a block device using lsblk.
+func diskSizeBytes(device string) int64 {
+	out, err := exec.Command("lsblk", "-bdn", "-o", "SIZE", device).Output()
+	if err != nil {
+		return 0
+	}
+	n, _ := strconv.ParseInt(strings.TrimSpace(string(out)), 10, 64)
+	return n
+}
+
+// mountedParts returns a list of "<part> at <mountpoint>" strings for any
+// mounted partitions on the given device.
+func mountedParts(device string) []string {
+	out, err := exec.Command("lsblk", "-n", "-o", "NAME,MOUNTPOINT", device).Output()
+	if err != nil {
+		return nil
+	}
+	var result []string
+	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
+		fields := strings.Fields(line)
+		if len(fields) < 2 {
+			continue
+		}
+		mp := fields[1]
+		if mp == "" || mp == "[SWAP]" {
+			continue
+		}
+		result = append(result, "/dev/"+strings.TrimLeft(fields[0], "└─├─")+" at "+mp)
+	}
+	return result
+}
+
+// findLiveBootDevice returns the block device backing /run/live/medium (if any).
+func findLiveBootDevice() string {
+	out, err := exec.Command("findmnt", "-n", "-o", "SOURCE", "/run/live/medium").Output()
+	if err != nil {
+		return ""
+	}
+	src := strings.TrimSpace(string(out))
+	if src == "" {
+		return ""
+	}
+	// Strip partition suffix to get the whole disk device.
+	// e.g. /dev/sdb1 → /dev/sdb,  /dev/nvme0n1p1 → /dev/nvme0n1
+	out2, err := exec.Command("lsblk", "-no", "PKNAME", src).Output()
+	if err != nil || strings.TrimSpace(string(out2)) == "" {
+		return src
+	}
+	return "/dev/" + strings.TrimSpace(string(out2))
+}
+
+func mountSource(target string) string {
+	out, err := exec.Command("findmnt", "-n", "-o", "SOURCE", target).Output()
+	if err != nil {
+		return ""
+	}
+	return strings.TrimSpace(string(out))
+}
+
+func mountFSType(target string) string {
+	out, err := exec.Command("findmnt", "-n", "-o", "FSTYPE", target).Output()
+	if err != nil {
+		return ""
+	}
+	return strings.TrimSpace(string(out))
+}
+
+func blockDeviceType(device string) string {
+	if strings.TrimSpace(device) == "" {
+		return ""
+	}
+	out, err := exec.Command("lsblk", "-dn", "-o", "TYPE", device).Output()
+	if err != nil {
+		return ""
+	}
+	return strings.TrimSpace(string(out))
+}
+
+func blockDeviceTransport(device string) string {
+	if strings.TrimSpace(device) == "" {
+		return ""
+	}
+	out, err := exec.Command("lsblk", "-dn", "-o", "TRAN", device).Output()
+	if err != nil {
+		return ""
+	}
+	return strings.TrimSpace(string(out))
+}
+
+func inferLiveBootKind(fsType, source, deviceType, transport string) string {
+	switch {
+	case strings.EqualFold(strings.TrimSpace(fsType), "tmpfs"):
+		return "ram"
+	case strings.EqualFold(strings.TrimSpace(deviceType), "rom"):
+		return "cdrom"
+	case strings.EqualFold(strings.TrimSpace(transport), "usb"):
+		return "usb"
+	case strings.HasPrefix(strings.TrimSpace(source), "/dev/sr"):
+		return "cdrom"
+	case strings.HasPrefix(strings.TrimSpace(source), "/dev/"):
+		return "disk"
+	default:
+		return "unknown"
+	}
+}
+
+// MinInstallBytes returns the minimum recommended disk size for installation:
+// squashfs size × 1.5 to allow for extracted filesystem and bootloader.
+// Returns 0 if the squashfs is not available (non-live environment).
+func MinInstallBytes() int64 {
+	fi, err := os.Stat(squashfsPath)
+	if err != nil {
+		return 0
+	}
+	return fi.Size() * 3 / 2
+}
+
+// toramActive returns true when the live system was booted with toram.
+func toramActive() bool {
+	data, err := os.ReadFile("/proc/cmdline")
+	if err != nil {
+		return false
+	}
+	return strings.Contains(string(data), "toram")
+}
+
+// freeMemBytes returns MemAvailable from /proc/meminfo.
+func freeMemBytes() int64 {
+	data, err := os.ReadFile("/proc/meminfo")
+	if err != nil {
+		return 0
+	}
+	for _, line := range strings.Split(string(data), "\n") {
+		if strings.HasPrefix(line, "MemAvailable:") {
+			fields := strings.Fields(line)
+			if len(fields) >= 2 {
+				n, _ := strconv.ParseInt(fields[1], 10, 64)
+				return n * 1024 // kB → bytes
+			}
+		}
+	}
+	return 0
+}
+
+// DiskWarnings returns advisory warning strings for a disk candidate.
+func DiskWarnings(d InstallDisk) []string {
+	var w []string
+	if len(d.MountedParts) > 0 {
+		w = append(w, "has mounted partitions: "+strings.Join(d.MountedParts, ", "))
+	}
+	min := MinInstallBytes()
+	if min > 0 && d.SizeBytes > 0 && d.SizeBytes < min {
+		w = append(w, fmt.Sprintf("disk may be too small (need ≥ %s, have %s)",
+			humanBytes(min), humanBytes(d.SizeBytes)))
+	}
+	if toramActive() {
+		sqFi, err := os.Stat(squashfsPath)
+		if err == nil {
+			free := freeMemBytes()
+			if free > 0 && free < sqFi.Size()*2 {
+				w = append(w, "toram mode — low RAM, extraction may be slow or fail")
+			}
+		}
+	}
+	return w
+}
+
+func humanBytes(b int64) string {
+	const unit = 1024
+	if b < unit {
+		return fmt.Sprintf("%d B", b)
+	}
+	div, exp := int64(unit), 0
+	for n := b / unit; n >= unit; n /= unit {
+		div *= unit
+		exp++
+	}
+	return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "KMGTPE"[exp])
+}
+
+// InstallToDisk runs bee-install <device> <logfile> and streams output to logFile.
+// The context can be used to cancel.
+func (s *System) InstallToDisk(ctx context.Context, device string, logFile string) error {
+	cmd := exec.CommandContext(ctx, "bee-install", device, logFile)
+	return cmd.Run()
+}
+
+// InstallLogPath returns the default install log path for a given device.
+func InstallLogPath(device string) string {
+	safe := strings.NewReplacer("/", "_", " ", "_").Replace(device)
+	return "/tmp/bee-install" + safe + ".log"
+}
+
+// Label returns a display label for a disk.
+func (d InstallDisk) Label() string {
+	model := d.Model
+	if model == "" {
+		model = "Unknown"
+	}
+	return fmt.Sprintf("%s  %s  %s", d.Device, d.Size, model)
+}
--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -0,0 +1,391 @@
+package platform
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+)
+
+const installToRAMDir = "/dev/shm/bee-live"
+
+func (s *System) IsLiveMediaInRAM() bool {
+	return s.LiveMediaRAMState().InRAM
+}
+
+func (s *System) LiveBootSource() LiveBootSource {
+	fsType := mountFSType("/run/live/medium")
+	source := mountSource("/run/live/medium")
+	device := findLiveBootDevice()
+	status := LiveBootSource{
+		InRAM:  strings.EqualFold(fsType, "tmpfs"),
+		Source: source,
+		Device: device,
+	}
+	if fsType == "" && source == "" && device == "" {
+		if toramActive() {
+			status.InRAM = true
+			status.Kind = "ram"
+			status.Source = "tmpfs"
+			return status
+		}
+		status.Kind = "unknown"
+		return status
+	}
+	status.Kind = inferLiveBootKind(fsType, source, blockDeviceType(device), blockDeviceTransport(device))
+	if status.Kind == "" {
+		status.Kind = "unknown"
+	}
+	if status.InRAM && strings.TrimSpace(status.Source) == "" {
+		status.Source = "tmpfs"
+	}
+	return status
+}
+
+func (s *System) LiveMediaRAMState() LiveMediaRAMState {
+	return evaluateLiveMediaRAMState(
+		s.LiveBootSource(),
+		toramActive(),
+		globPaths("/run/live/medium/live/*.squashfs"),
+		globPaths(filepath.Join(installToRAMDir, "*.squashfs")),
+	)
+}
+
+func evaluateLiveMediaRAMState(status LiveBootSource, toram bool, sourceSquashfs, copiedSquashfs []string) LiveMediaRAMState {
+	state := LiveMediaRAMState{
+		LiveBootSource: status,
+		ToramActive:    toram,
+		CopyPresent:    len(copiedSquashfs) > 0,
+	}
+	if status.InRAM {
+		state.State = "in_ram"
+		state.Status = "ok"
+		state.CopyComplete = true
+		state.Message = "Running from RAM — installation media can be safely disconnected."
+		return state
+	}
+
+	expected := pathBaseSet(sourceSquashfs)
+	copied := pathBaseSet(copiedSquashfs)
+	state.CopyComplete = len(expected) > 0 && setContainsAll(copied, expected)
+
+	switch {
+	case state.CopyComplete:
+		state.State = "partial"
+		state.Status = "partial"
+		state.CanStartCopy = true
+		state.Message = "Live media files were copied to RAM, but the system is still mounted from the original boot source."
+	case state.CopyPresent:
+		state.State = "partial"
+		state.Status = "partial"
+		state.CanStartCopy = true
+		state.Message = "Partial RAM copy detected. A previous Copy to RAM run was interrupted or cancelled."
+	case toram:
+		state.State = "toram_failed"
+		state.Status = "failed"
+		state.CanStartCopy = true
+		state.Message = "toram boot parameter is set but the live medium is not mounted from RAM."
+	default:
+		state.State = "not_in_ram"
+		state.Status = "warning"
+		state.CanStartCopy = true
+		state.Message = "ISO not copied to RAM. Use Copy to RAM to free the boot drive and improve performance."
+	}
+	return state
+}
+
+func globPaths(pattern string) []string {
+	matches, _ := filepath.Glob(pattern)
+	return matches
+}
+
+func pathBaseSet(paths []string) map[string]struct{} {
+	out := make(map[string]struct{}, len(paths))
+	for _, path := range paths {
+		base := strings.TrimSpace(filepath.Base(path))
+		if base != "" {
+			out[base] = struct{}{}
+		}
+	}
+	return out
+}
+
+func setContainsAll(have, want map[string]struct{}) bool {
+	if len(want) == 0 {
+		return false
+	}
+	for name := range want {
+		if _, ok := have[name]; !ok {
+			return false
+		}
+	}
+	return true
+}
+
+func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) (retErr error) {
+	log := func(msg string) {
+		if logFunc != nil {
+			logFunc(msg)
+		}
+	}
+
+	state := s.LiveMediaRAMState()
+	if state.InRAM {
+		log("Already running from RAM — installation media can be safely disconnected.")
+		return nil
+	}
+
+	squashfsFiles, err := filepath.Glob("/run/live/medium/live/*.squashfs")
+	if err != nil || len(squashfsFiles) == 0 {
+		return fmt.Errorf("no squashfs files found in /run/live/medium/live/")
+	}
+
+	free := freeMemBytes()
+	var needed int64
+	for _, sf := range squashfsFiles {
+		fi, err2 := os.Stat(sf)
+		if err2 != nil {
+			return fmt.Errorf("stat %s: %v", sf, err2)
+		}
+		needed += fi.Size()
+	}
+	const headroom = 256 * 1024 * 1024
+	if free > 0 && needed+headroom > free {
+		return fmt.Errorf("insufficient RAM: need %s, available %s",
+			humanBytes(needed+headroom), humanBytes(free))
+	}
+
+	dstDir := installToRAMDir
+	if state.CopyPresent {
+		log("Removing stale partial RAM copy before retry...")
+	}
+	_ = os.RemoveAll(dstDir)
+	if err := os.MkdirAll(dstDir, 0755); err != nil {
+		return fmt.Errorf("create tmpfs dir: %v", err)
+	}
+	defer func() {
+		if retErr == nil {
+			return
+		}
+		_ = os.RemoveAll(dstDir)
+		log("Removed incomplete RAM copy.")
+	}()
+
+	for _, sf := range squashfsFiles {
+		if err := ctx.Err(); err != nil {
+			return err
+		}
+		base := filepath.Base(sf)
+		dst := filepath.Join(dstDir, base)
+		log(fmt.Sprintf("Copying %s to RAM...", base))
+		if err := copyFileLarge(ctx, sf, dst, log); err != nil {
+			return fmt.Errorf("copy %s: %v", base, err)
+		}
+		log(fmt.Sprintf("Copied %s.", base))
+
+		loopDev, err := findLoopForFile(sf)
+		if err != nil {
+			log(fmt.Sprintf("Loop device for %s not found (%v) — skipping re-association.", base, err))
+			continue
+		}
+		if err := reassociateLoopDevice(loopDev, dst); err != nil {
+			log(fmt.Sprintf("Warning: could not re-associate %s → %s: %v", loopDev, dst, err))
+		} else {
+			log(fmt.Sprintf("Loop device %s now backed by RAM copy.", loopDev))
+		}
+	}
+
+	log("Copying remaining medium files...")
+	if err := cpDir(ctx, "/run/live/medium", dstDir, log); err != nil {
+		log(fmt.Sprintf("Warning: partial copy: %v", err))
+	}
+	if err := ctx.Err(); err != nil {
+		return err
+	}
+
+	mediumRebound := false
+	if err := bindMount(dstDir, "/run/live/medium"); err != nil {
+		log(fmt.Sprintf("Warning: rebind /run/live/medium → %s failed: %v", dstDir, err))
+	} else {
+		mediumRebound = true
+	}
+
+	log("Verifying live medium now served from RAM...")
+	status := s.LiveBootSource()
+	if err := verifyInstallToRAMStatus(status, dstDir, mediumRebound, log); err != nil {
+		return err
+	}
+	if status.InRAM {
+		log(fmt.Sprintf("Verification passed: live medium now served from %s.", describeLiveBootSource(status)))
+	}
+	log("Done. Squashfs files are in RAM. Installation media can be safely disconnected.")
+	return nil
+}
+
+func verifyInstallToRAMStatus(status LiveBootSource, dstDir string, mediumRebound bool, log func(string)) error {
+	if status.InRAM {
+		return nil
+	}
+
+	// The live medium mount was not redirected to RAM. This is expected when
+	// booting from an ISO/CD-ROM: the squashfs loop device has a non-zero
+	// offset and LOOP_CHANGE_FD cannot be used; the bind mount also fails
+	// because the CD-ROM mount is in use. Check whether files were at least
+	// copied to the tmpfs directory — that is sufficient for safe disconnection
+	// once the kernel has paged in all actively-used data.
+	files, _ := filepath.Glob(filepath.Join(dstDir, "*.squashfs"))
+	if len(files) > 0 {
+		if !mediumRebound {
+			log(fmt.Sprintf("Note: squashfs copied to RAM (%s) but /run/live/medium still shows the original source.", dstDir))
+			log("This is normal for CD-ROM boots. For a fully transparent RAM boot, add 'toram' to the kernel parameters.")
+		}
+		return nil
+	}
+
+	return fmt.Errorf("install to RAM verification failed: live medium still mounted from %s and no squashfs found in %s", describeLiveBootSource(status), dstDir)
+}
+
+func describeLiveBootSource(status LiveBootSource) string {
+	source := strings.TrimSpace(status.Device)
+	if source == "" {
+		source = strings.TrimSpace(status.Source)
+	}
+	if source == "" {
+		source = "unknown source"
+	}
+	switch strings.TrimSpace(status.Kind) {
+	case "ram":
+		return "RAM"
+	case "usb":
+		return "USB (" + source + ")"
+	case "cdrom":
+		return "CD-ROM (" + source + ")"
+	case "disk":
+		return "disk (" + source + ")"
+	default:
+		return source
+	}
+}
+
+func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) error {
+	in, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer in.Close()
+	fi, err := in.Stat()
+	if err != nil {
+		return err
+	}
+	out, err := os.Create(dst)
+	if err != nil {
+		return err
+	}
+	defer out.Close()
+	total := fi.Size()
+	var copied int64
+	buf := make([]byte, 4*1024*1024)
+	for {
+		if err := ctx.Err(); err != nil {
+			return err
+		}
+		n, err := in.Read(buf)
+		if n > 0 {
+			if _, werr := out.Write(buf[:n]); werr != nil {
+				return werr
+			}
+			copied += int64(n)
+			if logFunc != nil && total > 0 {
+				pct := int(float64(copied) / float64(total) * 100)
+				logFunc(fmt.Sprintf("  %s / %s (%d%%)", humanBytes(copied), humanBytes(total), pct))
+			}
+		}
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return err
+		}
+	}
+	return out.Sync()
+}
+
+func cpDir(ctx context.Context, src, dst string, logFunc func(string)) error {
+	return filepath.Walk(src, func(path string, fi os.FileInfo, err error) error {
+		if ctx.Err() != nil {
+			return ctx.Err()
+		}
+		if err != nil {
+			return nil
+		}
+		rel, _ := filepath.Rel(src, path)
+		target := filepath.Join(dst, rel)
+		if fi.IsDir() {
+			return os.MkdirAll(target, fi.Mode())
+		}
+		if strings.HasSuffix(path, ".squashfs") {
+			return nil
+		}
+		if _, err := os.Stat(target); err == nil {
+			return nil
+		}
+		return copyFileLarge(ctx, path, target, nil)
+	})
+}
+
+func findLoopForFile(backingFile string) (string, error) {
+	out, err := exec.Command("losetup", "--list", "--json").Output()
+	if err != nil {
+		return "", err
+	}
+	var result struct {
+		Loopdevices []struct {
+			Name     string `json:"name"`
+			BackFile string `json:"back-file"`
+		} `json:"loopdevices"`
+	}
+	if err := json.Unmarshal(out, &result); err != nil {
+		return "", err
+	}
+	for _, dev := range result.Loopdevices {
+		if dev.BackFile == backingFile {
+			return dev.Name, nil
+		}
+	}
+	return "", fmt.Errorf("no loop device found for %s", backingFile)
+}
+
+// loopDeviceOffset returns the byte offset configured for the loop device,
+// or -1 if it cannot be determined.
+func loopDeviceOffset(loopDev string) int64 {
+	out, err := exec.Command("losetup", "--json", loopDev).Output()
+	if err != nil {
+		return -1
+	}
+	var result struct {
+		Loopdevices []struct {
+			Offset int64 `json:"offset"`
+		} `json:"loopdevices"`
+	}
+	if err := json.Unmarshal(out, &result); err != nil || len(result.Loopdevices) == 0 {
+		return -1
+	}
+	return result.Loopdevices[0].Offset
+}
+
+func reassociateLoopDevice(loopDev, newFile string) error {
+	// LOOP_CHANGE_FD requires lo_offset == 0. ISO/CD-ROM loop devices are
+	// typically set up with a non-zero offset (squashfs lives inside the ISO),
+	// so the ioctl returns EINVAL. Detect this early for a clear error message.
+	if off := loopDeviceOffset(loopDev); off > 0 {
+		return fmt.Errorf("loop device has non-zero offset (%d bytes, typical for ISO/CD-ROM) — LOOP_CHANGE_FD not supported; use 'toram' kernel parameter for RAM boot", off)
+	}
+	if err := exec.Command("losetup", "--replace", loopDev, newFile).Run(); err == nil {
+		return nil
+	}
+	return loopChangeFD(loopDev, newFile)
+}
--- a/audit/internal/platform/install_to_ram_linux.go
+++ b/audit/internal/platform/install_to_ram_linux.go
@@ -0,0 +1,33 @@
+//go:build linux
+
+package platform
+
+import (
+	"os"
+	"syscall"
+)
+
+const ioctlLoopChangeFD = 0x4C08
+
+func loopChangeFD(loopDev, newFile string) error {
+	lf, err := os.OpenFile(loopDev, os.O_RDWR, 0)
+	if err != nil {
+		return err
+	}
+	defer lf.Close()
+	nf, err := os.OpenFile(newFile, os.O_RDONLY, 0)
+	if err != nil {
+		return err
+	}
+	defer nf.Close()
+	_, _, errno := syscall.Syscall(syscall.SYS_IOCTL, lf.Fd(), ioctlLoopChangeFD, nf.Fd())
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
+
+// bindMount binds src over dst using the syscall directly (avoids exec PATH issues).
+func bindMount(src, dst string) error {
+	return syscall.Mount(src, dst, "", syscall.MS_BIND, "")
+}
--- a/audit/internal/platform/install_to_ram_other.go
+++ b/audit/internal/platform/install_to_ram_other.go
@@ -0,0 +1,13 @@
+//go:build !linux
+
+package platform
+
+import "errors"
+
+func loopChangeFD(loopDev, newFile string) error {
+	return errors.New("LOOP_CHANGE_FD not available on this platform")
+}
+
+func bindMount(src, dst string) error {
+	return errors.New("bind mount not available on this platform")
+}
--- a/audit/internal/platform/install_to_ram_test.go
+++ b/audit/internal/platform/install_to_ram_test.go
@@ -0,0 +1,103 @@
+package platform
+
+import "testing"
+
+func TestInferLiveBootKind(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name       string
+		fsType     string
+		source     string
+		deviceType string
+		transport  string
+		want       string
+	}{
+		{name: "ram tmpfs", fsType: "tmpfs", source: "/dev/shm/bee-live", want: "ram"},
+		{name: "usb disk", source: "/dev/sdb1", deviceType: "disk", transport: "usb", want: "usb"},
+		{name: "cdrom rom", source: "/dev/sr0", deviceType: "rom", want: "cdrom"},
+		{name: "disk sata", source: "/dev/nvme0n1p1", deviceType: "disk", transport: "nvme", want: "disk"},
+		{name: "unknown", source: "overlay", want: "unknown"},
+	}
+	for _, tc := range tests {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			got := inferLiveBootKind(tc.fsType, tc.source, tc.deviceType, tc.transport)
+			if got != tc.want {
+				t.Fatalf("inferLiveBootKind(%q,%q,%q,%q)=%q want %q", tc.fsType, tc.source, tc.deviceType, tc.transport, got, tc.want)
+			}
+		})
+	}
+}
+
+func TestVerifyInstallToRAMStatus(t *testing.T) {
+	t.Parallel()
+
+	dstDir := t.TempDir()
+
+	if err := verifyInstallToRAMStatus(LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"}, dstDir, false, nil); err != nil {
+		t.Fatalf("expected success for RAM-backed status, got %v", err)
+	}
+
+	err := verifyInstallToRAMStatus(LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"}, dstDir, false, nil)
+	if err == nil {
+		t.Fatal("expected verification failure when media is still on USB")
+	}
+	if got := err.Error(); got != "install to RAM verification failed: live medium still mounted from USB (/dev/sdb1) and no squashfs found in "+dstDir {
+		t.Fatalf("error=%q", got)
+	}
+}
+
+func TestDescribeLiveBootSource(t *testing.T) {
+	t.Parallel()
+
+	if got := describeLiveBootSource(LiveBootSource{InRAM: true, Kind: "ram"}); got != "RAM" {
+		t.Fatalf("got %q want RAM", got)
+	}
+	if got := describeLiveBootSource(LiveBootSource{Kind: "unknown", Source: "/run/live/medium"}); got != "/run/live/medium" {
+		t.Fatalf("got %q want /run/live/medium", got)
+	}
+}
+
+func TestEvaluateLiveMediaRAMState(t *testing.T) {
+	t.Parallel()
+
+	t.Run("in_ram", func(t *testing.T) {
+		state := evaluateLiveMediaRAMState(
+			LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"},
+			false,
+			nil,
+			nil,
+		)
+		if state.State != "in_ram" || state.Status != "ok" || state.CanStartCopy {
+			t.Fatalf("state=%+v", state)
+		}
+	})
+
+	t.Run("partial_copy_after_cancel", func(t *testing.T) {
+		state := evaluateLiveMediaRAMState(
+			LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"},
+			false,
+			[]string{"/run/live/medium/live/filesystem.squashfs", "/run/live/medium/live/firmware.squashfs"},
+			[]string{"/dev/shm/bee-live/filesystem.squashfs"},
+		)
+		if state.State != "partial" || state.Status != "partial" || !state.CanStartCopy {
+			t.Fatalf("state=%+v", state)
+		}
+		if state.CopyComplete {
+			t.Fatalf("CopyComplete=%v want false", state.CopyComplete)
+		}
+	})
+
+	t.Run("toram_failed", func(t *testing.T) {
+		state := evaluateLiveMediaRAMState(
+			LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"},
+			true,
+			nil,
+			nil,
+		)
+		if state.State != "toram_failed" || state.Status != "failed" || !state.CanStartCopy {
+			t.Fatalf("state=%+v", state)
+		}
+	})
+}
--- a/audit/internal/platform/kill_workers.go
+++ b/audit/internal/platform/kill_workers.go
@@ -0,0 +1,68 @@
+package platform
+
+import (
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+	"syscall"
+)
+
+// workerPatterns are substrings matched against /proc/<pid>/cmdline to identify
+// bee test worker processes that should be killed by KillTestWorkers.
+var workerPatterns = []string{
+	"bee-gpu-burn",
+	"stress-ng",
+	"stressapptest",
+	"memtester",
+	// DCGM diagnostic workers — nvvs is spawned by dcgmi diag and survives
+	// if dcgmi is killed mid-run, leaving the GPU occupied (DCGM_ST_IN_USE).
+	"nvvs",
+	"dcgmi",
+}
+
+// KilledProcess describes a process that was sent SIGKILL.
+type KilledProcess struct {
+	PID  int    `json:"pid"`
+	Name string `json:"name"`
+}
+
+// KillTestWorkers scans /proc for running test worker processes and sends
+// SIGKILL to each one found. It returns a list of killed processes.
+// Errors for individual processes (e.g. already exited) are silently ignored.
+func KillTestWorkers() []KilledProcess {
+	entries, err := os.ReadDir("/proc")
+	if err != nil {
+		return nil
+	}
+
+	var killed []KilledProcess
+	for _, e := range entries {
+		if !e.IsDir() {
+			continue
+		}
+		pid, err := strconv.Atoi(e.Name())
+		if err != nil {
+			continue
+		}
+		cmdline, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid))
+		if err != nil {
+			continue
+		}
+		// /proc/*/cmdline uses NUL bytes as argument separators.
+		args := strings.SplitN(strings.ReplaceAll(string(cmdline), "\x00", " "), " ", 2)
+		exe := strings.TrimSpace(args[0])
+		base := exe
+		if idx := strings.LastIndexByte(exe, '/'); idx >= 0 {
+			base = exe[idx+1:]
+		}
+		for _, pat := range workerPatterns {
+			if strings.Contains(base, pat) || strings.Contains(exe, pat) {
+				_ = syscall.Kill(pid, syscall.SIGKILL)
+				killed = append(killed, KilledProcess{PID: pid, Name: base})
+				break
+			}
+		}
+	}
+	return killed
+}
--- a/audit/internal/platform/live_metrics.go
+++ b/audit/internal/platform/live_metrics.go
@@ -0,0 +1,328 @@
+package platform
+
+import (
+	"bufio"
+	"encoding/json"
+	"os"
+	"os/exec"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// LiveMetricSample is a single point-in-time snapshot of server metrics
+// collected for the web UI metrics page.
+type LiveMetricSample struct {
+	Timestamp  time.Time      `json:"ts"`
+	Fans       []FanReading   `json:"fans"`
+	Temps      []TempReading  `json:"temps"`
+	PowerW     float64        `json:"power_w"`
+	CPULoadPct float64        `json:"cpu_load_pct"`
+	MemLoadPct float64        `json:"mem_load_pct"`
+	GPUs       []GPUMetricRow `json:"gpus"`
+}
+
+// TempReading is a named temperature sensor value.
+type TempReading struct {
+	Name    string  `json:"name"`
+	Group   string  `json:"group,omitempty"`
+	Celsius float64 `json:"celsius"`
+}
+
+// SampleLiveMetrics collects a single metrics snapshot from all available
+// sources: GPU (via nvidia-smi), fans and temperatures (via ipmitool/sensors),
+// and system power (via ipmitool dcmi). Missing sources are silently skipped.
+func SampleLiveMetrics() LiveMetricSample {
+	s := LiveMetricSample{Timestamp: time.Now().UTC()}
+
+	// GPU metrics — try NVIDIA first, fall back to AMD
+	if gpus, err := SampleGPUMetrics(nil); err == nil && len(gpus) > 0 {
+		s.GPUs = gpus
+	} else if amdGPUs, err := sampleAMDGPUMetrics(); err == nil && len(amdGPUs) > 0 {
+		s.GPUs = amdGPUs
+	}
+
+	// Fan speeds — skipped silently if ipmitool unavailable
+	fans, _ := sampleFanSpeeds()
+	s.Fans = fans
+
+	s.Temps = append(s.Temps, sampleLiveTemperatureReadings()...)
+	if !hasTempGroup(s.Temps, "cpu") {
+		if cpuTemp := sampleCPUMaxTemp(); cpuTemp > 0 {
+			s.Temps = append(s.Temps, TempReading{Name: "CPU Max", Group: "cpu", Celsius: cpuTemp})
+		}
+	}
+
+	// System power — returns 0 if unavailable
+	s.PowerW = sampleSystemPower()
+
+	// CPU load — from /proc/stat
+	s.CPULoadPct = sampleCPULoadPct()
+
+	// Memory load — from /proc/meminfo
+	s.MemLoadPct = sampleMemLoadPct()
+
+	return s
+}
+
+// sampleCPULoadPct reads two /proc/stat snapshots 200ms apart and returns
+// the overall CPU utilisation percentage.
+func sampleCPULoadPct() float64 {
+	total0, idle0 := readCPUStat()
+	if total0 == 0 {
+		return 0
+	}
+	time.Sleep(200 * time.Millisecond)
+	total1, idle1 := readCPUStat()
+	if total1 == 0 {
+		return 0
+	}
+	return cpuLoadPctBetween(total0, idle0, total1, idle1)
+}
+
+func cpuLoadPctBetween(prevTotal, prevIdle, total, idle uint64) float64 {
+	dt := float64(total - prevTotal)
+	di := float64(idle - prevIdle)
+	if dt <= 0 {
+		return 0
+	}
+	pct := (1 - di/dt) * 100
+	if pct < 0 {
+		return 0
+	}
+	if pct > 100 {
+		return 100
+	}
+	return pct
+}
+
+func readCPUStat() (total, idle uint64) {
+	f, err := os.Open("/proc/stat")
+	if err != nil {
+		return 0, 0
+	}
+	defer f.Close()
+	sc := bufio.NewScanner(f)
+	for sc.Scan() {
+		line := sc.Text()
+		if !strings.HasPrefix(line, "cpu ") {
+			continue
+		}
+		fields := strings.Fields(line)[1:] // skip "cpu"
+		var vals [10]uint64
+		for i := 0; i < len(fields) && i < 10; i++ {
+			vals[i], _ = strconv.ParseUint(fields[i], 10, 64)
+		}
+		// idle = idle + iowait
+		idle = vals[3] + vals[4]
+		for _, v := range vals {
+			total += v
+		}
+		return total, idle
+	}
+	return 0, 0
+}
+
+func sampleMemLoadPct() float64 {
+	f, err := os.Open("/proc/meminfo")
+	if err != nil {
+		return 0
+	}
+	defer f.Close()
+	vals := map[string]uint64{}
+	sc := bufio.NewScanner(f)
+	for sc.Scan() {
+		fields := strings.Fields(sc.Text())
+		if len(fields) >= 2 {
+			v, _ := strconv.ParseUint(fields[1], 10, 64)
+			vals[strings.TrimSuffix(fields[0], ":")] = v
+		}
+	}
+	total := vals["MemTotal"]
+	avail := vals["MemAvailable"]
+	if total == 0 {
+		return 0
+	}
+	used := total - avail
+	return float64(used) / float64(total) * 100
+}
+
+func hasTempGroup(temps []TempReading, group string) bool {
+	for _, t := range temps {
+		if t.Group == group {
+			return true
+		}
+	}
+	return false
+}
+
+func sampleLiveTemperatureReadings() []TempReading {
+	if temps := sampleLiveTempsViaSensorsJSON(); len(temps) > 0 {
+		return temps
+	}
+	return sampleLiveTempsViaIPMI()
+}
+
+func sampleLiveTempsViaSensorsJSON() []TempReading {
+	out, err := exec.Command("sensors", "-j").Output()
+	if err != nil || len(out) == 0 {
+		return nil
+	}
+
+	var doc map[string]map[string]any
+	if err := json.Unmarshal(out, &doc); err != nil {
+		return nil
+	}
+
+	chips := make([]string, 0, len(doc))
+	for chip := range doc {
+		chips = append(chips, chip)
+	}
+	sort.Strings(chips)
+
+	temps := make([]TempReading, 0, len(chips))
+	seen := map[string]struct{}{}
+	for _, chip := range chips {
+		features := doc[chip]
+		featureNames := make([]string, 0, len(features))
+		for name := range features {
+			featureNames = append(featureNames, name)
+		}
+		sort.Strings(featureNames)
+		for _, name := range featureNames {
+			if strings.EqualFold(name, "Adapter") {
+				continue
+			}
+			feature, ok := features[name].(map[string]any)
+			if !ok {
+				continue
+			}
+			value, ok := firstTempInputValue(feature)
+			if !ok || value <= 0 || value > 150 {
+				continue
+			}
+			group := classifyLiveTempGroup(chip, name)
+			if group == "gpu" {
+				continue
+			}
+			label := strings.TrimSpace(name)
+			if label == "" {
+				continue
+			}
+			if group == "ambient" {
+				label = compactAmbientTempName(chip, label)
+			}
+			key := group + "\x00" + label
+			if _, ok := seen[key]; ok {
+				continue
+			}
+			seen[key] = struct{}{}
+			temps = append(temps, TempReading{Name: label, Group: group, Celsius: value})
+		}
+	}
+	return temps
+}
+
+func sampleLiveTempsViaIPMI() []TempReading {
+	out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
+	if err != nil || len(out) == 0 {
+		return nil
+	}
+	var temps []TempReading
+	seen := map[string]struct{}{}
+	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
+		parts := strings.Split(line, "|")
+		if len(parts) < 3 {
+			continue
+		}
+		name := strings.TrimSpace(parts[0])
+		if name == "" {
+			continue
+		}
+		unit := strings.ToLower(strings.TrimSpace(parts[2]))
+		if !strings.Contains(unit, "degrees") {
+			continue
+		}
+		raw := strings.TrimSpace(parts[1])
+		if raw == "" || strings.EqualFold(raw, "na") {
+			continue
+		}
+		value, err := strconv.ParseFloat(raw, 64)
+		if err != nil || value <= 0 || value > 150 {
+			continue
+		}
+		group := classifyLiveTempGroup("", name)
+		if group == "gpu" {
+			continue
+		}
+		label := name
+		if group == "ambient" {
+			label = compactAmbientTempName("", label)
+		}
+		key := group + "\x00" + label
+		if _, ok := seen[key]; ok {
+			continue
+		}
+		seen[key] = struct{}{}
+		temps = append(temps, TempReading{Name: label, Group: group, Celsius: value})
+	}
+	return temps
+}
+
+func firstTempInputValue(feature map[string]any) (float64, bool) {
+	keys := make([]string, 0, len(feature))
+	for key := range feature {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+	for _, key := range keys {
+		lower := strings.ToLower(key)
+		if !strings.Contains(lower, "temp") || !strings.HasSuffix(lower, "_input") {
+			continue
+		}
+		switch value := feature[key].(type) {
+		case float64:
+			return value, true
+		case string:
+			f, err := strconv.ParseFloat(value, 64)
+			if err == nil {
+				return f, true
+			}
+		}
+	}
+	return 0, false
+}
+
+func classifyLiveTempGroup(chip, name string) string {
+	text := strings.ToLower(strings.TrimSpace(chip + " " + name))
+	switch {
+	case strings.Contains(text, "gpu"), strings.Contains(text, "amdgpu"), strings.Contains(text, "nvidia"), strings.Contains(text, "adeon"):
+		return "gpu"
+	case strings.Contains(text, "coretemp"),
+		strings.Contains(text, "k10temp"),
+		strings.Contains(text, "zenpower"),
+		strings.Contains(text, "package id"),
+		strings.Contains(text, "x86_pkg_temp"),
+		strings.Contains(text, "tctl"),
+		strings.Contains(text, "tdie"),
+		strings.Contains(text, "tccd"),
+		strings.Contains(text, "cpu"),
+		strings.Contains(text, "peci"):
+		return "cpu"
+	default:
+		return "ambient"
+	}
+}
+
+func compactAmbientTempName(chip, name string) string {
+	chip = strings.TrimSpace(chip)
+	name = strings.TrimSpace(name)
+	if chip == "" || strings.EqualFold(chip, name) {
+		return name
+	}
+	if strings.Contains(strings.ToLower(name), strings.ToLower(chip)) {
+		return name
+	}
+	return chip + " / " + name
+}
--- a/audit/internal/platform/live_metrics_test.go
+++ b/audit/internal/platform/live_metrics_test.go
@@ -0,0 +1,94 @@
+package platform
+
+import "testing"
+
+func TestFirstTempInputValue(t *testing.T) {
+	feature := map[string]any{
+		"temp1_input": 61.5,
+		"temp1_max":   80.0,
+	}
+	got, ok := firstTempInputValue(feature)
+	if !ok {
+		t.Fatal("expected value")
+	}
+	if got != 61.5 {
+		t.Fatalf("got %v want 61.5", got)
+	}
+}
+
+func TestClassifyLiveTempGroup(t *testing.T) {
+	tests := []struct {
+		chip string
+		name string
+		want string
+	}{
+		{chip: "coretemp-isa-0000", name: "Package id 0", want: "cpu"},
+		{chip: "amdgpu-pci-4300", name: "edge", want: "gpu"},
+		{chip: "nvme-pci-0100", name: "Composite", want: "ambient"},
+		{chip: "acpitz-acpi-0", name: "temp1", want: "ambient"},
+	}
+	for _, tc := range tests {
+		if got := classifyLiveTempGroup(tc.chip, tc.name); got != tc.want {
+			t.Fatalf("classifyLiveTempGroup(%q,%q)=%q want %q", tc.chip, tc.name, got, tc.want)
+		}
+	}
+}
+
+func TestCompactAmbientTempName(t *testing.T) {
+	if got := compactAmbientTempName("nvme-pci-0100", "Composite"); got != "nvme-pci-0100 / Composite" {
+		t.Fatalf("got %q", got)
+	}
+	if got := compactAmbientTempName("", "Inlet Temp"); got != "Inlet Temp" {
+		t.Fatalf("got %q", got)
+	}
+}
+
+func TestCPULoadPctBetween(t *testing.T) {
+	tests := []struct {
+		name      string
+		prevTotal uint64
+		prevIdle  uint64
+		total     uint64
+		idle      uint64
+		want      float64
+	}{
+		{
+			name:      "busy half",
+			prevTotal: 100,
+			prevIdle:  40,
+			total:     200,
+			idle:      90,
+			want:      50,
+		},
+		{
+			name:      "fully busy",
+			prevTotal: 100,
+			prevIdle:  40,
+			total:     200,
+			idle:      40,
+			want:      100,
+		},
+		{
+			name:      "no progress",
+			prevTotal: 100,
+			prevIdle:  40,
+			total:     100,
+			idle:      40,
+			want:      0,
+		},
+		{
+			name:      "idle delta larger than total clamps to zero",
+			prevTotal: 100,
+			prevIdle:  40,
+			total:     200,
+			idle:      150,
+			want:      0,
+		},
+	}
+
+	for _, tc := range tests {
+		if got := cpuLoadPctBetween(tc.prevTotal, tc.prevIdle, tc.total, tc.idle); got != tc.want {
+			t.Fatalf("%s: cpuLoadPctBetween(...)=%v want %v", tc.name, got, tc.want)
+		}
+	}
+}
--- a/audit/internal/platform/network.go
+++ b/audit/internal/platform/network.go
@@ -2,6 +2,7 @@ package platform

 import (
 	"bytes"
+	"errors"
 	"fmt"
 	"os"
 	"os/exec"
@@ -18,21 +19,17 @@ func (s *System) ListInterfaces() ([]InterfaceInfo, error) {
 	out := make([]InterfaceInfo, 0, len(names))
 	for _, name := range names {
 		state := "unknown"
-		if raw, err := exec.Command("ip", "-o", "link", "show", name).Output(); err == nil {
-			fields := strings.Fields(string(raw))
-			if len(fields) >= 9 {
-				state = fields[8]
+		if up, err := interfaceAdminState(name); err == nil {
+			if up {
+				state = "up"
+			} else {
+				state = "down"
 			}
 		}

-		var ipv4 []string
-		if raw, err := exec.Command("ip", "-o", "-4", "addr", "show", "dev", name).Output(); err == nil {
-			for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
-				fields := strings.Fields(line)
-				if len(fields) >= 4 {
-					ipv4 = append(ipv4, fields[3])
-				}
-			}
+		ipv4, err := interfaceIPv4Addrs(name)
+		if err != nil {
+			ipv4 = nil
 		}

 		out = append(out, InterfaceInfo{Name: name, State: state, IPv4: ipv4})
@@ -55,6 +52,119 @@ func (s *System) DefaultRoute() string {
 	return ""
 }

+func (s *System) CaptureNetworkSnapshot() (NetworkSnapshot, error) {
+	names, err := listInterfaceNames()
+	if err != nil {
+		return NetworkSnapshot{}, err
+	}
+
+	snapshot := NetworkSnapshot{
+		Interfaces: make([]NetworkInterfaceSnapshot, 0, len(names)),
+	}
+	for _, name := range names {
+		up, err := interfaceAdminState(name)
+		if err != nil {
+			return NetworkSnapshot{}, err
+		}
+		ipv4, err := interfaceIPv4Addrs(name)
+		if err != nil {
+			return NetworkSnapshot{}, err
+		}
+		snapshot.Interfaces = append(snapshot.Interfaces, NetworkInterfaceSnapshot{
+			Name: name,
+			Up:   up,
+			IPv4: ipv4,
+		})
+	}
+
+	if raw, err := exec.Command("ip", "route", "show", "default").Output(); err == nil {
+		for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
+			line = strings.TrimSpace(line)
+			if line != "" {
+				snapshot.DefaultRoutes = append(snapshot.DefaultRoutes, line)
+			}
+		}
+	}
+
+	if raw, err := os.ReadFile("/etc/resolv.conf"); err == nil {
+		snapshot.ResolvConf = string(raw)
+	}
+
+	return snapshot, nil
+}
+
+func (s *System) RestoreNetworkSnapshot(snapshot NetworkSnapshot) error {
+	var errs []string
+
+	for _, iface := range snapshot.Interfaces {
+		if err := exec.Command("ip", "link", "set", "dev", iface.Name, "up").Run(); err != nil {
+			errs = append(errs, fmt.Sprintf("%s: bring up before restore: %v", iface.Name, err))
+			continue
+		}
+		if err := exec.Command("ip", "addr", "flush", "dev", iface.Name).Run(); err != nil {
+			errs = append(errs, fmt.Sprintf("%s: flush addresses: %v", iface.Name, err))
+		}
+		for _, cidr := range iface.IPv4 {
+			if raw, err := exec.Command("ip", "addr", "add", cidr, "dev", iface.Name).CombinedOutput(); err != nil {
+				detail := strings.TrimSpace(string(raw))
+				if detail != "" {
+					errs = append(errs, fmt.Sprintf("%s: restore address %s: %v: %s", iface.Name, cidr, err, detail))
+				} else {
+					errs = append(errs, fmt.Sprintf("%s: restore address %s: %v", iface.Name, cidr, err))
+				}
+			}
+		}
+		state := "down"
+		if iface.Up {
+			state = "up"
+		}
+		if err := exec.Command("ip", "link", "set", "dev", iface.Name, state).Run(); err != nil {
+			errs = append(errs, fmt.Sprintf("%s: restore state %s: %v", iface.Name, state, err))
+		}
+	}
+
+	if err := exec.Command("ip", "route", "del", "default").Run(); err != nil {
+		var exitErr *exec.ExitError
+		if !errors.As(err, &exitErr) {
+			errs = append(errs, fmt.Sprintf("clear default route: %v", err))
+		}
+	}
+	for _, route := range snapshot.DefaultRoutes {
+		fields := strings.Fields(route)
+		if len(fields) == 0 {
+			continue
+		}
+		// Strip state flags that ip-route(8) does not accept as add arguments.
+		filtered := fields[:0]
+		for _, f := range fields {
+			switch f {
+			case "linkdown", "dead", "onlink", "pervasive":
+				// skip
+			default:
+				filtered = append(filtered, f)
+			}
+		}
+		args := append([]string{"route", "add"}, filtered...)
+		if raw, err := exec.Command("ip", args...).CombinedOutput(); err != nil {
+			detail := strings.TrimSpace(string(raw))
+			if detail != "" {
+				errs = append(errs, fmt.Sprintf("restore route %q: %v: %s", route, err, detail))
+			} else {
+				errs = append(errs, fmt.Sprintf("restore route %q: %v", route, err))
+			}
+		}
+	}
+
+	if err := os.WriteFile("/etc/resolv.conf", []byte(snapshot.ResolvConf), 0644); err != nil {
+		errs = append(errs, fmt.Sprintf("restore resolv.conf: %v", err))
+	}
+
+	if len(errs) > 0 {
+		return errors.New(strings.Join(errs, "; "))
+	}
+	return nil
+}
+
 func (s *System) DHCPOne(iface string) (string, error) {
 	var out bytes.Buffer
 	if err := exec.Command("ip", "link", "set", iface, "up").Run(); err != nil {
@@ -131,6 +241,65 @@ func (s *System) SetStaticIPv4(cfg StaticIPv4Config) (string, error) {
 	return out.String(), nil
 }

+// SetInterfaceState brings a network interface up or down.
+func (s *System) SetInterfaceState(iface string, up bool) error {
+	state := "down"
+	if up {
+		state = "up"
+	}
+	return exec.Command("ip", "link", "set", "dev", iface, state).Run()
+}
+
+// GetInterfaceState returns true if the interface is UP.
+func (s *System) GetInterfaceState(iface string) (bool, error) {
+	return interfaceAdminState(iface)
+}
+
+func interfaceAdminState(iface string) (bool, error) {
+	raw, err := exec.Command("ip", "-o", "link", "show", "dev", iface).Output()
+	if err != nil {
+		return false, err
+	}
+	return parseInterfaceAdminState(string(raw))
+}
+
+func parseInterfaceAdminState(raw string) (bool, error) {
+	start := strings.IndexByte(raw, '<')
+	if start == -1 {
+		return false, fmt.Errorf("ip link output missing flags")
+	}
+	end := strings.IndexByte(raw[start+1:], '>')
+	if end == -1 {
+		return false, fmt.Errorf("ip link output missing flag terminator")
+	}
+	flags := strings.Split(raw[start+1:start+1+end], ",")
+	for _, flag := range flags {
+		if strings.TrimSpace(flag) == "UP" {
+			return true, nil
+		}
+	}
+	return false, nil
+}
+
+func interfaceIPv4Addrs(iface string) ([]string, error) {
+	raw, err := exec.Command("ip", "-o", "-4", "addr", "show", "dev", iface).Output()
+	if err != nil {
+		var exitErr *exec.ExitError
+		if errors.As(err, &exitErr) {
+			return nil, nil
+		}
+		return nil, err
+	}
+	var ipv4 []string
+	for _, line := range strings.Split(strings.TrimSpace(string(raw)), "\n") {
+		fields := strings.Fields(line)
+		if len(fields) >= 4 {
+			ipv4 = append(ipv4, fields[3])
+		}
+	}
+	return ipv4, nil
+}
+
 func listInterfaceNames() ([]string, error) {
 	raw, err := exec.Command("ip", "-o", "link", "show").Output()
 	if err != nil {
--- a/audit/internal/platform/network_test.go
+++ b/audit/internal/platform/network_test.go
@@ -0,0 +1,46 @@
+package platform
+
+import "testing"
+
+func TestParseInterfaceAdminState(t *testing.T) {
+	tests := []struct {
+		name    string
+		raw     string
+		want    bool
+		wantErr bool
+	}{
+		{
+			name: "admin up with no carrier",
+			raw:  "2: enp1s0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN mode DEFAULT group default qlen 1000\n",
+			want: true,
+		},
+		{
+			name: "admin down",
+			raw:  "2: enp1s0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000\n",
+			want: false,
+		},
+		{
+			name:    "malformed output",
+			raw:     "2: enp1s0: mtu 1500 state DOWN\n",
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := parseInterfaceAdminState(tt.raw)
+			if tt.wantErr {
+				if err == nil {
+					t.Fatal("expected error")
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("unexpected error: %v", err)
+			}
+			if got != tt.want {
+				t.Fatalf("got %v want %v", got, tt.want)
+			}
+		})
+	}
+}
--- a/audit/internal/platform/nvidia_stress.go
+++ b/audit/internal/platform/nvidia_stress.go
@@ -0,0 +1,209 @@
+package platform
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"strconv"
+	"strings"
+)
+
+func (s *System) RunNvidiaStressPack(ctx context.Context, baseDir string, opts NvidiaStressOptions, logFunc func(string)) (string, error) {
+	normalizeNvidiaStressOptions(&opts)
+
+	job, err := buildNvidiaStressJob(opts)
+	if err != nil {
+		return "", err
+	}
+
+	return runAcceptancePackCtx(ctx, baseDir, nvidiaStressArchivePrefix(opts.Loader), withNvidiaPersistenceMode(
+		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
+		satJob{name: "02-nvidia-smi-list.log", cmd: []string{"nvidia-smi", "-L"}},
+		job,
+		satJob{name: "04-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
+	), logFunc)
+}
+
+func nvidiaStressArchivePrefix(loader string) string {
+	switch strings.TrimSpace(strings.ToLower(loader)) {
+	case NvidiaStressLoaderJohn:
+		return "gpu-nvidia-john"
+	case NvidiaStressLoaderNCCL:
+		return "gpu-nvidia-nccl"
+	default:
+		return "gpu-nvidia-burn"
+	}
+}
+
+func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
+	selected, err := resolveNvidiaGPUSelection(opts.GPUIndices, opts.ExcludeGPUIndices)
+	if err != nil {
+		return satJob{}, err
+	}
+
+	loader := strings.TrimSpace(strings.ToLower(opts.Loader))
+	switch loader {
+	case "", NvidiaStressLoaderBuiltin:
+		cmd := []string{
+			"bee-gpu-burn",
+			"--seconds", strconv.Itoa(opts.DurationSec),
+			"--size-mb", strconv.Itoa(opts.SizeMB),
+		}
+		if opts.StaggerSeconds > 0 && len(selected) > 1 {
+			cmd = append(cmd, "--stagger-seconds", strconv.Itoa(opts.StaggerSeconds))
+		}
+		if len(selected) > 0 {
+			cmd = append(cmd, "--devices", joinIndexList(selected))
+		}
+		return satJob{
+			name:       "03-bee-gpu-burn.log",
+			cmd:        cmd,
+			collectGPU: true,
+			gpuIndices: selected,
+		}, nil
+	case NvidiaStressLoaderJohn:
+		cmd := []string{
+			"bee-john-gpu-stress",
+			"--seconds", strconv.Itoa(opts.DurationSec),
+		}
+		if opts.StaggerSeconds > 0 && len(selected) > 1 {
+			cmd = append(cmd, "--stagger-seconds", strconv.Itoa(opts.StaggerSeconds))
+		}
+		if len(selected) > 0 {
+			cmd = append(cmd, "--devices", joinIndexList(selected))
+		}
+		return satJob{
+			name:       "03-john-gpu-stress.log",
+			cmd:        cmd,
+			collectGPU: true,
+			gpuIndices: selected,
+		}, nil
+	case NvidiaStressLoaderNCCL:
+		cmd := []string{
+			"bee-nccl-gpu-stress",
+			"--seconds", strconv.Itoa(opts.DurationSec),
+		}
+		if len(selected) > 0 {
+			cmd = append(cmd, "--devices", joinIndexList(selected))
+		}
+		return satJob{
+			name:       "03-bee-nccl-gpu-stress.log",
+			cmd:        cmd,
+			collectGPU: true,
+			gpuIndices: selected,
+		}, nil
+	default:
+		return satJob{}, fmt.Errorf("unknown NVIDIA stress loader %q", opts.Loader)
+	}
+}
+
+func normalizeNvidiaStressOptions(opts *NvidiaStressOptions) {
+	if opts.DurationSec <= 0 {
+		opts.DurationSec = 300
+	}
+	// SizeMB=0 means "auto" — bee-gpu-burn will query per-GPU memory at runtime.
+	switch strings.TrimSpace(strings.ToLower(opts.Loader)) {
+	case "", NvidiaStressLoaderBuiltin:
+		opts.Loader = NvidiaStressLoaderBuiltin
+	case NvidiaStressLoaderJohn:
+		opts.Loader = NvidiaStressLoaderJohn
+	case NvidiaStressLoaderNCCL:
+		opts.Loader = NvidiaStressLoaderNCCL
+	default:
+		opts.Loader = NvidiaStressLoaderBuiltin
+	}
+	opts.GPUIndices = dedupeSortedIndices(opts.GPUIndices)
+	opts.ExcludeGPUIndices = dedupeSortedIndices(opts.ExcludeGPUIndices)
+}
+
+func resolveNvidiaGPUSelection(include, exclude []int) ([]int, error) {
+	all, err := listNvidiaGPUIndices()
+	if err != nil {
+		return nil, err
+	}
+	if len(all) == 0 {
+		return nil, fmt.Errorf("nvidia-smi found no NVIDIA GPUs")
+	}
+
+	selected := all
+	if len(include) > 0 {
+		want := make(map[int]struct{}, len(include))
+		for _, idx := range include {
+			want[idx] = struct{}{}
+		}
+		selected = selected[:0]
+		for _, idx := range all {
+			if _, ok := want[idx]; ok {
+				selected = append(selected, idx)
+			}
+		}
+	}
+	if len(exclude) > 0 {
+		skip := make(map[int]struct{}, len(exclude))
+		for _, idx := range exclude {
+			skip[idx] = struct{}{}
+		}
+		filtered := selected[:0]
+		for _, idx := range selected {
+			if _, ok := skip[idx]; ok {
+				continue
+			}
+			filtered = append(filtered, idx)
+		}
+		selected = filtered
+	}
+	if len(selected) == 0 {
+		return nil, fmt.Errorf("no NVIDIA GPUs selected after applying filters")
+	}
+	out := append([]int(nil), selected...)
+	sort.Ints(out)
+	return out, nil
+}
+
+func listNvidiaGPUIndices() ([]int, error) {
+	out, err := satExecCommand("nvidia-smi", "--query-gpu=index", "--format=csv,noheader,nounits").Output()
+	if err != nil {
+		return nil, fmt.Errorf("nvidia-smi: %w", err)
+	}
+	var indices []int
+	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+		idx, err := strconv.Atoi(line)
+		if err != nil {
+			continue
+		}
+		indices = append(indices, idx)
+	}
+	return dedupeSortedIndices(indices), nil
+}
+
+func dedupeSortedIndices(values []int) []int {
+	if len(values) == 0 {
+		return nil
+	}
+	seen := make(map[int]struct{}, len(values))
+	out := make([]int, 0, len(values))
+	for _, value := range values {
+		if value < 0 {
+			continue
+		}
+		if _, ok := seen[value]; ok {
+			continue
+		}
+		seen[value] = struct{}{}
+		out = append(out, value)
+	}
+	sort.Ints(out)
+	return out
+}
+
+func joinIndexList(values []int) string {
+	parts := make([]string, 0, len(values))
+	for _, value := range values {
+		parts = append(parts, strconv.Itoa(value))
+	}
+	return strings.Join(parts, ",")
+}
--- a/audit/internal/platform/platform_stress.go
+++ b/audit/internal/platform/platform_stress.go
@@ -0,0 +1,563 @@
+package platform
+
+import (
+	"archive/tar"
+	"bytes"
+	"compress/gzip"
+	"context"
+	"encoding/csv"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strconv"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+)
+
+// PlatformStressCycle defines one load+idle cycle.
+type PlatformStressCycle struct {
+	LoadSec int // seconds of simultaneous CPU+GPU stress
+	IdleSec int // seconds of idle monitoring after load cut
+}
+
+// PlatformStressOptions controls the thermal cycling test.
+type PlatformStressOptions struct {
+	Cycles     []PlatformStressCycle
+	Components []string // if empty: run all; values: "cpu", "gpu"
+}
+
+// platformStressRow is one second of telemetry.
+type platformStressRow struct {
+	ElapsedSec   float64
+	Cycle        int
+	Phase        string // "load" | "idle"
+	CPULoadPct   float64
+	MaxCPUTempC  float64
+	MaxGPUTempC  float64
+	SysPowerW    float64
+	FanMinRPM    float64
+	FanMaxRPM    float64
+	GPUThrottled bool
+}
+
+// RunPlatformStress runs repeated load+idle thermal cycling.
+// Each cycle starts CPU (stressapptest) and GPU stress simultaneously,
+// runs for LoadSec, then cuts load abruptly and monitors for IdleSec.
+func (s *System) RunPlatformStress(
+	ctx context.Context,
+	baseDir string,
+	opts PlatformStressOptions,
+	logFunc func(string),
+) (string, error) {
+	if logFunc == nil {
+		logFunc = func(string) {}
+	}
+	if len(opts.Cycles) == 0 {
+		return "", fmt.Errorf("no cycles defined")
+	}
+	if err := os.MkdirAll(baseDir, 0755); err != nil {
+		return "", fmt.Errorf("mkdir %s: %w", baseDir, err)
+	}
+
+	stamp := time.Now().UTC().Format("20060102-150405")
+	runDir := filepath.Join(baseDir, "platform-stress-"+stamp)
+	if err := os.MkdirAll(runDir, 0755); err != nil {
+		return "", fmt.Errorf("mkdir run dir: %w", err)
+	}
+
+	hasCPU := len(opts.Components) == 0 || containsComponent(opts.Components, "cpu")
+	hasGPU := len(opts.Components) == 0 || containsComponent(opts.Components, "gpu")
+
+	vendor := s.DetectGPUVendor()
+	logFunc(fmt.Sprintf("Platform Thermal Cycling — %d cycle(s), GPU vendor: %s, cpu=%v gpu=%v", len(opts.Cycles), vendor, hasCPU, hasGPU))
+
+	var rows []platformStressRow
+	start := time.Now()
+
+	var analyses []cycleAnalysis
+
+	for i, cycle := range opts.Cycles {
+		if ctx.Err() != nil {
+			break
+		}
+		cycleNum := i + 1
+		logFunc(fmt.Sprintf("--- Cycle %d/%d: load=%ds, idle=%ds ---", cycleNum, len(opts.Cycles), cycle.LoadSec, cycle.IdleSec))
+
+		// ── LOAD PHASE ───────────────────────────────────────────────────────
+		loadCtx, loadCancel := context.WithTimeout(ctx, time.Duration(cycle.LoadSec)*time.Second)
+		var wg sync.WaitGroup
+
+		// CPU stress
+		if hasCPU {
+			wg.Add(1)
+			go func() {
+				defer wg.Done()
+				cpuCmd, err := buildCPUStressCmd(loadCtx)
+				if err != nil {
+					logFunc("CPU stress: " + err.Error())
+					return
+				}
+				_ = cpuCmd.Wait() // exits when loadCtx times out (SIGKILL)
+			}()
+		}
+
+		// GPU stress
+		if hasGPU {
+			wg.Add(1)
+			go func() {
+				defer wg.Done()
+				gpuCmd := buildGPUStressCmd(loadCtx, vendor, cycle.LoadSec)
+				if gpuCmd == nil {
+					return
+				}
+				_ = gpuCmd.Wait()
+			}()
+		}
+
+		// Monitoring goroutine for load phase
+		loadRows := collectPhase(loadCtx, cycleNum, "load", start)
+		for _, r := range loadRows {
+			logFunc(formatPlatformRow(r))
+		}
+		rows = append(rows, loadRows...)
+		loadCancel()
+		wg.Wait()
+
+		if len(loadRows) > 0 {
+			logFunc(fmt.Sprintf("Cycle %d load ended (%.0fs)", cycleNum, loadRows[len(loadRows)-1].ElapsedSec))
+		}
+
+		// ── IDLE PHASE ───────────────────────────────────────────────────────
+		idleCtx, idleCancel := context.WithTimeout(ctx, time.Duration(cycle.IdleSec)*time.Second)
+		idleRows := collectPhase(idleCtx, cycleNum, "idle", start)
+		for _, r := range idleRows {
+			logFunc(formatPlatformRow(r))
+		}
+		rows = append(rows, idleRows...)
+		idleCancel()
+
+		// Per-cycle analysis
+		an := analyzePlatformCycle(loadRows, idleRows)
+		analyses = append(analyses, an)
+		logFunc(fmt.Sprintf("Cycle %d: maxCPU=%.1f°C maxGPU=%.1f°C power=%.0fW throttled=%v fanDrop=%.0f%%",
+			cycleNum, an.maxCPUTemp, an.maxGPUTemp, an.maxPower, an.throttled, an.fanDropPct))
+	}
+
+	// Write CSV
+	csvData := writePlatformCSV(rows)
+	_ = os.WriteFile(filepath.Join(runDir, "metrics.csv"), csvData, 0644)
+
+	// Write summary
+	summary := writePlatformSummary(opts, analyses)
+	logFunc("--- Summary ---")
+	for _, line := range strings.Split(summary, "\n") {
+		if line != "" {
+			logFunc(line)
+		}
+	}
+	_ = os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary), 0644)
+
+	return runDir, nil
+}
+
+// collectPhase samples live metrics every second until ctx is done.
+func collectPhase(ctx context.Context, cycle int, phase string, testStart time.Time) []platformStressRow {
+	var rows []platformStressRow
+	ticker := time.NewTicker(time.Second)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ctx.Done():
+			return rows
+		case <-ticker.C:
+			sample := SampleLiveMetrics()
+			rows = append(rows, sampleToPlatformRow(sample, cycle, phase, testStart))
+		}
+	}
+}
+
+func sampleToPlatformRow(s LiveMetricSample, cycle int, phase string, testStart time.Time) platformStressRow {
+	r := platformStressRow{
+		ElapsedSec: time.Since(testStart).Seconds(),
+		Cycle:      cycle,
+		Phase:      phase,
+		CPULoadPct: s.CPULoadPct,
+		SysPowerW:  s.PowerW,
+	}
+	for _, t := range s.Temps {
+		switch t.Group {
+		case "cpu":
+			if t.Celsius > r.MaxCPUTempC {
+				r.MaxCPUTempC = t.Celsius
+			}
+		case "gpu":
+			if t.Celsius > r.MaxGPUTempC {
+				r.MaxGPUTempC = t.Celsius
+			}
+		}
+	}
+	for _, g := range s.GPUs {
+		if g.TempC > r.MaxGPUTempC {
+			r.MaxGPUTempC = g.TempC
+		}
+	}
+	if len(s.Fans) > 0 {
+		r.FanMinRPM = s.Fans[0].RPM
+		r.FanMaxRPM = s.Fans[0].RPM
+		for _, f := range s.Fans[1:] {
+			if f.RPM < r.FanMinRPM {
+				r.FanMinRPM = f.RPM
+			}
+			if f.RPM > r.FanMaxRPM {
+				r.FanMaxRPM = f.RPM
+			}
+		}
+	}
+	return r
+}
+
+func formatPlatformRow(r platformStressRow) string {
+	throttle := ""
+	if r.GPUThrottled {
+		throttle = " THROTTLE"
+	}
+	fans := ""
+	if r.FanMinRPM > 0 {
+		fans = fmt.Sprintf(" fans=%.0f-%.0fRPM", r.FanMinRPM, r.FanMaxRPM)
+	}
+	return fmt.Sprintf("[%5.0fs] cycle=%d phase=%-4s cpu=%.0f%% cpuT=%.1f°C gpuT=%.1f°C pwr=%.0fW%s%s",
+		r.ElapsedSec, r.Cycle, r.Phase, r.CPULoadPct, r.MaxCPUTempC, r.MaxGPUTempC, r.SysPowerW, fans, throttle)
+}
+
+func analyzePlatformCycle(loadRows, idleRows []platformStressRow) cycleAnalysis {
+	var an cycleAnalysis
+	for _, r := range loadRows {
+		if r.MaxCPUTempC > an.maxCPUTemp {
+			an.maxCPUTemp = r.MaxCPUTempC
+		}
+		if r.MaxGPUTempC > an.maxGPUTemp {
+			an.maxGPUTemp = r.MaxGPUTempC
+		}
+		if r.SysPowerW > an.maxPower {
+			an.maxPower = r.SysPowerW
+		}
+		if r.GPUThrottled {
+			an.throttled = true
+		}
+	}
+	// Fan RPM at cut = avg of last 5 load rows
+	if n := len(loadRows); n > 0 {
+		window := loadRows
+		if n > 5 {
+			window = loadRows[n-5:]
+		}
+		var sum float64
+		var cnt int
+		for _, r := range window {
+			if r.FanMinRPM > 0 {
+				sum += (r.FanMinRPM + r.FanMaxRPM) / 2
+				cnt++
+			}
+		}
+		if cnt > 0 {
+			an.fanAtCutAvg = sum / float64(cnt)
+		}
+	}
+	// Fan RPM min in first 15s of idle
+	an.fanMin15s = an.fanAtCutAvg
+	var cutElapsed float64
+	if len(loadRows) > 0 {
+		cutElapsed = loadRows[len(loadRows)-1].ElapsedSec
+	}
+	for _, r := range idleRows {
+		if r.ElapsedSec > cutElapsed+15 {
+			break
+		}
+		avg := (r.FanMinRPM + r.FanMaxRPM) / 2
+		if avg > 0 && (an.fanMin15s == 0 || avg < an.fanMin15s) {
+			an.fanMin15s = avg
+		}
+	}
+	if an.fanAtCutAvg > 0 {
+		an.fanDropPct = (an.fanAtCutAvg - an.fanMin15s) / an.fanAtCutAvg * 100
+	}
+	return an
+}
+
+type cycleAnalysis struct {
+	maxCPUTemp  float64
+	maxGPUTemp  float64
+	maxPower    float64
+	throttled   bool
+	fanAtCutAvg float64
+	fanMin15s   float64
+	fanDropPct  float64
+}
+
+func writePlatformSummary(opts PlatformStressOptions, analyses []cycleAnalysis) string {
+	var b strings.Builder
+	fmt.Fprintf(&b, "Platform Thermal Cycling — %d cycle(s)\n", len(opts.Cycles))
+	fmt.Fprintf(&b, "%s\n\n", strings.Repeat("=", 48))
+
+	totalThrottle := 0
+	totalFanWarn := 0
+	for i, an := range analyses {
+		cycle := opts.Cycles[i]
+		fmt.Fprintf(&b, "Cycle %d/%d (load=%ds, idle=%ds)\n", i+1, len(opts.Cycles), cycle.LoadSec, cycle.IdleSec)
+		fmt.Fprintf(&b, "  Max CPU temp: %.1f°C\n", an.maxCPUTemp)
+		fmt.Fprintf(&b, "  Max GPU temp: %.1f°C\n", an.maxGPUTemp)
+		fmt.Fprintf(&b, "  Max sys power: %.0f W\n", an.maxPower)
+		if an.throttled {
+			fmt.Fprintf(&b, "  Throttle: DETECTED\n")
+			totalThrottle++
+		} else {
+			fmt.Fprintf(&b, "  Throttle: none\n")
+		}
+		if an.fanAtCutAvg > 0 {
+			fmt.Fprintf(&b, "  Fan at load cut: %.0f RPM avg\n", an.fanAtCutAvg)
+			fmt.Fprintf(&b, "  Fan min (first 15s idle): %.0f RPM (drop %.0f%%)\n", an.fanMin15s, an.fanDropPct)
+			if an.fanDropPct > 20 {
+				fmt.Fprintf(&b, "  Fan response: WARN — fast spindown (>20%% drop in 15s)\n")
+				totalFanWarn++
+			} else {
+				fmt.Fprintf(&b, "  Fan response: OK\n")
+			}
+		}
+		b.WriteString("\n")
+	}
+
+	fmt.Fprintf(&b, "%s\n", strings.Repeat("=", 48))
+	if totalThrottle > 0 {
+		fmt.Fprintf(&b, "Overall: FAIL — throttle detected in %d/%d cycles\n", totalThrottle, len(analyses))
+	} else if totalFanWarn > 0 {
+		fmt.Fprintf(&b, "Overall: WARN — fast fan spindown in %d/%d cycles (cooling recovery risk)\n", totalFanWarn, len(analyses))
+	} else {
+		fmt.Fprintf(&b, "Overall: PASS\n")
+	}
+	return b.String()
+}
+
+func writePlatformCSV(rows []platformStressRow) []byte {
+	var buf bytes.Buffer
+	w := csv.NewWriter(&buf)
+	_ = w.Write([]string{
+		"elapsed_sec", "cycle", "phase",
+		"cpu_load_pct", "max_cpu_temp_c", "max_gpu_temp_c",
+		"sys_power_w", "fan_min_rpm", "fan_max_rpm", "gpu_throttled",
+	})
+	for _, r := range rows {
+		throttled := "0"
+		if r.GPUThrottled {
+			throttled = "1"
+		}
+		_ = w.Write([]string{
+			strconv.FormatFloat(r.ElapsedSec, 'f', 1, 64),
+			strconv.Itoa(r.Cycle),
+			r.Phase,
+			strconv.FormatFloat(r.CPULoadPct, 'f', 1, 64),
+			strconv.FormatFloat(r.MaxCPUTempC, 'f', 1, 64),
+			strconv.FormatFloat(r.MaxGPUTempC, 'f', 1, 64),
+			strconv.FormatFloat(r.SysPowerW, 'f', 1, 64),
+			strconv.FormatFloat(r.FanMinRPM, 'f', 0, 64),
+			strconv.FormatFloat(r.FanMaxRPM, 'f', 0, 64),
+			throttled,
+		})
+	}
+	w.Flush()
+	return buf.Bytes()
+}
+
+// buildCPUStressCmd creates a stressapptest command that runs until ctx is cancelled.
+func buildCPUStressCmd(ctx context.Context) (*exec.Cmd, error) {
+	path, err := satLookPath("stressapptest")
+	if err != nil {
+		return nil, fmt.Errorf("stressapptest not found: %w", err)
+	}
+	// Use a very long duration; the context timeout will kill it at the right time.
+	cmdArgs := []string{"-s", "86400", "-W", "--cc_test"}
+	if threads := platformStressCPUThreads(); threads > 0 {
+		cmdArgs = append(cmdArgs, "-m", strconv.Itoa(threads))
+	}
+	if mb := platformStressMemoryMB(); mb > 0 {
+		cmdArgs = append(cmdArgs, "-M", strconv.Itoa(mb))
+	}
+	cmd := exec.CommandContext(ctx, path, cmdArgs...)
+	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+	cmd.Cancel = func() error {
+		if cmd.Process != nil {
+			_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+		}
+		return nil
+	}
+	cmd.Stdout = nil
+	cmd.Stderr = nil
+	if err := startLowPriorityCmd(cmd, 15); err != nil {
+		return nil, fmt.Errorf("stressapptest start: %w", err)
+	}
+	return cmd, nil
+}
+
+// buildGPUStressCmd creates a GPU stress command appropriate for the detected vendor.
+// Returns nil if no GPU stress tool is available (CPU-only cycling still useful).
+func buildGPUStressCmd(ctx context.Context, vendor string, durSec int) *exec.Cmd {
+	switch strings.ToLower(vendor) {
+	case "amd":
+		return buildAMDGPUStressCmd(ctx, durSec)
+	case "nvidia":
+		return buildNvidiaGPUStressCmd(ctx, durSec)
+	}
+	return nil
+}
+
+func buildAMDGPUStressCmd(ctx context.Context, durSec int) *exec.Cmd {
+	rvsArgs, err := resolveRVSCommand()
+	if err != nil {
+		return nil
+	}
+	rvsPath := rvsArgs[0]
+	cfg := fmt.Sprintf(`actions:
+- name: gst_platform
+  device: all
+  module: gst
+  parallel: true
+  duration: %d`, durSec*1000) + `
+  copy_matrix: false
+  target_stress: 90
+  matrix_size_a: 8640
+  matrix_size_b: 8640
+  matrix_size_c: 8640
+`
+	cfgFile := "/tmp/bee-platform-gst.conf"
+	_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
+	cmd := exec.CommandContext(ctx, rvsPath, "-c", cfgFile)
+	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+	cmd.Cancel = func() error {
+		if cmd.Process != nil {
+			_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+		}
+		return nil
+	}
+	cmd.Stdout = nil
+	cmd.Stderr = nil
+	_ = startLowPriorityCmd(cmd, 10)
+	return cmd
+}
+
+func buildNvidiaGPUStressCmd(ctx context.Context, durSec int) *exec.Cmd {
+	path, err := satLookPath("bee-gpu-burn")
+	if err != nil {
+		path, err = satLookPath("bee-gpu-stress")
+	}
+	if err != nil {
+		return nil
+	}
+	// Pass exact duration so bee-gpu-burn exits on its own when the cycle ends.
+	// Process group kill via Setpgid+Cancel is kept as a safety net for cases
+	// where the context is cancelled early (user stop, parent timeout).
+	cmd := exec.CommandContext(ctx, path, "--seconds", strconv.Itoa(durSec))
+	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+	cmd.Cancel = func() error {
+		if cmd.Process != nil {
+			_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+		}
+		return nil
+	}
+	cmd.Stdout = nil
+	cmd.Stderr = nil
+	_ = startLowPriorityCmd(cmd, 10)
+	return cmd
+}
+
+func startLowPriorityCmd(cmd *exec.Cmd, nice int) error {
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+	if cmd.Process != nil {
+		_ = syscall.Setpriority(syscall.PRIO_PROCESS, cmd.Process.Pid, nice)
+	}
+	return nil
+}
+
+func platformStressCPUThreads() int {
+	if n := envInt("BEE_PLATFORM_STRESS_THREADS", 0); n > 0 {
+		return n
+	}
+	cpus := runtime.NumCPU()
+	switch {
+	case cpus <= 2:
+		return 1
+	case cpus <= 8:
+		return cpus - 1
+	default:
+		return cpus - 2
+	}
+}
+
+func platformStressMemoryMB() int {
+	if mb := envInt("BEE_PLATFORM_STRESS_MB", 0); mb > 0 {
+		return mb
+	}
+	free := freeMemBytes()
+	if free <= 0 {
+		return 0
+	}
+	mb := int((free * 60) / 100 / (1024 * 1024))
+	if mb < 1024 {
+		return 1024
+	}
+	return mb
+}
+
+func containsComponent(components []string, name string) bool {
+	for _, c := range components {
+		if c == name {
+			return true
+		}
+	}
+	return false
+}
+
+func packPlatformDir(dir, dest string) error {
+	f, err := os.Create(dest)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	gz := gzip.NewWriter(f)
+	defer gz.Close()
+	tw := tar.NewWriter(gz)
+	defer tw.Close()
+
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		return err
+	}
+	base := filepath.Base(dir)
+	for _, e := range entries {
+		if e.IsDir() {
+			continue
+		}
+		fpath := filepath.Join(dir, e.Name())
+		data, err := os.ReadFile(fpath)
+		if err != nil {
+			continue
+		}
+		hdr := &tar.Header{
+			Name:    filepath.Join(base, e.Name()),
+			Size:    int64(len(data)),
+			Mode:    0644,
+			ModTime: time.Now(),
+		}
+		if err := tw.WriteHeader(hdr); err != nil {
+			return err
+		}
+		if _, err := tw.Write(data); err != nil {
+			return err
+		}
+	}
+	return nil
+}
--- a/audit/internal/platform/platform_stress_test.go
+++ b/audit/internal/platform/platform_stress_test.go
@@ -0,0 +1,34 @@
+package platform
+
+import (
+	"runtime"
+	"testing"
+)
+
+func TestPlatformStressCPUThreadsOverride(t *testing.T) {
+	t.Setenv("BEE_PLATFORM_STRESS_THREADS", "7")
+	if got := platformStressCPUThreads(); got != 7 {
+		t.Fatalf("platformStressCPUThreads=%d want 7", got)
+	}
+}
+
+func TestPlatformStressCPUThreadsDefaultLeavesHeadroom(t *testing.T) {
+	t.Setenv("BEE_PLATFORM_STRESS_THREADS", "")
+	got := platformStressCPUThreads()
+	if got < 1 {
+		t.Fatalf("platformStressCPUThreads=%d want >= 1", got)
+	}
+	if got > runtime.NumCPU() {
+		t.Fatalf("platformStressCPUThreads=%d want <= NumCPU=%d", got, runtime.NumCPU())
+	}
+	if runtime.NumCPU() > 2 && got >= runtime.NumCPU() {
+		t.Fatalf("platformStressCPUThreads=%d want headroom below NumCPU=%d", got, runtime.NumCPU())
+	}
+}
+
+func TestPlatformStressMemoryMBOverride(t *testing.T) {
+	t.Setenv("BEE_PLATFORM_STRESS_MB", "8192")
+	if got := platformStressMemoryMB(); got != 8192 {
+		t.Fatalf("platformStressMemoryMB=%d want 8192", got)
+	}
+}
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -0,0 +1,344 @@
+package platform
+
+import (
+	"bufio"
+	"os"
+	"os/exec"
+	"strings"
+	"time"
+
+	"bee/audit/internal/schema"
+)
+
+var runtimeRequiredTools = []string{
+	"dmidecode",
+	"lspci",
+	"lsblk",
+	"smartctl",
+	"nvme",
+	"ipmitool",
+	"dhclient",
+	"mount",
+}
+
+var runtimeTrackedServices = []string{
+	"bee-network",
+	"bee-nvidia",
+	"bee-preflight",
+	"bee-audit",
+	"bee-web",
+	"bee-sshsetup",
+	"nvidia-dcgm",
+	"nvidia-fabricmanager",
+}
+
+func (s *System) CollectRuntimeHealth(exportDir string) (schema.RuntimeHealth, error) {
+	checkedAt := time.Now().UTC().Format(time.RFC3339)
+	health := schema.RuntimeHealth{
+		Status:    "OK",
+		CheckedAt: checkedAt,
+		ExportDir: strings.TrimSpace(exportDir),
+	}
+
+	if health.ExportDir != "" {
+		if err := os.MkdirAll(health.ExportDir, 0755); err != nil {
+			health.Status = "FAILED"
+			health.Issues = append(health.Issues, schema.RuntimeIssue{
+				Code:        "export_dir_unavailable",
+				Severity:    "critical",
+				Description: err.Error(),
+			})
+		}
+	}
+
+	interfaces, err := s.ListInterfaces()
+	if err == nil {
+		health.Interfaces = make([]schema.RuntimeInterface, 0, len(interfaces))
+		hasIPv4 := false
+		missingIPv4 := false
+		for _, iface := range interfaces {
+			outcome := "no_offer"
+			if len(iface.IPv4) > 0 {
+				outcome = "lease_acquired"
+				hasIPv4 = true
+			} else if strings.EqualFold(iface.State, "DOWN") {
+				outcome = "link_down"
+			} else {
+				missingIPv4 = true
+			}
+			health.Interfaces = append(health.Interfaces, schema.RuntimeInterface{
+				Name:    iface.Name,
+				State:   iface.State,
+				IPv4:    iface.IPv4,
+				Outcome: outcome,
+			})
+		}
+		switch {
+		case hasIPv4 && !missingIPv4:
+			health.NetworkStatus = "OK"
+		case hasIPv4:
+			health.NetworkStatus = "PARTIAL"
+			health.Issues = append(health.Issues, schema.RuntimeIssue{
+				Code:        "dhcp_partial",
+				Severity:    "warning",
+				Description: "At least one interface did not obtain IPv4 connectivity.",
+			})
+		default:
+			health.NetworkStatus = "FAILED"
+			health.Issues = append(health.Issues, schema.RuntimeIssue{
+				Code:        "dhcp_failed",
+				Severity:    "warning",
+				Description: "No physical interface obtained IPv4 connectivity.",
+			})
+		}
+	}
+
+	vendor := s.DetectGPUVendor()
+	for _, tool := range s.runtimeToolStatuses(vendor) {
+		health.Tools = append(health.Tools, schema.RuntimeToolStatus{
+			Name: tool.Name,
+			Path: tool.Path,
+			OK:   tool.OK,
+		})
+		if !tool.OK {
+			health.Issues = append(health.Issues, schema.RuntimeIssue{
+				Code:        "tool_missing",
+				Severity:    "warning",
+				Description: "Required tool missing: " + tool.Name,
+			})
+		}
+	}
+
+	for _, name := range runtimeTrackedServices {
+		health.Services = append(health.Services, schema.RuntimeServiceStatus{
+			Name:   name,
+			Status: s.ServiceState(name),
+		})
+	}
+
+	s.collectGPURuntimeHealth(vendor, &health)
+	s.collectToRAMHealth(&health)
+	s.collectUSBExportHealth(&health)
+
+	if health.Status != "FAILED" && len(health.Issues) > 0 {
+		health.Status = "PARTIAL"
+	}
+	return health, nil
+}
+
+func commandText(name string, args ...string) string {
+	raw, err := exec.Command(name, args...).CombinedOutput()
+	if err != nil && len(raw) == 0 {
+		return ""
+	}
+	return string(raw)
+}
+
+func (s *System) runtimeToolStatuses(vendor string) []ToolStatus {
+	tools := s.CheckTools(runtimeRequiredTools)
+	switch vendor {
+	case "nvidia":
+		tools = append(tools, s.CheckTools([]string{
+			"nvidia-smi",
+			"dcgmi",
+			"nv-hostengine",
+			"nvidia-bug-report.sh",
+			"bee-gpu-burn",
+			"bee-john-gpu-stress",
+			"bee-nccl-gpu-stress",
+			"all_reduce_perf",
+		})...)
+		tools = append(tools, resolvedToolStatus("dcgmproftester", dcgmProfTesterCandidates...))
+	case "amd":
+		tool := ToolStatus{Name: "rocm-smi"}
+		if cmd, err := resolveROCmSMICommand(); err == nil && len(cmd) > 0 {
+			tool.Path = cmd[0]
+			if len(cmd) > 1 && strings.HasSuffix(cmd[1], "rocm_smi.py") {
+				tool.Path = cmd[1]
+			}
+			tool.OK = true
+		}
+		tools = append(tools, tool)
+	}
+	return tools
+}
+
+func resolvedToolStatus(display string, candidates ...string) ToolStatus {
+	for _, candidate := range candidates {
+		path, err := exec.LookPath(candidate)
+		if err == nil {
+			return ToolStatus{Name: display, Path: path, OK: true}
+		}
+	}
+	return ToolStatus{Name: display}
+}
+
+// collectToRAMHealth evaluates whether the live system is fully running from RAM.
+// Status values: "ok" = fully in RAM, "warning" = not copied, "partial" = stale or
+// incomplete RAM copy exists but runtime still depends on the boot medium,
+// "failed" = toram was requested but medium is not in RAM.
+func (s *System) collectToRAMHealth(health *schema.RuntimeHealth) {
+	state := s.LiveMediaRAMState()
+	health.ToRAMStatus = state.Status
+	switch state.Status {
+	case "ok":
+		return
+	case "failed":
+		health.Issues = append(health.Issues, schema.RuntimeIssue{
+			Code:        "toram_copy_failed",
+			Severity:    "warning",
+			Description: state.Message,
+		})
+	case "partial":
+		health.Issues = append(health.Issues, schema.RuntimeIssue{
+			Code:        "toram_copy_partial",
+			Severity:    "warning",
+			Description: state.Message,
+		})
+	}
+}
+
+// collectUSBExportHealth scans /proc/mounts for a writable USB-backed filesystem
+// suitable for log export. Sets USBExportPath to the first match found.
+func (s *System) collectUSBExportHealth(health *schema.RuntimeHealth) {
+	health.USBExportPath = findUSBExportMount()
+}
+
+// findUSBExportMount returns the mount point of the first writable USB filesystem
+// found in /proc/mounts (vfat, exfat, ext2/3/4, ntfs) whose backing block device
+// has USB transport. Returns "" if none found.
+func findUSBExportMount() string {
+	f, err := os.Open("/proc/mounts")
+	if err != nil {
+		return ""
+	}
+	defer f.Close()
+
+	// fs types that are expected on USB export drives
+	exportFSTypes := map[string]bool{
+		"vfat":    true,
+		"exfat":   true,
+		"ext2":    true,
+		"ext3":    true,
+		"ext4":    true,
+		"ntfs":    true,
+		"ntfs3":   true,
+		"fuseblk": true,
+	}
+
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		// fields: device mountpoint fstype options dump pass
+		fields := strings.Fields(scanner.Text())
+		if len(fields) < 4 {
+			continue
+		}
+		device, mountPoint, fsType, options := fields[0], fields[1], fields[2], fields[3]
+		if !exportFSTypes[strings.ToLower(fsType)] {
+			continue
+		}
+		// Skip read-only mounts
+		opts := strings.Split(options, ",")
+		readOnly := false
+		for _, o := range opts {
+			if strings.TrimSpace(o) == "ro" {
+				readOnly = true
+				break
+			}
+		}
+		if readOnly {
+			continue
+		}
+		// Check USB transport via lsblk on the device (or its parent disk for partitions).
+		if !strings.HasPrefix(device, "/dev/") {
+			continue
+		}
+		checkDev := device
+		// lsblk only reports TRAN for the whole disk, not for partitions (e.g. /dev/sdc1).
+		// Strip trailing partition digits to get the parent disk name.
+		if trimmed := strings.TrimRight(device, "0123456789"); trimmed != device && len(trimmed) > len("/dev/") {
+			checkDev = trimmed
+		}
+		if blockDeviceTransport(checkDev) == "usb" {
+			return mountPoint
+		}
+	}
+	return ""
+}
+
+func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHealth) {
+	lsmodText := commandText("lsmod")
+
+	switch vendor {
+	case "nvidia":
+		if raw, err := os.ReadFile("/run/bee-nvidia-mode"); err == nil {
+			health.NvidiaGSPMode = strings.TrimSpace(string(raw))
+			if health.NvidiaGSPMode == "gsp-stuck" {
+				health.Issues = append(health.Issues, schema.RuntimeIssue{
+					Code:        "nvidia_gsp_stuck",
+					Severity:    "critical",
+					Description: "NVIDIA GSP firmware init timed out and the kernel module is stuck. Reboot and select 'GSP=off' in the boot menu.",
+				})
+			} else if health.NvidiaGSPMode == "gsp-off" {
+				health.Issues = append(health.Issues, schema.RuntimeIssue{
+					Code:        "nvidia_gsp_disabled",
+					Severity:    "warning",
+					Description: "NVIDIA GSP firmware disabled (fallback). Power management runs via CPU path — power draw readings may differ from reference hardware.",
+				})
+			}
+		}
+		health.DriverReady = strings.Contains(lsmodText, "nvidia ")
+		if !health.DriverReady {
+			health.Issues = append(health.Issues, schema.RuntimeIssue{
+				Code:        "nvidia_kernel_module_missing",
+				Severity:    "warning",
+				Description: "NVIDIA kernel module is not loaded.",
+			})
+		}
+		if health.DriverReady && !strings.Contains(lsmodText, "nvidia_modeset") {
+			health.Issues = append(health.Issues, schema.RuntimeIssue{
+				Code:        "nvidia_modeset_failed",
+				Severity:    "warning",
+				Description: "nvidia-modeset is not loaded; display/CUDA stack may be partial.",
+			})
+		}
+		if out, err := exec.Command("nvidia-smi", "-L").CombinedOutput(); err == nil && strings.TrimSpace(string(out)) != "" {
+			health.DriverReady = true
+		}
+
+		if _, lookErr := exec.LookPath("bee-gpu-burn"); lookErr == nil {
+			out, err := exec.Command("bee-gpu-burn", "--seconds", "1", "--size-mb", "1").CombinedOutput()
+			if err == nil {
+				health.CUDAReady = true
+			} else if strings.Contains(strings.ToLower(string(out)), "cuda_error_system_not_ready") {
+				health.Issues = append(health.Issues, schema.RuntimeIssue{
+					Code:        "cuda_runtime_not_ready",
+					Severity:    "warning",
+					Description: "CUDA runtime is not ready for GPU SAT.",
+				})
+			}
+		}
+	case "amd":
+		health.DriverReady = strings.Contains(lsmodText, "amdgpu ") || strings.Contains(lsmodText, "amdkfd")
+		if !health.DriverReady {
+			health.Issues = append(health.Issues, schema.RuntimeIssue{
+				Code:        "amdgpu_kernel_module_missing",
+				Severity:    "warning",
+				Description: "AMD GPU driver is not loaded.",
+			})
+		}
+
+		out, err := runROCmSMI("--showproductname", "--csv")
+		if err == nil && strings.TrimSpace(string(out)) != "" {
+			health.CUDAReady = true
+			health.DriverReady = true
+			return
+		}
+
+		health.Issues = append(health.Issues, schema.RuntimeIssue{
+			Code:        "rocm_smi_unavailable",
+			Severity:    "warning",
+			Description: "ROCm SMI is not available for AMD GPU SAT.",
+		})
+	}
+}
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -0,0 +1,810 @@
+package platform
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+)
+
+// FanStressOptions configures the fan-stress / thermal cycling test.
+type FanStressOptions struct {
+	BaselineSec  int   // idle monitoring before and after load (default 30)
+	Phase1DurSec int   // first load phase duration in seconds (default 300)
+	PauseSec     int   // pause between the two load phases (default 60)
+	Phase2DurSec int   // second load phase duration in seconds (default 300)
+	SizeMB       int   // GPU memory to allocate per GPU during stress (0 = auto: 95% of VRAM)
+	GPUIndices   []int // which GPU indices to stress (empty = all detected)
+}
+
+// FanReading holds one fan sensor reading.
+type FanReading struct {
+	Name string
+	RPM  float64
+}
+
+// GPUStressMetric holds per-GPU metrics during the stress test.
+type GPUStressMetric struct {
+	Index     int
+	TempC     float64
+	UsagePct  float64
+	PowerW    float64
+	ClockMHz  float64
+	Throttled bool // true if any throttle reason is active
+}
+
+// FanStressRow is one second-interval telemetry sample covering all monitored dimensions.
+type FanStressRow struct {
+	TimestampUTC string
+	ElapsedSec   float64
+	Phase        string // "baseline", "load1", "pause", "load2", "cooldown"
+	GPUs         []GPUStressMetric
+	Fans         []FanReading
+	CPUMaxTempC  float64 // highest CPU temperature from ipmitool / sensors
+	SysPowerW    float64 // DCMI system power reading
+}
+
+type cachedPowerReading struct {
+	Value     float64
+	UpdatedAt time.Time
+}
+
+var (
+	systemPowerCacheMu sync.Mutex
+	systemPowerCache   cachedPowerReading
+)
+
+const systemPowerHoldTTL = 15 * time.Second
+
+// RunFanStressTest runs a two-phase GPU stress test while monitoring fan speeds,
+// temperatures, and power draw every second. Exports metrics.csv and fan-sensors.csv.
+// Designed to reproduce case-04 fan-speed lag and detect GPU thermal throttling.
+func (s *System) RunFanStressTest(ctx context.Context, baseDir string, opts FanStressOptions) (string, error) {
+	if baseDir == "" {
+		baseDir = "/var/log/bee-sat"
+	}
+	applyFanStressDefaults(&opts)
+
+	ts := time.Now().UTC().Format("20060102-150405")
+	runDir := filepath.Join(baseDir, "fan-stress-"+ts)
+	if err := os.MkdirAll(runDir, 0755); err != nil {
+		return "", err
+	}
+	verboseLog := filepath.Join(runDir, "verbose.log")
+
+	// Phase name shared between sampler goroutine and main goroutine.
+	var phaseMu sync.Mutex
+	currentPhase := "init"
+	setPhase := func(name string) {
+		phaseMu.Lock()
+		currentPhase = name
+		phaseMu.Unlock()
+	}
+	getPhase := func() string {
+		phaseMu.Lock()
+		defer phaseMu.Unlock()
+		return currentPhase
+	}
+
+	start := time.Now()
+	var rowsMu sync.Mutex
+	var allRows []FanStressRow
+
+	// Start background sampler (every second).
+	stopCh := make(chan struct{})
+	doneCh := make(chan struct{})
+	go func() {
+		defer close(doneCh)
+		ticker := time.NewTicker(time.Second)
+		defer ticker.Stop()
+		for {
+			select {
+			case <-stopCh:
+				return
+			case <-ticker.C:
+				row := sampleFanStressRow(opts.GPUIndices, getPhase(), time.Since(start).Seconds())
+				rowsMu.Lock()
+				allRows = append(allRows, row)
+				rowsMu.Unlock()
+			}
+		}
+	}()
+
+	var summary strings.Builder
+	fmt.Fprintf(&summary, "run_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
+
+	stats := satStats{}
+
+	// idlePhase sleeps for durSec while the sampler stamps phaseName on each row.
+	idlePhase := func(phaseName, stepName string, durSec int) {
+		if ctx.Err() != nil {
+			return
+		}
+		setPhase(phaseName)
+		appendSATVerboseLog(verboseLog,
+			fmt.Sprintf("[%s] start %s (idle %ds)", time.Now().UTC().Format(time.RFC3339), stepName, durSec),
+		)
+		select {
+		case <-ctx.Done():
+		case <-time.After(time.Duration(durSec) * time.Second):
+		}
+		appendSATVerboseLog(verboseLog,
+			fmt.Sprintf("[%s] finish %s", time.Now().UTC().Format(time.RFC3339), stepName),
+		)
+		fmt.Fprintf(&summary, "%s_status=OK\n", stepName)
+		stats.OK++
+	}
+
+	// loadPhase runs bee-gpu-burn for durSec; sampler stamps phaseName on each row.
+	loadPhase := func(phaseName, stepName string, durSec int) {
+		if ctx.Err() != nil {
+			return
+		}
+		setPhase(phaseName)
+		cmd := []string{
+			"bee-gpu-burn",
+			"--seconds", strconv.Itoa(durSec),
+			"--size-mb", strconv.Itoa(opts.SizeMB),
+		}
+		if len(opts.GPUIndices) > 0 {
+			cmd = append(cmd, "--devices", joinIndexList(dedupeSortedIndices(opts.GPUIndices)))
+		}
+		out, err := runSATCommandCtx(ctx, verboseLog, stepName, cmd, nil, nil)
+		_ = os.WriteFile(filepath.Join(runDir, stepName+".log"), out, 0644)
+		if err != nil && err != context.Canceled && err.Error() != "signal: killed" {
+			fmt.Fprintf(&summary, "%s_status=FAILED\n", stepName)
+			stats.Failed++
+		} else {
+			fmt.Fprintf(&summary, "%s_status=OK\n", stepName)
+			stats.OK++
+		}
+	}
+
+	// Execute test phases.
+	idlePhase("baseline", "01-baseline", opts.BaselineSec)
+	loadPhase("load1", "02-load1", opts.Phase1DurSec)
+	idlePhase("pause", "03-pause", opts.PauseSec)
+	loadPhase("load2", "04-load2", opts.Phase2DurSec)
+	idlePhase("cooldown", "05-cooldown", opts.BaselineSec)
+
+	// Stop sampler and collect rows.
+	close(stopCh)
+	<-doneCh
+
+	rowsMu.Lock()
+	rows := allRows
+	rowsMu.Unlock()
+
+	// Analysis.
+	throttled := analyzeThrottling(rows)
+	maxGPUTemp := analyzeMaxTemp(rows, func(r FanStressRow) float64 {
+		var m float64
+		for _, g := range r.GPUs {
+			if g.TempC > m {
+				m = g.TempC
+			}
+		}
+		return m
+	})
+	maxCPUTemp := analyzeMaxTemp(rows, func(r FanStressRow) float64 {
+		return r.CPUMaxTempC
+	})
+	fanResponseSec := analyzeFanResponse(rows)
+
+	fmt.Fprintf(&summary, "throttling_detected=%v\n", throttled)
+	fmt.Fprintf(&summary, "max_gpu_temp_c=%.1f\n", maxGPUTemp)
+	fmt.Fprintf(&summary, "max_cpu_temp_c=%.1f\n", maxCPUTemp)
+	if fanResponseSec >= 0 {
+		fmt.Fprintf(&summary, "fan_response_sec=%.1f\n", fanResponseSec)
+	} else {
+		fmt.Fprintf(&summary, "fan_response_sec=N/A\n")
+	}
+
+	// Throttling failure counts against overall result.
+	if throttled {
+		stats.Failed++
+	}
+	writeSATStats(&summary, stats)
+
+	// Write CSV outputs.
+	if err := WriteFanStressCSV(filepath.Join(runDir, "metrics.csv"), rows, opts.GPUIndices); err != nil {
+		return "", err
+	}
+	_ = WriteFanSensorsCSV(filepath.Join(runDir, "fan-sensors.csv"), rows)
+
+	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(summary.String()), 0644); err != nil {
+		return "", err
+	}
+
+	return runDir, nil
+}
+
+func applyFanStressDefaults(opts *FanStressOptions) {
+	if opts.BaselineSec <= 0 {
+		opts.BaselineSec = 30
+	}
+	if opts.Phase1DurSec <= 0 {
+		opts.Phase1DurSec = 300
+	}
+	if opts.PauseSec <= 0 {
+		opts.PauseSec = 60
+	}
+	if opts.Phase2DurSec <= 0 {
+		opts.Phase2DurSec = 300
+	}
+	// SizeMB == 0 means "auto" (worker picks 95% of GPU VRAM for maximum power draw).
+	// Leave at 0 to avoid passing a too-small size that starves the tensor-core path.
+}
+
+// sampleFanStressRow collects all metrics for one telemetry sample.
+func sampleFanStressRow(gpuIndices []int, phase string, elapsed float64) FanStressRow {
+	row := FanStressRow{
+		TimestampUTC: time.Now().UTC().Format(time.RFC3339),
+		ElapsedSec:   elapsed,
+		Phase:        phase,
+	}
+	row.GPUs = sampleGPUStressMetrics(gpuIndices)
+	row.Fans, _ = sampleFanSpeeds()
+	row.CPUMaxTempC = sampleCPUMaxTemp()
+	row.SysPowerW = sampleSystemPower()
+	return row
+}
+
+// sampleGPUStressMetrics queries nvidia-smi for temperature, utilization, power,
+// clock frequency, and active throttle reasons for each GPU.
+func sampleGPUStressMetrics(gpuIndices []int) []GPUStressMetric {
+	args := []string{
+		"--query-gpu=index,temperature.gpu,utilization.gpu,power.draw,clocks.current.graphics,clocks_throttle_reasons.active",
+		"--format=csv,noheader,nounits",
+	}
+	if len(gpuIndices) > 0 {
+		ids := make([]string, len(gpuIndices))
+		for i, idx := range gpuIndices {
+			ids[i] = strconv.Itoa(idx)
+		}
+		args = append([]string{"--id=" + strings.Join(ids, ",")}, args...)
+	}
+	out, err := exec.Command("nvidia-smi", args...).Output()
+	if err != nil {
+		return nil
+	}
+	var metrics []GPUStressMetric
+	for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+		parts := strings.Split(line, ", ")
+		if len(parts) < 6 {
+			continue
+		}
+		idx, _ := strconv.Atoi(strings.TrimSpace(parts[0]))
+		throttleVal := strings.TrimSpace(parts[5])
+		// Throttled if active reasons bitmask is non-zero.
+		throttled := throttleVal != "0x0000000000000000" &&
+			throttleVal != "0x0" &&
+			throttleVal != "0" &&
+			throttleVal != "" &&
+			throttleVal != "N/A"
+		metrics = append(metrics, GPUStressMetric{
+			Index:     idx,
+			TempC:     parseGPUFloat(parts[1]),
+			UsagePct:  parseGPUFloat(parts[2]),
+			PowerW:    parseGPUFloat(parts[3]),
+			ClockMHz:  parseGPUFloat(parts[4]),
+			Throttled: throttled,
+		})
+	}
+	return metrics
+}
+
+// sampleFanSpeeds reads fan RPM values from ipmitool sdr.
+func sampleFanSpeeds() ([]FanReading, error) {
+	out, err := exec.Command("ipmitool", "sdr", "type", "Fan").Output()
+	if err == nil {
+		if fans := parseFanSpeeds(string(out)); len(fans) > 0 {
+			return fans, nil
+		}
+	}
+	fans, sensorsErr := sampleFanSpeedsViaSensorsJSON()
+	if len(fans) > 0 {
+		return fans, nil
+	}
+	if err != nil {
+		return nil, err
+	}
+	return nil, sensorsErr
+}
+
+// parseFanSpeeds parses "ipmitool sdr type Fan" output.
+// Handles two formats:
+//
+//	Old: "FAN1 | 2400.000 | RPM | ok"           (value in col[1], unit in col[2])
+//	New: "FAN1 | 41h | ok | 29.1 | 4340 RPM"   (value+unit combined in last col)
+func parseFanSpeeds(raw string) []FanReading {
+	var fans []FanReading
+	for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
+		parts := strings.Split(line, "|")
+		if len(parts) < 2 {
+			continue
+		}
+		name := strings.TrimSpace(parts[0])
+		// Find the first field that contains "RPM" (either as a standalone unit or inline)
+		rpmVal := 0.0
+		found := false
+		for _, p := range parts[1:] {
+			p = strings.TrimSpace(p)
+			if !strings.Contains(strings.ToUpper(p), "RPM") {
+				continue
+			}
+			if strings.EqualFold(p, "RPM") {
+				continue // unit-only column in old format; value is in previous field
+			}
+			val, err := parseFanRPMValue(p)
+			if err == nil {
+				rpmVal = val
+				found = true
+				break
+			}
+		}
+		// Old format: unit "RPM" is in col[2], value is in col[1]
+		if !found && len(parts) >= 3 && strings.EqualFold(strings.TrimSpace(parts[2]), "RPM") {
+			valStr := strings.TrimSpace(parts[1])
+			if !strings.EqualFold(valStr, "na") && !strings.EqualFold(valStr, "disabled") && valStr != "" {
+				if val, err := parseFanRPMValue(valStr); err == nil {
+					rpmVal = val
+					found = true
+				}
+			}
+		}
+		if !found {
+			continue
+		}
+		fans = append(fans, FanReading{Name: name, RPM: rpmVal})
+	}
+	return fans
+}
+
+func parseFanRPMValue(raw string) (float64, error) {
+	fields := strings.Fields(strings.TrimSpace(strings.ReplaceAll(raw, ",", "")))
+	if len(fields) == 0 {
+		return 0, strconv.ErrSyntax
+	}
+	return strconv.ParseFloat(fields[0], 64)
+}
+
+func sampleFanSpeedsViaSensorsJSON() ([]FanReading, error) {
+	out, err := exec.Command("sensors", "-j").Output()
+	if err != nil || len(out) == 0 {
+		return nil, err
+	}
+	var doc map[string]map[string]any
+	if err := json.Unmarshal(out, &doc); err != nil {
+		return nil, err
+	}
+	chips := make([]string, 0, len(doc))
+	for chip := range doc {
+		chips = append(chips, chip)
+	}
+	sort.Strings(chips)
+	var fans []FanReading
+	seen := map[string]struct{}{}
+	for _, chip := range chips {
+		features := doc[chip]
+		names := make([]string, 0, len(features))
+		for name := range features {
+			names = append(names, name)
+		}
+		sort.Strings(names)
+		for _, name := range names {
+			feature, ok := features[name].(map[string]any)
+			if !ok {
+				continue
+			}
+			rpm, ok := firstFanInputValue(feature)
+			if !ok || rpm <= 0 {
+				continue
+			}
+			label := strings.TrimSpace(name)
+			if chip != "" && !strings.Contains(strings.ToLower(label), strings.ToLower(chip)) {
+				label = chip + " / " + label
+			}
+			if _, ok := seen[label]; ok {
+				continue
+			}
+			seen[label] = struct{}{}
+			fans = append(fans, FanReading{Name: label, RPM: rpm})
+		}
+	}
+	return fans, nil
+}
+
+// sampleFanDutyCyclePct reads fan PWM/duty-cycle controls from lm-sensors.
+// Returns the average duty cycle across all exposed PWM controls.
+func sampleFanDutyCyclePct() (float64, bool) {
+	out, err := exec.Command("sensors", "-j").Output()
+	if err != nil || len(out) == 0 {
+		return 0, false
+	}
+	return parseFanDutyCyclePctSensorsJSON(out)
+}
+
+func parseFanDutyCyclePctSensorsJSON(raw []byte) (float64, bool) {
+	var doc map[string]map[string]any
+	if err := json.Unmarshal(raw, &doc); err != nil {
+		return 0, false
+	}
+	var samples []float64
+	for _, features := range doc {
+		for name, feature := range features {
+			if strings.EqualFold(name, "Adapter") {
+				continue
+			}
+			featureMap, ok := feature.(map[string]any)
+			if !ok {
+				continue
+			}
+			if duty, ok := firstFanDutyValue(name, featureMap); ok {
+				samples = append(samples, duty)
+			}
+		}
+	}
+	if len(samples) == 0 {
+		return 0, false
+	}
+	return benchmarkMean(samples), true
+}
+
+func firstFanDutyValue(featureName string, feature map[string]any) (float64, bool) {
+	featureName = strings.ToLower(strings.TrimSpace(featureName))
+	if strings.Contains(featureName, "enable") || strings.Contains(featureName, "mode") || strings.Contains(featureName, "alarm") {
+		return 0, false
+	}
+	if strings.Contains(featureName, "pwm") {
+		for _, key := range []string{"input", "value", "current"} {
+			if value, ok := feature[key]; ok {
+				if duty, parsed := parseFanDutyValue(value); parsed {
+					return duty, true
+				}
+			}
+		}
+	}
+	keys := make([]string, 0, len(feature))
+	for key := range feature {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+	for _, key := range keys {
+		lower := strings.ToLower(key)
+		if !strings.Contains(lower, "pwm") {
+			continue
+		}
+		if strings.Contains(lower, "enable") || strings.Contains(lower, "mode") || strings.Contains(lower, "alarm") {
+			continue
+		}
+		if duty, parsed := parseFanDutyValue(feature[key]); parsed {
+			return duty, true
+		}
+	}
+	return 0, false
+}
+
+func parseFanDutyValue(value any) (float64, bool) {
+	switch v := value.(type) {
+	case float64:
+		return normalizePWMAsDutyPct(v)
+	case string:
+		if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil {
+			return normalizePWMAsDutyPct(f)
+		}
+	}
+	return 0, false
+}
+
+func normalizePWMAsDutyPct(raw float64) (float64, bool) {
+	if raw < 0 {
+		return 0, false
+	}
+	if raw <= 100 {
+		return raw, true
+	}
+	if raw <= 255 {
+		return raw / 255.0 * 100.0, true
+	}
+	return 0, false
+}
+
+func firstFanInputValue(feature map[string]any) (float64, bool) {
+	keys := make([]string, 0, len(feature))
+	for key := range feature {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+	for _, key := range keys {
+		lower := strings.ToLower(key)
+		if !strings.Contains(lower, "fan") || !strings.HasSuffix(lower, "_input") {
+			continue
+		}
+		switch value := feature[key].(type) {
+		case float64:
+			return value, true
+		case string:
+			f, err := strconv.ParseFloat(value, 64)
+			if err == nil {
+				return f, true
+			}
+		}
+	}
+	return 0, false
+}
+
+// sampleCPUMaxTemp returns the highest CPU/inlet temperature from ipmitool or sensors.
+func sampleCPUMaxTemp() float64 {
+	out, err := exec.Command("ipmitool", "sdr", "type", "Temperature").Output()
+	if err != nil {
+		return sampleCPUTempViaSensors()
+	}
+	return parseIPMIMaxTemp(string(out))
+}
+
+// parseIPMIMaxTemp extracts the maximum temperature from "ipmitool sdr type Temperature".
+func parseIPMIMaxTemp(raw string) float64 {
+	var max float64
+	for _, line := range strings.Split(strings.TrimSpace(raw), "\n") {
+		parts := strings.Split(line, "|")
+		if len(parts) < 3 {
+			continue
+		}
+		unit := strings.TrimSpace(parts[2])
+		if !strings.Contains(strings.ToLower(unit), "degrees") {
+			continue
+		}
+		valStr := strings.TrimSpace(parts[1])
+		if strings.EqualFold(valStr, "na") || valStr == "" {
+			continue
+		}
+		val, err := strconv.ParseFloat(valStr, 64)
+		if err != nil {
+			continue
+		}
+		if val > max {
+			max = val
+		}
+	}
+	return max
+}
+
+// sampleCPUTempViaSensors falls back to lm-sensors when ipmitool is unavailable.
+func sampleCPUTempViaSensors() float64 {
+	out, err := exec.Command("sensors", "-u").Output()
+	if err != nil {
+		return 0
+	}
+	var max float64
+	for _, line := range strings.Split(string(out), "\n") {
+		line = strings.TrimSpace(line)
+		fields := strings.Fields(line)
+		if len(fields) < 2 {
+			continue
+		}
+		if !strings.HasSuffix(fields[0], "_input:") {
+			continue
+		}
+		val, err := strconv.ParseFloat(fields[1], 64)
+		if err != nil {
+			continue
+		}
+		if val > 0 && val < 150 && val > max {
+			max = val
+		}
+	}
+	return max
+}
+
+// sampleSystemPower reads system power draw via DCMI.
+func sampleSystemPower() float64 {
+	now := time.Now()
+	current := 0.0
+	out, err := exec.Command("ipmitool", "dcmi", "power", "reading").Output()
+	if err == nil {
+		current = parseDCMIPowerReading(string(out))
+	}
+	systemPowerCacheMu.Lock()
+	defer systemPowerCacheMu.Unlock()
+	value, updated := effectiveSystemPowerReading(systemPowerCache, current, now)
+	systemPowerCache = updated
+	return value
+}
+
+// parseDCMIPowerReading extracts the instantaneous power reading from ipmitool dcmi output.
+// Sample: "    Instantaneous power reading:                   500 Watts"
+func parseDCMIPowerReading(raw string) float64 {
+	for _, line := range strings.Split(raw, "\n") {
+		if !strings.Contains(strings.ToLower(line), "instantaneous") {
+			continue
+		}
+		parts := strings.Fields(line)
+		for i, p := range parts {
+			if strings.EqualFold(p, "Watts") && i > 0 {
+				val, err := strconv.ParseFloat(parts[i-1], 64)
+				if err == nil {
+					return val
+				}
+			}
+		}
+	}
+	return 0
+}
+
+func effectiveSystemPowerReading(cache cachedPowerReading, current float64, now time.Time) (float64, cachedPowerReading) {
+	if current > 0 {
+		cache = cachedPowerReading{Value: current, UpdatedAt: now}
+		return current, cache
+	}
+	if cache.Value > 0 && !cache.UpdatedAt.IsZero() && now.Sub(cache.UpdatedAt) <= systemPowerHoldTTL {
+		return cache.Value, cache
+	}
+	return 0, cache
+}
+
+// analyzeThrottling returns true if any GPU reported an active throttle reason
+// during either load phase.
+func analyzeThrottling(rows []FanStressRow) bool {
+	for _, row := range rows {
+		if row.Phase != "load1" && row.Phase != "load2" {
+			continue
+		}
+		for _, gpu := range row.GPUs {
+			if gpu.Throttled {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+// analyzeMaxTemp returns the maximum value of the given extractor across all rows.
+func analyzeMaxTemp(rows []FanStressRow, extract func(FanStressRow) float64) float64 {
+	var max float64
+	for _, row := range rows {
+		if v := extract(row); v > max {
+			max = v
+		}
+	}
+	return max
+}
+
+// analyzeFanResponse returns the seconds from load1 start until fan RPM first
+// increased by more than 5% above the baseline average. Returns -1 if undetermined.
+func analyzeFanResponse(rows []FanStressRow) float64 {
+	// Compute baseline average fan RPM.
+	var baseTotal, baseCount float64
+	for _, row := range rows {
+		if row.Phase != "baseline" {
+			continue
+		}
+		for _, f := range row.Fans {
+			baseTotal += f.RPM
+			baseCount++
+		}
+	}
+	if baseCount == 0 || baseTotal == 0 {
+		return -1
+	}
+	baseAvg := baseTotal / baseCount
+	threshold := baseAvg * 1.05 // 5% increase signals fan ramp-up
+
+	// Find elapsed time when load1 started.
+	var load1Start float64 = -1
+	for _, row := range rows {
+		if row.Phase == "load1" {
+			load1Start = row.ElapsedSec
+			break
+		}
+	}
+	if load1Start < 0 {
+		return -1
+	}
+
+	// Find first load1 row where average RPM crosses the threshold.
+	for _, row := range rows {
+		if row.Phase != "load1" {
+			continue
+		}
+		var total, count float64
+		for _, f := range row.Fans {
+			total += f.RPM
+			count++
+		}
+		if count > 0 && total/count >= threshold {
+			return row.ElapsedSec - load1Start
+		}
+	}
+	return -1
+}
+
+// WriteFanStressCSV writes the wide-format metrics CSV with one row per second.
+// GPU columns are generated per index in gpuIndices order.
+func WriteFanStressCSV(path string, rows []FanStressRow, gpuIndices []int) error {
+	if len(rows) == 0 {
+		return os.WriteFile(path, []byte("no data\n"), 0644)
+	}
+
+	var b strings.Builder
+
+	// Header: fixed system columns + per-GPU columns.
+	b.WriteString("timestamp_utc,elapsed_sec,phase,fan_avg_rpm,fan_min_rpm,fan_max_rpm,cpu_max_temp_c,sys_power_w")
+	for _, idx := range gpuIndices {
+		fmt.Fprintf(&b, ",gpu%d_temp_c,gpu%d_usage_pct,gpu%d_power_w,gpu%d_clock_mhz,gpu%d_throttled",
+			idx, idx, idx, idx, idx)
+	}
+	b.WriteRune('\n')
+
+	for _, row := range rows {
+		favg, fmin, fmax := fanRPMStats(row.Fans)
+		fmt.Fprintf(&b, "%s,%.1f,%s,%.0f,%.0f,%.0f,%.1f,%.1f",
+			row.TimestampUTC,
+			row.ElapsedSec,
+			row.Phase,
+			favg, fmin, fmax,
+			row.CPUMaxTempC,
+			row.SysPowerW,
+		)
+		gpuByIdx := make(map[int]GPUStressMetric, len(row.GPUs))
+		for _, g := range row.GPUs {
+			gpuByIdx[g.Index] = g
+		}
+		for _, idx := range gpuIndices {
+			g := gpuByIdx[idx]
+			throttled := 0
+			if g.Throttled {
+				throttled = 1
+			}
+			fmt.Fprintf(&b, ",%.1f,%.1f,%.1f,%.0f,%d",
+				g.TempC, g.UsagePct, g.PowerW, g.ClockMHz, throttled)
+		}
+		b.WriteRune('\n')
+	}
+
+	return os.WriteFile(path, []byte(b.String()), 0644)
+}
+
+// WriteFanSensorsCSV writes individual fan sensor readings in long (tidy) format.
+func WriteFanSensorsCSV(path string, rows []FanStressRow) error {
+	var b strings.Builder
+	b.WriteString("timestamp_utc,elapsed_sec,phase,fan_name,rpm\n")
+	for _, row := range rows {
+		for _, f := range row.Fans {
+			fmt.Fprintf(&b, "%s,%.1f,%s,%s,%.0f\n",
+				row.TimestampUTC, row.ElapsedSec, row.Phase, f.Name, f.RPM)
+		}
+	}
+	return os.WriteFile(path, []byte(b.String()), 0644)
+}
+
+// fanRPMStats computes average, min, max RPM across all fans in a sample row.
+func fanRPMStats(fans []FanReading) (avg, min, max float64) {
+	if len(fans) == 0 {
+		return 0, 0, 0
+	}
+	min = fans[0].RPM
+	max = fans[0].RPM
+	var total float64
+	for _, f := range fans {
+		total += f.RPM
+		if f.RPM < min {
+			min = f.RPM
+		}
+		if f.RPM > max {
+			max = f.RPM
+		}
+	}
+	return total / float64(len(fans)), min, max
+}
--- a/audit/internal/platform/sat_fan_stress_test.go
+++ b/audit/internal/platform/sat_fan_stress_test.go
@@ -0,0 +1,88 @@
+package platform
+
+import (
+	"testing"
+	"time"
+)
+
+func TestParseFanSpeeds(t *testing.T) {
+	raw := "FAN1 | 2400.000 | RPM | ok\nFAN2 | 1800 RPM | ok | ok\nFAN3 | na | RPM | ns\n"
+	got := parseFanSpeeds(raw)
+	if len(got) != 2 {
+		t.Fatalf("fans=%d want 2 (%v)", len(got), got)
+	}
+	if got[0].Name != "FAN1" || got[0].RPM != 2400 {
+		t.Fatalf("fan0=%+v", got[0])
+	}
+	if got[1].Name != "FAN2" || got[1].RPM != 1800 {
+		t.Fatalf("fan1=%+v", got[1])
+	}
+}
+
+func TestFirstFanInputValue(t *testing.T) {
+	feature := map[string]any{
+		"fan1_input": 9200.0,
+	}
+	got, ok := firstFanInputValue(feature)
+	if !ok || got != 9200 {
+		t.Fatalf("got=%v ok=%v", got, ok)
+	}
+}
+
+func TestParseFanDutyCyclePctSensorsJSON(t *testing.T) {
+	raw := []byte(`{
+		"chip0": {
+			"fan1": {"input": 9000},
+			"pwm1": {"input": 128},
+			"pwm1_enable": {"input": 1}
+		},
+		"chip1": {
+			"pwm2": {"input": 64}
+		}
+	}`)
+
+	got, ok := parseFanDutyCyclePctSensorsJSON(raw)
+	if !ok {
+		t.Fatalf("expected duty cycle telemetry to be parsed")
+	}
+	if got < 57 || got > 58 {
+		t.Fatalf("got=%v want ~57.1", got)
+	}
+}
+
+func TestParseDCMIPowerReading(t *testing.T) {
+	raw := `
+Instantaneous power reading:                   512 Watts
+Minimum during sampling period:               498 Watts
+`
+	if got := parseDCMIPowerReading(raw); got != 512 {
+		t.Fatalf("parseDCMIPowerReading()=%v want 512", got)
+	}
+}
+
+func TestEffectiveSystemPowerReading(t *testing.T) {
+	now := time.Now()
+	cache := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-5 * time.Second)}
+
+	got, updated := effectiveSystemPowerReading(cache, 0, now)
+	if got != 480 {
+		t.Fatalf("got=%v want cached 480", got)
+	}
+	if updated.Value != 480 {
+		t.Fatalf("updated=%+v", updated)
+	}
+
+	got, updated = effectiveSystemPowerReading(cache, 530, now)
+	if got != 530 {
+		t.Fatalf("got=%v want 530", got)
+	}
+	if updated.Value != 530 {
+		t.Fatalf("updated=%+v", updated)
+	}
+
+	expired := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-systemPowerHoldTTL - time.Second)}
+	got, _ = effectiveSystemPowerReading(expired, 0, now)
+	if got != 0 {
+		t.Fatalf("expired cache returned %v want 0", got)
+	}
+}
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -0,0 +1,582 @@
+package platform
+
+import (
+	"context"
+	"errors"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestStorageSATCommands(t *testing.T) {
+	t.Parallel()
+
+	nvme := storageSATCommands("/dev/nvme0n1", false)
+	if len(nvme) != 3 || nvme[2].cmd[0] != "nvme" {
+		t.Fatalf("unexpected nvme commands: %#v", nvme)
+	}
+
+	sata := storageSATCommands("/dev/sda", false)
+	if len(sata) != 2 || sata[0].cmd[0] != "smartctl" {
+		t.Fatalf("unexpected sata commands: %#v", sata)
+	}
+}
+
+func TestRunNvidiaAcceptancePackIncludesGPUStress(t *testing.T) {
+	t.Parallel()
+
+	jobs := nvidiaSATJobs()
+
+	if len(jobs) != 6 {
+		t.Fatalf("jobs=%d want 6", len(jobs))
+	}
+	if got := jobs[0].cmd[0]; got != "nvidia-smi" {
+		t.Fatalf("preflight command=%q want nvidia-smi", got)
+	}
+	if got := strings.Join(jobs[0].cmd, " "); got != "nvidia-smi -pm 1" {
+		t.Fatalf("preflight=%q want %q", got, "nvidia-smi -pm 1")
+	}
+	if got := jobs[5].cmd[0]; got != "bee-gpu-burn" {
+		t.Fatalf("gpu stress command=%q want bee-gpu-burn", got)
+	}
+	if got := jobs[4].cmd[1]; got != "--output-file" {
+		t.Fatalf("bug report flag=%q want --output-file", got)
+	}
+}
+
+func TestAMDStressConfigUsesSingleGSTAction(t *testing.T) {
+	t.Parallel()
+
+	cfg := amdStressRVSConfig(123)
+	if !strings.Contains(cfg, "module: gst") {
+		t.Fatalf("config missing gst module:\n%s", cfg)
+	}
+	if strings.Contains(cfg, "module: mem") {
+		t.Fatalf("config should not include mem module:\n%s", cfg)
+	}
+	if !strings.Contains(cfg, "copy_matrix: false") {
+		t.Fatalf("config should use copy_matrix=false:\n%s", cfg)
+	}
+	if strings.Count(cfg, "duration: 123000") != 1 {
+		t.Fatalf("config should apply duration once:\n%s", cfg)
+	}
+	for _, field := range []string{"matrix_size_a: 8640", "matrix_size_b: 8640", "matrix_size_c: 8640"} {
+		if !strings.Contains(cfg, field) {
+			t.Fatalf("config missing %s:\n%s", field, cfg)
+		}
+	}
+}
+
+func TestAMDStressJobsIncludeBandwidthAndGST(t *testing.T) {
+	t.Parallel()
+
+	jobs := amdStressJobs(300, "/tmp/test-amd-gst.conf")
+	if len(jobs) != 4 {
+		t.Fatalf("jobs=%d want 4", len(jobs))
+	}
+	if got := jobs[1].cmd[0]; got != "rocm-bandwidth-test" {
+		t.Fatalf("jobs[1]=%q want rocm-bandwidth-test", got)
+	}
+	if got := jobs[2].cmd[0]; got != "rvs" {
+		t.Fatalf("jobs[2]=%q want rvs", got)
+	}
+	if got := jobs[2].cmd[2]; got != "/tmp/test-amd-gst.conf" {
+		t.Fatalf("jobs[2] cfg=%q want /tmp/test-amd-gst.conf", got)
+	}
+}
+
+func TestNvidiaSATJobsUseBuiltinBurnDefaults(t *testing.T) {
+	jobs := nvidiaSATJobs()
+	got := jobs[5].cmd
+	want := []string{"bee-gpu-burn", "--seconds", "5", "--size-mb", "64"}
+	if len(got) != len(want) {
+		t.Fatalf("cmd len=%d want %d", len(got), len(want))
+	}
+	for i := range want {
+		if got[i] != want[i] {
+			t.Fatalf("cmd[%d]=%q want %q", i, got[i], want[i])
+		}
+	}
+}
+
+func TestNvidiaDCGMJobsEnablePersistenceModeBeforeDiag(t *testing.T) {
+	jobs := nvidiaDCGMJobs(3, []int{2, 0})
+	if len(jobs) != 5 {
+		t.Fatalf("jobs=%d want 5", len(jobs))
+	}
+	if got := strings.Join(jobs[0].cmd, " "); got != "nvidia-smi -pm 1" {
+		t.Fatalf("preflight=%q want %q", got, "nvidia-smi -pm 1")
+	}
+	if got := strings.Join(jobs[4].cmd, " "); got != "dcgmi diag -r 3 -i 2,0" {
+		t.Fatalf("diag=%q want %q", got, "dcgmi diag -r 3 -i 2,0")
+	}
+}
+
+func TestBuildNvidiaStressJobUsesSelectedLoaderAndDevices(t *testing.T) {
+	t.Parallel()
+
+	oldExecCommand := satExecCommand
+	satExecCommand = func(name string, args ...string) *exec.Cmd {
+		if name == "nvidia-smi" {
+			return exec.Command("sh", "-c", "printf '0\n1\n2\n'")
+		}
+		return exec.Command(name, args...)
+	}
+	t.Cleanup(func() { satExecCommand = oldExecCommand })
+
+	job, err := buildNvidiaStressJob(NvidiaStressOptions{
+		DurationSec:       600,
+		Loader:            NvidiaStressLoaderJohn,
+		ExcludeGPUIndices: []int{1},
+	})
+	if err != nil {
+		t.Fatalf("buildNvidiaStressJob error: %v", err)
+	}
+	wantCmd := []string{"bee-john-gpu-stress", "--seconds", "600", "--devices", "0,2"}
+	if len(job.cmd) != len(wantCmd) {
+		t.Fatalf("cmd len=%d want %d (%v)", len(job.cmd), len(wantCmd), job.cmd)
+	}
+	for i := range wantCmd {
+		if job.cmd[i] != wantCmd[i] {
+			t.Fatalf("cmd[%d]=%q want %q", i, job.cmd[i], wantCmd[i])
+		}
+	}
+	if got := joinIndexList(job.gpuIndices); got != "0,2" {
+		t.Fatalf("gpuIndices=%q want 0,2", got)
+	}
+}
+
+func TestBuildNvidiaStressJobUsesNCCLLoader(t *testing.T) {
+	t.Parallel()
+
+	oldExecCommand := satExecCommand
+	satExecCommand = func(name string, args ...string) *exec.Cmd {
+		if name == "nvidia-smi" {
+			return exec.Command("sh", "-c", "printf '0\n1\n2\n'")
+		}
+		return exec.Command(name, args...)
+	}
+	t.Cleanup(func() { satExecCommand = oldExecCommand })
+
+	job, err := buildNvidiaStressJob(NvidiaStressOptions{
+		DurationSec: 120,
+		Loader:      NvidiaStressLoaderNCCL,
+		GPUIndices:  []int{2, 0},
+	})
+	if err != nil {
+		t.Fatalf("buildNvidiaStressJob error: %v", err)
+	}
+	wantCmd := []string{"bee-nccl-gpu-stress", "--seconds", "120", "--devices", "0,2"}
+	if len(job.cmd) != len(wantCmd) {
+		t.Fatalf("cmd len=%d want %d (%v)", len(job.cmd), len(wantCmd), job.cmd)
+	}
+	for i := range wantCmd {
+		if job.cmd[i] != wantCmd[i] {
+			t.Fatalf("cmd[%d]=%q want %q", i, job.cmd[i], wantCmd[i])
+		}
+	}
+	if got := joinIndexList(job.gpuIndices); got != "0,2" {
+		t.Fatalf("gpuIndices=%q want 0,2", got)
+	}
+}
+
+func TestResolveDCGMGPUIndicesUsesDetectedGPUsWhenUnset(t *testing.T) {
+	t.Parallel()
+
+	oldExecCommand := satExecCommand
+	satExecCommand = func(name string, args ...string) *exec.Cmd {
+		if name == "nvidia-smi" {
+			return exec.Command("sh", "-c", "printf '2\n0\n1\n'")
+		}
+		return exec.Command(name, args...)
+	}
+	t.Cleanup(func() { satExecCommand = oldExecCommand })
+
+	got, err := resolveDCGMGPUIndices(nil)
+	if err != nil {
+		t.Fatalf("resolveDCGMGPUIndices error: %v", err)
+	}
+	if want := "0,1,2"; joinIndexList(got) != want {
+		t.Fatalf("gpuIndices=%q want %q", joinIndexList(got), want)
+	}
+}
+
+func TestResolveDCGMGPUIndicesKeepsExplicitSelection(t *testing.T) {
+	t.Parallel()
+
+	got, err := resolveDCGMGPUIndices([]int{3, 1, 3})
+	if err != nil {
+		t.Fatalf("resolveDCGMGPUIndices error: %v", err)
+	}
+	if want := "1,3"; joinIndexList(got) != want {
+		t.Fatalf("gpuIndices=%q want %q", joinIndexList(got), want)
+	}
+}
+
+func TestParseNvidiaGPUHealthDetectsResetRequired(t *testing.T) {
+	t.Parallel()
+
+	got := parseNvidiaGPUHealth("0, NVIDIA H100 PCIe, 38, 46.89, 0, 0, 81559\n1, NVIDIA H100 PCIe, [GPU requires reset], [N/A], [N/A], 0, 81559\n")
+	if len(got) != 2 {
+		t.Fatalf("len=%d want 2", len(got))
+	}
+	if got[0].NeedsReset {
+		t.Fatalf("gpu0 unexpectedly marked reset-required")
+	}
+	if !got[1].NeedsReset {
+		t.Fatalf("gpu1 should be marked reset-required: %#v", got[1])
+	}
+}
+
+func TestCheckNvidiaJobHealthReturnsErrorForSelectedResetRequiredGPU(t *testing.T) {
+	oldExecCommand := satExecCommand
+	satExecCommand = func(name string, args ...string) *exec.Cmd {
+		if name == "nvidia-smi" {
+			return exec.Command("sh", "-c", "printf '0, NVIDIA H100 PCIe, 38, 46.89, 0, 0, 81559\n1, NVIDIA H100 PCIe, [GPU requires reset], [N/A], [N/A], 0, 81559\n'")
+		}
+		return exec.Command(name, args...)
+	}
+	t.Cleanup(func() { satExecCommand = oldExecCommand })
+
+	msg, err := checkNvidiaJobHealth([]int{1})
+	if err == nil {
+		t.Fatal("expected health check error")
+	}
+	if !strings.Contains(msg, "gpu 1") || !strings.Contains(strings.ToLower(msg), "requires reset") {
+		t.Fatalf("unexpected message: %q", msg)
+	}
+}
+
+func TestWriteNvidiaGPUStatusFilesCreatesPerGPUFiles(t *testing.T) {
+	dir := t.TempDir()
+	oldExecCommand := satExecCommand
+	satExecCommand = func(name string, args ...string) *exec.Cmd {
+		if name == "nvidia-smi" {
+			return exec.Command("sh", "-c", "printf '0, NVIDIA H100 PCIe, 38, 46.89, 0, 0, 81559\n1, NVIDIA H100 PCIe, [GPU requires reset], [N/A], [N/A], 0, 81559\n'")
+		}
+		return exec.Command(name, args...)
+	}
+	t.Cleanup(func() { satExecCommand = oldExecCommand })
+
+	perGPU := map[int]*nvidiaGPUStatusFile{
+		0: {Index: 0, RunStatus: "OK"},
+		1: {Index: 1, RunStatus: "FAILED", FailingJob: "02-dcgmi-targeted-stress.log", Reason: "NVIDIA GPU health check failed:"},
+	}
+	if err := writeNvidiaGPUStatusFiles(dir, "FAILED", perGPU, map[int]struct{}{0: {}, 1: {}}); err != nil {
+		t.Fatalf("writeNvidiaGPUStatusFiles error: %v", err)
+	}
+	raw, err := os.ReadFile(filepath.Join(dir, "gpu-1-status.txt"))
+	if err != nil {
+		t.Fatalf("ReadFile gpu-1-status.txt: %v", err)
+	}
+	text := string(raw)
+	if !strings.Contains(text, "run_status=FAILED") {
+		t.Fatalf("missing run status:\n%s", text)
+	}
+	if !strings.Contains(text, "health_status=RESET_REQUIRED") {
+		t.Fatalf("missing health status:\n%s", text)
+	}
+	if !strings.Contains(text, "failing_job=02-dcgmi-targeted-stress.log") {
+		t.Fatalf("missing failing job:\n%s", text)
+	}
+}
+
+func TestResolveDCGMProfTesterCommandUsesVersionedBinary(t *testing.T) {
+	oldLookPath := satLookPath
+	satLookPath = func(file string) (string, error) {
+		switch file {
+		case "dcgmproftester13":
+			return "/usr/bin/dcgmproftester13", nil
+		default:
+			return "", exec.ErrNotFound
+		}
+	}
+	t.Cleanup(func() { satLookPath = oldLookPath })
+
+	cmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004")
+	if err != nil {
+		t.Fatalf("resolveDCGMProfTesterCommand error: %v", err)
+	}
+	if len(cmd) != 4 {
+		t.Fatalf("cmd len=%d want 4 (%v)", len(cmd), cmd)
+	}
+	if cmd[0] != "/usr/bin/dcgmproftester13" {
+		t.Fatalf("cmd[0]=%q want /usr/bin/dcgmproftester13", cmd[0])
+	}
+}
+
+func TestNvidiaDCGMNamedDiagCommandUsesDurationAndSelection(t *testing.T) {
+	cmd := nvidiaDCGMNamedDiagCommand("targeted_power", 900, []int{3, 1})
+	want := []string{"dcgmi", "diag", "-r", "targeted_power", "-p", "targeted_power.test_duration=900", "-i", "3,1"}
+	if len(cmd) != len(want) {
+		t.Fatalf("cmd len=%d want %d (%v)", len(cmd), len(want), cmd)
+	}
+	for i := range want {
+		if cmd[i] != want[i] {
+			t.Fatalf("cmd[%d]=%q want %q", i, cmd[i], want[i])
+		}
+	}
+}
+
+func TestNvidiaVisibleDevicesEnvUsesSelectedGPUs(t *testing.T) {
+	env := nvidiaVisibleDevicesEnv([]int{0, 2, 4})
+	if len(env) != 2 {
+		t.Fatalf("env len=%d want 2 (%v)", len(env), env)
+	}
+	if env[0] != "CUDA_DEVICE_ORDER=PCI_BUS_ID" {
+		t.Fatalf("env[0]=%q want CUDA_DEVICE_ORDER=PCI_BUS_ID", env[0])
+	}
+	if env[1] != "CUDA_VISIBLE_DEVICES=0,2,4" {
+		t.Fatalf("env[1]=%q want CUDA_VISIBLE_DEVICES=0,2,4", env[1])
+	}
+}
+
+func TestNvidiaStressArchivePrefixByLoader(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		loader string
+		want   string
+	}{
+		{loader: NvidiaStressLoaderBuiltin, want: "gpu-nvidia-burn"},
+		{loader: NvidiaStressLoaderJohn, want: "gpu-nvidia-john"},
+		{loader: NvidiaStressLoaderNCCL, want: "gpu-nvidia-nccl"},
+		{loader: "", want: "gpu-nvidia-burn"},
+	}
+	for _, tt := range tests {
+		if got := nvidiaStressArchivePrefix(tt.loader); got != tt.want {
+			t.Fatalf("loader=%q prefix=%q want %q", tt.loader, got, tt.want)
+		}
+	}
+}
+
+func TestEnvIntFallback(t *testing.T) {
+	os.Unsetenv("BEE_MEMTESTER_SIZE_MB")
+	if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
+		t.Fatalf("got %d want 123", got)
+	}
+	t.Setenv("BEE_MEMTESTER_SIZE_MB", "bad")
+	if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 123 {
+		t.Fatalf("got %d want 123", got)
+	}
+	t.Setenv("BEE_MEMTESTER_SIZE_MB", "256")
+	if got := envInt("BEE_MEMTESTER_SIZE_MB", 123); got != 256 {
+		t.Fatalf("got %d want 256", got)
+	}
+}
+
+func TestMemoryStressSizeArgUsesAvailableMemory(t *testing.T) {
+	oldFreeMemBytes := satFreeMemBytes
+	satFreeMemBytes = func() int64 { return 96 * 1024 * 1024 * 1024 }
+	t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
+
+	if got := memoryStressSizeArg(); got != "65536M" {
+		t.Fatalf("sizeArg=%q want 65536M", got)
+	}
+}
+
+func TestMemoryStressSizeArgRespectsOverride(t *testing.T) {
+	oldFreeMemBytes := satFreeMemBytes
+	satFreeMemBytes = func() int64 { return 96 * 1024 * 1024 * 1024 }
+	t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
+	t.Setenv("BEE_VM_STRESS_SIZE_MB", "4096")
+
+	if got := memoryStressSizeArg(); got != "4096M" {
+		t.Fatalf("sizeArg=%q want 4096M", got)
+	}
+}
+
+func TestMemoryStressSizeArgFallsBackWhenFreeMemoryUnknown(t *testing.T) {
+	oldFreeMemBytes := satFreeMemBytes
+	satFreeMemBytes = func() int64 { return 0 }
+	t.Cleanup(func() { satFreeMemBytes = oldFreeMemBytes })
+
+	if got := memoryStressSizeArg(); got != "80%" {
+		t.Fatalf("sizeArg=%q want 80%%", got)
+	}
+}
+
+func TestClassifySATResult(t *testing.T) {
+	tests := []struct {
+		name   string
+		job    string
+		out    string
+		err    error
+		status string
+	}{
+		{name: "ok", job: "memtester", out: "done", err: nil, status: "OK"},
+		{name: "unsupported", job: "smartctl-self-test-short", out: "Self-test not supported", err: errors.New("rc 1"), status: "UNSUPPORTED"},
+		{name: "nvme wait timeout without progress", job: "nvme-device-self-test", out: "Short Device self-test started\nWaiting for self test completion...\nno progress for 78 seconds, stop waiting", err: errors.New("rc 1"), status: "UNSUPPORTED"},
+		{name: "failed", job: "bee-gpu-burn", out: "cuda error", err: errors.New("rc 1"), status: "FAILED"},
+		{name: "cuda not ready", job: "bee-gpu-burn", out: "cuInit failed: CUDA_ERROR_SYSTEM_NOT_READY", err: errors.New("rc 1"), status: "UNSUPPORTED"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, _ := classifySATResult(tt.job, []byte(tt.out), tt.err)
+			if got != tt.status {
+				t.Fatalf("status=%q want %q", got, tt.status)
+			}
+		})
+	}
+}
+
+func TestRunAcceptancePackCtxReturnsContextErrorWithoutArchive(t *testing.T) {
+	dir := t.TempDir()
+	ctx, cancel := context.WithCancel(context.Background())
+	t.Cleanup(cancel)
+
+	done := make(chan struct{})
+	go func() {
+		time.Sleep(100 * time.Millisecond)
+		cancel()
+		close(done)
+	}()
+
+	archive, err := runAcceptancePackCtx(ctx, dir, "cancelled-pack", []satJob{
+		{name: "01-sleep.log", cmd: []string{"sh", "-c", "sleep 5"}},
+	}, nil)
+	<-done
+
+	if !errors.Is(err, context.Canceled) {
+		t.Fatalf("err=%v want context.Canceled", err)
+	}
+	if archive != "" {
+		t.Fatalf("archive=%q want empty", archive)
+	}
+	matches, globErr := filepath.Glob(filepath.Join(dir, "cancelled-pack-*.tar.gz"))
+	if globErr != nil {
+		t.Fatalf("Glob error: %v", globErr)
+	}
+	if len(matches) != 0 {
+		t.Fatalf("archives=%v want none", matches)
+	}
+}
+
+func TestParseStorageDevicesSkipsUSBDisks(t *testing.T) {
+	t.Parallel()
+
+	raw := "nvme0n1 disk nvme\nsda disk usb\nloop0 loop\nsdb disk sata\n"
+	got := parseStorageDevices(raw)
+	want := []string{"/dev/nvme0n1", "/dev/sdb"}
+	if len(got) != len(want) {
+		t.Fatalf("len(devices)=%d want %d (%v)", len(got), len(want), got)
+	}
+	for i := range want {
+		if got[i] != want[i] {
+			t.Fatalf("devices[%d]=%q want %q", i, got[i], want[i])
+		}
+	}
+}
+
+func TestResolveROCmSMICommandFromPATH(t *testing.T) {
+	t.Setenv("PATH", t.TempDir())
+
+	toolPath := filepath.Join(os.Getenv("PATH"), "rocm-smi")
+	if err := os.WriteFile(toolPath, []byte("#!/bin/sh\nexit 0\n"), 0755); err != nil {
+		t.Fatalf("write rocm-smi: %v", err)
+	}
+
+	cmd, err := resolveROCmSMICommand("--showproductname")
+	if err != nil {
+		t.Fatalf("resolveROCmSMICommand error: %v", err)
+	}
+	if len(cmd) != 2 {
+		t.Fatalf("cmd len=%d want 2 (%v)", len(cmd), cmd)
+	}
+	if cmd[0] != toolPath {
+		t.Fatalf("cmd[0]=%q want %q", cmd[0], toolPath)
+	}
+}
+
+func TestResolveSATCommandUsesLookPathForGenericTools(t *testing.T) {
+	oldLookPath := satLookPath
+	satLookPath = func(file string) (string, error) {
+		if file == "stress-ng" {
+			return "/usr/bin/stress-ng", nil
+		}
+		return "", exec.ErrNotFound
+	}
+	t.Cleanup(func() { satLookPath = oldLookPath })
+
+	cmd, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
+	if err != nil {
+		t.Fatalf("resolveSATCommand error: %v", err)
+	}
+	if len(cmd) != 3 {
+		t.Fatalf("cmd len=%d want 3 (%v)", len(cmd), cmd)
+	}
+	if cmd[0] != "/usr/bin/stress-ng" {
+		t.Fatalf("cmd[0]=%q want /usr/bin/stress-ng", cmd[0])
+	}
+}
+
+func TestResolveSATCommandFailsForMissingGenericTool(t *testing.T) {
+	oldLookPath := satLookPath
+	satLookPath = func(file string) (string, error) {
+		return "", exec.ErrNotFound
+	}
+	t.Cleanup(func() { satLookPath = oldLookPath })
+
+	_, err := resolveSATCommand([]string{"stress-ng", "--cpu", "0"})
+	if err == nil {
+		t.Fatal("expected error")
+	}
+	if !strings.Contains(err.Error(), "stress-ng not found in PATH") {
+		t.Fatalf("error=%q", err)
+	}
+}
+
+func TestResolveROCmSMICommandFallsBackToROCmTree(t *testing.T) {
+	tmp := t.TempDir()
+	execPath := filepath.Join(tmp, "opt", "rocm", "bin", "rocm-smi")
+	if err := os.MkdirAll(filepath.Dir(execPath), 0755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	if err := os.WriteFile(execPath, []byte("#!/bin/sh\nexit 0\n"), 0755); err != nil {
+		t.Fatalf("write rocm-smi: %v", err)
+	}
+
+	oldGlob := rocmSMIExecutableGlobs
+	oldScriptGlobs := rocmSMIScriptGlobs
+	rocmSMIExecutableGlobs = []string{execPath}
+	rocmSMIScriptGlobs = nil
+	t.Cleanup(func() {
+		rocmSMIExecutableGlobs = oldGlob
+		rocmSMIScriptGlobs = oldScriptGlobs
+	})
+
+	t.Setenv("PATH", "")
+
+	cmd, err := resolveROCmSMICommand("--showallinfo")
+	if err != nil {
+		t.Fatalf("resolveROCmSMICommand error: %v", err)
+	}
+	if len(cmd) != 2 {
+		t.Fatalf("cmd len=%d want 2 (%v)", len(cmd), cmd)
+	}
+	if cmd[0] != execPath {
+		t.Fatalf("cmd[0]=%q want %q", cmd[0], execPath)
+	}
+}
+
+func TestRunROCmSMIReportsMissingCommand(t *testing.T) {
+	oldLookPath := satLookPath
+	oldExecGlobs := rocmSMIExecutableGlobs
+	oldScriptGlobs := rocmSMIScriptGlobs
+	satLookPath = func(string) (string, error) { return "", exec.ErrNotFound }
+	rocmSMIExecutableGlobs = nil
+	rocmSMIScriptGlobs = nil
+	t.Cleanup(func() {
+		satLookPath = oldLookPath
+		rocmSMIExecutableGlobs = oldExecGlobs
+		rocmSMIScriptGlobs = oldScriptGlobs
+	})
+
+	if _, err := runROCmSMI("--showproductname"); err == nil {
+		t.Fatal("expected missing rocm-smi error")
+	}
+}
--- a/audit/internal/platform/services.go
+++ b/audit/internal/platform/services.go
@@ -10,13 +10,30 @@ import (
 func (s *System) ListBeeServices() ([]string, error) {
 	seen := map[string]bool{}
 	var out []string
-	for _, pattern := range []string{"/etc/systemd/system/bee-*.service", "/lib/systemd/system/bee-*.service"} {
+	for _, pattern := range []string{
+		"/etc/systemd/system/bee-*.service",
+		"/lib/systemd/system/bee-*.service",
+		"/etc/systemd/system/bee-*.timer",
+		"/lib/systemd/system/bee-*.timer",
+	} {
 		matches, err := filepath.Glob(pattern)
 		if err != nil {
 			return nil, err
 		}
 		for _, match := range matches {
-			name := strings.TrimSuffix(filepath.Base(match), ".service")
+			base := filepath.Base(match)
+			name := base
+			if strings.HasSuffix(base, ".service") {
+				name = strings.TrimSuffix(base, ".service")
+			}
+			// Skip template units (e.g. bee-journal-mirror@) — they have no instances to query.
+			if strings.HasSuffix(name, "@") {
+				continue
+			}
+			// bee-selfheal is timer-managed; showing the oneshot service as inactive is misleading.
+			if name == "bee-selfheal" && strings.HasSuffix(base, ".service") {
+				continue
+			}
 			if !seen[name] {
 				seen[name] = true
 				out = append(out, name)
@@ -44,7 +61,9 @@ func (s *System) ServiceState(name string) string {
 }

 func (s *System) ServiceDo(name string, action ServiceAction) (string, error) {
-	raw, err := exec.Command("systemctl", string(action), name).CombinedOutput()
+	// bee-web runs as the bee user; sudo is required to control system services.
+	// /etc/sudoers.d/bee grants bee NOPASSWD:ALL.
+	raw, err := exec.Command("sudo", "systemctl", string(action), name).CombinedOutput()
 	return string(raw), err
 }

--- a/audit/internal/platform/techdump.go
+++ b/audit/internal/platform/techdump.go
@@ -0,0 +1,151 @@
+package platform
+
+import (
+	"encoding/json"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"sort"
+	"strings"
+)
+
+var techDumpFixedCommands = []struct {
+	Name string
+	Args []string
+	File string
+}{
+	{Name: "dmidecode", Args: []string{"-t", "0"}, File: "dmidecode-type0.txt"},
+	{Name: "dmidecode", Args: []string{"-t", "1"}, File: "dmidecode-type1.txt"},
+	{Name: "dmidecode", Args: []string{"-t", "2"}, File: "dmidecode-type2.txt"},
+	{Name: "dmidecode", Args: []string{"-t", "4"}, File: "dmidecode-type4.txt"},
+	{Name: "dmidecode", Args: []string{"-t", "17"}, File: "dmidecode-type17.txt"},
+	{Name: "lspci", Args: []string{"-vmm", "-D"}, File: "lspci-vmm.txt"},
+	{Name: "lspci", Args: []string{"-vvv"}, File: "lspci-vvv.txt"},
+	{Name: "lsblk", Args: []string{"-J", "-d", "-o", "NAME,TYPE,SIZE,SERIAL,MODEL,TRAN,HCTL"}, File: "lsblk.json"},
+	{Name: "sensors", Args: []string{"-j"}, File: "sensors.json"},
+	{Name: "ipmitool", Args: []string{"fru", "print"}, File: "ipmitool-fru.txt"},
+	{Name: "ipmitool", Args: []string{"sdr"}, File: "ipmitool-sdr.txt"},
+	{Name: "nvme", Args: []string{"list", "-o", "json"}, File: "nvme-list.json"},
+}
+
+var techDumpNvidiaCommands = []struct {
+	Name string
+	Args []string
+	File string
+}{
+	{Name: "nvidia-smi", Args: []string{"-q"}, File: "nvidia-smi-q.txt"},
+	{Name: "nvidia-smi", Args: []string{"--query-gpu=index,pci.bus_id,serial,vbios_version,temperature.gpu,power.draw,ecc.errors.uncorrected.aggregate.total,ecc.errors.corrected.aggregate.total,clocks_throttle_reasons.hw_slowdown", "--format=csv,noheader,nounits"}, File: "nvidia-smi-query.csv"},
+}
+
+type lsblkDumpRoot struct {
+	Blockdevices []struct {
+		Name string `json:"name"`
+		Type string `json:"type"`
+		Tran string `json:"tran"`
+	} `json:"blockdevices"`
+}
+
+type nvmeDumpRoot struct {
+	Devices []struct {
+		DevicePath string `json:"DevicePath"`
+	} `json:"Devices"`
+}
+
+func (s *System) CaptureTechnicalDump(baseDir string) error {
+	if err := os.MkdirAll(baseDir, 0755); err != nil {
+		return err
+	}
+
+	for _, cmd := range techDumpFixedCommands {
+		writeCommandDump(filepath.Join(baseDir, cmd.File), cmd.Name, cmd.Args...)
+	}
+	switch s.DetectGPUVendor() {
+	case "nvidia":
+		for _, cmd := range techDumpNvidiaCommands {
+			writeCommandDump(filepath.Join(baseDir, cmd.File), cmd.Name, cmd.Args...)
+		}
+	case "amd":
+		writeROCmSMIDump(filepath.Join(baseDir, "rocm-smi.txt"))
+		writeROCmSMIDump(filepath.Join(baseDir, "rocm-smi-showallinfo.txt"), "--showallinfo")
+	}
+
+	for _, dev := range lsblkDumpDevices(filepath.Join(baseDir, "lsblk.json")) {
+		writeCommandDump(filepath.Join(baseDir, "smartctl-"+sanitizeDumpName(dev)+".json"), "smartctl", "-j", "-a", "/dev/"+dev)
+	}
+	for _, dev := range nvmeDumpDevices(filepath.Join(baseDir, "nvme-list.json")) {
+		writeCommandDump(filepath.Join(baseDir, "nvme-id-ctrl-"+sanitizeDumpName(dev)+".json"), "nvme", "id-ctrl", dev, "-o", "json")
+		writeCommandDump(filepath.Join(baseDir, "nvme-smart-log-"+sanitizeDumpName(dev)+".json"), "nvme", "smart-log", dev, "-o", "json")
+	}
+	return nil
+}
+
+func writeCommandDump(path, name string, args ...string) {
+	out, err := exec.Command(name, args...).CombinedOutput()
+	if err != nil && len(out) == 0 {
+		return
+	}
+	_ = os.WriteFile(path, out, 0644)
+}
+
+func writeROCmSMIDump(path string, args ...string) {
+	out, err := runROCmSMI(args...)
+	if err != nil && len(out) == 0 {
+		return
+	}
+	_ = os.WriteFile(path, out, 0644)
+}
+
+func lsblkDumpDevices(path string) []string {
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		return nil
+	}
+	var root lsblkDumpRoot
+	if err := json.Unmarshal(raw, &root); err != nil {
+		return nil
+	}
+	var devices []string
+	for _, dev := range root.Blockdevices {
+		if strings.EqualFold(strings.TrimSpace(dev.Tran), "usb") {
+			continue
+		}
+		if dev.Type == "disk" && strings.TrimSpace(dev.Name) != "" {
+			devices = append(devices, strings.TrimSpace(dev.Name))
+		}
+	}
+	sort.Strings(devices)
+	return devices
+}
+
+func nvmeDumpDevices(path string) []string {
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		return nil
+	}
+	var root nvmeDumpRoot
+	if err := json.Unmarshal(raw, &root); err != nil {
+		return nil
+	}
+	seen := map[string]bool{}
+	var devices []string
+	for _, dev := range root.Devices {
+		name := strings.TrimSpace(dev.DevicePath)
+		if name == "" || seen[name] {
+			continue
+		}
+		seen[name] = true
+		devices = append(devices, name)
+	}
+	sort.Strings(devices)
+	return devices
+}
+
+func sanitizeDumpName(value string) string {
+	value = strings.TrimSpace(value)
+	value = strings.TrimPrefix(value, "/dev/")
+	value = strings.ReplaceAll(value, "/", "_")
+	if value == "" {
+		return "unknown"
+	}
+	return value
+}
--- a/audit/internal/platform/techdump_test.go
+++ b/audit/internal/platform/techdump_test.go
@@ -0,0 +1,48 @@
+package platform
+
+import (
+	"os"
+	"path/filepath"
+	"reflect"
+	"testing"
+)
+
+func TestLSBLKDumpDevices(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	path := filepath.Join(dir, "lsblk.json")
+	if err := os.WriteFile(path, []byte(`{"blockdevices":[{"name":"sda","type":"disk","tran":"usb"},{"name":"sda1","type":"part"},{"name":"nvme0n1","type":"disk","tran":"nvme"},{"name":"sdb","type":"disk","tran":"sata"}]}`), 0644); err != nil {
+		t.Fatalf("write lsblk fixture: %v", err)
+	}
+
+	got := lsblkDumpDevices(path)
+	want := []string{"nvme0n1", "sdb"}
+	if !reflect.DeepEqual(got, want) {
+		t.Fatalf("lsblkDumpDevices=%v want %v", got, want)
+	}
+}
+
+func TestNVMEDumpDevices(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	path := filepath.Join(dir, "nvme-list.json")
+	if err := os.WriteFile(path, []byte(`{"Devices":[{"DevicePath":"/dev/nvme1n1"},{"DevicePath":"/dev/nvme0n1"},{"DevicePath":"/dev/nvme1n1"}]}`), 0644); err != nil {
+		t.Fatalf("write nvme fixture: %v", err)
+	}
+
+	got := nvmeDumpDevices(path)
+	want := []string{"/dev/nvme0n1", "/dev/nvme1n1"}
+	if !reflect.DeepEqual(got, want) {
+		t.Fatalf("nvmeDumpDevices=%v want %v", got, want)
+	}
+}
+
+func TestSanitizeDumpName(t *testing.T) {
+	t.Parallel()
+
+	if got := sanitizeDumpName("/dev/nvme0n1"); got != "nvme0n1" {
+		t.Fatalf("sanitizeDumpName=%q want nvme0n1", got)
+	}
+}
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -2,12 +2,42 @@ package platform

 type System struct{}

+type LiveBootSource struct {
+	InRAM  bool   `json:"in_ram"`
+	Kind   string `json:"kind"`
+	Source string `json:"source,omitempty"`
+	Device string `json:"device,omitempty"`
+}
+
+type LiveMediaRAMState struct {
+	LiveBootSource
+	State        string `json:"state"`
+	Status       string `json:"status"`
+	ToramActive  bool   `json:"toram_active,omitempty"`
+	CopyPresent  bool   `json:"copy_present,omitempty"`
+	CopyComplete bool   `json:"copy_complete,omitempty"`
+	CanStartCopy bool   `json:"can_start_copy,omitempty"`
+	Message      string `json:"message,omitempty"`
+}
+
 type InterfaceInfo struct {
 	Name  string
 	State string
 	IPv4  []string
 }

+type NetworkInterfaceSnapshot struct {
+	Name string
+	Up   bool
+	IPv4 []string
+}
+
+type NetworkSnapshot struct {
+	Interfaces    []NetworkInterfaceSnapshot
+	DefaultRoutes []string
+	ResolvConf    string
+}
+
 type ServiceAction string

 const (
@@ -25,12 +55,12 @@ type StaticIPv4Config struct {
 }

 type RemovableTarget struct {
-	Device     string
-	FSType     string
-	Size       string
-	Label      string
-	Model      string
-	Mountpoint string
+	Device     string `json:"device"`
+	FSType     string `json:"fs_type"`
+	Size       string `json:"size"`
+	Label      string `json:"label"`
+	Model      string `json:"model"`
+	Mountpoint string `json:"mountpoint"`
 }

 type ToolStatus struct {
@@ -39,6 +69,21 @@ type ToolStatus struct {
 	OK   bool
 }

+const (
+	NvidiaStressLoaderBuiltin = "builtin"
+	NvidiaStressLoaderJohn    = "john"
+	NvidiaStressLoaderNCCL    = "nccl"
+)
+
+type NvidiaStressOptions struct {
+	DurationSec       int
+	SizeMB            int
+	Loader            string
+	GPUIndices        []int
+	ExcludeGPUIndices []int
+	StaggerSeconds    int
+}
+
 func New() *System {
 	return &System{}
 }
--- a/audit/internal/platform/types_test.go
+++ b/audit/internal/platform/types_test.go
@@ -0,0 +1,31 @@
+package platform
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+)
+
+func TestRemovableTargetJSONUsesFrontendFieldNames(t *testing.T) {
+	t.Parallel()
+
+	data, err := json.Marshal(RemovableTarget{
+		Device: "/dev/sdb1",
+		FSType: "exfat",
+		Size:   "1.8T",
+		Label:  "USB",
+		Model:  "Flash",
+	})
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	raw := string(data)
+	for _, key := range []string{`"device"`, `"fs_type"`, `"size"`, `"label"`, `"model"`} {
+		if !strings.Contains(raw, key) {
+			t.Fatalf("json missing key %s: %s", key, raw)
+		}
+	}
+	if strings.Contains(raw, `"Device"`) || strings.Contains(raw, `"FSType"`) {
+		t.Fatalf("json still contains Go field names: %s", raw)
+	}
+}
--- a/audit/internal/schema/hardware.go
+++ b/audit/internal/schema/hardware.go
@@ -5,14 +5,57 @@ package schema
 // HardwareIngestRequest is the top-level output document produced by `bee audit`.
 // It is accepted as-is by the core /api/ingest/hardware endpoint.
 type HardwareIngestRequest struct {
-	Filename    *string          `json:"filename"`
-	SourceType  *string          `json:"source_type"`
-	Protocol    *string          `json:"protocol"`
-	TargetHost  string           `json:"target_host"`
+	Filename    *string          `json:"filename,omitempty"`
+	SourceType  *string          `json:"source_type,omitempty"`
+	Protocol    *string          `json:"protocol,omitempty"`
+	TargetHost  *string          `json:"target_host,omitempty"`
 	CollectedAt string           `json:"collected_at"`
+	Runtime     *RuntimeHealth   `json:"runtime,omitempty"`
 	Hardware    HardwareSnapshot `json:"hardware"`
 }

+type RuntimeHealth struct {
+	Status        string `json:"status"`
+	CheckedAt     string `json:"checked_at"`
+	ExportDir     string `json:"export_dir,omitempty"`
+	DriverReady   bool   `json:"driver_ready,omitempty"`
+	CUDAReady     bool   `json:"cuda_ready,omitempty"`
+	NvidiaGSPMode string `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
+	NetworkStatus string `json:"network_status,omitempty"`
+	// ToRAMStatus: "ok" (fully in RAM), "warning" (not copied), "partial" (stale/incomplete copy exists), "failed" (toram active but copy failed)
+	ToRAMStatus string `json:"toram_status,omitempty"`
+	// USBExportPath: mount point of the first writable USB drive found, empty if none.
+	USBExportPath string                 `json:"usb_export_path,omitempty"`
+	Issues        []RuntimeIssue         `json:"issues,omitempty"`
+	Tools         []RuntimeToolStatus    `json:"tools,omitempty"`
+	Services      []RuntimeServiceStatus `json:"services,omitempty"`
+	Interfaces    []RuntimeInterface     `json:"interfaces,omitempty"`
+}
+
+type RuntimeIssue struct {
+	Code        string `json:"code"`
+	Severity    string `json:"severity,omitempty"`
+	Description string `json:"description"`
+}
+
+type RuntimeToolStatus struct {
+	Name string `json:"name"`
+	Path string `json:"path,omitempty"`
+	OK   bool   `json:"ok"`
+}
+
+type RuntimeServiceStatus struct {
+	Name   string `json:"name"`
+	Status string `json:"status"`
+}
+
+type RuntimeInterface struct {
+	Name    string   `json:"name"`
+	State   string   `json:"state,omitempty"`
+	IPv4    []string `json:"ipv4,omitempty"`
+	Outcome string   `json:"outcome,omitempty"`
+}
+
 type HardwareSnapshot struct {
 	Board         HardwareBoard            `json:"board"`
 	Firmware      []HardwareFirmwareRecord `json:"firmware,omitempty"`
@@ -21,14 +64,33 @@ type HardwareSnapshot struct {
 	Storage       []HardwareStorage        `json:"storage,omitempty"`
 	PCIeDevices   []HardwarePCIeDevice     `json:"pcie_devices,omitempty"`
 	PowerSupplies []HardwarePowerSupply    `json:"power_supplies,omitempty"`
+	Sensors       *HardwareSensors         `json:"sensors,omitempty"`
+	EventLogs     []HardwareEventLog       `json:"event_logs,omitempty"`
+}
+
+type HardwareHealthSummary struct {
+	Status      string   `json:"status"`
+	Warnings    []string `json:"warnings,omitempty"`
+	Failures    []string `json:"failures,omitempty"`
+	StorageWarn int      `json:"storage_warn,omitempty"`
+	StorageFail int      `json:"storage_fail,omitempty"`
+	PCIeWarn    int      `json:"pcie_warn,omitempty"`
+	PCIeFail    int      `json:"pcie_fail,omitempty"`
+	PSUWarn     int      `json:"psu_warn,omitempty"`
+	PSUFail     int      `json:"psu_fail,omitempty"`
+	MemoryWarn  int      `json:"memory_warn,omitempty"`
+	MemoryFail  int      `json:"memory_fail,omitempty"`
+	EmptyDIMMs  int      `json:"empty_dimms,omitempty"`
+	MissingPSUs int      `json:"missing_psus,omitempty"`
+	CollectedAt string   `json:"collected_at,omitempty"`
 }

 type HardwareBoard struct {
-	Manufacturer *string `json:"manufacturer"`
-	ProductName  *string `json:"product_name"`
+	Manufacturer *string `json:"manufacturer,omitempty"`
+	ProductName  *string `json:"product_name,omitempty"`
 	SerialNumber string  `json:"serial_number"`
-	PartNumber   *string `json:"part_number"`
-	UUID         *string `json:"uuid"`
+	PartNumber   *string `json:"part_number,omitempty"`
+	UUID         *string `json:"uuid,omitempty"`
 }

 type HardwareFirmwareRecord struct {
@@ -37,77 +99,203 @@ type HardwareFirmwareRecord struct {
 }

 type HardwareCPU struct {
-	Socket          *int    `json:"socket"`
-	Model           *string `json:"model"`
-	Manufacturer    *string `json:"manufacturer"`
-	Status          *string `json:"status"`
-	SerialNumber    *string `json:"serial_number"`
-	Firmware        *string `json:"firmware"`
-	Cores           *int    `json:"cores"`
-	Threads         *int    `json:"threads"`
-	FrequencyMHz    *int    `json:"frequency_mhz"`
-	MaxFrequencyMHz *int    `json:"max_frequency_mhz"`
+	HardwareComponentStatus
+	Socket                  *int     `json:"socket,omitempty"`
+	Model                   *string  `json:"model,omitempty"`
+	Manufacturer            *string  `json:"manufacturer,omitempty"`
+	SerialNumber            *string  `json:"serial_number,omitempty"`
+	Firmware                *string  `json:"firmware,omitempty"`
+	Cores                   *int     `json:"cores,omitempty"`
+	Threads                 *int     `json:"threads,omitempty"`
+	FrequencyMHz            *int     `json:"frequency_mhz,omitempty"`
+	MaxFrequencyMHz         *int     `json:"max_frequency_mhz,omitempty"`
+	TemperatureC            *float64 `json:"temperature_c,omitempty"`
+	PowerW                  *float64 `json:"power_w,omitempty"`
+	Throttled               *bool    `json:"throttled,omitempty"`
+	CorrectableErrorCount   *int64   `json:"correctable_error_count,omitempty"`
+	UncorrectableErrorCount *int64   `json:"uncorrectable_error_count,omitempty"`
+	LifeRemainingPct        *float64 `json:"life_remaining_pct,omitempty"`
+	LifeUsedPct             *float64 `json:"life_used_pct,omitempty"`
+	Present                 *bool    `json:"present,omitempty"`
 }

 type HardwareMemory struct {
-	Slot            *string `json:"slot"`
-	Location        *string `json:"location"`
-	Present         *bool   `json:"present"`
-	SizeMB          *int    `json:"size_mb"`
-	Type            *string `json:"type"`
-	MaxSpeedMHz     *int    `json:"max_speed_mhz"`
-	CurrentSpeedMHz *int    `json:"current_speed_mhz"`
-	Manufacturer    *string `json:"manufacturer"`
-	SerialNumber    *string `json:"serial_number"`
-	PartNumber      *string `json:"part_number"`
-	Status          *string `json:"status"`
+	HardwareComponentStatus
+	Slot                       *string  `json:"slot,omitempty"`
+	Location                   *string  `json:"location,omitempty"`
+	Present                    *bool    `json:"present,omitempty"`
+	SizeMB                     *int     `json:"size_mb,omitempty"`
+	Type                       *string  `json:"type,omitempty"`
+	MaxSpeedMHz                *int     `json:"max_speed_mhz,omitempty"`
+	CurrentSpeedMHz            *int     `json:"current_speed_mhz,omitempty"`
+	Manufacturer               *string  `json:"manufacturer,omitempty"`
+	SerialNumber               *string  `json:"serial_number,omitempty"`
+	PartNumber                 *string  `json:"part_number,omitempty"`
+	TemperatureC               *float64 `json:"temperature_c,omitempty"`
+	CorrectableECCErrorCount   *int64   `json:"correctable_ecc_error_count,omitempty"`
+	UncorrectableECCErrorCount *int64   `json:"uncorrectable_ecc_error_count,omitempty"`
+	LifeRemainingPct           *float64 `json:"life_remaining_pct,omitempty"`
+	LifeUsedPct                *float64 `json:"life_used_pct,omitempty"`
+	SpareBlocksRemainingPct    *float64 `json:"spare_blocks_remaining_pct,omitempty"`
+	PerformanceDegraded        *bool    `json:"performance_degraded,omitempty"`
+	DataLossDetected           *bool    `json:"data_loss_detected,omitempty"`
 }

 type HardwareStorage struct {
-	Slot         *string        `json:"slot"`
-	Type         *string        `json:"type"`
-	Model        *string        `json:"model"`
-	SizeGB       *int           `json:"size_gb"`
-	SerialNumber *string        `json:"serial_number"`
-	Manufacturer *string        `json:"manufacturer"`
-	Firmware     *string        `json:"firmware"`
-	Interface    *string        `json:"interface"`
-	Present      *bool          `json:"present"`
-	Status       *string        `json:"status"`
-	Telemetry    map[string]any `json:"telemetry,omitempty"`
+	HardwareComponentStatus
+	Slot                  *string        `json:"slot,omitempty"`
+	Type                  *string        `json:"type,omitempty"`
+	Model                 *string        `json:"model,omitempty"`
+	SizeGB                *int           `json:"size_gb,omitempty"`
+	SerialNumber          *string        `json:"serial_number,omitempty"`
+	Manufacturer          *string        `json:"manufacturer,omitempty"`
+	Firmware              *string        `json:"firmware,omitempty"`
+	Interface             *string        `json:"interface,omitempty"`
+	Present               *bool          `json:"present,omitempty"`
+	TemperatureC          *float64       `json:"temperature_c,omitempty"`
+	PowerOnHours          *int64         `json:"power_on_hours,omitempty"`
+	PowerCycles           *int64         `json:"power_cycles,omitempty"`
+	UnsafeShutdowns       *int64         `json:"unsafe_shutdowns,omitempty"`
+	MediaErrors           *int64         `json:"media_errors,omitempty"`
+	ErrorLogEntries       *int64         `json:"error_log_entries,omitempty"`
+	WrittenBytes          *int64         `json:"written_bytes,omitempty"`
+	ReadBytes             *int64         `json:"read_bytes,omitempty"`
+	LifeUsedPct           *float64       `json:"life_used_pct,omitempty"`
+	LifeRemainingPct      *float64       `json:"life_remaining_pct,omitempty"`
+	AvailableSparePct     *float64       `json:"available_spare_pct,omitempty"`
+	ReallocatedSectors    *int64         `json:"reallocated_sectors,omitempty"`
+	CurrentPendingSectors *int64         `json:"current_pending_sectors,omitempty"`
+	OfflineUncorrectable  *int64         `json:"offline_uncorrectable,omitempty"`
+	Telemetry             map[string]any `json:"-"`
 }

 type HardwarePCIeDevice struct {
-	Slot         *string        `json:"slot"`
-	VendorID     *int           `json:"vendor_id"`
-	DeviceID     *int           `json:"device_id"`
-	BDF          *string        `json:"bdf"`
-	DeviceClass  *string        `json:"device_class"`
-	Manufacturer *string        `json:"manufacturer"`
-	Model        *string        `json:"model"`
-	LinkWidth    *int           `json:"link_width"`
-	LinkSpeed    *string        `json:"link_speed"`
-	MaxLinkWidth *int           `json:"max_link_width"`
-	MaxLinkSpeed *string        `json:"max_link_speed"`
-	SerialNumber *string        `json:"serial_number"`
-	Firmware     *string        `json:"firmware"`
-	Present      *bool          `json:"present"`
-	Status       *string        `json:"status"`
-	Telemetry    map[string]any `json:"telemetry,omitempty"`
+	HardwareComponentStatus
+	Slot                   *string        `json:"slot,omitempty"`
+	VendorID               *int           `json:"vendor_id,omitempty"`
+	DeviceID               *int           `json:"device_id,omitempty"`
+	NUMANode               *int           `json:"numa_node,omitempty"`
+	TemperatureC           *float64       `json:"temperature_c,omitempty"`
+	PowerW                 *float64       `json:"power_w,omitempty"`
+	LifeRemainingPct       *float64       `json:"life_remaining_pct,omitempty"`
+	LifeUsedPct            *float64       `json:"life_used_pct,omitempty"`
+	ECCCorrectedTotal      *int64         `json:"ecc_corrected_total,omitempty"`
+	ECCUncorrectedTotal    *int64         `json:"ecc_uncorrected_total,omitempty"`
+	HWSlowdown             *bool          `json:"hw_slowdown,omitempty"`
+	BatteryChargePct       *float64       `json:"battery_charge_pct,omitempty"`
+	BatteryHealthPct       *float64       `json:"battery_health_pct,omitempty"`
+	BatteryTemperatureC    *float64       `json:"battery_temperature_c,omitempty"`
+	BatteryVoltageV        *float64       `json:"battery_voltage_v,omitempty"`
+	BatteryReplaceRequired *bool          `json:"battery_replace_required,omitempty"`
+	SFPPresent             *bool          `json:"sfp_present,omitempty"`
+	SFPIdentifier          *string        `json:"sfp_identifier,omitempty"`
+	SFPConnector           *string        `json:"sfp_connector,omitempty"`
+	SFPVendor              *string        `json:"sfp_vendor,omitempty"`
+	SFPPartNumber          *string        `json:"sfp_part_number,omitempty"`
+	SFPSerialNumber        *string        `json:"sfp_serial_number,omitempty"`
+	SFPWavelengthNM        *float64       `json:"sfp_wavelength_nm,omitempty"`
+	SFPTemperatureC        *float64       `json:"sfp_temperature_c,omitempty"`
+	SFPTXPowerDBM          *float64       `json:"sfp_tx_power_dbm,omitempty"`
+	SFPRXPowerDBM          *float64       `json:"sfp_rx_power_dbm,omitempty"`
+	SFPVoltageV            *float64       `json:"sfp_voltage_v,omitempty"`
+	SFPBiasMA              *float64       `json:"sfp_bias_ma,omitempty"`
+	BDF                    *string        `json:"-"`
+	DeviceClass            *string        `json:"device_class,omitempty"`
+	Manufacturer           *string        `json:"manufacturer,omitempty"`
+	Model                  *string        `json:"model,omitempty"`
+	LinkWidth              *int           `json:"link_width,omitempty"`
+	LinkSpeed              *string        `json:"link_speed,omitempty"`
+	MaxLinkWidth           *int           `json:"max_link_width,omitempty"`
+	MaxLinkSpeed           *string        `json:"max_link_speed,omitempty"`
+	SerialNumber           *string        `json:"serial_number,omitempty"`
+	Firmware               *string        `json:"firmware,omitempty"`
+	MacAddresses           []string       `json:"mac_addresses,omitempty"`
+	Present                *bool          `json:"present,omitempty"`
+	Telemetry              map[string]any `json:"-"`
 }

 type HardwarePowerSupply struct {
-	Slot         *string  `json:"slot"`
-	Present      *bool    `json:"present"`
-	Model        *string  `json:"model"`
-	Vendor       *string  `json:"vendor"`
-	WattageW     *int     `json:"wattage_w"`
-	SerialNumber *string  `json:"serial_number"`
-	PartNumber   *string  `json:"part_number"`
-	Firmware     *string  `json:"firmware"`
-	Status       *string  `json:"status"`
-	InputType    *string  `json:"input_type"`
-	InputPowerW  *float64 `json:"input_power_w"`
-	OutputPowerW *float64 `json:"output_power_w"`
-	InputVoltage *float64 `json:"input_voltage"`
+	HardwareComponentStatus
+	Slot             *string  `json:"slot,omitempty"`
+	Present          *bool    `json:"present,omitempty"`
+	Model            *string  `json:"model,omitempty"`
+	Vendor           *string  `json:"vendor,omitempty"`
+	WattageW         *int     `json:"wattage_w,omitempty"`
+	SerialNumber     *string  `json:"serial_number,omitempty"`
+	PartNumber       *string  `json:"part_number,omitempty"`
+	Firmware         *string  `json:"firmware,omitempty"`
+	InputType        *string  `json:"input_type,omitempty"`
+	InputPowerW      *float64 `json:"input_power_w,omitempty"`
+	OutputPowerW     *float64 `json:"output_power_w,omitempty"`
+	InputVoltage     *float64 `json:"input_voltage,omitempty"`
+	TemperatureC     *float64 `json:"temperature_c,omitempty"`
+	LifeRemainingPct *float64 `json:"life_remaining_pct,omitempty"`
+	LifeUsedPct      *float64 `json:"life_used_pct,omitempty"`
+}
+
+type HardwareComponentStatus struct {
+	Status               *string                 `json:"status,omitempty"`
+	StatusCheckedAt      *string                 `json:"status_checked_at,omitempty"`
+	StatusChangedAt      *string                 `json:"status_changed_at,omitempty"`
+	StatusHistory        []HardwareStatusHistory `json:"status_history,omitempty"`
+	ErrorDescription     *string                 `json:"error_description,omitempty"`
+	ManufacturedYearWeek *string                 `json:"manufactured_year_week,omitempty"`
+}
+
+type HardwareStatusHistory struct {
+	Status    string  `json:"status"`
+	ChangedAt string  `json:"changed_at"`
+	Details   *string `json:"details,omitempty"`
+}
+
+type HardwareSensors struct {
+	Fans         []HardwareFanSensor         `json:"fans,omitempty"`
+	Power        []HardwarePowerSensor       `json:"power,omitempty"`
+	Temperatures []HardwareTemperatureSensor `json:"temperatures,omitempty"`
+	Other        []HardwareOtherSensor       `json:"other,omitempty"`
+}
+
+type HardwareFanSensor struct {
+	Name     string  `json:"name"`
+	Location *string `json:"location,omitempty"`
+	RPM      *int    `json:"rpm,omitempty"`
+	Status   *string `json:"status,omitempty"`
+}
+
+type HardwarePowerSensor struct {
+	Name     string   `json:"name"`
+	Location *string  `json:"location,omitempty"`
+	VoltageV *float64 `json:"voltage_v,omitempty"`
+	CurrentA *float64 `json:"current_a,omitempty"`
+	PowerW   *float64 `json:"power_w,omitempty"`
+	Status   *string  `json:"status,omitempty"`
+}
+
+type HardwareTemperatureSensor struct {
+	Name                     string   `json:"name"`
+	Location                 *string  `json:"location,omitempty"`
+	Celsius                  *float64 `json:"celsius,omitempty"`
+	ThresholdWarningCelsius  *float64 `json:"threshold_warning_celsius,omitempty"`
+	ThresholdCriticalCelsius *float64 `json:"threshold_critical_celsius,omitempty"`
+	Status                   *string  `json:"status,omitempty"`
+}
+
+type HardwareOtherSensor struct {
+	Name     string   `json:"name"`
+	Location *string  `json:"location,omitempty"`
+	Value    *float64 `json:"value,omitempty"`
+	Unit     *string  `json:"unit,omitempty"`
+	Status   *string  `json:"status,omitempty"`
+}
+
+type HardwareEventLog struct {
+	Source       string         `json:"source"`
+	EventTime    *string        `json:"event_time,omitempty"`
+	Severity     *string        `json:"severity,omitempty"`
+	MessageID    *string        `json:"message_id,omitempty"`
+	Message      string         `json:"message"`
+	ComponentRef *string        `json:"component_ref,omitempty"`
+	Fingerprint  *string        `json:"fingerprint,omitempty"`
+	IsActive     *bool          `json:"is_active,omitempty"`
+	RawPayload   map[string]any `json:"raw_payload,omitempty"`
 }
--- a/audit/internal/schema/hardware_test.go
+++ b/audit/internal/schema/hardware_test.go
@@ -0,0 +1,46 @@
+package schema
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+)
+
+func TestHardwareSnapshotMarshalsNewContractFields(t *testing.T) {
+	week := "2024-W07"
+	eventTime := "2026-03-15T14:03:11Z"
+	message := "Correctable ECC error threshold exceeded"
+
+	payload := HardwareIngestRequest{
+		CollectedAt: "2026-03-15T15:00:00Z",
+		Hardware: HardwareSnapshot{
+			Board: HardwareBoard{SerialNumber: "SRV-001"},
+			CPUs: []HardwareCPU{
+				{
+					HardwareComponentStatus: HardwareComponentStatus{
+						ManufacturedYearWeek: &week,
+					},
+				},
+			},
+			EventLogs: []HardwareEventLog{
+				{
+					Source:    "bmc",
+					EventTime: &eventTime,
+					Message:   message,
+				},
+			},
+		},
+	}
+
+	data, err := json.Marshal(payload)
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	text := string(data)
+	if !strings.Contains(text, `"manufactured_year_week":"2024-W07"`) {
+		t.Fatalf("missing manufactured_year_week: %s", text)
+	}
+	if !strings.Contains(text, `"event_logs":[{"source":"bmc","event_time":"2026-03-15T14:03:11Z","message":"Correctable ECC error threshold exceeded"}]`) {
+		t.Fatalf("missing event_logs payload: %s", text)
+	}
+}
--- a/audit/internal/tui/forms.go
+++ b/audit/internal/tui/forms.go
@@ -1,98 +0,0 @@
-package tui
-
-import tea "github.com/charmbracelet/bubbletea"
-
-func (m model) updateStaticForm(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	switch msg.String() {
-	case "esc":
-		m.screen = screenNetwork
-		m.formFields = nil
-		m.formIndex = 0
-		return m, nil
-	case "up", "shift+tab":
-		if m.formIndex > 0 {
-			m.formIndex--
-		}
-	case "down", "tab":
-		if m.formIndex < len(m.formFields)-1 {
-			m.formIndex++
-		}
-	case "enter":
-		if m.formIndex < len(m.formFields)-1 {
-			m.formIndex++
-			return m, nil
-		}
-		cfg := m.app.ParseStaticIPv4Config(m.selectedIface, []string{
-			m.formFields[0].Value,
-			m.formFields[1].Value,
-			m.formFields[2].Value,
-			m.formFields[3].Value,
-		})
-		m.busy = true
-		return m, func() tea.Msg {
-			result, err := m.app.SetStaticIPv4Result(cfg)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
-		}
-	case "backspace":
-		field := &m.formFields[m.formIndex]
-		if len(field.Value) > 0 {
-			field.Value = field.Value[:len(field.Value)-1]
-		}
-	default:
-		if msg.Type == tea.KeyRunes && len(msg.Runes) > 0 {
-			m.formFields[m.formIndex].Value += string(msg.Runes)
-		}
-	}
-	return m, nil
-}
-
-func (m model) updateConfirm(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	switch msg.String() {
-	case "left", "up", "tab":
-		if m.cursor > 0 {
-			m.cursor--
-		}
-	case "right", "down":
-		if m.cursor < 1 {
-			m.cursor++
-		}
-	case "esc":
-		m.screen = m.confirmCancelTarget()
-		m.cursor = 0
-		return m, nil
-	case "enter":
-		if m.cursor == 1 {
-			m.screen = m.confirmCancelTarget()
-			m.cursor = 0
-			return m, nil
-		}
-		m.busy = true
-		switch m.pendingAction {
-		case actionExportAudit:
-			target := *m.selectedTarget
-			return m, func() tea.Msg {
-				result, err := m.app.ExportLatestAuditResult(target)
-				return resultMsg{title: result.Title, body: result.Body, err: err, back: screenMain}
-			}
-		case actionRunNvidiaSAT:
-			return m, func() tea.Msg {
-				result, err := m.app.RunNvidiaAcceptancePackResult("")
-				return resultMsg{title: result.Title, body: result.Body, err: err, back: screenAcceptance}
-			}
-		}
-	case "ctrl+c":
-		return m, tea.Quit
-	}
-	return m, nil
-}
-
-func (m model) confirmCancelTarget() screen {
-	switch m.pendingAction {
-	case actionExportAudit:
-		return screenExportTargets
-	case actionRunNvidiaSAT:
-		return screenAcceptance
-	default:
-		return screenMain
-	}
-}
--- a/audit/internal/tui/messages.go
+++ b/audit/internal/tui/messages.go
@@ -1,25 +0,0 @@
-package tui
-
-import "bee/audit/internal/platform"
-
-type resultMsg struct {
-	title string
-	body  string
-	err   error
-	back  screen
-}
-
-type servicesMsg struct {
-	services []string
-	err      error
-}
-
-type interfacesMsg struct {
-	ifaces []platform.InterfaceInfo
-	err    error
-}
-
-type exportTargetsMsg struct {
-	targets []platform.RemovableTarget
-	err     error
-}
--- a/audit/internal/tui/screen_acceptance.go
+++ b/audit/internal/tui/screen_acceptance.go
@@ -1,14 +0,0 @@
-package tui
-
-import tea "github.com/charmbracelet/bubbletea"
-
-func (m model) handleAcceptanceMenu() (tea.Model, tea.Cmd) {
-	if m.cursor == 1 {
-		m.screen = screenMain
-		m.cursor = 0
-		return m, nil
-	}
-	m.pendingAction = actionRunNvidiaSAT
-	m.screen = screenConfirm
-	return m, nil
-}
--- a/audit/internal/tui/screen_export.go
+++ b/audit/internal/tui/screen_export.go
@@ -1,14 +0,0 @@
-package tui
-
-import tea "github.com/charmbracelet/bubbletea"
-
-func (m model) handleExportTargetsMenu() (tea.Model, tea.Cmd) {
-	if len(m.targets) == 0 {
-		return m, resultCmd("Export audit", "No removable filesystems found", nil, screenMain)
-	}
-	target := m.targets[m.cursor]
-	m.selectedTarget = &target
-	m.pendingAction = actionExportAudit
-	m.screen = screenConfirm
-	return m, nil
-}
--- a/audit/internal/tui/screen_main.go
+++ b/audit/internal/tui/screen_main.go
@@ -1,51 +0,0 @@
-package tui
-
-import (
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func (m model) handleMainMenu() (tea.Model, tea.Cmd) {
-	switch m.cursor {
-	case 0:
-		m.screen = screenNetwork
-		m.cursor = 0
-		return m, nil
-	case 1:
-		m.busy = true
-		return m, func() tea.Msg {
-			services, err := m.app.ListBeeServices()
-			return servicesMsg{services: services, err: err}
-		}
-	case 2:
-		m.screen = screenAcceptance
-		m.cursor = 0
-		return m, nil
-	case 3:
-		m.busy = true
-		return m, func() tea.Msg {
-			result, err := m.app.RunAuditNow(m.runtimeMode)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenMain}
-		}
-	case 4:
-		m.busy = true
-		return m, func() tea.Msg {
-			targets, err := m.app.ListRemovableTargets()
-			return exportTargetsMsg{targets: targets, err: err}
-		}
-	case 5:
-		m.busy = true
-		return m, func() tea.Msg {
-			result := m.app.ToolCheckResult([]string{"dmidecode", "smartctl", "nvme", "ipmitool", "lspci", "bee", "nvidia-smi", "dhclient", "lsblk", "mount"})
-			return resultMsg{title: result.Title, body: result.Body, back: screenMain}
-		}
-	case 6:
-		m.busy = true
-		return m, func() tea.Msg {
-			result := m.app.AuditLogTailResult()
-			return resultMsg{title: result.Title, body: result.Body, back: screenMain}
-		}
-	case 7:
-		return m, tea.Quit
-	}
-	return m, nil
-}
--- a/audit/internal/tui/screen_network.go
+++ b/audit/internal/tui/screen_network.go
@@ -1,71 +0,0 @@
-package tui
-
-import (
-	"strings"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func (m model) handleNetworkMenu() (tea.Model, tea.Cmd) {
-	switch m.cursor {
-	case 0:
-		m.busy = true
-		return m, func() tea.Msg {
-			result, err := m.app.NetworkStatus()
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
-		}
-	case 1:
-		m.busy = true
-		return m, func() tea.Msg {
-			result, err := m.app.DHCPAllResult()
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
-		}
-	case 2:
-		m.pendingAction = actionDHCPOne
-		m.busy = true
-		return m, func() tea.Msg {
-			ifaces, err := m.app.ListInterfaces()
-			return interfacesMsg{ifaces: ifaces, err: err}
-		}
-	case 3:
-		m.pendingAction = actionStaticIPv4
-		m.busy = true
-		return m, func() tea.Msg {
-			ifaces, err := m.app.ListInterfaces()
-			return interfacesMsg{ifaces: ifaces, err: err}
-		}
-	case 4:
-		m.screen = screenMain
-		m.cursor = 0
-		return m, nil
-	}
-	return m, nil
-}
-
-func (m model) handleInterfacePickMenu() (tea.Model, tea.Cmd) {
-	if len(m.interfaces) == 0 {
-		return m, resultCmd("interfaces", "No physical interfaces found", nil, screenNetwork)
-	}
-	m.selectedIface = m.interfaces[m.cursor].Name
-	switch m.pendingAction {
-	case actionDHCPOne:
-		m.busy = true
-		return m, func() tea.Msg {
-			result, err := m.app.DHCPOneResult(m.selectedIface)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenNetwork}
-		}
-	case actionStaticIPv4:
-		defaults := m.app.DefaultStaticIPv4FormFields(m.selectedIface)
-		m.formFields = []formField{
-			{Label: "IPv4 address", Value: defaults[0]},
-			{Label: "Prefix", Value: defaults[1]},
-			{Label: "Gateway", Value: strings.TrimSpace(defaults[2])},
-			{Label: "DNS (space-separated)", Value: defaults[3]},
-		}
-		m.formIndex = 0
-		m.screen = screenStaticForm
-		return m, nil
-	default:
-		return m, nil
-	}
-}
--- a/audit/internal/tui/screen_services.go
+++ b/audit/internal/tui/screen_services.go
@@ -1,46 +0,0 @@
-package tui
-
-import (
-	"bee/audit/internal/platform"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func (m model) handleServicesMenu() (tea.Model, tea.Cmd) {
-	if len(m.services) == 0 {
-		return m, resultCmd("bee services", "No bee-* services found", nil, screenMain)
-	}
-	m.selectedService = m.services[m.cursor]
-	m.screen = screenServiceAction
-	m.cursor = 0
-	return m, nil
-}
-
-func (m model) handleServiceActionMenu() (tea.Model, tea.Cmd) {
-	action := m.serviceMenu[m.cursor]
-	if action == "back" {
-		m.screen = screenServices
-		m.cursor = 0
-		return m, nil
-	}
-
-	m.busy = true
-	return m, func() tea.Msg {
-		switch action {
-		case "status":
-			result, err := m.app.ServiceStatusResult(m.selectedService)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
-		case "restart":
-			result, err := m.app.ServiceActionResult(m.selectedService, platform.ServiceRestart)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
-		case "start":
-			result, err := m.app.ServiceActionResult(m.selectedService, platform.ServiceStart)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
-		case "stop":
-			result, err := m.app.ServiceActionResult(m.selectedService, platform.ServiceStop)
-			return resultMsg{title: result.Title, body: result.Body, err: err, back: screenServiceAction}
-		default:
-			return resultMsg{title: "service", body: "unknown action", back: screenServiceAction}
-		}
-	}
-}
--- a/audit/internal/tui/tui_test.go
+++ b/audit/internal/tui/tui_test.go
@@ -1,349 +0,0 @@
-package tui
-
-import (
-	"testing"
-
-	"bee/audit/internal/app"
-	"bee/audit/internal/platform"
-	"bee/audit/internal/runtimeenv"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func newTestModel() model {
-	return newModel(app.New(platform.New()), runtimeenv.ModeLocal)
-}
-
-func sendKey(t *testing.T, m model, key tea.KeyType) model {
-	t.Helper()
-
-	next, _ := m.Update(tea.KeyMsg{Type: key})
-	return next.(model)
-}
-
-func TestUpdateMainMenuCursorNavigation(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-
-	m = sendKey(t, m, tea.KeyDown)
-	if m.cursor != 1 {
-		t.Fatalf("cursor=%d want 1 after down", m.cursor)
-	}
-
-	m = sendKey(t, m, tea.KeyDown)
-	if m.cursor != 2 {
-		t.Fatalf("cursor=%d want 2 after second down", m.cursor)
-	}
-
-	m = sendKey(t, m, tea.KeyUp)
-	if m.cursor != 1 {
-		t.Fatalf("cursor=%d want 1 after up", m.cursor)
-	}
-}
-
-func TestUpdateMainMenuEnterActions(t *testing.T) {
-	t.Parallel()
-
-	tests := []struct {
-		name       string
-		cursor     int
-		wantScreen screen
-		wantBusy   bool
-		wantCmd    bool
-	}{
-		{name: "network", cursor: 0, wantScreen: screenNetwork},
-		{name: "services", cursor: 1, wantScreen: screenMain, wantBusy: true, wantCmd: true},
-		{name: "acceptance", cursor: 2, wantScreen: screenAcceptance},
-		{name: "run audit", cursor: 3, wantScreen: screenMain, wantBusy: true, wantCmd: true},
-		{name: "export", cursor: 4, wantScreen: screenMain, wantBusy: true, wantCmd: true},
-	}
-
-	for _, test := range tests {
-		test := test
-		t.Run(test.name, func(t *testing.T) {
-			t.Parallel()
-
-			m := newTestModel()
-			m.cursor = test.cursor
-
-			next, cmd := m.Update(tea.KeyMsg{Type: tea.KeyEnter})
-			got := next.(model)
-
-			if got.screen != test.wantScreen {
-				t.Fatalf("screen=%q want %q", got.screen, test.wantScreen)
-			}
-			if got.busy != test.wantBusy {
-				t.Fatalf("busy=%v want %v", got.busy, test.wantBusy)
-			}
-			if (cmd != nil) != test.wantCmd {
-				t.Fatalf("cmd present=%v want %v", cmd != nil, test.wantCmd)
-			}
-		})
-	}
-}
-
-func TestUpdateConfirmCancelViaKeys(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenConfirm
-	m.pendingAction = actionRunNvidiaSAT
-
-	next, _ := m.Update(tea.KeyMsg{Type: tea.KeyRight})
-	got := next.(model)
-	if got.cursor != 1 {
-		t.Fatalf("cursor=%d want 1 after right", got.cursor)
-	}
-
-	next, _ = got.Update(tea.KeyMsg{Type: tea.KeyEnter})
-	got = next.(model)
-	if got.screen != screenAcceptance {
-		t.Fatalf("screen=%q want %q", got.screen, screenAcceptance)
-	}
-	if got.cursor != 0 {
-		t.Fatalf("cursor=%d want 0 after cancel", got.cursor)
-	}
-}
-
-func TestMainMenuSimpleTransitions(t *testing.T) {
-	t.Parallel()
-
-	tests := []struct {
-		name       string
-		cursor     int
-		wantScreen screen
-	}{
-		{name: "network", cursor: 0, wantScreen: screenNetwork},
-		{name: "acceptance", cursor: 2, wantScreen: screenAcceptance},
-	}
-
-	for _, test := range tests {
-		test := test
-		t.Run(test.name, func(t *testing.T) {
-			t.Parallel()
-
-			m := newTestModel()
-			m.cursor = test.cursor
-
-			next, cmd := m.handleMainMenu()
-			got := next.(model)
-
-			if cmd != nil {
-				t.Fatalf("expected nil cmd for %s", test.name)
-			}
-			if got.screen != test.wantScreen {
-				t.Fatalf("screen=%q want %q", got.screen, test.wantScreen)
-			}
-			if got.cursor != 0 {
-				t.Fatalf("cursor=%d want 0", got.cursor)
-			}
-		})
-	}
-}
-
-func TestMainMenuAsyncActionsSetBusy(t *testing.T) {
-	t.Parallel()
-
-	tests := []struct {
-		name   string
-		cursor int
-	}{
-		{name: "services", cursor: 1},
-		{name: "run audit", cursor: 3},
-		{name: "export", cursor: 4},
-		{name: "check tools", cursor: 5},
-		{name: "log tail", cursor: 6},
-	}
-
-	for _, test := range tests {
-		test := test
-		t.Run(test.name, func(t *testing.T) {
-			t.Parallel()
-
-			m := newTestModel()
-			m.cursor = test.cursor
-
-			next, cmd := m.handleMainMenu()
-			got := next.(model)
-
-			if !got.busy {
-				t.Fatalf("busy=false for %s", test.name)
-			}
-			if cmd == nil {
-				t.Fatalf("expected async cmd for %s", test.name)
-			}
-		})
-	}
-}
-
-func TestEscapeNavigation(t *testing.T) {
-	t.Parallel()
-
-	tests := []struct {
-		name       string
-		screen     screen
-		wantScreen screen
-	}{
-		{name: "network to main", screen: screenNetwork, wantScreen: screenMain},
-		{name: "services to main", screen: screenServices, wantScreen: screenMain},
-		{name: "acceptance to main", screen: screenAcceptance, wantScreen: screenMain},
-		{name: "service action to services", screen: screenServiceAction, wantScreen: screenServices},
-		{name: "export targets to main", screen: screenExportTargets, wantScreen: screenMain},
-		{name: "interface pick to network", screen: screenInterfacePick, wantScreen: screenNetwork},
-	}
-
-	for _, test := range tests {
-		test := test
-		t.Run(test.name, func(t *testing.T) {
-			t.Parallel()
-
-			m := newTestModel()
-			m.screen = test.screen
-			m.cursor = 3
-
-			next, _ := m.updateKey(tea.KeyMsg{Type: tea.KeyEsc})
-			got := next.(model)
-
-			if got.screen != test.wantScreen {
-				t.Fatalf("screen=%q want %q", got.screen, test.wantScreen)
-			}
-			if got.cursor != 0 {
-				t.Fatalf("cursor=%d want 0", got.cursor)
-			}
-		})
-	}
-}
-
-func TestOutputScreenReturnsToPreviousScreen(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenOutput
-	m.prevScreen = screenNetwork
-	m.title = "title"
-	m.body = "body"
-
-	next, _ := m.updateKey(tea.KeyMsg{Type: tea.KeyEnter})
-	got := next.(model)
-
-	if got.screen != screenNetwork {
-		t.Fatalf("screen=%q want %q", got.screen, screenNetwork)
-	}
-	if got.title != "" || got.body != "" {
-		t.Fatalf("expected output state cleared, got title=%q body=%q", got.title, got.body)
-	}
-}
-
-func TestAcceptanceConfirmFlow(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenAcceptance
-	m.cursor = 0
-
-	next, cmd := m.handleAcceptanceMenu()
-	got := next.(model)
-
-	if cmd != nil {
-		t.Fatal("expected nil cmd")
-	}
-	if got.screen != screenConfirm {
-		t.Fatalf("screen=%q want %q", got.screen, screenConfirm)
-	}
-	if got.pendingAction != actionRunNvidiaSAT {
-		t.Fatalf("pendingAction=%q want %q", got.pendingAction, actionRunNvidiaSAT)
-	}
-
-	next, _ = got.updateConfirm(tea.KeyMsg{Type: tea.KeyEsc})
-	got = next.(model)
-	if got.screen != screenAcceptance {
-		t.Fatalf("screen after esc=%q want %q", got.screen, screenAcceptance)
-	}
-}
-
-func TestExportTargetSelectionOpensConfirm(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenExportTargets
-	m.targets = []platform.RemovableTarget{{Device: "/dev/sdb1", FSType: "vfat", Size: "16G"}}
-
-	next, cmd := m.handleExportTargetsMenu()
-	got := next.(model)
-
-	if cmd != nil {
-		t.Fatal("expected nil cmd")
-	}
-	if got.screen != screenConfirm {
-		t.Fatalf("screen=%q want %q", got.screen, screenConfirm)
-	}
-	if got.pendingAction != actionExportAudit {
-		t.Fatalf("pendingAction=%q want %q", got.pendingAction, actionExportAudit)
-	}
-	if got.selectedTarget == nil || got.selectedTarget.Device != "/dev/sdb1" {
-		t.Fatalf("selectedTarget=%+v want /dev/sdb1", got.selectedTarget)
-	}
-}
-
-func TestInterfacePickStaticIPv4OpensForm(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.pendingAction = actionStaticIPv4
-	m.interfaces = []platform.InterfaceInfo{{Name: "eth0"}}
-
-	next, cmd := m.handleInterfacePickMenu()
-	got := next.(model)
-
-	if cmd != nil {
-		t.Fatal("expected nil cmd")
-	}
-	if got.screen != screenStaticForm {
-		t.Fatalf("screen=%q want %q", got.screen, screenStaticForm)
-	}
-	if got.selectedIface != "eth0" {
-		t.Fatalf("selectedIface=%q want eth0", got.selectedIface)
-	}
-	if len(got.formFields) != 4 {
-		t.Fatalf("len(formFields)=%d want 4", len(got.formFields))
-	}
-}
-
-func TestResultMsgUsesExplicitBackScreen(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-	m.screen = screenConfirm
-
-	next, _ := m.Update(resultMsg{title: "done", body: "ok", back: screenNetwork})
-	got := next.(model)
-
-	if got.screen != screenOutput {
-		t.Fatalf("screen=%q want %q", got.screen, screenOutput)
-	}
-	if got.prevScreen != screenNetwork {
-		t.Fatalf("prevScreen=%q want %q", got.prevScreen, screenNetwork)
-	}
-}
-
-func TestConfirmCancelTarget(t *testing.T) {
-	t.Parallel()
-
-	m := newTestModel()
-
-	m.pendingAction = actionExportAudit
-	if got := m.confirmCancelTarget(); got != screenExportTargets {
-		t.Fatalf("export cancel target=%q want %q", got, screenExportTargets)
-	}
-
-	m.pendingAction = actionRunNvidiaSAT
-	if got := m.confirmCancelTarget(); got != screenAcceptance {
-		t.Fatalf("sat cancel target=%q want %q", got, screenAcceptance)
-	}
-
-	m.pendingAction = actionNone
-	if got := m.confirmCancelTarget(); got != screenMain {
-		t.Fatalf("default cancel target=%q want %q", got, screenMain)
-	}
-}
--- a/audit/internal/tui/types.go
+++ b/audit/internal/tui/types.go
@@ -1,111 +0,0 @@
-package tui
-
-import (
-	"bee/audit/internal/app"
-	"bee/audit/internal/platform"
-	"bee/audit/internal/runtimeenv"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-type screen string
-
-const (
-	screenMain          screen = "main"
-	screenNetwork       screen = "network"
-	screenInterfacePick screen = "interface_pick"
-	screenServices      screen = "services"
-	screenServiceAction screen = "service_action"
-	screenAcceptance    screen = "acceptance"
-	screenExportTargets screen = "export_targets"
-	screenOutput        screen = "output"
-	screenStaticForm    screen = "static_form"
-	screenConfirm       screen = "confirm"
-)
-
-type actionKind string
-
-const (
-	actionNone         actionKind = ""
-	actionDHCPOne      actionKind = "dhcp_one"
-	actionStaticIPv4   actionKind = "static_ipv4"
-	actionExportAudit  actionKind = "export_audit"
-	actionRunNvidiaSAT actionKind = "run_nvidia_sat"
-)
-
-type model struct {
-	app         *app.App
-	runtimeMode runtimeenv.Mode
-
-	screen      screen
-	prevScreen  screen
-	cursor      int
-	busy        bool
-	title       string
-	body        string
-	mainMenu    []string
-	networkMenu []string
-	serviceMenu []string
-
-	services        []string
-	interfaces      []platform.InterfaceInfo
-	targets         []platform.RemovableTarget
-	selectedService string
-	selectedIface   string
-	selectedTarget  *platform.RemovableTarget
-	pendingAction   actionKind
-
-	formFields []formField
-	formIndex  int
-}
-
-type formField struct {
-	Label string
-	Value string
-}
-
-func Run(application *app.App, runtimeMode runtimeenv.Mode) error {
-	options := []tea.ProgramOption{}
-	if runtimeMode != runtimeenv.ModeLiveCD {
-		options = append(options, tea.WithAltScreen())
-	}
-	program := tea.NewProgram(newModel(application, runtimeMode), options...)
-	_, err := program.Run()
-	return err
-}
-
-func newModel(application *app.App, runtimeMode runtimeenv.Mode) model {
-	return model{
-		app:         application,
-		runtimeMode: runtimeMode,
-		screen:      screenMain,
-		mainMenu: []string{
-			"Network setup",
-			"bee service management",
-			"System acceptance tests",
-			"Run audit now",
-			"Export audit to removable drive",
-			"Check required tools",
-			"Show last audit log tail",
-			"Exit",
-		},
-		networkMenu: []string{
-			"Show network status",
-			"DHCP on all interfaces",
-			"DHCP on one interface",
-			"Set static IPv4 on one interface",
-			"Back",
-		},
-		serviceMenu: []string{
-			"status",
-			"restart",
-			"start",
-			"stop",
-			"back",
-		},
-	}
-}
-
-func (m model) Init() tea.Cmd {
-	return nil
-}
--- a/audit/internal/tui/update.go
+++ b/audit/internal/tui/update.go
@@ -1,154 +0,0 @@
-package tui
-
-import (
-	"fmt"
-	"strings"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
-	switch msg := msg.(type) {
-	case tea.KeyMsg:
-		if m.busy {
-			switch msg.String() {
-			case "ctrl+c":
-				return m, tea.Quit
-			default:
-				return m, nil
-			}
-		}
-		return m.updateKey(msg)
-	case resultMsg:
-		m.busy = false
-		m.title = msg.title
-		if msg.err != nil {
-			m.body = fmt.Sprintf("%s\n\nERROR: %v", strings.TrimSpace(msg.body), msg.err)
-		} else {
-			m.body = msg.body
-		}
-		if msg.back != "" {
-			m.prevScreen = msg.back
-		} else {
-			m.prevScreen = m.screen
-		}
-		m.screen = screenOutput
-		m.cursor = 0
-		return m, nil
-	case servicesMsg:
-		m.busy = false
-		if msg.err != nil {
-			m.title = "bee services"
-			m.body = msg.err.Error()
-			m.prevScreen = screenMain
-			m.screen = screenOutput
-			return m, nil
-		}
-		m.services = msg.services
-		m.screen = screenServices
-		m.cursor = 0
-		return m, nil
-	case interfacesMsg:
-		m.busy = false
-		if msg.err != nil {
-			m.title = "interfaces"
-			m.body = msg.err.Error()
-			m.prevScreen = screenMain
-			m.screen = screenOutput
-			return m, nil
-		}
-		m.interfaces = msg.ifaces
-		m.screen = screenInterfacePick
-		m.cursor = 0
-		return m, nil
-	case exportTargetsMsg:
-		m.busy = false
-		if msg.err != nil {
-			m.title = "export"
-			m.body = msg.err.Error()
-			m.prevScreen = screenMain
-			m.screen = screenOutput
-			return m, nil
-		}
-		m.targets = msg.targets
-		m.screen = screenExportTargets
-		m.cursor = 0
-		return m, nil
-	}
-
-	return m, nil
-}
-
-func (m model) updateKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) {
-	switch m.screen {
-	case screenMain:
-		return m.updateMenu(msg, len(m.mainMenu), m.handleMainMenu)
-	case screenNetwork:
-		return m.updateMenu(msg, len(m.networkMenu), m.handleNetworkMenu)
-	case screenServices:
-		return m.updateMenu(msg, len(m.services), m.handleServicesMenu)
-	case screenServiceAction:
-		return m.updateMenu(msg, len(m.serviceMenu), m.handleServiceActionMenu)
-	case screenAcceptance:
-		return m.updateMenu(msg, 2, m.handleAcceptanceMenu)
-	case screenExportTargets:
-		return m.updateMenu(msg, len(m.targets), m.handleExportTargetsMenu)
-	case screenInterfacePick:
-		return m.updateMenu(msg, len(m.interfaces), m.handleInterfacePickMenu)
-	case screenOutput:
-		switch msg.String() {
-		case "esc", "enter", "q":
-			m.screen = m.prevScreen
-			m.body = ""
-			m.title = ""
-			return m, nil
-		case "ctrl+c":
-			return m, tea.Quit
-		}
-	case screenStaticForm:
-		return m.updateStaticForm(msg)
-	case screenConfirm:
-		return m.updateConfirm(msg)
-	}
-
-	if msg.String() == "ctrl+c" {
-		return m, tea.Quit
-	}
-	return m, nil
-}
-
-func (m model) updateMenu(msg tea.KeyMsg, size int, onEnter func() (tea.Model, tea.Cmd)) (tea.Model, tea.Cmd) {
-	if size == 0 {
-		size = 1
-	}
-	switch msg.String() {
-	case "up", "k":
-		if m.cursor > 0 {
-			m.cursor--
-		}
-	case "down", "j":
-		if m.cursor < size-1 {
-			m.cursor++
-		}
-	case "enter":
-		return onEnter()
-	case "esc":
-		switch m.screen {
-		case screenNetwork, screenServices, screenAcceptance:
-			m.screen = screenMain
-			m.cursor = 0
-		case screenServiceAction:
-			m.screen = screenServices
-			m.cursor = 0
-		case screenExportTargets:
-			m.screen = screenMain
-			m.cursor = 0
-		case screenInterfacePick:
-			m.screen = screenNetwork
-			m.cursor = 0
-		}
-	case "q", "ctrl+c":
-		return m, tea.Quit
-	}
-	return m, nil
-}
--- a/audit/internal/tui/view.go
+++ b/audit/internal/tui/view.go
@@ -1,137 +0,0 @@
-package tui
-
-import (
-	"fmt"
-	"strings"
-
-	"bee/audit/internal/platform"
-
-	tea "github.com/charmbracelet/bubbletea"
-)
-
-func (m model) View() string {
-	if m.busy {
-		return "bee\n\nWorking...\n"
-	}
-	switch m.screen {
-	case screenMain:
-		return renderMenu("bee", "Select action", m.mainMenu, m.cursor)
-	case screenNetwork:
-		return renderMenu("Network", "Select action", m.networkMenu, m.cursor)
-	case screenServices:
-		return renderMenu("bee services", "Select service", m.services, m.cursor)
-	case screenServiceAction:
-		items := make([]string, len(m.serviceMenu))
-		copy(items, m.serviceMenu)
-		return renderMenu("Service: "+m.selectedService, "Select action", items, m.cursor)
-	case screenAcceptance:
-		return renderMenu("System acceptance tests", "Select action", []string{"Run NVIDIA command pack", "Back"}, m.cursor)
-	case screenExportTargets:
-		return renderMenu("Export audit", "Select removable filesystem", renderTargetItems(m.targets), m.cursor)
-	case screenInterfacePick:
-		return renderMenu("Interfaces", "Select interface", renderInterfaceItems(m.interfaces), m.cursor)
-	case screenStaticForm:
-		return renderForm("Static IPv4: "+m.selectedIface, m.formFields, m.formIndex)
-	case screenConfirm:
-		title, body := m.confirmBody()
-		return renderConfirm(title, body, m.cursor)
-	case screenOutput:
-		return fmt.Sprintf("%s\n\n%s\n\n[enter/esc] back  [ctrl+c] quit\n", m.title, strings.TrimSpace(m.body))
-	default:
-		return "bee\n"
-	}
-}
-
-func (m model) confirmBody() (string, string) {
-	switch m.pendingAction {
-	case actionExportAudit:
-		if m.selectedTarget == nil {
-			return "Export audit", "No target selected"
-		}
-		return "Export audit", fmt.Sprintf("Copy latest audit JSON to %s?", m.selectedTarget.Device)
-	case actionRunNvidiaSAT:
-		return "NVIDIA SAT", "Run NVIDIA acceptance command pack?"
-	default:
-		return "Confirm", "Proceed?"
-	}
-}
-
-func renderTargetItems(targets []platform.RemovableTarget) []string {
-	items := make([]string, 0, len(targets))
-	for _, target := range targets {
-		desc := fmt.Sprintf("%s [%s %s]", target.Device, target.FSType, target.Size)
-		if target.Label != "" {
-			desc += " label=" + target.Label
-		}
-		if target.Mountpoint != "" {
-			desc += " mounted=" + target.Mountpoint
-		}
-		items = append(items, desc)
-	}
-	return items
-}
-
-func renderInterfaceItems(interfaces []platform.InterfaceInfo) []string {
-	items := make([]string, 0, len(interfaces))
-	for _, iface := range interfaces {
-		label := iface.Name
-		if len(iface.IPv4) > 0 {
-			label += " [" + strings.Join(iface.IPv4, ", ") + "]"
-		}
-		items = append(items, label)
-	}
-	return items
-}
-
-func renderMenu(title, subtitle string, items []string, cursor int) string {
-	var body strings.Builder
-	fmt.Fprintf(&body, "%s\n\n%s\n\n", title, subtitle)
-	if len(items) == 0 {
-		body.WriteString("(no items)\n")
-	} else {
-		for i, item := range items {
-			prefix := "  "
-			if i == cursor {
-				prefix = "> "
-			}
-			fmt.Fprintf(&body, "%s%s\n", prefix, item)
-		}
-	}
-	body.WriteString("\n[↑/↓] move  [enter] select  [esc] back  [ctrl+c] quit\n")
-	return body.String()
-}
-
-func renderForm(title string, fields []formField, idx int) string {
-	var body strings.Builder
-	fmt.Fprintf(&body, "%s\n\n", title)
-	for i, field := range fields {
-		prefix := "  "
-		if i == idx {
-			prefix = "> "
-		}
-		fmt.Fprintf(&body, "%s%s: %s\n", prefix, field.Label, field.Value)
-	}
-	body.WriteString("\n[tab/↑/↓] move  [enter] next/submit  [backspace] delete  [esc] cancel\n")
-	return body.String()
-}
-
-func renderConfirm(title, body string, cursor int) string {
-	options := []string{"Confirm", "Cancel"}
-	var out strings.Builder
-	fmt.Fprintf(&out, "%s\n\n%s\n\n", title, body)
-	for i, option := range options {
-		prefix := "  "
-		if i == cursor {
-			prefix = "> "
-		}
-		fmt.Fprintf(&out, "%s%s\n", prefix, option)
-	}
-	out.WriteString("\n[←/→/↑/↓] move  [enter] select  [esc] cancel\n")
-	return out.String()
-}
-
-func resultCmd(title, body string, err error, back screen) tea.Cmd {
-	return func() tea.Msg {
-		return resultMsg{title: title, body: body, err: err, back: back}
-	}
-}
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
--- a/Show More
+++ b/Show More