Refine validate UI and runtime health table

Refine burn UI and NVIDIA stress flows
Unify metrics charts on custom SVG renderer
2026-04-05 16:24:45 +03:00 · 2026-04-05 13:43:43 +03:00 · 2026-04-05 12:17:50 +03:00 · 2026-04-05 12:05:00 +03:00 · 2026-04-05 11:52:32 +03:00 · 2026-04-05 10:39:09 +03:00
27 changed files with 3069 additions and 709 deletions
--- a/audit/Makefile
+++ b/audit/Makefile
@@ -1,9 +1,10 @@
 LISTEN ?= :8080
 AUDIT_PATH ?=
+EXPORT_DIR ?= $(CURDIR)/.tmp/export
 VERSION ?= $(shell sh ./scripts/resolve-version.sh)
 GO_LDFLAGS := -X main.Version=$(VERSION)

-RUN_ARGS := web --listen $(LISTEN)
+RUN_ARGS := web --listen $(LISTEN) --export-dir $(EXPORT_DIR)
 ifneq ($(AUDIT_PATH),)
 RUN_ARGS += --audit-path $(AUDIT_PATH)
 endif
@@ -11,6 +12,7 @@ endif
 .PHONY: run build test

 run:
+	mkdir -p $(EXPORT_DIR)
 	go run -ldflags "$(GO_LDFLAGS)" ./cmd/bee $(RUN_ARGS)

 build:
--- a/audit/cmd/bee/main.go
+++ b/audit/cmd/bee/main.go
@@ -87,7 +87,7 @@ func printRootUsage(w io.Writer) {
  bee preflight --output stdout|file:<path>
  bee export  --target <device>
  bee support-bundle --output stdout|file:<path>
-  bee web     --listen :80 --audit-path `+app.DefaultAuditJSONPath+`
+  bee web     --listen :80 [--audit-path `+app.DefaultAuditJSONPath+`]
  bee sat nvidia|memory|storage|cpu [--duration <seconds>]
  bee benchmark nvidia [--profile standard|stability|overnight]
  bee version
@@ -296,7 +296,7 @@ func runWeb(args []string, stdout, stderr io.Writer) int {
 	fs := flag.NewFlagSet("web", flag.ContinueOnError)
 	fs.SetOutput(stderr)
 	listenAddr := fs.String("listen", ":8080", "listen address, e.g. :80")
-	auditPath := fs.String("audit-path", app.DefaultAuditJSONPath, "path to the latest audit JSON snapshot")
+	auditPath := fs.String("audit-path", "", "optional path to the latest audit JSON snapshot")
 	exportDir := fs.String("export-dir", app.DefaultExportDir, "directory with logs, SAT results, and support bundles")
 	title := fs.String("title", "Bee Hardware Audit", "page title")
 	fs.Usage = func() {
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -115,7 +115,12 @@ func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
 type satRunner interface {
 	RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error)
 	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
+	RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBenchmark(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
+	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
+	RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
+	RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
+	RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaStressPack(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error)
 	RunMemoryAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
 	RunStorageAcceptancePack(ctx context.Context, baseDir string, logFunc func(string)) (string, error)
@@ -528,6 +533,13 @@ func (a *App) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir st
 	return ActionResult{Title: "NVIDIA DCGM", Body: body}, err
 }

+func (a *App) RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunNvidiaTargetedStressValidatePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
+}
+
 func (a *App) RunNvidiaStressPack(baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
 	return a.RunNvidiaStressPackCtx(context.Background(), baseDir, opts, logFunc)
 }
@@ -543,6 +555,34 @@ func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts pl
 	return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
 }

+func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
+}
+
+func (a *App) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunNvidiaTargetedPowerPack(ctx, baseDir, durationSec, gpuIndices, logFunc)
+}
+
+func (a *App) RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunNvidiaPulseTestPack(ctx, baseDir, durationSec, gpuIndices, logFunc)
+}
+
+func (a *App) RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultSATBaseDir
+	}
+	return a.sat.RunNvidiaBandwidthPack(ctx, baseDir, gpuIndices, logFunc)
+}
+
 func (a *App) RunNvidiaStressPackCtx(ctx context.Context, baseDir string, opts platform.NvidiaStressOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
@@ -893,6 +933,12 @@ func latestSATSummaries() []string {
 		prefix string
 	}{
 		{label: "NVIDIA SAT", prefix: "gpu-nvidia-"},
+		{label: "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)", prefix: "gpu-nvidia-targeted-stress-"},
+		{label: "NVIDIA Max Compute Load (dcgmproftester)", prefix: "gpu-nvidia-compute-"},
+		{label: "NVIDIA Targeted Power (dcgmi diag targeted_power)", prefix: "gpu-nvidia-targeted-power-"},
+		{label: "NVIDIA Pulse Test (dcgmi diag pulse_test)", prefix: "gpu-nvidia-pulse-"},
+		{label: "NVIDIA Interconnect Test (NCCL all_reduce_perf)", prefix: "gpu-nvidia-nccl-"},
+		{label: "NVIDIA Bandwidth Test (NVBandwidth)", prefix: "gpu-nvidia-bandwidth-"},
 		{label: "Memory SAT", prefix: "memory-"},
 		{label: "Storage SAT", prefix: "storage-"},
 		{label: "CPU SAT", prefix: "cpu-"},
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -123,6 +123,11 @@ type fakeSAT struct {
 	runNvidiaFn               func(string) (string, error)
 	runNvidiaBenchmarkFn      func(string, platform.NvidiaBenchmarkOptions) (string, error)
 	runNvidiaStressFn         func(string, platform.NvidiaStressOptions) (string, error)
+	runNvidiaComputeFn        func(string, int, []int) (string, error)
+	runNvidiaPowerFn          func(string, int, []int) (string, error)
+	runNvidiaPulseFn          func(string, int, []int) (string, error)
+	runNvidiaBandwidthFn      func(string, []int) (string, error)
+	runNvidiaTargetedStressFn func(string, int, []int) (string, error)
 	runMemoryFn               func(string) (string, error)
 	runStorageFn              func(string) (string, error)
 	runCPUFn                  func(string, int) (string, error)
@@ -147,6 +152,41 @@ func (f fakeSAT) RunNvidiaBenchmark(_ context.Context, baseDir string, opts plat
 	return f.runNvidiaFn(baseDir)
 }

+func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
+	if f.runNvidiaTargetedStressFn != nil {
+		return f.runNvidiaTargetedStressFn(baseDir, durationSec, gpuIndices)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
+	if f.runNvidiaComputeFn != nil {
+		return f.runNvidiaComputeFn(baseDir, durationSec, gpuIndices)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaTargetedPowerPack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
+	if f.runNvidiaPowerFn != nil {
+		return f.runNvidiaPowerFn(baseDir, durationSec, gpuIndices)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaPulseTestPack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
+	if f.runNvidiaPulseFn != nil {
+		return f.runNvidiaPulseFn(baseDir, durationSec, gpuIndices)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
+func (f fakeSAT) RunNvidiaBandwidthPack(_ context.Context, baseDir string, gpuIndices []int, _ func(string)) (string, error) {
+	if f.runNvidiaBandwidthFn != nil {
+		return f.runNvidiaBandwidthFn(baseDir, gpuIndices)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
 func (f fakeSAT) RunNvidiaStressPack(_ context.Context, baseDir string, opts platform.NvidiaStressOptions, _ func(string)) (string, error) {
 	if f.runNvidiaStressFn != nil {
 		return f.runNvidiaStressFn(baseDir, opts)
--- a/audit/internal/app/component_status_db.go
+++ b/audit/internal/app/component_status_db.go
@@ -179,7 +179,9 @@ func ApplySATResultToDB(db *ComponentStatusDB, target, archivePath string) {

 	// Map SAT target to component keys.
 	switch target {
-	case "nvidia", "amd", "nvidia-stress", "amd-stress", "amd-mem", "amd-bandwidth":
+	case "nvidia", "nvidia-targeted-stress", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
+		"nvidia-interconnect", "nvidia-bandwidth", "amd", "nvidia-stress",
+		"amd-stress", "amd-mem", "amd-bandwidth":
 		db.Record("pcie:gpu:"+target, source, dbStatus, target+" SAT: "+overall)
 	case "memory", "memory-stress", "sat-stress":
 		db.Record("memory:all", source, dbStatus, target+" SAT: "+overall)
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
@@ -274,9 +274,6 @@ func normalizeNvidiaBenchmarkOptionsForBenchmark(opts NvidiaBenchmarkOptions) Nv
 	}
 	opts.GPUIndices = dedupeSortedIndices(opts.GPUIndices)
 	opts.ExcludeGPUIndices = dedupeSortedIndices(opts.ExcludeGPUIndices)
-	if !opts.RunNCCL {
-		opts.RunNCCL = true
-	}
 	return opts
 }

--- a/audit/internal/platform/benchmark_test.go
+++ b/audit/internal/platform/benchmark_test.go
@@ -41,6 +41,21 @@ func TestResolveBenchmarkProfile(t *testing.T) {
 	}
 }

+func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
+	t.Parallel()
+
+	opts := normalizeNvidiaBenchmarkOptionsForBenchmark(NvidiaBenchmarkOptions{
+		Profile: "stability",
+		RunNCCL: false,
+	})
+	if opts.Profile != NvidiaBenchmarkProfileStability {
+		t.Fatalf("profile=%q want %q", opts.Profile, NvidiaBenchmarkProfileStability)
+	}
+	if opts.RunNCCL {
+		t.Fatalf("RunNCCL should stay false when explicitly disabled")
+	}
+}
+
 func TestParseBenchmarkBurnLog(t *testing.T) {
 	t.Parallel()

--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -120,10 +120,45 @@ func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) erro
 		log(fmt.Sprintf("Warning: rebind /run/live/medium failed: %v", err))
 	}

+	log("Verifying live medium now served from RAM...")
+	status := s.LiveBootSource()
+	if err := verifyInstallToRAMStatus(status); err != nil {
+		return err
+	}
+	log(fmt.Sprintf("Verification passed: live medium now served from %s.", describeLiveBootSource(status)))
 	log("Done. Installation media can be safely disconnected.")
 	return nil
 }

+func verifyInstallToRAMStatus(status LiveBootSource) error {
+	if status.InRAM {
+		return nil
+	}
+	return fmt.Errorf("install to RAM verification failed: live medium still mounted from %s", describeLiveBootSource(status))
+}
+
+func describeLiveBootSource(status LiveBootSource) string {
+	source := strings.TrimSpace(status.Device)
+	if source == "" {
+		source = strings.TrimSpace(status.Source)
+	}
+	if source == "" {
+		source = "unknown source"
+	}
+	switch strings.TrimSpace(status.Kind) {
+	case "ram":
+		return "RAM"
+	case "usb":
+		return "USB (" + source + ")"
+	case "cdrom":
+		return "CD-ROM (" + source + ")"
+	case "disk":
+		return "disk (" + source + ")"
+	default:
+		return source
+	}
+}
+
 func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) error {
 	in, err := os.Open(src)
 	if err != nil {
--- a/audit/internal/platform/install_to_ram_test.go
+++ b/audit/internal/platform/install_to_ram_test.go
@@ -3,6 +3,8 @@ package platform
 import "testing"

 func TestInferLiveBootKind(t *testing.T) {
+	t.Parallel()
+
 	tests := []struct {
 		name       string
 		fsType     string
@@ -18,6 +20,7 @@ func TestInferLiveBootKind(t *testing.T) {
 		{name: "unknown", source: "overlay", want: "unknown"},
 	}
 	for _, tc := range tests {
+		tc := tc
 		t.Run(tc.name, func(t *testing.T) {
 			got := inferLiveBootKind(tc.fsType, tc.source, tc.deviceType, tc.transport)
 			if got != tc.want {
@@ -26,3 +29,29 @@ func TestInferLiveBootKind(t *testing.T) {
 		})
 	}
 }
+
+func TestVerifyInstallToRAMStatus(t *testing.T) {
+	t.Parallel()
+
+	if err := verifyInstallToRAMStatus(LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"}); err != nil {
+		t.Fatalf("expected success for RAM-backed status, got %v", err)
+	}
+	err := verifyInstallToRAMStatus(LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"})
+	if err == nil {
+		t.Fatal("expected verification failure when media is still on USB")
+	}
+	if got := err.Error(); got != "install to RAM verification failed: live medium still mounted from USB (/dev/sdb1)" {
+		t.Fatalf("error=%q", got)
+	}
+}
+
+func TestDescribeLiveBootSource(t *testing.T) {
+	t.Parallel()
+
+	if got := describeLiveBootSource(LiveBootSource{InRAM: true, Kind: "ram"}); got != "RAM" {
+		t.Fatalf("got %q want RAM", got)
+	}
+	if got := describeLiveBootSource(LiveBootSource{Kind: "unknown", Source: "/run/live/medium"}); got != "/run/live/medium" {
+		t.Fatalf("got %q want /run/live/medium", got)
+	}
+}
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -135,12 +135,15 @@ func (s *System) runtimeToolStatuses(vendor string) []ToolStatus {
 	case "nvidia":
 		tools = append(tools, s.CheckTools([]string{
 			"nvidia-smi",
+			"dcgmi",
+			"nv-hostengine",
 			"nvidia-bug-report.sh",
 			"bee-gpu-burn",
 			"bee-john-gpu-stress",
 			"bee-nccl-gpu-stress",
 			"all_reduce_perf",
 		})...)
+		tools = append(tools, resolvedToolStatus("dcgmproftester", dcgmProfTesterCandidates...))
 	case "amd":
 		tool := ToolStatus{Name: "rocm-smi"}
 		if cmd, err := resolveROCmSMICommand(); err == nil && len(cmd) > 0 {
@@ -155,6 +158,16 @@ func (s *System) runtimeToolStatuses(vendor string) []ToolStatus {
 	return tools
 }

+func resolvedToolStatus(display string, candidates ...string) ToolStatus {
+	for _, candidate := range candidates {
+		path, err := exec.LookPath(candidate)
+		if err == nil {
+			return ToolStatus{Name: display, Path: path, OK: true}
+		}
+	}
+	return ToolStatus{Name: display}
+}
+
 func (s *System) collectGPURuntimeHealth(vendor string, health *schema.RuntimeHealth) {
 	lsmodText := commandText("lsmod")

--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -12,11 +12,11 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
-	"syscall"
 	"sort"
 	"strconv"
 	"strings"
 	"sync"
+	"syscall"
 	"time"
 )

@@ -38,6 +38,12 @@ var (
 		"/opt/rocm/bin/rvs",
 		"/opt/rocm-*/bin/rvs",
 	}
+	dcgmProfTesterCandidates = []string{
+		"dcgmproftester",
+		"dcgmproftester13",
+		"dcgmproftester12",
+		"dcgmproftester11",
+	}
 )

 // streamExecOutput runs cmd and streams each output line to logFunc (if non-nil).
@@ -76,15 +82,15 @@ func streamExecOutput(cmd *exec.Cmd, logFunc func(string)) ([]byte, error) {

 // NvidiaGPU holds basic GPU info from nvidia-smi.
 type NvidiaGPU struct {
-	Index    int
-	Name     string
-	MemoryMB int
+	Index    int    `json:"index"`
+	Name     string `json:"name"`
+	MemoryMB int    `json:"memory_mb"`
 }

 // AMDGPUInfo holds basic info about an AMD GPU from rocm-smi.
 type AMDGPUInfo struct {
-	Index int
-	Name  string
+	Index int    `json:"index"`
+	Name  string `json:"name"`
 }

 // DetectGPUVendor returns "nvidia" if /dev/nvidia0 exists, "amd" if /dev/kfd exists, or "" otherwise.
@@ -277,6 +283,80 @@ func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(
 	}, logFunc)
 }

+func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+	selected, err := resolveDCGMGPUIndices(gpuIndices)
+	if err != nil {
+		return "", err
+	}
+	profCmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)))
+	if err != nil {
+		return "", err
+	}
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-compute", []satJob{
+		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
+		{name: "02-dcgmi-version.log", cmd: []string{"dcgmi", "-v"}},
+		{
+			name:       "03-dcgmproftester.log",
+			cmd:        profCmd,
+			env:        nvidiaVisibleDevicesEnv(selected),
+			collectGPU: true,
+			gpuIndices: selected,
+		},
+		{name: "04-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
+	}, logFunc)
+}
+
+func (s *System) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+	selected, err := resolveDCGMGPUIndices(gpuIndices)
+	if err != nil {
+		return "", err
+	}
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-targeted-power", []satJob{
+		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
+		{
+			name:       "02-dcgmi-targeted-power.log",
+			cmd:        nvidiaDCGMNamedDiagCommand("targeted_power", normalizeNvidiaBurnDuration(durationSec), selected),
+			collectGPU: true,
+			gpuIndices: selected,
+		},
+		{name: "03-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
+	}, logFunc)
+}
+
+func (s *System) RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+	selected, err := resolveDCGMGPUIndices(gpuIndices)
+	if err != nil {
+		return "", err
+	}
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-pulse", []satJob{
+		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
+		{
+			name:       "02-dcgmi-pulse-test.log",
+			cmd:        nvidiaDCGMNamedDiagCommand("pulse_test", normalizeNvidiaBurnDuration(durationSec), selected),
+			collectGPU: true,
+			gpuIndices: selected,
+		},
+		{name: "03-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
+	}, logFunc)
+}
+
+func (s *System) RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error) {
+	selected, err := resolveDCGMGPUIndices(gpuIndices)
+	if err != nil {
+		return "", err
+	}
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-bandwidth", []satJob{
+		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
+		{
+			name:       "02-dcgmi-nvbandwidth.log",
+			cmd:        nvidiaDCGMNamedDiagCommand("nvbandwidth", 0, selected),
+			collectGPU: true,
+			gpuIndices: selected,
+		},
+		{name: "03-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
+	}, logFunc)
+}
+
 func (s *System) RunNvidiaAcceptancePack(baseDir string, logFunc func(string)) (string, error) {
 	return runAcceptancePackCtx(context.Background(), baseDir, "gpu-nvidia", nvidiaSATJobs(), logFunc)
 }
@@ -293,6 +373,23 @@ func (s *System) RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia", nvidiaDCGMJobs(diagLevel, resolvedGPUIndices), logFunc)
 }

+func (s *System) RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+	selected, err := resolveDCGMGPUIndices(gpuIndices)
+	if err != nil {
+		return "", err
+	}
+	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-targeted-stress", []satJob{
+		{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
+		{
+			name:       "02-dcgmi-targeted-stress.log",
+			cmd:        nvidiaDCGMNamedDiagCommand("targeted_stress", normalizeNvidiaBurnDuration(durationSec), selected),
+			collectGPU: true,
+			gpuIndices: selected,
+		},
+		{name: "03-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
+	}, logFunc)
+}
+
 func resolveDCGMGPUIndices(gpuIndices []int) ([]int, error) {
 	if len(gpuIndices) > 0 {
 		return dedupeSortedIndices(gpuIndices), nil
@@ -473,6 +570,31 @@ func nvidiaDCGMJobs(diagLevel int, gpuIndices []int) []satJob {
 	}
 }

+func nvidiaDCGMNamedDiagCommand(name string, durationSec int, gpuIndices []int) []string {
+	args := []string{"dcgmi", "diag", "-r", name}
+	if durationSec > 0 {
+		args = append(args, "-p", fmt.Sprintf("%s.test_duration=%d", name, durationSec))
+	}
+	if len(gpuIndices) > 0 {
+		args = append(args, "-i", joinIndexList(gpuIndices))
+	}
+	return args
+}
+
+func normalizeNvidiaBurnDuration(durationSec int) int {
+	if durationSec <= 0 {
+		return 300
+	}
+	return durationSec
+}
+
+func nvidiaVisibleDevicesEnv(gpuIndices []int) []string {
+	if len(gpuIndices) == 0 {
+		return nil
+	}
+	return []string{"CUDA_VISIBLE_DEVICES=" + joinIndexList(gpuIndices)}
+}
+
 func runAcceptancePackCtx(ctx context.Context, baseDir, prefix string, jobs []satJob, logFunc func(string)) (string, error) {
 	if ctx == nil {
 		ctx = context.Background()
@@ -642,6 +764,7 @@ func classifySATResult(name string, out []byte, err error) (string, int) {
 	}
 	if strings.Contains(text, "unsupported") ||
 		strings.Contains(text, "not supported") ||
+		strings.Contains(text, "not found in path") ||
 		strings.Contains(text, "invalid opcode") ||
 		strings.Contains(text, "unknown command") ||
 		strings.Contains(text, "not implemented") ||
@@ -748,6 +871,15 @@ func resolveROCmSMICommand(args ...string) ([]string, error) {
 	return nil, errors.New("rocm-smi not found in PATH or under /opt/rocm")
 }

+func resolveDCGMProfTesterCommand(args ...string) ([]string, error) {
+	for _, candidate := range dcgmProfTesterCandidates {
+		if path, err := satLookPath(candidate); err == nil {
+			return append([]string{path}, args...), nil
+		}
+	}
+	return nil, errors.New("dcgmproftester not found in PATH")
+}
+
 func ensureAMDRuntimeReady() error {
 	if _, err := os.Stat("/dev/kfd"); err == nil {
 		return nil
--- a/audit/internal/platform/sat_test.go
+++ b/audit/internal/platform/sat_test.go
@@ -195,6 +195,53 @@ func TestResolveDCGMGPUIndicesKeepsExplicitSelection(t *testing.T) {
 	}
 }

+func TestResolveDCGMProfTesterCommandUsesVersionedBinary(t *testing.T) {
+	oldLookPath := satLookPath
+	satLookPath = func(file string) (string, error) {
+		switch file {
+		case "dcgmproftester13":
+			return "/usr/bin/dcgmproftester13", nil
+		default:
+			return "", exec.ErrNotFound
+		}
+	}
+	t.Cleanup(func() { satLookPath = oldLookPath })
+
+	cmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004")
+	if err != nil {
+		t.Fatalf("resolveDCGMProfTesterCommand error: %v", err)
+	}
+	if len(cmd) != 4 {
+		t.Fatalf("cmd len=%d want 4 (%v)", len(cmd), cmd)
+	}
+	if cmd[0] != "/usr/bin/dcgmproftester13" {
+		t.Fatalf("cmd[0]=%q want /usr/bin/dcgmproftester13", cmd[0])
+	}
+}
+
+func TestNvidiaDCGMNamedDiagCommandUsesDurationAndSelection(t *testing.T) {
+	cmd := nvidiaDCGMNamedDiagCommand("targeted_power", 900, []int{3, 1})
+	want := []string{"dcgmi", "diag", "-r", "targeted_power", "-p", "targeted_power.test_duration=900", "-i", "3,1"}
+	if len(cmd) != len(want) {
+		t.Fatalf("cmd len=%d want %d (%v)", len(cmd), len(want), cmd)
+	}
+	for i := range want {
+		if cmd[i] != want[i] {
+			t.Fatalf("cmd[%d]=%q want %q", i, cmd[i], want[i])
+		}
+	}
+}
+
+func TestNvidiaVisibleDevicesEnvUsesSelectedGPUs(t *testing.T) {
+	env := nvidiaVisibleDevicesEnv([]int{0, 2, 4})
+	if len(env) != 1 {
+		t.Fatalf("env len=%d want 1 (%v)", len(env), env)
+	}
+	if env[0] != "CUDA_VISIBLE_DEVICES=0,2,4" {
+		t.Fatalf("env[0]=%q want CUDA_VISIBLE_DEVICES=0,2,4", env[0])
+	}
+}
+
 func TestNvidiaStressArchivePrefixByLoader(t *testing.T) {
 	t.Parallel()

--- a/audit/internal/platform/services.go
+++ b/audit/internal/platform/services.go
@@ -10,17 +10,30 @@ import (
 func (s *System) ListBeeServices() ([]string, error) {
 	seen := map[string]bool{}
 	var out []string
-	for _, pattern := range []string{"/etc/systemd/system/bee-*.service", "/lib/systemd/system/bee-*.service"} {
+	for _, pattern := range []string{
+		"/etc/systemd/system/bee-*.service",
+		"/lib/systemd/system/bee-*.service",
+		"/etc/systemd/system/bee-*.timer",
+		"/lib/systemd/system/bee-*.timer",
+	} {
 		matches, err := filepath.Glob(pattern)
 		if err != nil {
 			return nil, err
 		}
 		for _, match := range matches {
-			name := strings.TrimSuffix(filepath.Base(match), ".service")
+			base := filepath.Base(match)
+			name := base
+			if strings.HasSuffix(base, ".service") {
+				name = strings.TrimSuffix(base, ".service")
+			}
 			// Skip template units (e.g. bee-journal-mirror@) — they have no instances to query.
 			if strings.HasSuffix(name, "@") {
 				continue
 			}
+			// bee-selfheal is timer-managed; showing the oneshot service as inactive is misleading.
+			if name == "bee-selfheal" && strings.HasSuffix(base, ".service") {
+				continue
+			}
 			if !seen[name] {
 				seen[name] = true
 				out = append(out, name)
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -44,12 +44,12 @@ type StaticIPv4Config struct {
 }

 type RemovableTarget struct {
-	Device     string
-	FSType     string
-	Size       string
-	Label      string
-	Model      string
-	Mountpoint string
+	Device     string `json:"device"`
+	FSType     string `json:"fs_type"`
+	Size       string `json:"size"`
+	Label      string `json:"label"`
+	Model      string `json:"model"`
+	Mountpoint string `json:"mountpoint"`
 }

 type ToolStatus struct {
--- a/audit/internal/platform/types_test.go
+++ b/audit/internal/platform/types_test.go
@@ -0,0 +1,31 @@
+package platform
+
+import (
+	"encoding/json"
+	"strings"
+	"testing"
+)
+
+func TestRemovableTargetJSONUsesFrontendFieldNames(t *testing.T) {
+	t.Parallel()
+
+	data, err := json.Marshal(RemovableTarget{
+		Device: "/dev/sdb1",
+		FSType: "exfat",
+		Size:   "1.8T",
+		Label:  "USB",
+		Model:  "Flash",
+	})
+	if err != nil {
+		t.Fatalf("marshal: %v", err)
+	}
+	raw := string(data)
+	for _, key := range []string{`"device"`, `"fs_type"`, `"size"`, `"label"`, `"model"`} {
+		if !strings.Contains(raw, key) {
+			t.Fatalf("json missing key %s: %s", key, raw)
+		}
+	}
+	if strings.Contains(raw, `"Device"`) || strings.Contains(raw, `"FSType"`) {
+		t.Fatalf("json still contains Go field names: %s", raw)
+	}
+}
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -232,6 +232,54 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 	}
 }

+func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+
+	var body struct {
+		Profile           string `json:"profile"`
+		SizeMB            int    `json:"size_mb"`
+		GPUIndices        []int  `json:"gpu_indices"`
+		ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
+		RunNCCL           *bool  `json:"run_nccl"`
+		DisplayName       string `json:"display_name"`
+	}
+	if r.Body != nil {
+		if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
+			writeError(w, http.StatusBadRequest, "invalid request body")
+			return
+		}
+	}
+
+	runNCCL := true
+	if body.RunNCCL != nil {
+		runNCCL = *body.RunNCCL
+	}
+	t := &Task{
+		ID:        newJobID("benchmark-nvidia"),
+		Name:      taskDisplayName("nvidia-benchmark", "", ""),
+		Target:    "nvidia-benchmark",
+		Priority:  15,
+		Status:    TaskPending,
+		CreatedAt: time.Now(),
+		params: taskParams{
+			GPUIndices:        body.GPUIndices,
+			ExcludeGPUIndices: body.ExcludeGPUIndices,
+			SizeMB:            body.SizeMB,
+			BenchmarkProfile:  body.Profile,
+			RunNCCL:           runNCCL,
+			DisplayName:       body.DisplayName,
+		},
+	}
+	if strings.TrimSpace(body.DisplayName) != "" {
+		t.Name = body.DisplayName
+	}
+	globalQueue.enqueue(t)
+	writeJSON(w, map[string]string{"task_id": t.ID, "job_id": t.ID})
+}
+
 func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
 	id := r.URL.Query().Get("job_id")
 	if id == "" {
@@ -491,6 +539,22 @@ func (h *handler) handleAPIExportUSBBundle(w http.ResponseWriter, r *http.Reques

 // ── GPU presence ──────────────────────────────────────────────────────────────

+func (h *handler) handleAPIGNVIDIAGPUs(w http.ResponseWriter, _ *http.Request) {
+	if h.opts.App == nil {
+		writeError(w, http.StatusServiceUnavailable, "app not configured")
+		return
+	}
+	gpus, err := h.opts.App.ListNvidiaGPUs()
+	if err != nil {
+		writeError(w, http.StatusInternalServerError, err.Error())
+		return
+	}
+	if gpus == nil {
+		gpus = []platform.NvidiaGPU{}
+	}
+	writeJSON(w, gpus)
+}
+
 func (h *handler) handleAPIGPUPresence(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
@@ -516,14 +580,33 @@ func (h *handler) handleAPIGPUTools(w http.ResponseWriter, _ *http.Request) {
 	_, amdErr := os.Stat("/dev/kfd")
 	nvidiaUp := nvidiaErr == nil
 	amdUp := amdErr == nil
+	_, dcgmErr := exec.LookPath("dcgmi")
+	_, ncclStressErr := exec.LookPath("bee-nccl-gpu-stress")
+	_, johnErr := exec.LookPath("bee-john-gpu-stress")
+	_, beeBurnErr := exec.LookPath("bee-gpu-burn")
+	_, nvBandwidthErr := exec.LookPath("nvbandwidth")
+	profErr := lookPathAny("dcgmproftester", "dcgmproftester13", "dcgmproftester12", "dcgmproftester11")
 	writeJSON(w, []toolEntry{
-		{ID: "bee-gpu-burn", Available: nvidiaUp, Vendor: "nvidia"},
-		{ID: "john", Available: nvidiaUp, Vendor: "nvidia"},
-		{ID: "nccl", Available: nvidiaUp, Vendor: "nvidia"},
+		{ID: "nvidia-compute", Available: nvidiaUp && profErr == nil, Vendor: "nvidia"},
+		{ID: "nvidia-targeted-power", Available: nvidiaUp && dcgmErr == nil, Vendor: "nvidia"},
+		{ID: "nvidia-pulse", Available: nvidiaUp && dcgmErr == nil, Vendor: "nvidia"},
+		{ID: "nvidia-interconnect", Available: nvidiaUp && ncclStressErr == nil, Vendor: "nvidia"},
+		{ID: "nvidia-bandwidth", Available: nvidiaUp && dcgmErr == nil && nvBandwidthErr == nil, Vendor: "nvidia"},
+		{ID: "bee-gpu-burn", Available: nvidiaUp && beeBurnErr == nil, Vendor: "nvidia"},
+		{ID: "john", Available: nvidiaUp && johnErr == nil, Vendor: "nvidia"},
 		{ID: "rvs", Available: amdUp, Vendor: "amd"},
 	})
 }

+func lookPathAny(names ...string) error {
+	for _, name := range names {
+		if _, err := exec.LookPath(name); err == nil {
+			return nil
+		}
+	}
+	return exec.ErrNotFound
+}
+
 // ── System ────────────────────────────────────────────────────────────────────

 func (h *handler) handleAPIRAMStatus(w http.ResponseWriter, r *http.Request) {
@@ -562,7 +645,7 @@ func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request)

 var standardTools = []string{
 	"dmidecode", "smartctl", "nvme", "lspci", "ipmitool",
-	"nvidia-smi", "memtester", "stress-ng", "nvtop",
+	"nvidia-smi", "dcgmi", "nv-hostengine", "memtester", "stress-ng", "nvtop",
 	"mstflint", "qrencode",
 }

--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -64,6 +64,42 @@ func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
 	}
 }

+func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
+	globalQueue.mu.Lock()
+	originalTasks := globalQueue.tasks
+	globalQueue.tasks = nil
+	globalQueue.mu.Unlock()
+	t.Cleanup(func() {
+		globalQueue.mu.Lock()
+		globalQueue.tasks = originalTasks
+		globalQueue.mu.Unlock()
+	})
+
+	h := &handler{opts: HandlerOptions{App: &app.App{}}}
+	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
+	rec := httptest.NewRecorder()
+
+	h.handleAPIBenchmarkNvidiaRun(rec, req)
+
+	if rec.Code != 200 {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	globalQueue.mu.Lock()
+	defer globalQueue.mu.Unlock()
+	if len(globalQueue.tasks) != 1 {
+		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
+	}
+	task := globalQueue.tasks[0]
+	if task.Target != "nvidia-benchmark" {
+		t.Fatalf("target=%q want nvidia-benchmark", task.Target)
+	}
+	if got := task.params.GPUIndices; len(got) != 2 || got[0] != 1 || got[1] != 3 {
+		t.Fatalf("gpu indices=%v want [1 3]", got)
+	}
+	if task.params.RunNCCL {
+		t.Fatal("RunNCCL should reflect explicit false from request")
+	}
+}

 func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
 	h := &handler{}
--- a/audit/internal/webui/charts_svg.go
+++ b/audit/internal/webui/charts_svg.go
@@ -0,0 +1,713 @@
+package webui
+
+import (
+	"fmt"
+	"math"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+
+	"bee/audit/internal/platform"
+)
+
+type chartTimelineSegment struct {
+	Start  time.Time
+	End    time.Time
+	Active bool
+}
+
+type chartScale struct {
+	Min   float64
+	Max   float64
+	Ticks []float64
+}
+
+type chartLayout struct {
+	Width      int
+	Height     int
+	PlotLeft   int
+	PlotRight  int
+	PlotTop    int
+	PlotBottom int
+}
+
+type metricChartSeries struct {
+	Name      string
+	AxisTitle string
+	Color     string
+	Values    []float64
+}
+
+var metricChartPalette = []string{
+	"#5794f2",
+	"#73bf69",
+	"#f2cc0c",
+	"#ff9830",
+	"#f2495c",
+	"#b877d9",
+	"#56d2f7",
+	"#8ab8ff",
+	"#9adf8f",
+	"#ffbe5c",
+}
+
+func renderMetricChartSVG(title string, labels []string, times []time.Time, datasets [][]float64, names []string, yMin, yMax *float64, canvasHeight int, timeline []chartTimelineSegment) ([]byte, error) {
+	pointCount := len(labels)
+	if len(times) > pointCount {
+		pointCount = len(times)
+	}
+	if pointCount == 0 {
+		pointCount = 1
+		labels = []string{""}
+		times = []time.Time{time.Time{}}
+	}
+	if len(labels) < pointCount {
+		padded := make([]string, pointCount)
+		copy(padded, labels)
+		labels = padded
+	}
+	if len(times) < pointCount {
+		times = synthesizeChartTimes(times, pointCount)
+	}
+	for i := range datasets {
+		if len(datasets[i]) == 0 {
+			datasets[i] = make([]float64, pointCount)
+		}
+	}
+
+	mn, avg, mx := globalStats(datasets)
+	if mx > 0 {
+		title = fmt.Sprintf("%s    ↓%s  ~%s  ↑%s",
+			title,
+			chartLegendNumber(mn),
+			chartLegendNumber(avg),
+			chartLegendNumber(mx),
+		)
+	}
+
+	legendItems := []metricChartSeries{}
+	for i, name := range names {
+		color := metricChartPalette[i%len(metricChartPalette)]
+		values := make([]float64, pointCount)
+		if i < len(datasets) {
+			copy(values, coalesceDataset(datasets[i], pointCount))
+		}
+		legendItems = append(legendItems, metricChartSeries{
+			Name:   name,
+			Color:  color,
+			Values: values,
+		})
+	}
+
+	scale := singleAxisChartScale(datasets, yMin, yMax)
+	layout := singleAxisChartLayout(canvasHeight, len(legendItems))
+	start, end := chartTimeBounds(times)
+
+	var b strings.Builder
+	writeSVGOpen(&b, layout.Width, layout.Height)
+	writeChartFrame(&b, title, layout.Width, layout.Height)
+	writeTimelineIdleSpans(&b, layout, start, end, timeline)
+	writeVerticalGrid(&b, layout, times, pointCount, 8)
+	writeHorizontalGrid(&b, layout, scale)
+	writeTimelineBoundaries(&b, layout, start, end, timeline)
+	writePlotBorder(&b, layout)
+	writeSingleAxisY(&b, layout, scale)
+	writeXAxisLabels(&b, layout, times, labels, start, end, 8)
+	for _, item := range legendItems {
+		writeSeriesPolyline(&b, layout, times, start, end, item.Values, scale, item.Color)
+	}
+	writeLegend(&b, layout, legendItems)
+	writeSVGClose(&b)
+	return []byte(b.String()), nil
+}
+
+func renderGPUOverviewChartSVG(idx int, samples []platform.LiveMetricSample, timeline []chartTimelineSegment) ([]byte, bool, error) {
+	temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
+	power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
+	coreClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
+	memClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
+	if temp == nil && power == nil && coreClock == nil && memClock == nil {
+		return nil, false, nil
+	}
+	labels := sampleTimeLabels(samples)
+	times := sampleTimes(samples)
+	svg, err := drawGPUOverviewChartSVG(
+		fmt.Sprintf("GPU %d Overview", idx),
+		labels,
+		times,
+		[]metricChartSeries{
+			{Name: "Temp C", Values: coalesceDataset(temp, len(labels)), Color: "#f05a5a", AxisTitle: "Temp C"},
+			{Name: "Power W", Values: coalesceDataset(power, len(labels)), Color: "#ffb357", AxisTitle: "Power W"},
+			{Name: "Core Clock MHz", Values: coalesceDataset(coreClock, len(labels)), Color: "#73bf69", AxisTitle: "Core MHz"},
+			{Name: "Memory Clock MHz", Values: coalesceDataset(memClock, len(labels)), Color: "#5794f2", AxisTitle: "Memory MHz"},
+		},
+		timeline,
+	)
+	if err != nil {
+		return nil, false, err
+	}
+	return svg, true, nil
+}
+
+func drawGPUOverviewChartSVG(title string, labels []string, times []time.Time, series []metricChartSeries, timeline []chartTimelineSegment) ([]byte, error) {
+	if len(series) != 4 {
+		return nil, fmt.Errorf("gpu overview requires 4 series, got %d", len(series))
+	}
+	const (
+		width      = 1400
+		height     = 840
+		plotLeft   = 180
+		plotRight  = 1220
+		plotTop    = 96
+		plotBottom = 660
+	)
+	const (
+		leftOuterAxis  = 72
+		leftInnerAxis  = 132
+		rightInnerAxis = 1268
+		rightOuterAxis = 1328
+	)
+	layout := chartLayout{
+		Width:      width,
+		Height:     height,
+		PlotLeft:   plotLeft,
+		PlotRight:  plotRight,
+		PlotTop:    plotTop,
+		PlotBottom: plotBottom,
+	}
+	axisX := []int{leftOuterAxis, leftInnerAxis, rightInnerAxis, rightOuterAxis}
+	pointCount := len(labels)
+	if len(times) > pointCount {
+		pointCount = len(times)
+	}
+	if pointCount == 0 {
+		pointCount = 1
+		labels = []string{""}
+		times = []time.Time{time.Time{}}
+	}
+	if len(labels) < pointCount {
+		padded := make([]string, pointCount)
+		copy(padded, labels)
+		labels = padded
+	}
+	if len(times) < pointCount {
+		times = synthesizeChartTimes(times, pointCount)
+	}
+	for i := range series {
+		if len(series[i].Values) == 0 {
+			series[i].Values = make([]float64, pointCount)
+		}
+	}
+
+	scales := make([]chartScale, len(series))
+	for i := range series {
+		min, max := chartSeriesBounds(series[i].Values)
+		ticks := chartNiceTicks(min, max, 8)
+		scales[i] = chartScale{
+			Min:   ticks[0],
+			Max:   ticks[len(ticks)-1],
+			Ticks: ticks,
+		}
+	}
+	start, end := chartTimeBounds(times)
+
+	var b strings.Builder
+	writeSVGOpen(&b, width, height)
+	writeChartFrame(&b, title, width, height)
+	writeTimelineIdleSpans(&b, layout, start, end, timeline)
+	writeVerticalGrid(&b, layout, times, pointCount, 8)
+	writeHorizontalGrid(&b, layout, scales[0])
+	writeTimelineBoundaries(&b, layout, start, end, timeline)
+	writePlotBorder(&b, layout)
+
+	for i, axisLineX := range axisX {
+		fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="1"/>`+"\n",
+			axisLineX, layout.PlotTop, axisLineX, layout.PlotBottom, series[i].Color)
+		fmt.Fprintf(&b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="11" font-weight="700" fill="%s">%s</text>`+"\n",
+			axisLineX, 64, series[i].Color, sanitizeChartText(series[i].AxisTitle))
+		for _, tick := range scales[i].Ticks {
+			y := chartYForValue(valueClamp(tick, scales[i]), scales[i], layout.PlotTop, layout.PlotBottom)
+			label := sanitizeChartText(chartYAxisNumber(tick))
+			if i < 2 {
+				fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
+					axisLineX, y, axisLineX+6, y, series[i].Color)
+				fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
+					axisLineX-8, y, series[i].Color, label)
+				continue
+			}
+			fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
+				axisLineX, y, axisLineX-6, y, series[i].Color)
+			fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="start" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
+				axisLineX+8, y, series[i].Color, label)
+		}
+	}
+
+	writeXAxisLabels(&b, layout, times, labels, start, end, 8)
+	for i := range series {
+		writeSeriesPolyline(&b, layout, times, start, end, series[i].Values, scales[i], series[i].Color)
+	}
+	writeLegend(&b, layout, series)
+	writeSVGClose(&b)
+	return []byte(b.String()), nil
+}
+
+func metricsTimelineSegments(samples []platform.LiveMetricSample, now time.Time) []chartTimelineSegment {
+	if len(samples) == 0 {
+		return nil
+	}
+	times := sampleTimes(samples)
+	start, end := chartTimeBounds(times)
+	if start.IsZero() || end.IsZero() {
+		return nil
+	}
+	return chartTimelineSegmentsForRange(start, end, now, snapshotTaskHistory())
+}
+
+func snapshotTaskHistory() []Task {
+	globalQueue.mu.Lock()
+	defer globalQueue.mu.Unlock()
+	out := make([]Task, len(globalQueue.tasks))
+	for i, t := range globalQueue.tasks {
+		out[i] = *t
+	}
+	return out
+}
+
+func chartTimelineSegmentsForRange(start, end, now time.Time, tasks []Task) []chartTimelineSegment {
+	if start.IsZero() || end.IsZero() {
+		return nil
+	}
+	if end.Before(start) {
+		start, end = end, start
+	}
+	type interval struct {
+		start time.Time
+		end   time.Time
+	}
+	active := make([]interval, 0, len(tasks))
+	for _, task := range tasks {
+		if task.StartedAt == nil {
+			continue
+		}
+		intervalStart := task.StartedAt.UTC()
+		intervalEnd := now.UTC()
+		if task.DoneAt != nil {
+			intervalEnd = task.DoneAt.UTC()
+		}
+		if !intervalEnd.After(intervalStart) {
+			continue
+		}
+		if intervalEnd.Before(start) || intervalStart.After(end) {
+			continue
+		}
+		if intervalStart.Before(start) {
+			intervalStart = start
+		}
+		if intervalEnd.After(end) {
+			intervalEnd = end
+		}
+		active = append(active, interval{start: intervalStart, end: intervalEnd})
+	}
+	sort.Slice(active, func(i, j int) bool {
+		if active[i].start.Equal(active[j].start) {
+			return active[i].end.Before(active[j].end)
+		}
+		return active[i].start.Before(active[j].start)
+	})
+	merged := make([]interval, 0, len(active))
+	for _, span := range active {
+		if len(merged) == 0 {
+			merged = append(merged, span)
+			continue
+		}
+		last := &merged[len(merged)-1]
+		if !span.start.After(last.end) {
+			if span.end.After(last.end) {
+				last.end = span.end
+			}
+			continue
+		}
+		merged = append(merged, span)
+	}
+
+	segments := make([]chartTimelineSegment, 0, len(merged)*2+1)
+	cursor := start
+	for _, span := range merged {
+		if span.start.After(cursor) {
+			segments = append(segments, chartTimelineSegment{Start: cursor, End: span.start, Active: false})
+		}
+		segments = append(segments, chartTimelineSegment{Start: span.start, End: span.end, Active: true})
+		cursor = span.end
+	}
+	if cursor.Before(end) {
+		segments = append(segments, chartTimelineSegment{Start: cursor, End: end, Active: false})
+	}
+	if len(segments) == 0 {
+		segments = append(segments, chartTimelineSegment{Start: start, End: end, Active: false})
+	}
+	return segments
+}
+
+func sampleTimes(samples []platform.LiveMetricSample) []time.Time {
+	times := make([]time.Time, 0, len(samples))
+	for _, sample := range samples {
+		times = append(times, sample.Timestamp)
+	}
+	return times
+}
+
+func singleAxisChartScale(datasets [][]float64, yMin, yMax *float64) chartScale {
+	min, max := 0.0, 1.0
+	if yMin != nil && yMax != nil {
+		min, max = *yMin, *yMax
+	} else {
+		min, max = chartSeriesBounds(flattenDatasets(datasets))
+		if yMin != nil {
+			min = *yMin
+		}
+		if yMax != nil {
+			max = *yMax
+		}
+	}
+	ticks := chartNiceTicks(min, max, 8)
+	return chartScale{Min: ticks[0], Max: ticks[len(ticks)-1], Ticks: ticks}
+}
+
+func flattenDatasets(datasets [][]float64) []float64 {
+	total := 0
+	for _, ds := range datasets {
+		total += len(ds)
+	}
+	out := make([]float64, 0, total)
+	for _, ds := range datasets {
+		out = append(out, ds...)
+	}
+	return out
+}
+
+func singleAxisChartLayout(canvasHeight int, seriesCount int) chartLayout {
+	legendRows := 0
+	if chartLegendVisible(seriesCount) && seriesCount > 0 {
+		cols := 4
+		if seriesCount < cols {
+			cols = seriesCount
+		}
+		legendRows = (seriesCount + cols - 1) / cols
+	}
+	legendHeight := 0
+	if legendRows > 0 {
+		legendHeight = legendRows*24 + 24
+	}
+	return chartLayout{
+		Width:      1400,
+		Height:     canvasHeight,
+		PlotLeft:   96,
+		PlotRight:  1352,
+		PlotTop:    72,
+		PlotBottom: canvasHeight - 60 - legendHeight,
+	}
+}
+
+func chartTimeBounds(times []time.Time) (time.Time, time.Time) {
+	if len(times) == 0 {
+		return time.Time{}, time.Time{}
+	}
+	start := times[0].UTC()
+	end := start
+	for _, ts := range times[1:] {
+		t := ts.UTC()
+		if t.Before(start) {
+			start = t
+		}
+		if t.After(end) {
+			end = t
+		}
+	}
+	return start, end
+}
+
+func synthesizeChartTimes(times []time.Time, count int) []time.Time {
+	if count <= 0 {
+		return nil
+	}
+	if len(times) == count {
+		return times
+	}
+	if len(times) == 1 {
+		out := make([]time.Time, count)
+		for i := range out {
+			out[i] = times[0].Add(time.Duration(i) * time.Minute)
+		}
+		return out
+	}
+	base := time.Now().UTC().Add(-time.Duration(count-1) * time.Minute)
+	out := make([]time.Time, count)
+	for i := range out {
+		out[i] = base.Add(time.Duration(i) * time.Minute)
+	}
+	return out
+}
+
+func writeSVGOpen(b *strings.Builder, width, height int) {
+	fmt.Fprintf(b, `<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" viewBox="0 0 %d %d">`+"\n", width, height, width, height)
+}
+
+func writeSVGClose(b *strings.Builder) {
+	b.WriteString("</svg>\n")
+}
+
+func writeChartFrame(b *strings.Builder, title string, width, height int) {
+	fmt.Fprintf(b, `<rect width="%d" height="%d" rx="10" ry="10" fill="#ffffff" stroke="#d7e0ea"/>`+"\n", width, height)
+	fmt.Fprintf(b, `<text x="%d" y="30" text-anchor="middle" font-family="sans-serif" font-size="16" font-weight="700" fill="#1f2937">%s</text>`+"\n",
+		width/2, sanitizeChartText(title))
+}
+
+func writePlotBorder(b *strings.Builder, layout chartLayout) {
+	fmt.Fprintf(b, `<rect x="%d" y="%d" width="%d" height="%d" fill="none" stroke="#cbd5e1" stroke-width="1"/>`+"\n",
+		layout.PlotLeft, layout.PlotTop, layout.PlotRight-layout.PlotLeft, layout.PlotBottom-layout.PlotTop)
+}
+
+func writeHorizontalGrid(b *strings.Builder, layout chartLayout, scale chartScale) {
+	b.WriteString(`<g stroke="#e2e8f0" stroke-width="1">` + "\n")
+	for _, tick := range scale.Ticks {
+		y := chartYForValue(tick, scale, layout.PlotTop, layout.PlotBottom)
+		fmt.Fprintf(b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"/>`+"\n",
+			layout.PlotLeft, y, layout.PlotRight, y)
+	}
+	b.WriteString(`</g>` + "\n")
+}
+
+func writeVerticalGrid(b *strings.Builder, layout chartLayout, times []time.Time, pointCount, target int) {
+	if pointCount <= 0 {
+		return
+	}
+	start, end := chartTimeBounds(times)
+	b.WriteString(`<g stroke="#edf2f7" stroke-width="1">` + "\n")
+	for _, idx := range gpuChartLabelIndices(pointCount, target) {
+		ts := chartPointTime(times, idx)
+		x := chartXForTime(ts, start, end, layout.PlotLeft, layout.PlotRight)
+		fmt.Fprintf(b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d"/>`+"\n",
+			x, layout.PlotTop, x, layout.PlotBottom)
+	}
+	b.WriteString(`</g>` + "\n")
+}
+
+func writeSingleAxisY(b *strings.Builder, layout chartLayout, scale chartScale) {
+	fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#64748b" stroke-width="1"/>`+"\n",
+		layout.PlotLeft, layout.PlotTop, layout.PlotLeft, layout.PlotBottom)
+	for _, tick := range scale.Ticks {
+		y := chartYForValue(tick, scale, layout.PlotTop, layout.PlotBottom)
+		fmt.Fprintf(b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="#64748b" stroke-width="1"/>`+"\n",
+			layout.PlotLeft, y, layout.PlotLeft-6, y)
+		fmt.Fprintf(b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="#475569">%s</text>`+"\n",
+			layout.PlotLeft-10, y, sanitizeChartText(chartYAxisNumber(tick)))
+	}
+}
+
+func writeXAxisLabels(b *strings.Builder, layout chartLayout, times []time.Time, labels []string, start, end time.Time, target int) {
+	pointCount := len(labels)
+	if len(times) > pointCount {
+		pointCount = len(times)
+	}
+	b.WriteString(`<g font-family="sans-serif" font-size="11" fill="#64748b" text-anchor="middle">` + "\n")
+	for _, idx := range gpuChartLabelIndices(pointCount, target) {
+		x := chartXForTime(chartPointTime(times, idx), start, end, layout.PlotLeft, layout.PlotRight)
+		label := ""
+		if idx < len(labels) {
+			label = labels[idx]
+		}
+		fmt.Fprintf(b, `<text x="%.1f" y="%d">%s</text>`+"\n", x, layout.PlotBottom+28, sanitizeChartText(label))
+	}
+	b.WriteString(`</g>` + "\n")
+	fmt.Fprintf(b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="12" fill="#64748b">Time</text>`+"\n",
+		(layout.PlotLeft+layout.PlotRight)/2, layout.PlotBottom+48)
+}
+
+func writeSeriesPolyline(b *strings.Builder, layout chartLayout, times []time.Time, start, end time.Time, values []float64, scale chartScale, color string) {
+	if len(values) == 0 {
+		return
+	}
+	var points strings.Builder
+	for idx, value := range values {
+		if idx > 0 {
+			points.WriteByte(' ')
+		}
+		x := chartXForTime(chartPointTime(times, idx), start, end, layout.PlotLeft, layout.PlotRight)
+		y := chartYForValue(value, scale, layout.PlotTop, layout.PlotBottom)
+		points.WriteString(strconv.FormatFloat(x, 'f', 1, 64))
+		points.WriteByte(',')
+		points.WriteString(strconv.FormatFloat(y, 'f', 1, 64))
+	}
+	fmt.Fprintf(b, `<polyline points="%s" fill="none" stroke="%s" stroke-width="2.2" stroke-linejoin="round" stroke-linecap="round"/>`+"\n",
+		points.String(), color)
+	if len(values) == 1 {
+		x := chartXForTime(chartPointTime(times, 0), start, end, layout.PlotLeft, layout.PlotRight)
+		y := chartYForValue(values[0], scale, layout.PlotTop, layout.PlotBottom)
+		fmt.Fprintf(b, `<circle cx="%.1f" cy="%.1f" r="3.5" fill="%s"/>`+"\n", x, y, color)
+	}
+}
+
+func writeLegend(b *strings.Builder, layout chartLayout, series []metricChartSeries) {
+	if !chartLegendVisible(len(series)) || len(series) == 0 {
+		return
+	}
+	cols := 4
+	if len(series) < cols {
+		cols = len(series)
+	}
+	cellWidth := float64(layout.PlotRight-layout.PlotLeft) / float64(cols)
+	baseY := layout.PlotBottom + 74
+	for i, item := range series {
+		row := i / cols
+		col := i % cols
+		x := float64(layout.PlotLeft) + cellWidth*float64(col) + 8
+		y := float64(baseY + row*24)
+		fmt.Fprintf(b, `<line x1="%.1f" y1="%.1f" x2="%.1f" y2="%.1f" stroke="%s" stroke-width="3"/>`+"\n",
+			x, y, x+28, y, item.Color)
+		fmt.Fprintf(b, `<text x="%.1f" y="%.1f" font-family="sans-serif" font-size="12" fill="#1f2937">%s</text>`+"\n",
+			x+38, y+4, sanitizeChartText(item.Name))
+	}
+}
+
+func writeTimelineIdleSpans(b *strings.Builder, layout chartLayout, start, end time.Time, segments []chartTimelineSegment) {
+	if len(segments) == 0 {
+		return
+	}
+	b.WriteString(`<g data-role="timeline-overlay">` + "\n")
+	for _, segment := range segments {
+		if segment.Active || !segment.End.After(segment.Start) {
+			continue
+		}
+		x0 := chartXForTime(segment.Start, start, end, layout.PlotLeft, layout.PlotRight)
+		x1 := chartXForTime(segment.End, start, end, layout.PlotLeft, layout.PlotRight)
+		fmt.Fprintf(b, `<rect x="%.1f" y="%d" width="%.1f" height="%d" fill="#475569" opacity="0.10"/>`+"\n",
+			x0, layout.PlotTop, math.Max(1, x1-x0), layout.PlotBottom-layout.PlotTop)
+	}
+	b.WriteString(`</g>` + "\n")
+}
+
+func writeTimelineBoundaries(b *strings.Builder, layout chartLayout, start, end time.Time, segments []chartTimelineSegment) {
+	if len(segments) == 0 {
+		return
+	}
+	seen := map[int]bool{}
+	b.WriteString(`<g data-role="timeline-boundaries" stroke="#94a3b8" stroke-width="1.2">` + "\n")
+	for i, segment := range segments {
+		if i > 0 {
+			x := int(math.Round(chartXForTime(segment.Start, start, end, layout.PlotLeft, layout.PlotRight)))
+			if !seen[x] {
+				seen[x] = true
+				fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d"/>`+"\n", x, layout.PlotTop, x, layout.PlotBottom)
+			}
+		}
+		if i < len(segments)-1 {
+			x := int(math.Round(chartXForTime(segment.End, start, end, layout.PlotLeft, layout.PlotRight)))
+			if !seen[x] {
+				seen[x] = true
+				fmt.Fprintf(b, `<line x1="%d" y1="%d" x2="%d" y2="%d"/>`+"\n", x, layout.PlotTop, x, layout.PlotBottom)
+			}
+		}
+	}
+	b.WriteString(`</g>` + "\n")
+}
+
+func chartXForTime(ts, start, end time.Time, left, right int) float64 {
+	if !end.After(start) {
+		return float64(left+right) / 2
+	}
+	if ts.Before(start) {
+		ts = start
+	}
+	if ts.After(end) {
+		ts = end
+	}
+	ratio := float64(ts.Sub(start)) / float64(end.Sub(start))
+	return float64(left) + ratio*float64(right-left)
+}
+
+func chartPointTime(times []time.Time, idx int) time.Time {
+	if idx >= 0 && idx < len(times) && !times[idx].IsZero() {
+		return times[idx].UTC()
+	}
+	if len(times) > 0 && !times[0].IsZero() {
+		return times[0].UTC().Add(time.Duration(idx) * time.Minute)
+	}
+	return time.Now().UTC().Add(time.Duration(idx) * time.Minute)
+}
+
+func chartYForValue(value float64, scale chartScale, plotTop, plotBottom int) float64 {
+	if scale.Max <= scale.Min {
+		return float64(plotTop+plotBottom) / 2
+	}
+	return float64(plotBottom) - (value-scale.Min)/(scale.Max-scale.Min)*float64(plotBottom-plotTop)
+}
+
+func chartSeriesBounds(values []float64) (float64, float64) {
+	if len(values) == 0 {
+		return 0, 1
+	}
+	min, max := values[0], values[0]
+	for _, value := range values[1:] {
+		if value < min {
+			min = value
+		}
+		if value > max {
+			max = value
+		}
+	}
+	if min == max {
+		if max == 0 {
+			return 0, 1
+		}
+		pad := math.Abs(max) * 0.1
+		if pad == 0 {
+			pad = 1
+		}
+		min -= pad
+		max += pad
+	}
+	if min > 0 {
+		pad := (max - min) * 0.2
+		if pad == 0 {
+			pad = max * 0.1
+		}
+		min -= pad
+		if min < 0 {
+			min = 0
+		}
+		max += pad
+	}
+	return min, max
+}
+
+func chartNiceTicks(min, max float64, target int) []float64 {
+	if min == max {
+		max = min + 1
+	}
+	span := max - min
+	step := math.Pow(10, math.Floor(math.Log10(span/float64(target))))
+	for _, factor := range []float64{1, 2, 5, 10} {
+		if span/(factor*step) <= float64(target)*1.5 {
+			step = factor * step
+			break
+		}
+	}
+	low := math.Floor(min/step) * step
+	high := math.Ceil(max/step) * step
+	var ticks []float64
+	for value := low; value <= high+step*0.001; value += step {
+		ticks = append(ticks, math.Round(value*1e9)/1e9)
+	}
+	return ticks
+}
+
+func valueClamp(value float64, scale chartScale) float64 {
+	if value < scale.Min {
+		return scale.Min
+	}
+	if value > scale.Max {
+		return scale.Max
+	}
+	return value
+}
--- a/audit/internal/webui/kmsg_watcher.go
+++ b/audit/internal/webui/kmsg_watcher.go
@@ -232,7 +232,8 @@ func truncate(s string, max int) string {
 // isSATTarget returns true for task targets that run hardware acceptance tests.
 func isSATTarget(target string) bool {
 	switch target {
-	case "nvidia", "nvidia-stress", "memory", "memory-stress", "storage",
+	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
+		"nvidia-interconnect", "nvidia-bandwidth", "nvidia-stress", "memory", "memory-stress", "storage",
 		"cpu", "sat-stress", "amd", "amd-mem", "amd-bandwidth", "amd-stress",
 		"platform-stress":
 		return true
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -1,19 +1,20 @@
 package webui

 import (
+	"bufio"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"html"
+	"io"
 	"log/slog"
-	"math"
 	"mime"
+	"net"
 	"net/http"
 	"os"
 	"path/filepath"
 	"runtime/debug"
 	"sort"
-	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -21,7 +22,6 @@ import (
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 	"bee/audit/internal/runtimeenv"
-	gocharts "github.com/go-analyze/charts"
 	"reanimator/chart/viewer"
 	"reanimator/chart/web"
 )
@@ -237,6 +237,12 @@ func NewHandler(opts HandlerOptions) http.Handler {

 	// SAT
 	mux.HandleFunc("POST /api/sat/nvidia/run", h.handleAPISATRun("nvidia"))
+	mux.HandleFunc("POST /api/sat/nvidia-targeted-stress/run", h.handleAPISATRun("nvidia-targeted-stress"))
+	mux.HandleFunc("POST /api/sat/nvidia-compute/run", h.handleAPISATRun("nvidia-compute"))
+	mux.HandleFunc("POST /api/sat/nvidia-targeted-power/run", h.handleAPISATRun("nvidia-targeted-power"))
+	mux.HandleFunc("POST /api/sat/nvidia-pulse/run", h.handleAPISATRun("nvidia-pulse"))
+	mux.HandleFunc("POST /api/sat/nvidia-interconnect/run", h.handleAPISATRun("nvidia-interconnect"))
+	mux.HandleFunc("POST /api/sat/nvidia-bandwidth/run", h.handleAPISATRun("nvidia-bandwidth"))
 	mux.HandleFunc("POST /api/sat/nvidia-stress/run", h.handleAPISATRun("nvidia-stress"))
 	mux.HandleFunc("POST /api/sat/memory/run", h.handleAPISATRun("memory"))
 	mux.HandleFunc("POST /api/sat/storage/run", h.handleAPISATRun("storage"))
@@ -250,6 +256,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	mux.HandleFunc("POST /api/sat/platform-stress/run", h.handleAPISATRun("platform-stress"))
 	mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
 	mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
+	mux.HandleFunc("POST /api/benchmark/nvidia/run", h.handleAPIBenchmarkNvidiaRun)

 	// Tasks
 	mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
@@ -286,6 +293,7 @@ func NewHandler(opts HandlerOptions) http.Handler {

 	// GPU presence / tools
 	mux.HandleFunc("GET /api/gpu/presence", h.handleAPIGPUPresence)
+	mux.HandleFunc("GET /api/gpu/nvidia", h.handleAPIGNVIDIAGPUs)
 	mux.HandleFunc("GET /api/gpu/tools", h.handleAPIGPUTools)

 	// System
@@ -373,6 +381,38 @@ func (w *trackingResponseWriter) Write(p []byte) (int, error) {
 	return w.ResponseWriter.Write(p)
 }

+func (w *trackingResponseWriter) Flush() {
+	w.wroteHeader = true
+	if f, ok := w.ResponseWriter.(http.Flusher); ok {
+		f.Flush()
+	}
+}
+
+func (w *trackingResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
+	h, ok := w.ResponseWriter.(http.Hijacker)
+	if !ok {
+		return nil, nil, fmt.Errorf("hijacking not supported")
+	}
+	return h.Hijack()
+}
+
+func (w *trackingResponseWriter) Push(target string, opts *http.PushOptions) error {
+	p, ok := w.ResponseWriter.(http.Pusher)
+	if !ok {
+		return http.ErrNotSupported
+	}
+	return p.Push(target, opts)
+}
+
+func (w *trackingResponseWriter) ReadFrom(r io.Reader) (int64, error) {
+	rf, ok := w.ResponseWriter.(io.ReaderFrom)
+	if !ok {
+		return io.Copy(w.ResponseWriter, r)
+	}
+	w.wroteHeader = true
+	return rf.ReadFrom(r)
+}
+
 func recoverMiddleware(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		tw := &trackingResponseWriter{ResponseWriter: w}
@@ -520,13 +560,14 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 		http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
 		return
 	}
+	samples, err := h.metricsDB.LoadAll()
+	if err != nil || len(samples) == 0 {
+		http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
+		return
+	}
+	timeline := metricsTimelineSegments(samples, time.Now())
 	if idx, sub, ok := parseGPUChartPath(path); ok && sub == "overview" {
-		samples, err := h.metricsDB.LoadAll()
-		if err != nil || len(samples) == 0 {
-			http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
-			return
-		}
-		buf, ok, err := renderGPUOverviewChartSVG(idx, samples)
+		buf, ok, err := renderGPUOverviewChartSVG(idx, samples, timeline)
 		if err != nil {
 			http.Error(w, err.Error(), http.StatusInternalServerError)
 			return
@@ -540,13 +581,23 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 		_, _ = w.Write(buf)
 		return
 	}
-	datasets, names, labels, title, yMin, yMax, ok := h.chartDataFromDB(path)
+	datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples)
 	if !ok {
 		http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable)
 		return
 	}

-	buf, err := renderChartSVG(title, datasets, names, labels, yMin, yMax)
+	buf, err := renderMetricChartSVG(
+		title,
+		labels,
+		sampleTimes(samples),
+		datasets,
+		names,
+		yMin,
+		yMax,
+		chartCanvasHeightForPath(path, len(names)),
+		timeline,
+	)
 	if err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
@@ -556,14 +607,6 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request)
 	_, _ = w.Write(buf)
 }

-func (h *handler) chartDataFromDB(path string) ([][]float64, []string, []string, string, *float64, *float64, bool) {
-	samples, err := h.metricsDB.LoadAll()
-	if err != nil || len(samples) == 0 {
-		return nil, nil, nil, "", nil, nil, false
-	}
-	return chartDataFromSamples(path, samples)
-}
-
 func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][]float64, []string, []string, string, *float64, *float64, bool) {
 	var datasets [][]float64
 	var names []string
@@ -961,247 +1004,6 @@ func autoBounds120(datasets ...[]float64) (*float64, *float64) {
 	return floatPtr(low), floatPtr(high)
 }

-func renderGPUOverviewChartSVG(idx int, samples []platform.LiveMetricSample) ([]byte, bool, error) {
-	temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC })
-	power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW })
-	coreClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz })
-	memClock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz })
-	if temp == nil && power == nil && coreClock == nil && memClock == nil {
-		return nil, false, nil
-	}
-	labels := sampleTimeLabels(samples)
-	svg, err := drawGPUOverviewChartSVG(
-		fmt.Sprintf("GPU %d Overview", idx),
-		labels,
-		[]gpuOverviewSeries{
-			{Name: "Temp C", Values: coalesceDataset(temp, len(samples)), Color: "#f05a5a", AxisTitle: "Temp C"},
-			{Name: "Power W", Values: coalesceDataset(power, len(samples)), Color: "#ffb357", AxisTitle: "Power W"},
-			{Name: "Core Clock MHz", Values: coalesceDataset(coreClock, len(samples)), Color: "#73bf69", AxisTitle: "Core MHz"},
-			{Name: "Memory Clock MHz", Values: coalesceDataset(memClock, len(samples)), Color: "#5794f2", AxisTitle: "Memory MHz"},
-		},
-	)
-	if err != nil {
-		return nil, false, err
-	}
-	return svg, true, nil
-}
-
-type gpuOverviewSeries struct {
-	Name      string
-	AxisTitle string
-	Color     string
-	Values    []float64
-}
-
-func drawGPUOverviewChartSVG(title string, labels []string, series []gpuOverviewSeries) ([]byte, error) {
-	if len(series) != 4 {
-		return nil, fmt.Errorf("gpu overview requires 4 series, got %d", len(series))
-	}
-	const (
-		width      = 1400
-		height     = 420
-		plotLeft   = 180
-		plotRight  = 1220
-		plotTop    = 74
-		plotBottom = 292
-	)
-	const (
-		leftOuterAxis  = 72
-		leftInnerAxis  = 132
-		rightInnerAxis = 1268
-		rightOuterAxis = 1328
-	)
-	axisX := []int{leftOuterAxis, leftInnerAxis, rightInnerAxis, rightOuterAxis}
-	plotWidth := plotRight - plotLeft
-	plotHeight := plotBottom - plotTop
-
-	pointCount := len(labels)
-	if pointCount == 0 {
-		pointCount = 1
-		labels = []string{""}
-	}
-	for i := range series {
-		if len(series[i].Values) == 0 {
-			series[i].Values = make([]float64, pointCount)
-		}
-	}
-
-	type axisScale struct {
-		Min   float64
-		Max   float64
-		Ticks []float64
-	}
-	scales := make([]axisScale, len(series))
-	for i := range series {
-		min, max := gpuChartSeriesBounds(series[i].Values)
-		ticks := gpuChartNiceTicks(min, max, 8)
-		scales[i] = axisScale{
-			Min:   ticks[0],
-			Max:   ticks[len(ticks)-1],
-			Ticks: ticks,
-		}
-	}
-
-	xFor := func(index int) float64 {
-		if pointCount <= 1 {
-			return float64(plotLeft + plotWidth/2)
-		}
-		return float64(plotLeft) + float64(index)*float64(plotWidth)/float64(pointCount-1)
-	}
-	yFor := func(value float64, scale axisScale) float64 {
-		if scale.Max <= scale.Min {
-			return float64(plotTop + plotHeight/2)
-		}
-		return float64(plotBottom) - (value-scale.Min)/(scale.Max-scale.Min)*float64(plotHeight)
-	}
-
-	var b strings.Builder
-	b.WriteString(fmt.Sprintf(`<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" viewBox="0 0 %d %d">`, width, height, width, height))
-	b.WriteString("\n")
-	b.WriteString(`<rect width="100%" height="100%" rx="10" ry="10" fill="#111217" stroke="#2f3440"/>` + "\n")
-	b.WriteString(`<text x="700" y="28" text-anchor="middle" font-family="sans-serif" font-size="16" font-weight="700" fill="#f5f7fa">` + sanitizeChartText(title) + `</text>` + "\n")
-
-	b.WriteString(`<g stroke="#2f3440" stroke-width="1">` + "\n")
-	for _, tick := range scales[0].Ticks {
-		y := yFor(tick, scales[0])
-		fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f"/>`+"\n", plotLeft, y, plotRight, y)
-	}
-	for _, idx := range gpuChartLabelIndices(pointCount, 8) {
-		x := xFor(idx)
-		fmt.Fprintf(&b, `<line x1="%.1f" y1="%d" x2="%.1f" y2="%d"/>`+"\n", x, plotTop, x, plotBottom)
-	}
-	b.WriteString("</g>\n")
-
-	fmt.Fprintf(&b, `<rect x="%d" y="%d" width="%d" height="%d" fill="none" stroke="#454c5c" stroke-width="1"/>`+"\n",
-		plotLeft, plotTop, plotWidth, plotHeight)
-
-	for i, axisLineX := range axisX {
-		fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="1"/>`+"\n",
-			axisLineX, plotTop, axisLineX, plotBottom, series[i].Color)
-		fmt.Fprintf(&b, `<text x="%d" y="%d" text-anchor="middle" font-family="sans-serif" font-size="11" font-weight="700" fill="%s">%s</text>`+"\n",
-			axisLineX, 52, series[i].Color, sanitizeChartText(series[i].AxisTitle))
-		for _, tick := range scales[i].Ticks {
-			y := yFor(tick, scales[i])
-			label := sanitizeChartText(gpuChartFormatTick(tick))
-			if i < 2 {
-				fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
-					axisLineX, y, axisLineX+6, y, series[i].Color)
-				fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="end" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
-					axisLineX-8, y, series[i].Color, label)
-				continue
-			}
-			fmt.Fprintf(&b, `<line x1="%d" y1="%.1f" x2="%d" y2="%.1f" stroke="%s" stroke-width="1"/>`+"\n",
-				axisLineX, y, axisLineX-6, y, series[i].Color)
-			fmt.Fprintf(&b, `<text x="%d" y="%.1f" text-anchor="start" dy="4" font-family="sans-serif" font-size="10" fill="%s">%s</text>`+"\n",
-				axisLineX+8, y, series[i].Color, label)
-		}
-	}
-
-	b.WriteString(`<g font-family="sans-serif" font-size="11" fill="#c8d0d8" text-anchor="middle">` + "\n")
-	for _, idx := range gpuChartLabelIndices(pointCount, 8) {
-		x := xFor(idx)
-		fmt.Fprintf(&b, `<text x="%.1f" y="%d">%s</text>`+"\n", x, plotBottom+22, sanitizeChartText(labels[idx]))
-	}
-	b.WriteString(`</g>` + "\n")
-	b.WriteString(`<text x="700" y="338" text-anchor="middle" font-family="sans-serif" font-size="12" fill="#c8d0d8">Time</text>` + "\n")
-
-	for i := range series {
-		var points strings.Builder
-		for j, value := range series[i].Values {
-			if j > 0 {
-				points.WriteByte(' ')
-			}
-			points.WriteString(strconv.FormatFloat(xFor(j), 'f', 1, 64))
-			points.WriteByte(',')
-			points.WriteString(strconv.FormatFloat(yFor(value, scales[i]), 'f', 1, 64))
-		}
-		fmt.Fprintf(&b, `<polyline points="%s" fill="none" stroke="%s" stroke-width="2"/>`+"\n",
-			points.String(), series[i].Color)
-		if len(series[i].Values) == 1 {
-			fmt.Fprintf(&b, `<circle cx="%.1f" cy="%.1f" r="3" fill="%s"/>`+"\n",
-				xFor(0), yFor(series[i].Values[0], scales[i]), series[i].Color)
-		}
-	}
-
-	const legendY = 372
-	legendX := []int{190, 470, 790, 1090}
-	for i := range series {
-		fmt.Fprintf(&b, `<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="%s" stroke-width="3"/>`+"\n",
-			legendX[i], legendY, legendX[i]+28, legendY, series[i].Color)
-		fmt.Fprintf(&b, `<text x="%d" y="%d" font-family="sans-serif" font-size="12" fill="#f5f7fa">%s</text>`+"\n",
-			legendX[i]+38, legendY+4, sanitizeChartText(series[i].Name))
-	}
-
-	b.WriteString("</svg>\n")
-	return []byte(b.String()), nil
-}
-
-func gpuChartSeriesBounds(values []float64) (float64, float64) {
-	if len(values) == 0 {
-		return 0, 1
-	}
-	min, max := values[0], values[0]
-	for _, value := range values[1:] {
-		if value < min {
-			min = value
-		}
-		if value > max {
-			max = value
-		}
-	}
-	if min == max {
-		if max == 0 {
-			return 0, 1
-		}
-		pad := math.Abs(max) * 0.1
-		if pad == 0 {
-			pad = 1
-		}
-		min -= pad
-		max += pad
-	}
-	if min > 0 {
-		pad := (max - min) * 0.2
-		if pad == 0 {
-			pad = max * 0.1
-		}
-		min -= pad
-		if min < 0 {
-			min = 0
-		}
-		max += pad
-	}
-	return min, max
-}
-
-func gpuChartNiceTicks(min, max float64, target int) []float64 {
-	if min == max {
-		max = min + 1
-	}
-	span := max - min
-	step := math.Pow(10, math.Floor(math.Log10(span/float64(target))))
-	for _, factor := range []float64{1, 2, 5, 10} {
-		if span/(factor*step) <= float64(target)*1.5 {
-			step = factor * step
-			break
-		}
-	}
-	low := math.Floor(min/step) * step
-	high := math.Ceil(max/step) * step
-	var ticks []float64
-	for value := low; value <= high+step*0.001; value += step {
-		ticks = append(ticks, math.Round(value*1e9)/1e9)
-	}
-	return ticks
-}
-
-func gpuChartFormatTick(value float64) string {
-	if value == math.Trunc(value) {
-		return strconv.Itoa(int(value))
-	}
-	return strconv.FormatFloat(value, 'f', 1, 64)
-}
-
 func gpuChartLabelIndices(total, target int) []int {
 	if total <= 0 {
 		return nil
@@ -1223,64 +1025,16 @@ func gpuChartLabelIndices(total, target int) []int {
 	return indices
 }

-// renderChartSVG renders a line chart SVG with a fixed Y-axis range.
-func renderChartSVG(title string, datasets [][]float64, names []string, labels []string, yMin, yMax *float64) ([]byte, error) {
-	n := len(labels)
-	if n == 0 {
-		n = 1
-		labels = []string{""}
+func chartCanvasHeightForPath(path string, seriesCount int) int {
+	height := chartCanvasHeight(seriesCount)
+	if isGPUChartPath(path) {
+		return height * 2
 	}
-	for i := range datasets {
-		if len(datasets[i]) == 0 {
-			datasets[i] = make([]float64, n)
-		}
-	}
-	// Append global min/avg/max to title.
-	mn, avg, mx := globalStats(datasets)
-	if mx > 0 {
-		title = fmt.Sprintf("%s    ↓%s  ~%s  ↑%s",
-			title,
-			chartLegendNumber(mn),
-			chartLegendNumber(avg),
-			chartLegendNumber(mx),
-		)
-	}
-	title = sanitizeChartText(title)
-	names = sanitizeChartTexts(names)
-	sparse := sanitizeChartTexts(sparseLabels(labels, 6))
+	return height
+}

-	opt := gocharts.NewLineChartOptionWithData(datasets)
-	opt.Title = gocharts.TitleOption{Text: title}
-	opt.XAxis.Labels = sparse
-	opt.Legend = gocharts.LegendOption{SeriesNames: names}
-	if chartLegendVisible(len(names)) {
-		opt.Legend.Offset = gocharts.OffsetStr{Top: gocharts.PositionBottom}
-		opt.Legend.OverlayChart = gocharts.Ptr(false)
-	} else {
-		opt.Legend.Show = gocharts.Ptr(false)
-	}
-	opt.Symbol = gocharts.SymbolNone
-	// Right padding: reserve space for the MarkLine label (library recommendation).
-	opt.Padding = gocharts.NewBox(20, 20, 80, 20)
-	if yMin != nil || yMax != nil {
-		opt.YAxis = []gocharts.YAxisOption{chartYAxisOption(yMin, yMax)}
-	}
-
-	// Add a single peak mark line on the series that holds the global maximum.
-	peakIdx, _ := globalPeakSeries(datasets)
-	if peakIdx >= 0 && peakIdx < len(opt.SeriesList) {
-		opt.SeriesList[peakIdx].MarkLine = gocharts.NewMarkLine(gocharts.SeriesMarkTypeMax)
-	}
-
-	p := gocharts.NewPainter(gocharts.PainterOptions{
-		OutputFormat: gocharts.ChartOutputSVG,
-		Width:        1400,
-		Height:       chartCanvasHeight(len(names)),
-	}, gocharts.PainterThemeOption(gocharts.GetTheme("grafana")))
-	if err := p.LineChart(opt); err != nil {
-		return nil, err
-	}
-	return p.Bytes()
+func isGPUChartPath(path string) bool {
+	return strings.HasPrefix(path, "gpu-all-") || strings.HasPrefix(path, "gpu/")
 }

 func chartLegendVisible(seriesCount int) bool {
@@ -1294,30 +1048,6 @@ func chartCanvasHeight(seriesCount int) int {
 	return 288
 }

-func chartYAxisOption(yMin, yMax *float64) gocharts.YAxisOption {
-	return gocharts.YAxisOption{
-		Min:            yMin,
-		Max:            yMax,
-		LabelCount:     11,
-		ValueFormatter: chartYAxisNumber,
-	}
-}
-
-// globalPeakSeries returns the index of the series containing the global maximum
-// value across all datasets, and that maximum value.
-func globalPeakSeries(datasets [][]float64) (idx int, peak float64) {
-	idx = -1
-	for i, ds := range datasets {
-		for _, v := range ds {
-			if v > peak {
-				peak = v
-				idx = i
-			}
-		}
-	}
-	return idx, peak
-}
-
 // globalStats returns min, average, and max across all values in all datasets.
 func globalStats(datasets [][]float64) (mn, avg, mx float64) {
 	var sum float64
@@ -1357,21 +1087,6 @@ func sanitizeChartText(s string) string {
 	}, s))
 }

-func sanitizeChartTexts(in []string) []string {
-	out := make([]string, len(in))
-	for i, s := range in {
-		out[i] = sanitizeChartText(s)
-	}
-	return out
-}
-
-func safeIdx(s []float64, i int) float64 {
-	if i < len(s) {
-		return s[i]
-	}
-	return 0
-}
-
 func snapshotNamedRings(rings []*namedMetricsRing) ([][]float64, []string, []string) {
 	var datasets [][]float64
 	var names []string
@@ -1458,20 +1173,6 @@ func chartYAxisNumber(v float64) string {
 	return out
 }

-func sparseLabels(labels []string, n int) []string {
-	out := make([]string, len(labels))
-	step := len(labels) / n
-	if step < 1 {
-		step = 1
-	}
-	for i, l := range labels {
-		if i%step == 0 {
-			out[i] = l
-		}
-	}
-	return out
-}
-
 func (h *handler) handleAPIMetricsExportCSV(w http.ResponseWriter, r *http.Request) {
 	if h.metricsDB == nil {
 		http.Error(w, "metrics database not available", http.StatusServiceUnavailable)
@@ -1487,6 +1188,11 @@ func (h *handler) handleAPIMetricsExportCSV(w http.ResponseWriter, r *http.Reque

 func (h *handler) handleReady(w http.ResponseWriter, r *http.Request) {
 	w.Header().Set("Cache-Control", "no-store")
+	if strings.TrimSpace(h.opts.AuditPath) == "" {
+		w.WriteHeader(http.StatusOK)
+		_, _ = w.Write([]byte("ready"))
+		return
+	}
 	if _, err := os.Stat(h.opts.AuditPath); err != nil {
 		w.WriteHeader(http.StatusServiceUnavailable)
 		_, _ = w.Write([]byte("starting"))
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -51,6 +51,32 @@ func TestRecoverMiddlewareReturns500OnPanic(t *testing.T) {
 	}
 }

+func TestRecoverMiddlewarePreservesStreamingInterfaces(t *testing.T) {
+	handler := recoverMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if !sseStart(w) {
+			return
+		}
+		if !sseWrite(w, "tick", "ok") {
+			t.Fatal("expected sse write to succeed")
+		}
+	}))
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodGet, "/stream", nil)
+
+	handler.ServeHTTP(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	if got := rec.Header().Get("Content-Type"); got != "text/event-stream" {
+		t.Fatalf("content-type=%q", got)
+	}
+	body := rec.Body.String()
+	if !strings.Contains(body, "event: tick\n") || !strings.Contains(body, "data: ok\n\n") {
+		t.Fatalf("body=%q", body)
+	}
+}
+
 func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
 	samples := []platform.LiveMetricSample{
 		{
@@ -278,6 +304,124 @@ func TestChartCanvasHeight(t *testing.T) {
 	}
 }

+func TestChartTimelineSegmentsForRangeMergesActiveSpansAndIdleGaps(t *testing.T) {
+	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
+	end := start.Add(10 * time.Minute)
+	taskWindow := func(offsetStart, offsetEnd time.Duration) Task {
+		s := start.Add(offsetStart)
+		e := start.Add(offsetEnd)
+		return Task{
+			Name:      "task",
+			Status:    TaskDone,
+			StartedAt: &s,
+			DoneAt:    &e,
+		}
+	}
+	segments := chartTimelineSegmentsForRange(start, end, end, []Task{
+		taskWindow(1*time.Minute, 3*time.Minute),
+		taskWindow(2*time.Minute, 5*time.Minute),
+		taskWindow(7*time.Minute, 8*time.Minute),
+	})
+	if len(segments) != 5 {
+		t.Fatalf("segments=%d want 5: %#v", len(segments), segments)
+	}
+	wantActive := []bool{false, true, false, true, false}
+	wantMinutes := [][2]int{{0, 1}, {1, 5}, {5, 7}, {7, 8}, {8, 10}}
+	for i, segment := range segments {
+		if segment.Active != wantActive[i] {
+			t.Fatalf("segment[%d].Active=%v want %v", i, segment.Active, wantActive[i])
+		}
+		if got := int(segment.Start.Sub(start).Minutes()); got != wantMinutes[i][0] {
+			t.Fatalf("segment[%d] start=%d want %d", i, got, wantMinutes[i][0])
+		}
+		if got := int(segment.End.Sub(start).Minutes()); got != wantMinutes[i][1] {
+			t.Fatalf("segment[%d] end=%d want %d", i, got, wantMinutes[i][1])
+		}
+	}
+}
+
+func TestRenderMetricChartSVGIncludesTimelineOverlay(t *testing.T) {
+	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
+	labels := []string{"12:00", "12:01", "12:02"}
+	times := []time.Time{start, start.Add(time.Minute), start.Add(2 * time.Minute)}
+	svg, err := renderMetricChartSVG(
+		"System Power",
+		labels,
+		times,
+		[][]float64{{300, 320, 310}},
+		[]string{"Power W"},
+		floatPtr(0),
+		floatPtr(400),
+		360,
+		[]chartTimelineSegment{
+			{Start: start, End: start.Add(time.Minute), Active: false},
+			{Start: start.Add(time.Minute), End: start.Add(2 * time.Minute), Active: true},
+		},
+	)
+	if err != nil {
+		t.Fatal(err)
+	}
+	body := string(svg)
+	if !strings.Contains(body, `data-role="timeline-overlay"`) {
+		t.Fatalf("svg missing timeline overlay: %s", body)
+	}
+	if !strings.Contains(body, `opacity="0.10"`) {
+		t.Fatalf("svg missing idle overlay opacity: %s", body)
+	}
+	if !strings.Contains(body, `System Power`) {
+		t.Fatalf("svg missing chart title: %s", body)
+	}
+}
+
+func TestHandleMetricsChartSVGRendersCustomSVG(t *testing.T) {
+	dir := t.TempDir()
+	db, err := openMetricsDB(filepath.Join(dir, "metrics.db"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Cleanup(func() { _ = db.db.Close() })
+
+	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
+	for i, sample := range []platform.LiveMetricSample{
+		{Timestamp: start, PowerW: 300},
+		{Timestamp: start.Add(time.Minute), PowerW: 320},
+		{Timestamp: start.Add(2 * time.Minute), PowerW: 310},
+	} {
+		if err := db.Write(sample); err != nil {
+			t.Fatalf("write sample %d: %v", i, err)
+		}
+	}
+
+	globalQueue.mu.Lock()
+	prevTasks := globalQueue.tasks
+	s := start.Add(30 * time.Second)
+	e := start.Add(90 * time.Second)
+	globalQueue.tasks = []*Task{{Name: "Burn", Status: TaskDone, StartedAt: &s, DoneAt: &e}}
+	globalQueue.mu.Unlock()
+	t.Cleanup(func() {
+		globalQueue.mu.Lock()
+		globalQueue.tasks = prevTasks
+		globalQueue.mu.Unlock()
+	})
+
+	h := &handler{opts: HandlerOptions{ExportDir: dir}, metricsDB: db}
+
+	rec := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodGet, "/api/metrics/chart/server-power.svg", nil)
+	h.handleMetricsChartSVG(rec, req)
+
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	body := rec.Body.String()
+	if !strings.Contains(body, `data-role="timeline-overlay"`) {
+		t.Fatalf("custom svg response missing timeline overlay: %s", body)
+	}
+	if !strings.Contains(body, `stroke-linecap="round"`) {
+		t.Fatalf("custom svg response missing custom polyline styling: %s", body)
+	}
+}
+
 func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
 	got := normalizeFanSeries([]float64{4200, 0, 0, 4300, 0})
 	want := []float64{4200, 4200, 4200, 4300, 4300}
@@ -291,21 +435,6 @@ func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
 	}
 }

-func TestChartYAxisOption(t *testing.T) {
-	min := floatPtr(0)
-	max := floatPtr(100)
-	opt := chartYAxisOption(min, max)
-	if opt.Min != min || opt.Max != max {
-		t.Fatalf("chartYAxisOption min/max mismatch: %#v", opt)
-	}
-	if opt.LabelCount != 11 {
-		t.Fatalf("chartYAxisOption labelCount=%d want 11", opt.LabelCount)
-	}
-	if got := opt.ValueFormatter(1000); got != "1к" {
-		t.Fatalf("chartYAxisOption formatter(1000)=%q want 1к", got)
-	}
-}
-
 func TestSnapshotFanRingsUsesTimelineLabels(t *testing.T) {
 	r1 := newMetricsRing(4)
 	r2 := newMetricsRing(4)
@@ -414,7 +543,7 @@ func TestRootShowsRunAuditButtonWhenSnapshotMissing(t *testing.T) {
 		t.Fatalf("status=%d", rec.Code)
 	}
 	body := rec.Body.String()
-	if !strings.Contains(body, `Run Audit`) {
+	if !strings.Contains(body, `onclick="auditModalRun()">Run audit</button>`) {
 		t.Fatalf("dashboard missing run audit button: %s", body)
 	}
 	if strings.Contains(body, `No audit data`) {
@@ -422,6 +551,18 @@ func TestRootShowsRunAuditButtonWhenSnapshotMissing(t *testing.T) {
 	}
 }

+func TestReadyIsOKWhenAuditPathIsUnset(t *testing.T) {
+	handler := NewHandler(HandlerOptions{})
+	rec := httptest.NewRecorder()
+	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/api/ready", nil))
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	if strings.TrimSpace(rec.Body.String()) != "ready" {
+		t.Fatalf("body=%q want ready", rec.Body.String())
+	}
+}
+
 func TestAuditPageRendersViewerFrameAndActions(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
@@ -488,6 +629,68 @@ func TestToolsPageRendersRestartGPUDriversButton(t *testing.T) {
 	}
 }

+func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
+	handler := NewHandler(HandlerOptions{})
+	rec := httptest.NewRecorder()
+	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/benchmark", nil))
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d", rec.Code)
+	}
+	body := rec.Body.String()
+	for _, needle := range []string{
+		`href="/benchmark"`,
+		`id="benchmark-gpu-list"`,
+		`/api/gpu/nvidia`,
+		`/api/benchmark/nvidia/run`,
+		`benchmark-run-nccl`,
+	} {
+		if !strings.Contains(body, needle) {
+			t.Fatalf("benchmark page missing %q: %s", needle, body)
+		}
+	}
+}
+
+func TestValidatePageRendersNvidiaTargetedStressCard(t *testing.T) {
+	handler := NewHandler(HandlerOptions{})
+	rec := httptest.NewRecorder()
+	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/validate", nil))
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d", rec.Code)
+	}
+	body := rec.Body.String()
+	for _, needle := range []string{
+		`NVIDIA GPU Targeted Stress`,
+		`nvidia-targeted-stress`,
+		`controlled NVIDIA DCGM load`,
+		`<code>dcgmi diag targeted_stress</code>`,
+	} {
+		if !strings.Contains(body, needle) {
+			t.Fatalf("validate page missing %q: %s", needle, body)
+		}
+	}
+}
+
+func TestBurnPageRendersGoalBasedNVIDIACards(t *testing.T) {
+	handler := NewHandler(HandlerOptions{})
+	rec := httptest.NewRecorder()
+	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/burn", nil))
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d", rec.Code)
+	}
+	body := rec.Body.String()
+	for _, needle := range []string{
+		`NVIDIA Max Compute Load`,
+		`dcgmproftester`,
+		`targeted_stress remain in <a href="/validate">Validate</a>`,
+		`NVIDIA Interconnect Test (NCCL all_reduce_perf)`,
+		`id="burn-gpu-list"`,
+	} {
+		if !strings.Contains(body, needle) {
+			t.Fatalf("burn page missing %q: %s", needle, body)
+		}
+	}
+}
+
 func TestTasksPageRendersScrollableLogModal(t *testing.T) {
 	dir := t.TempDir()
 	path := filepath.Join(dir, "audit.json")
@@ -643,3 +846,98 @@ func TestRuntimeHealthEndpointReturnsJSON(t *testing.T) {
 		t.Fatalf("body=%q want %q", strings.TrimSpace(rec.Body.String()), body)
 	}
 }
+
+func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "audit.json")
+	exportDir := filepath.Join(dir, "export")
+	if err := os.MkdirAll(exportDir, 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(path, []byte(`{"collected_at":"2026-03-15T00:00:00Z","hardware":{"board":{"serial_number":"SERIAL-1"}}}`), 0644); err != nil {
+		t.Fatal(err)
+	}
+	health := `{
+  "status":"PARTIAL",
+  "checked_at":"2026-03-16T10:00:00Z",
+  "export_dir":"/tmp/export",
+  "driver_ready":true,
+  "cuda_ready":false,
+  "network_status":"PARTIAL",
+  "issues":[
+    {"code":"dhcp_partial","description":"At least one interface did not obtain IPv4 connectivity."},
+    {"code":"cuda_runtime_not_ready","description":"CUDA runtime is not ready for GPU SAT."}
+  ],
+  "tools":[
+    {"name":"dmidecode","ok":true},
+    {"name":"nvidia-smi","ok":false}
+  ],
+  "services":[
+    {"name":"bee-web","status":"active"},
+    {"name":"bee-nvidia","status":"inactive"}
+  ]
+}`
+	if err := os.WriteFile(filepath.Join(exportDir, "runtime-health.json"), []byte(health), 0644); err != nil {
+		t.Fatal(err)
+	}
+	componentStatus := `[
+  {
+    "component_key":"cpu:all",
+    "status":"Warning",
+    "error_summary":"cpu SAT: FAILED",
+    "history":[{"at":"2026-03-16T10:00:00Z","status":"Warning","source":"sat:cpu","detail":"cpu SAT: FAILED"}]
+  },
+  {
+    "component_key":"memory:all",
+    "status":"OK",
+    "history":[{"at":"2026-03-16T10:01:00Z","status":"OK","source":"sat:memory","detail":"memory SAT: OK"}]
+  },
+  {
+    "component_key":"storage:nvme0n1",
+    "status":"Critical",
+    "error_summary":"storage SAT: FAILED",
+    "history":[{"at":"2026-03-16T10:02:00Z","status":"Critical","source":"sat:storage","detail":"storage SAT: FAILED"}]
+  },
+  {
+    "component_key":"pcie:gpu:nvidia",
+    "status":"Warning",
+    "error_summary":"nvidia SAT: FAILED",
+    "history":[{"at":"2026-03-16T10:03:00Z","status":"Warning","source":"sat:nvidia","detail":"nvidia SAT: FAILED"}]
+  }
+]`
+	if err := os.WriteFile(filepath.Join(exportDir, "component-status.json"), []byte(componentStatus), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	handler := NewHandler(HandlerOptions{AuditPath: path, ExportDir: exportDir})
+	rec := httptest.NewRecorder()
+	handler.ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/", nil))
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	body := rec.Body.String()
+	for _, needle := range []string{
+		`Runtime Health`,
+		`<th>Check</th><th>Status</th><th>Source</th><th>Issue</th>`,
+		`Export Directory`,
+		`Network`,
+		`NVIDIA/AMD Driver`,
+		`CUDA / ROCm`,
+		`Required Utilities`,
+		`Bee Services`,
+		`<td>CPU</td>`,
+		`<td>Memory</td>`,
+		`<td>Storage</td>`,
+		`<td>GPU</td>`,
+		`CUDA runtime is not ready for GPU SAT.`,
+		`Missing: nvidia-smi`,
+		`bee-nvidia=inactive`,
+		`cpu SAT: FAILED`,
+		`storage SAT: FAILED`,
+		`sat:nvidia`,
+	} {
+		if !strings.Contains(body, needle) {
+			t.Fatalf("dashboard missing %q: %s", needle, body)
+		}
+	}
+}
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -31,6 +31,13 @@ const (
 // taskNames maps target → human-readable name for validate (SAT) runs.
 var taskNames = map[string]string{
 	"nvidia":                 "NVIDIA SAT",
+	"nvidia-targeted-stress": "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)",
+	"nvidia-benchmark":       "NVIDIA Benchmark",
+	"nvidia-compute":         "NVIDIA Max Compute Load (dcgmproftester)",
+	"nvidia-targeted-power":  "NVIDIA Targeted Power (dcgmi diag targeted_power)",
+	"nvidia-pulse":           "NVIDIA Pulse Test (dcgmi diag pulse_test)",
+	"nvidia-interconnect":    "NVIDIA Interconnect Test (NCCL all_reduce_perf)",
+	"nvidia-bandwidth":       "NVIDIA Bandwidth Test (NVBandwidth)",
 	"nvidia-stress":          "NVIDIA GPU Stress",
 	"memory":                 "Memory SAT",
 	"storage":                "Storage SAT",
@@ -108,8 +115,11 @@ type taskParams struct {
 	DiagLevel          int      `json:"diag_level,omitempty"`
 	GPUIndices         []int    `json:"gpu_indices,omitempty"`
 	ExcludeGPUIndices  []int    `json:"exclude_gpu_indices,omitempty"`
+	SizeMB             int      `json:"size_mb,omitempty"`
 	Loader             string   `json:"loader,omitempty"`
 	BurnProfile        string   `json:"burn_profile,omitempty"`
+	BenchmarkProfile   string   `json:"benchmark_profile,omitempty"`
+	RunNCCL            bool     `json:"run_nccl,omitempty"`
 	DisplayName        string   `json:"display_name,omitempty"`
 	Device             string   `json:"device,omitempty"` // for install
 	PlatformComponents []string `json:"platform_components,omitempty"`
@@ -130,45 +140,53 @@ type persistedTask struct {
 }

 type burnPreset struct {
-	NvidiaDiag  int
 	DurationSec int
 }

 func resolveBurnPreset(profile string) burnPreset {
 	switch profile {
 	case "overnight":
-		return burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}
+		return burnPreset{DurationSec: 8 * 60 * 60}
 	case "acceptance":
-		return burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}
+		return burnPreset{DurationSec: 60 * 60}
 	default:
-		return burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}
+		return burnPreset{DurationSec: 5 * 60}
 	}
 }

 func resolvePlatformStressPreset(profile string) platform.PlatformStressOptions {
+	acceptanceCycles := []platform.PlatformStressCycle{
+		{LoadSec: 85, IdleSec: 5},
+		{LoadSec: 80, IdleSec: 10},
+		{LoadSec: 55, IdleSec: 5},
+		{LoadSec: 60, IdleSec: 0},
+		{LoadSec: 100, IdleSec: 10},
+		{LoadSec: 145, IdleSec: 15},
+		{LoadSec: 190, IdleSec: 20},
+		{LoadSec: 235, IdleSec: 25},
+		{LoadSec: 280, IdleSec: 30},
+		{LoadSec: 325, IdleSec: 35},
+		{LoadSec: 370, IdleSec: 40},
+		{LoadSec: 415, IdleSec: 45},
+		{LoadSec: 460, IdleSec: 50},
+		{LoadSec: 510, IdleSec: 0},
+	}
+
 	switch profile {
 	case "overnight":
-		return platform.PlatformStressOptions{Cycles: []platform.PlatformStressCycle{
-			{LoadSec: 600, IdleSec: 120},
-			{LoadSec: 600, IdleSec: 60},
-			{LoadSec: 600, IdleSec: 30},
-			{LoadSec: 600, IdleSec: 120},
-			{LoadSec: 600, IdleSec: 60},
-			{LoadSec: 600, IdleSec: 30},
-			{LoadSec: 600, IdleSec: 120},
-			{LoadSec: 600, IdleSec: 60},
-		}}
+		cycles := make([]platform.PlatformStressCycle, 0, len(acceptanceCycles)*8)
+		for range 8 {
+			cycles = append(cycles, acceptanceCycles...)
+		}
+		return platform.PlatformStressOptions{Cycles: cycles}
 	case "acceptance":
-		return platform.PlatformStressOptions{Cycles: []platform.PlatformStressCycle{
-			{LoadSec: 300, IdleSec: 60},
-			{LoadSec: 300, IdleSec: 30},
-			{LoadSec: 300, IdleSec: 60},
-			{LoadSec: 300, IdleSec: 30},
-		}}
+		return platform.PlatformStressOptions{Cycles: acceptanceCycles}
 	default: // smoke
 		return platform.PlatformStressOptions{Cycles: []platform.PlatformStressCycle{
-			{LoadSec: 90, IdleSec: 60},
-			{LoadSec: 90, IdleSec: 30},
+			{LoadSec: 85, IdleSec: 5},
+			{LoadSec: 80, IdleSec: 10},
+			{LoadSec: 55, IdleSec: 5},
+			{LoadSec: 60, IdleSec: 0},
 		}}
 	}
 }
@@ -429,6 +447,31 @@ func (q *taskQueue) worker() {
 				wg.Add(1)
 				goRecoverOnce("task "+t.Target, func() {
 					defer wg.Done()
+					defer taskCancel()
+					q.executeTask(t, j, taskCtx)
+				})
+			}
+			wg.Wait()
+
+			if len(batch) > 0 {
+				q.mu.Lock()
+				q.prune()
+				q.persistLocked()
+				q.mu.Unlock()
+			}
+		}()
+
+	}
+}
+
+func (q *taskQueue) executeTask(t *Task, j *jobState, ctx context.Context) {
+	startedKmsgWatch := false
+	defer q.finalizeTaskRun(t, j)
+	defer func() {
+		if startedKmsgWatch && q.kmsgWatcher != nil {
+			q.kmsgWatcher.NotifyTaskFinished(t.ID)
+		}
+	}()
 	defer func() {
 		if rec := recover(); rec != nil {
 			msg := fmt.Sprintf("task panic: %v", rec)
@@ -445,40 +488,28 @@ func (q *taskQueue) worker() {

 	if q.kmsgWatcher != nil && isSATTarget(t.Target) {
 		q.kmsgWatcher.NotifyTaskStarted(t.ID, t.Target)
+		startedKmsgWatch = true
 	}

-					q.runTask(t, j, taskCtx)
-
-					if q.kmsgWatcher != nil {
-						q.kmsgWatcher.NotifyTaskFinished(t.ID)
-					}
+	q.runTask(t, j, ctx)
+}

+func (q *taskQueue) finalizeTaskRun(t *Task, j *jobState) {
 	q.mu.Lock()
-					now2 := time.Now()
-					t.DoneAt = &now2
+	defer q.mu.Unlock()
+
+	now := time.Now()
+	t.DoneAt = &now
 	if t.Status == TaskRunning {
 		if j.err != "" {
 			t.Status = TaskFailed
 			t.ErrMsg = j.err
 		} else {
 			t.Status = TaskDone
+			t.ErrMsg = ""
 		}
 	}
 	q.persistLocked()
-					q.mu.Unlock()
-				})
-			}
-			wg.Wait()
-
-			if len(batch) > 0 {
-				q.mu.Lock()
-				q.prune()
-				q.persistLocked()
-				q.mu.Unlock()
-			}
-		}()
-
-	}
 }

 // setCPUGovernor writes the given governor to all CPU scaling_governor sysfs files.
@@ -519,9 +550,6 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			break
 		}
 		diagLevel := t.params.DiagLevel
-		if t.params.BurnProfile != "" && diagLevel <= 0 {
-			diagLevel = resolveBurnPreset(t.params.BurnProfile).NvidiaDiag
-		}
 		if len(t.params.GPUIndices) > 0 || diagLevel > 0 {
 			result, e := a.RunNvidiaAcceptancePackWithOptions(
 				ctx, "", diagLevel, t.params.GPUIndices, j.append,
@@ -534,6 +562,78 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 		} else {
 			archive, err = a.RunNvidiaAcceptancePack("", j.append)
 		}
+	case "nvidia-targeted-stress":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
+		dur := t.params.Duration
+		if dur <= 0 {
+			dur = 300
+		}
+		archive, err = a.RunNvidiaTargetedStressValidatePack(ctx, "", dur, t.params.GPUIndices, j.append)
+	case "nvidia-benchmark":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
+		archive, err = a.RunNvidiaBenchmarkCtx(ctx, "", platform.NvidiaBenchmarkOptions{
+			Profile:           t.params.BenchmarkProfile,
+			SizeMB:            t.params.SizeMB,
+			GPUIndices:        t.params.GPUIndices,
+			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
+			RunNCCL:           t.params.RunNCCL,
+		}, j.append)
+	case "nvidia-compute":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
+		dur := t.params.Duration
+		if t.params.BurnProfile != "" && dur <= 0 {
+			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
+		}
+		archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, j.append)
+	case "nvidia-targeted-power":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
+		dur := t.params.Duration
+		if t.params.BurnProfile != "" && dur <= 0 {
+			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
+		}
+		archive, err = a.RunNvidiaTargetedPowerPack(ctx, "", dur, t.params.GPUIndices, j.append)
+	case "nvidia-pulse":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
+		dur := t.params.Duration
+		if t.params.BurnProfile != "" && dur <= 0 {
+			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
+		}
+		archive, err = a.RunNvidiaPulseTestPack(ctx, "", dur, t.params.GPUIndices, j.append)
+	case "nvidia-bandwidth":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
+		archive, err = a.RunNvidiaBandwidthPack(ctx, "", t.params.GPUIndices, j.append)
+	case "nvidia-interconnect":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
+		dur := t.params.Duration
+		if t.params.BurnProfile != "" && dur <= 0 {
+			dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
+		}
+		archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
+			DurationSec: dur,
+			Loader:      platform.NvidiaStressLoaderNCCL,
+			GPUIndices:  t.params.GPUIndices,
+		}, j.append)
 	case "nvidia-stress":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -253,10 +253,10 @@ func TestResolveBurnPreset(t *testing.T) {
 		profile string
 		want    burnPreset
 	}{
-		{profile: "smoke", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
-		{profile: "acceptance", want: burnPreset{NvidiaDiag: 3, DurationSec: 60 * 60}},
-		{profile: "overnight", want: burnPreset{NvidiaDiag: 4, DurationSec: 8 * 60 * 60}},
-		{profile: "", want: burnPreset{NvidiaDiag: 1, DurationSec: 5 * 60}},
+		{profile: "smoke", want: burnPreset{DurationSec: 5 * 60}},
+		{profile: "acceptance", want: burnPreset{DurationSec: 60 * 60}},
+		{profile: "overnight", want: burnPreset{DurationSec: 8 * 60 * 60}},
+		{profile: "", want: burnPreset{DurationSec: 5 * 60}},
 	}
 	for _, tc := range tests {
 		if got := resolveBurnPreset(tc.profile); got != tc.want {
@@ -467,3 +467,52 @@ func TestRunTaskInstallUsesSharedCommandStreaming(t *testing.T) {
 		t.Fatalf("unexpected error: %q", j.err)
 	}
 }
+
+func TestExecuteTaskMarksPanicsAsFailedAndClosesKmsgWindow(t *testing.T) {
+	dir := t.TempDir()
+	q := &taskQueue{
+		opts:        &HandlerOptions{App: &app.App{}},
+		statePath:   filepath.Join(dir, "tasks-state.json"),
+		logsDir:     filepath.Join(dir, "tasks"),
+		kmsgWatcher: newKmsgWatcher(nil),
+	}
+	tk := &Task{
+		ID:        "cpu-panic-1",
+		Name:      "CPU SAT",
+		Target:    "cpu",
+		Status:    TaskRunning,
+		CreatedAt: time.Now(),
+	}
+	j := &jobState{}
+
+	orig := runCPUAcceptancePackCtx
+	runCPUAcceptancePackCtx = func(_ *app.App, _ context.Context, _ string, _ int, _ func(string)) (string, error) {
+		panic("boom")
+	}
+	defer func() { runCPUAcceptancePackCtx = orig }()
+
+	q.executeTask(tk, j, context.Background())
+
+	if tk.Status != TaskFailed {
+		t.Fatalf("status=%q want %q", tk.Status, TaskFailed)
+	}
+	if tk.DoneAt == nil {
+		t.Fatal("expected done_at to be set")
+	}
+	if !strings.Contains(tk.ErrMsg, "task panic: boom") {
+		t.Fatalf("task error=%q", tk.ErrMsg)
+	}
+	if !strings.Contains(j.err, "task panic: boom") {
+		t.Fatalf("job error=%q", j.err)
+	}
+	q.kmsgWatcher.mu.Lock()
+	activeCount := q.kmsgWatcher.activeCount
+	window := q.kmsgWatcher.window
+	q.kmsgWatcher.mu.Unlock()
+	if activeCount != 0 {
+		t.Fatalf("activeCount=%d want 0", activeCount)
+	}
+	if window != nil {
+		t.Fatalf("expected kmsg window to be cleared, got %+v", window)
+	}
+}
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
@@ -302,6 +302,12 @@ memtest_fail() {
    return 0
 }

+nvidia_runtime_fail() {
+    msg="$1"
+    echo "ERROR: ${msg}" >&2
+    exit 1
+}
+
 iso_memtest_present() {
    iso_path="$1"
    iso_files="$(mktemp)"
@@ -439,6 +445,44 @@ validate_iso_memtest() {
    echo "=== memtest validation OK ==="
 }

+validate_iso_nvidia_runtime() {
+    iso_path="$1"
+    [ "$BEE_GPU_VENDOR" = "nvidia" ] || return 0
+
+    echo "=== validating NVIDIA runtime in ISO ==="
+
+    [ -f "$iso_path" ] || nvidia_runtime_fail "ISO not found for NVIDIA runtime validation: $iso_path"
+    require_iso_reader "$iso_path" >/dev/null 2>&1 || nvidia_runtime_fail "ISO reader unavailable for NVIDIA runtime validation"
+    command -v unsquashfs >/dev/null 2>&1 || nvidia_runtime_fail "unsquashfs is required for NVIDIA runtime validation"
+
+    squashfs_tmp="$(mktemp)"
+    squashfs_list="$(mktemp)"
+    iso_read_member "$iso_path" live/filesystem.squashfs "$squashfs_tmp" || {
+        rm -f "$squashfs_tmp" "$squashfs_list"
+        nvidia_runtime_fail "failed to extract live/filesystem.squashfs from ISO"
+    }
+    unsquashfs -ll "$squashfs_tmp" > "$squashfs_list" 2>/dev/null || {
+        rm -f "$squashfs_tmp" "$squashfs_list"
+        nvidia_runtime_fail "failed to inspect filesystem.squashfs from ISO"
+    }
+
+    grep -Eq 'usr/bin/dcgmi$' "$squashfs_list" || {
+        rm -f "$squashfs_tmp" "$squashfs_list"
+        nvidia_runtime_fail "dcgmi missing from final NVIDIA ISO"
+    }
+    grep -Eq 'usr/bin/nv-hostengine$' "$squashfs_list" || {
+        rm -f "$squashfs_tmp" "$squashfs_list"
+        nvidia_runtime_fail "nv-hostengine missing from final NVIDIA ISO"
+    }
+    grep -Eq 'usr/bin/dcgmproftester([0-9]+)?$' "$squashfs_list" || {
+        rm -f "$squashfs_tmp" "$squashfs_list"
+        nvidia_runtime_fail "dcgmproftester missing from final NVIDIA ISO"
+    }
+
+    rm -f "$squashfs_tmp" "$squashfs_list"
+    echo "=== NVIDIA runtime validation OK ==="
+}
+
 append_memtest_grub_entry() {
    grub_cfg="$1"
    [ -f "$grub_cfg" ] || return 1
@@ -1144,6 +1188,7 @@ if [ -f "$ISO_RAW" ]; then
        fi
    fi
    validate_iso_memtest "$ISO_RAW"
+    validate_iso_nvidia_runtime "$ISO_RAW"
    cp "$ISO_RAW" "$ISO_OUT"
    echo ""
    echo "=== done (${BEE_GPU_VENDOR}) ==="
--- a/iso/builder/config/package-lists/bee-nvidia.list.chroot
+++ b/iso/builder/config/package-lists/bee-nvidia.list.chroot
@@ -1,6 +1,10 @@
-# NVIDIA DCGM (Data Center GPU Manager) — dcgmi diag for acceptance testing.
-# DCGM 4 is packaged per CUDA major. The image ships NVIDIA driver 590 with CUDA 13 userspace,
-# so install the CUDA 13 build plus proprietary diagnostic components explicitly.
+# NVIDIA DCGM (Data Center GPU Manager).
+# Validate uses dcgmi diagnostics; Burn uses dcgmproftester as the official
+# NVIDIA max-compute recipe. The smoketest/runtime contract treats
+# dcgmproftester as required in the LiveCD.
+# DCGM 4 is packaged per CUDA major. The image ships NVIDIA driver 590 with
+# CUDA 13 userspace, so install the CUDA 13 build plus proprietary components
+# explicitly.
 datacenter-gpu-manager-4-cuda13=1:%%DCGM_VERSION%%
 datacenter-gpu-manager-4-proprietary=1:%%DCGM_VERSION%%
 datacenter-gpu-manager-4-proprietary-cuda13=1:%%DCGM_VERSION%%
--- a/iso/builder/smoketest.sh
+++ b/iso/builder/smoketest.sh
@@ -52,6 +52,31 @@ else
    fail "nvidia-smi: NOT FOUND"
 fi

+if p=$(PATH="/usr/local/bin:$PATH" command -v dcgmi 2>/dev/null); then
+    ok "dcgmi found: $p"
+else
+    fail "dcgmi: NOT FOUND"
+fi
+
+if p=$(PATH="/usr/local/bin:$PATH" command -v nv-hostengine 2>/dev/null); then
+    ok "nv-hostengine found: $p"
+else
+    fail "nv-hostengine: NOT FOUND"
+fi
+
+DCGM_PROFTESTER=""
+for tool in dcgmproftester dcgmproftester13 dcgmproftester12 dcgmproftester11; do
+    if p=$(PATH="/usr/local/bin:$PATH" command -v "$tool" 2>/dev/null); then
+        DCGM_PROFTESTER="$p"
+        break
+    fi
+done
+if [ -n "$DCGM_PROFTESTER" ]; then
+    ok "dcgmproftester found: $DCGM_PROFTESTER"
+else
+    fail "dcgmproftester: NOT FOUND"
+fi
+
 for tool in bee-gpu-burn bee-john-gpu-stress bee-nccl-gpu-stress all_reduce_perf; do
    if p=$(PATH="/usr/local/bin:$PATH" command -v "$tool" 2>/dev/null); then
        ok "$tool found: $p"
@@ -60,6 +85,12 @@ for tool in bee-gpu-burn bee-john-gpu-stress bee-nccl-gpu-stress all_reduce_perf
    fi
 done

+if p=$(PATH="/usr/local/bin:$PATH" command -v nvbandwidth 2>/dev/null); then
+    ok "nvbandwidth found: $p"
+else
+    warn "nvbandwidth: NOT FOUND"
+fi
+
 echo ""
 echo "-- NVIDIA modules --"
 KO_DIR="/usr/local/lib/nvidia"
Author	SHA1	Message	Date
Michael Chus	33e0a5bef2	Refine validate UI and runtime health table	2026-04-05 16:24:45 +03:00
Michael Chus	38e79143eb	Refine burn UI and NVIDIA stress flows	2026-04-05 13:43:43 +03:00
Michael Chus	25af2df23a	Unify metrics charts on custom SVG renderer	2026-04-05 12:17:50 +03:00
Michael Chus	20abff7f90	WIP: checkpoint current tree	2026-04-05 12:05:00 +03:00
Michael Chus	a14ec8631c	Persist GPU chart mode and expand GPU charts	2026-04-05 11:52:32 +03:00
Michael Chus	f58c7e58d3	Fix webui streaming recovery regressions	2026-04-05 10:39:09 +03:00