Commit remaining workspace changes

Globalize autotuned system power source
audit: switch power benchmark load to dcgmproftester
2026-04-20 07:02:31 +03:00 · 2026-04-20 07:02:12 +03:00 · 2026-04-20 06:57:14 +03:00 · 2026-04-20 06:56:52 +03:00 · 2026-04-19 23:08:09 +03:00 · 2026-04-19 21:26:44 +03:00
36 changed files with 4752 additions and 3295 deletions
--- a/audit/bee
+++ b/audit/bee
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -19,20 +19,22 @@ import (
 )
 var (
-	DefaultExportDir        = "/appdata/bee/export"
+	DefaultExportDir                     = "/appdata/bee/export"
-	DefaultAuditJSONPath    = DefaultExportDir + "/bee-audit.json"
+	DefaultAuditJSONPath                 = DefaultExportDir + "/bee-audit.json"
-	DefaultAuditLogPath     = DefaultExportDir + "/bee-audit.log"
+	DefaultAuditLogPath                  = DefaultExportDir + "/bee-audit.log"
-	DefaultWebLogPath       = DefaultExportDir + "/bee-web.log"
+	DefaultWebLogPath                    = DefaultExportDir + "/bee-web.log"
-	DefaultNetworkLogPath   = DefaultExportDir + "/bee-network.log"
+	DefaultNetworkLogPath                = DefaultExportDir + "/bee-network.log"
-	DefaultNvidiaLogPath    = DefaultExportDir + "/bee-nvidia.log"
+	DefaultNvidiaLogPath                 = DefaultExportDir + "/bee-nvidia.log"
-	DefaultSSHLogPath       = DefaultExportDir + "/bee-sshsetup.log"
+	DefaultSSHLogPath                    = DefaultExportDir + "/bee-sshsetup.log"
-	DefaultRuntimeJSONPath  = DefaultExportDir + "/runtime-health.json"
+	DefaultRuntimeJSONPath               = DefaultExportDir + "/runtime-health.json"
-	DefaultRuntimeLogPath   = DefaultExportDir + "/runtime-health.log"
+	DefaultRuntimeLogPath                = DefaultExportDir + "/runtime-health.log"
-	DefaultTechDumpDir      = DefaultExportDir + "/techdump"
+	DefaultTechDumpDir                   = DefaultExportDir + "/techdump"
-	DefaultSATBaseDir       = DefaultExportDir + "/bee-sat"
+	DefaultSATBaseDir                    = DefaultExportDir + "/bee-sat"
-	DefaultBeeBenchBaseDir  = DefaultExportDir + "/bee-bench"
+	DefaultBeeBenchBaseDir               = DefaultExportDir + "/bee-bench"
-	DefaultBeeBenchPerfDir  = DefaultBeeBenchBaseDir + "/perf"
+	DefaultBeeBenchAutotuneDir           = DefaultBeeBenchBaseDir + "/autotune"
-	DefaultBeeBenchPowerDir = DefaultBeeBenchBaseDir + "/power"
+	DefaultBeeBenchPerfDir               = DefaultBeeBenchBaseDir + "/perf"
 	DefaultBeeBenchPowerDir              = DefaultBeeBenchBaseDir + "/power"
 	DefaultBeeBenchPowerSourceConfigPath = DefaultBeeBenchBaseDir + "/power-source-autotune.json"
 )
 type App struct {
@@ -125,6 +127,7 @@ type satRunner interface {
 	RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBenchmark(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
 	RunNvidiaPowerBench(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
 	RunNvidiaPowerSourceAutotune(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error)
 	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
@@ -572,6 +575,11 @@ func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts pl
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultBeeBenchPerfDir
 	}
 	resolved, err := a.ensureBenchmarkPowerAutotune(ctx, baseDir, opts, "performance", logFunc)
 	if err != nil {
 		return "", err
 	}
 	opts.ServerPowerSource = resolved.SelectedSource
 	return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
 }
@@ -579,9 +587,47 @@ func (a *App) RunNvidiaPowerBenchCtx(ctx context.Context, baseDir string, opts p
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultBeeBenchPowerDir
 	}
 	resolved, err := a.ensureBenchmarkPowerAutotune(ctx, baseDir, opts, "power-fit", logFunc)
 	if err != nil {
 		return "", err
 	}
 	opts.ServerPowerSource = resolved.SelectedSource
 	return a.sat.RunNvidiaPowerBench(ctx, baseDir, opts, logFunc)
 }
 func (a *App) RunNvidiaPowerSourceAutotuneCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultBeeBenchAutotuneDir
 	}
 	return a.sat.RunNvidiaPowerSourceAutotune(ctx, baseDir, opts, benchmarkKind, logFunc)
 }
 func (a *App) LoadBenchmarkPowerAutotune() (*platform.BenchmarkPowerAutotuneConfig, error) {
 	return platform.LoadBenchmarkPowerAutotuneConfig(DefaultBeeBenchPowerSourceConfigPath)
 }
 func (a *App) ensureBenchmarkPowerAutotune(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (platform.BenchmarkPowerAutotuneConfig, error) {
 	cfgPath := platform.BenchmarkPowerSourceConfigPath(baseDir)
 	if cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(cfgPath); err == nil {
 		if logFunc != nil {
 			logFunc(fmt.Sprintf("benchmark autotune: using saved server power source %s", cfg.SelectedSource))
 		}
 		return *cfg, nil
 	}
 	if logFunc != nil {
 		logFunc("benchmark autotune: no saved power source config, running autotune first")
 	}
 	autotuneDir := filepath.Join(filepath.Dir(baseDir), "autotune")
 	if _, err := a.RunNvidiaPowerSourceAutotuneCtx(ctx, autotuneDir, opts, benchmarkKind, logFunc); err != nil {
 		return platform.BenchmarkPowerAutotuneConfig{}, err
 	}
 	cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(cfgPath)
 	if err != nil {
 		return platform.BenchmarkPowerAutotuneConfig{}, err
 	}
 	return *cfg, nil
 }
 func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -9,6 +9,7 @@ import (
 	"io"
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 	"bee/audit/internal/platform"
@@ -123,6 +124,7 @@ type fakeSAT struct {
 	runNvidiaFn               func(string) (string, error)
 	runNvidiaBenchmarkFn      func(string, platform.NvidiaBenchmarkOptions) (string, error)
 	runNvidiaPowerBenchFn     func(string, platform.NvidiaBenchmarkOptions) (string, error)
 	runNvidiaAutotuneFn       func(string, platform.NvidiaBenchmarkOptions, string) (string, error)
 	runNvidiaStressFn         func(string, platform.NvidiaStressOptions) (string, error)
 	runNvidiaComputeFn        func(string, int, []int) (string, error)
 	runNvidiaPowerFn          func(string, int, []int) (string, error)
@@ -163,6 +165,13 @@ func (f fakeSAT) RunNvidiaPowerBench(_ context.Context, baseDir string, opts pla
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaPowerSourceAutotune(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, _ func(string)) (string, error) {
 	if f.runNvidiaAutotuneFn != nil {
 		return f.runNvidiaAutotuneFn(baseDir, opts, benchmarkKind)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaTargetedStressFn != nil {
 		return f.runNvidiaTargetedStressFn(baseDir, durationSec, gpuIndices)
@@ -809,6 +818,12 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run", "verbose.log"), []byte("sat verbose"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.MkdirAll(filepath.Join(exportDir, "bee-bench"), 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-bench", "power-source-autotune.json"), []byte(`{"version":1,"updated_at":"2026-04-20T01:02:03Z","selected_source":"sdr_psu_input","reason":"selected lowest relative error"}`), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run.tar.gz"), []byte("nested sat archive"), 0644); err != nil {
 		t.Fatal(err)
 	}
@@ -836,6 +851,7 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	tr := tar.NewReader(gzr)
 	var names []string
 	var auditJSON string
 	var manifest string
 	for {
 		hdr, err := tr.Next()
 		if errors.Is(err, io.EOF) {
@@ -852,6 +868,13 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 			}
 			auditJSON = string(body)
 		}
 		if strings.HasSuffix(hdr.Name, "/manifest.txt") {
 			body, err := io.ReadAll(tr)
 			if err != nil {
 				t.Fatalf("read manifest entry: %v", err)
 			}
 			manifest = string(body)
 		}
 	}
 	for _, want := range []string{
@@ -895,6 +918,12 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	if !contains(auditJSON, "PASCARI") || !contains(auditJSON, "NVIDIA H100") {
 		t.Fatalf("support bundle should keep real devices:\n%s", auditJSON)
 	}
 	if !contains(manifest, "files:") {
 		t.Fatalf("support bundle manifest missing files section:\n%s", manifest)
 	}
 	if !strings.Contains(manifest, "power_autotune_selected_source=sdr_psu_input") {
 		t.Fatalf("support bundle manifest missing autotune source:\n%s", manifest)
 	}
 }
 func TestMainBanner(t *testing.T) {
--- a/audit/internal/app/support_bundle.go
+++ b/audit/internal/app/support_bundle.go
@@ -2,6 +2,7 @@ package app
 import (
 	"archive/tar"
 	"bee/audit/internal/platform"
 	"compress/gzip"
 	"fmt"
 	"io"
@@ -424,6 +425,13 @@ func writeManifest(dst, exportDir, stageRoot string) error {
 	fmt.Fprintf(&body, "host=%s\n", hostnameOr("unknown"))
 	fmt.Fprintf(&body, "generated_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
 	fmt.Fprintf(&body, "export_dir=%s\n", exportDir)
 	if cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(filepath.Join(exportDir, "bee-bench", "power-source-autotune.json")); err == nil && cfg != nil {
 		fmt.Fprintf(&body, "power_autotune_selected_source=%s\n", cfg.SelectedSource)
 		fmt.Fprintf(&body, "power_autotune_updated_at=%s\n", cfg.UpdatedAt.UTC().Format(time.RFC3339))
 		if strings.TrimSpace(cfg.Reason) != "" {
 			fmt.Fprintf(&body, "power_autotune_reason=%s\n", cfg.Reason)
 		}
 	}
 	fmt.Fprintf(&body, "\nfiles:\n")
 	var files []string
--- a/audit/internal/collector/psu.go
+++ b/audit/internal/collector/psu.go
@@ -160,6 +160,9 @@ type psuSDR struct {
 }
 var psuSlotPatterns = []*regexp.Regexp{
 	// MSI/underscore style: PSU1_POWER_IN, PSU2_POWER_OUT — underscore is \w so \b
 	// does not fire after the digit; match explicitly with underscore terminator.
 	regexp.MustCompile(`(?i)\bpsu([0-9]+)_`),
 	regexp.MustCompile(`(?i)\bpsu?\s*([0-9]+)\b`),                    // PSU1, PS1, ps 2
 	regexp.MustCompile(`(?i)\bps\s*([0-9]+)\b`),                      // PS 6, PS6
 	regexp.MustCompile(`(?i)\bpws\s*([0-9]+)\b`),                     // PWS1
--- a/audit/internal/collector/psu_sdr_test.go
+++ b/audit/internal/collector/psu_sdr_test.go
@@ -49,6 +49,10 @@ func TestParsePSUSlotVendorVariants(t *testing.T) {
 		{name: "PWS1 Status", want: 1},
 		{name: "Power Supply Bay 8", want: 8},
 		{name: "PS 6 Input Power", want: 6},
 		// MSI underscore format — \b does not fire between digit and '_'
 		{name: "PSU1_POWER_IN", want: 1},
 		{name: "PSU2_POWER_OUT", want: 2},
 		{name: "PSU4_STATUS", want: 4},
 	}
 	for _, tt := range tests {
@@ -59,6 +63,31 @@ func TestParsePSUSlotVendorVariants(t *testing.T) {
 	}
 }
 func TestParsePSUSDRMSIFormat(t *testing.T) {
 	t.Parallel()
 	raw := `
 PSU1_STATUS      | F1h | ok
 PSU1_POWER_OUT   | 928 Watts | ok
 PSU1_POWER_IN    | 976 Watts | ok
 PSU2_STATUS      | F2h | ok
 PSU2_POWER_OUT   | 944 Watts | ok
 PSU2_POWER_IN    | 992 Watts | ok
 `
 	got := parsePSUSDR(raw)
 	if len(got) != 2 {
 		t.Fatalf("len(got)=%d want 2", len(got))
 	}
 	if got[1].inputPowerW == nil || *got[1].inputPowerW != 976 {
 		t.Fatalf("psu1 input power=%v want 976", got[1].inputPowerW)
 	}
 	if got[1].outputPowerW == nil || *got[1].outputPowerW != 928 {
 		t.Fatalf("psu1 output power=%v want 928", got[1].outputPowerW)
 	}
 	if got[2].inputPowerW == nil || *got[2].inputPowerW != 992 {
 		t.Fatalf("psu2 input power=%v want 992", got[2].inputPowerW)
 	}
 }
 func TestSynthesizePSUsFromSDR(t *testing.T) {
 	t.Parallel()
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
@@ -240,6 +240,47 @@ func setBenchmarkPowerLimit(ctx context.Context, verboseLog string, gpuIndex, po
 	return nil
 }
 func benchmarkPowerEngine() string {
 	switch strings.TrimSpace(strings.ToLower(os.Getenv("BEE_BENCH_POWER_ENGINE"))) {
 	case BenchmarkPowerEngineTargetedPower:
 		return BenchmarkPowerEngineTargetedPower
 	default:
 		return BenchmarkPowerEngineDCGMProfTester
 	}
 }
 func benchmarkPowerEngineLabel(engine string) string {
 	switch strings.TrimSpace(strings.ToLower(engine)) {
 	case BenchmarkPowerEngineTargetedPower:
 		return "dcgmi diag targeted_power"
 	default:
 		return "dcgmproftester"
 	}
 }
 func resolveBenchmarkPowerLoadCommand(durationSec int, gpuIndices []int) ([]string, []string, error) {
 	engine := benchmarkPowerEngine()
 	durationSec = normalizeNvidiaBurnDuration(durationSec)
 	switch engine {
 	case BenchmarkPowerEngineTargetedPower:
 		return nvidiaDCGMNamedDiagCommand("targeted_power", durationSec, gpuIndices), nil, nil
 	default:
 		if len(gpuIndices) > 1 {
 			return []string{
 				"bee-dcgmproftester-staggered",
 				"--seconds", strconv.Itoa(durationSec),
 				"--stagger-seconds", "0",
 				"--devices", joinIndexList(gpuIndices),
 			}, nil, nil
 		}
 		cmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(durationSec))
 		if err != nil {
 			return nil, nil, err
 		}
 		return cmd, nvidiaVisibleDevicesEnv(gpuIndices), nil
 	}
 }
 func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	if ctx == nil {
 		ctx = context.Background()
@@ -384,10 +425,10 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 			// Sample server idle power once (first GPU only — server state is global).
 			if !serverIdleOK {
-				if w, ok := sampleIPMIPowerSeries(ctx, maxInt(spec.BaselineSec, 10)); ok {
+				if w, ok := sampleBenchmarkPowerSourceSeries(ctx, opts.ServerPowerSource, maxInt(spec.BaselineSec, 10), benchmarkPowerAutotuneSampleInterval); ok {
 					serverIdleW = w
 					serverIdleOK = true
-					logFunc(fmt.Sprintf("server idle power (IPMI): %.0f W", w))
+					logFunc(fmt.Sprintf("server idle power (%s): %.0f W", opts.ServerPowerSource, w))
 				}
 			}
@@ -430,7 +471,16 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 				"--precision-plan-seconds", benchmarkPlanDurationsCSV(planPhases),
 			}
 			logFunc(fmt.Sprintf("GPU %d: uninterrupted precision plan (%d precision phases x %ds, mixed %ds)", idx, len(supportedPrecisions), basePhaseSec, mixedPhaseSec))
 			serverPowerStopCh := make(chan struct{})
 			serverPowerCh := startSelectedPowerSourceSampler(serverPowerStopCh, opts.ServerPowerSource, benchmarkPowerAutotuneSampleInterval)
 			_, phaseRowsByStage, phaseLogs, planErr := runBenchmarkPlannedCommandWithMetrics(ctx, verboseLog, fmt.Sprintf("gpu-%d-precision-plan.log", idx), planCmd, nil, []int{idx}, planPhases, logFunc)
 			close(serverPowerStopCh)
 			if serverPowerSamples := <-serverPowerCh; len(serverPowerSamples) > 0 {
 				serverLoadedWSum += benchmarkMean(serverPowerSamples)
 				serverLoadedSamples++
 				serverLoadedOK = true
 				logFunc(fmt.Sprintf("GPU %d: server loaded power (%s avg): %.0f W", idx, opts.ServerPowerSource, benchmarkMean(serverPowerSamples)))
 			}
 			for _, phaseSpec := range planPhases {
 				if rows := phaseRowsByStage[phaseSpec.MetricStage]; len(rows) > 0 {
 					appendBenchmarkMetrics(&metricRows, rows, phaseSpec.MetricStage, &metricTimelineSec, float64(phaseSpec.DurationSec))
@@ -461,48 +511,6 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 			beforeThrottle, _ := queryThrottleCounters(idx)
 			logFunc(fmt.Sprintf("GPU %d: steady compute (combined, %ds)", idx, mixedPhaseSec))
 			// Sample server power via IPMI in parallel with the steady phase.
 			// We collect readings every 5s and average them.
 			ipmiStopCh := make(chan struct{})
 			ipmiResultCh := make(chan float64, 1)
 			go func() {
 				defer close(ipmiResultCh)
 				var samples []float64
 				ticker := time.NewTicker(5 * time.Second)
 				defer ticker.Stop()
 				// First sample after a short warmup delay.
 				select {
 				case <-ipmiStopCh:
 					return
 				case <-time.After(15 * time.Second):
 				}
 				for {
 					if w, err := queryIPMIServerPowerW(); err == nil {
 						samples = append(samples, w)
 					}
 					select {
 					case <-ipmiStopCh:
 						if len(samples) > 0 {
 							var sum float64
 							for _, w := range samples {
 								sum += w
 							}
 							ipmiResultCh <- sum / float64(len(samples))
 						}
 						return
 					case <-ticker.C:
 					}
 				}
 			}()
 			close(ipmiStopCh)
 			if loadedW, ok := <-ipmiResultCh; ok {
 				serverLoadedWSum += loadedW
 				serverLoadedSamples++
 				serverLoadedOK = true
 				logFunc(fmt.Sprintf("GPU %d: server loaded power (IPMI): %.0f W", idx, loadedW))
 			}
 			afterThrottle, _ := queryThrottleCounters(idx)
 			if planErr != nil {
 				gpuResult.Notes = append(gpuResult.Notes, "precision plan failed: "+planErr.Error())
@@ -652,7 +660,7 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 	if serverLoadedSamples > 0 {
 		serverLoadedW = serverLoadedWSum / float64(serverLoadedSamples)
 	}
-	result.ServerPower = characterizeServerPower(serverIdleW, serverLoadedW, gpuReportedSumW, serverIdleOK && serverLoadedOK)
+	result.ServerPower = characterizeServerPower(serverIdleW, serverLoadedW, gpuReportedSumW, opts.ServerPowerSource, serverIdleOK && serverLoadedOK)
 	result.Cooling = summarizeBenchmarkCooling(metricRows)
 	// Apply server-power penalty when IPMI reports the server delta is much
@@ -707,6 +715,7 @@ func normalizeNvidiaBenchmarkOptionsForBenchmark(opts NvidiaBenchmarkOptions) Nv
 	if opts.SizeMB < 0 {
 		opts.SizeMB = 0
 	}
 	opts.ServerPowerSource = normalizeBenchmarkPowerSource(opts.ServerPowerSource)
 	opts.GPUIndices = dedupeSortedIndices(opts.GPUIndices)
 	opts.ExcludeGPUIndices = dedupeSortedIndices(opts.ExcludeGPUIndices)
 	return opts
@@ -2535,10 +2544,14 @@ loop:
 }
 // characterizeServerPower computes BenchmarkServerPower from idle and loaded
-// IPMI samples plus the GPU-reported average power during steady state.
+// samples plus the GPU-reported average power during steady state.
-func characterizeServerPower(idleW, loadedW, gpuReportedSumW float64, ipmiAvailable bool) *BenchmarkServerPower {
+func characterizeServerPower(idleW, loadedW, gpuReportedSumW float64, source string, available bool) *BenchmarkServerPower {
-	sp := &BenchmarkServerPower{Available: ipmiAvailable}
+	sp := &BenchmarkServerPower{
-	if !ipmiAvailable {
+		Available:         available,
 		Source:            normalizeBenchmarkPowerSource(source),
 		SampleIntervalSec: benchmarkPowerAutotuneSampleInterval,
 	}
 	if !available {
 		sp.Notes = append(sp.Notes, "IPMI power reading unavailable; server-side power characterization skipped")
 		return sp
 	}
@@ -2671,10 +2684,10 @@ func runNvidiaBenchmarkParallel(
 	// Sample server idle power once.
 	if !*serverIdleOK {
-		if w, ok := sampleIPMIPowerSeries(ctx, maxInt(spec.BaselineSec, 10)); ok {
+		if w, ok := sampleBenchmarkPowerSourceSeries(ctx, opts.ServerPowerSource, maxInt(spec.BaselineSec, 10), benchmarkPowerAutotuneSampleInterval); ok {
 			*serverIdleW = w
 			*serverIdleOK = true
-			logFunc(fmt.Sprintf("server idle power (IPMI): %.0f W", w))
+			logFunc(fmt.Sprintf("server idle power (%s): %.0f W", opts.ServerPowerSource, w))
 		}
 	}
@@ -2728,7 +2741,16 @@ func runNvidiaBenchmarkParallel(
 		"--precision-plan-seconds", benchmarkPlanDurationsCSV(planPhases),
 	}
 	logFunc(fmt.Sprintf("GPUs %s: uninterrupted precision plan (%d precision phases x %ds, mixed %ds)", allDevices, len(supportedPrecisions), basePhaseSec, mixedPhaseSec))
 	serverPowerStopCh := make(chan struct{})
 	serverPowerCh := startSelectedPowerSourceSampler(serverPowerStopCh, opts.ServerPowerSource, benchmarkPowerAutotuneSampleInterval)
 	_, phaseRowsByStage, phaseLogs, planErr := runBenchmarkPlannedCommandWithMetrics(ctx, verboseLog, "gpu-all-precision-plan.log", planCmd, nil, selected, planPhases, logFunc)
 	close(serverPowerStopCh)
 	if serverPowerSamples := <-serverPowerCh; len(serverPowerSamples) > 0 {
 		*serverLoadedWSum += benchmarkMean(serverPowerSamples)
 		(*serverLoadedSamples)++
 		*serverLoadedOK = true
 		logFunc(fmt.Sprintf("GPUs %s: server loaded power (%s avg): %.0f W", allDevices, opts.ServerPowerSource, benchmarkMean(serverPowerSamples)))
 	}
 	for _, phaseSpec := range planPhases {
 		if rows := phaseRowsByStage[phaseSpec.MetricStage]; len(rows) > 0 {
 			appendBenchmarkMetrics(allMetricRows, rows, phaseSpec.MetricStage, metricTimelineSec, float64(phaseSpec.DurationSec))
@@ -2770,46 +2792,6 @@ func runNvidiaBenchmarkParallel(
 	}
 	logFunc(fmt.Sprintf("GPUs %s: parallel steady compute (combined, %ds)", allDevices, mixedPhaseSec))
 	// Sample server power via IPMI in parallel with steady phase.
 	ipmiStopCh := make(chan struct{})
 	ipmiResultCh := make(chan float64, 1)
 	go func() {
 		defer close(ipmiResultCh)
 		var samples []float64
 		ticker := time.NewTicker(5 * time.Second)
 		defer ticker.Stop()
 		select {
 		case <-ipmiStopCh:
 			return
 		case <-time.After(15 * time.Second):
 		}
 		for {
 			if w, err := queryIPMIServerPowerW(); err == nil {
 				samples = append(samples, w)
 			}
 			select {
 			case <-ipmiStopCh:
 				if len(samples) > 0 {
 					var sum float64
 					for _, w := range samples {
 						sum += w
 					}
 					ipmiResultCh <- sum / float64(len(samples))
 				}
 				return
 			case <-ticker.C:
 			}
 		}
 	}()
 	close(ipmiStopCh)
 	if loadedW, ok := <-ipmiResultCh; ok {
 		*serverLoadedWSum += loadedW
 		(*serverLoadedSamples)++
 		*serverLoadedOK = true
 		logFunc(fmt.Sprintf("GPUs %s: server loaded power (IPMI): %.0f W", allDevices, loadedW))
 	}
 	afterThrottle := make(map[int]BenchmarkThrottleCounters, len(selected))
 	for _, idx := range selected {
 		afterThrottle[idx], _ = queryThrottleCounters(idx)
@@ -3040,8 +3022,8 @@ func summarizeCPULoad(samples []float64) *BenchmarkCPULoad {
 	return cl
 }
-// runBenchmarkPowerCalibration runs targeted_power for the supplied GPU set and
+// runBenchmarkPowerCalibration runs the configured power-fit load for the supplied
-// actively watches throttle counters. seedLimits, when provided, are treated as
+// GPU set and actively watches throttle counters. seedLimits, when provided, are treated as
 // the starting point for this calibration pass rather than as immutable fixed
 // limits. This matters during cumulative ramp-up: once an additional GPU is
 // introduced, every already-active GPU must be revalidated under the new
@@ -3070,10 +3052,19 @@ func runBenchmarkPowerCalibration(
 	// doubling each retry until it would exceed the cap, at which point the
 	// next busy response fails the calibration immediately.
 	const dcgmResourceBusyMaxDelaySec = 300
 	engine := benchmarkPowerEngine()
 	engineLabel := benchmarkPowerEngineLabel(engine)
-	if _, err := exec.LookPath("dcgmi"); err != nil {
+	if engine == BenchmarkPowerEngineTargetedPower {
-		logFunc("power calibration: dcgmi not found, skipping (will use default power limit)")
+		if _, err := exec.LookPath("dcgmi"); err != nil {
-		return map[int]benchmarkPowerCalibrationResult{}, nil, nil
+			logFunc("power calibration: dcgmi not found, skipping (will use default power limit)")
 			return map[int]benchmarkPowerCalibrationResult{}, nil, nil
 		}
 	} else {
 		if _, _, err := resolveBenchmarkPowerLoadCommand(calibDurationSec, gpuIndices); err != nil {
 			logFunc("power calibration: dcgmproftester not found, skipping (will use default power limit)")
 			return map[int]benchmarkPowerCalibrationResult{}, nil, nil
 		}
 	}
 	if killed := KillTestWorkers(); len(killed) > 0 {
 		for _, p := range killed {
@@ -3206,7 +3197,7 @@ calibDone:
 		sharedAttempt++
 		for _, s := range active {
 			s.calib.Attempts++
-			logFunc(fmt.Sprintf("power calibration: GPU %d targeted_power attempt %d at %d W for %ds", s.idx, s.calib.Attempts, s.appliedLimitW, calibDurationSec))
+			logFunc(fmt.Sprintf("power calibration: GPU %d %s attempt %d at %d W for %ds", s.idx, engineLabel, s.calib.Attempts, s.appliedLimitW, calibDurationSec))
 		}
 		// Snapshot throttle counters for all active GPUs before the run.
@@ -3215,14 +3206,22 @@ calibDone:
 			beforeThrottle[s.idx], _ = queryThrottleCounters(s.idx)
 		}
-		// Run targeted_power for ALL gpuIndices simultaneously so every card
+		// Run the selected power-fit load for ALL gpuIndices simultaneously so every card
 		// is under load during calibration — this reflects real server thermals.
 		logName := fmt.Sprintf("power-calibration-attempt-%d.log", sharedAttempt)
-		cmd := nvidiaDCGMNamedDiagCommand("targeted_power", calibDurationSec, gpuIndices)
+		cmd, env, err := resolveBenchmarkPowerLoadCommand(calibDurationSec, gpuIndices)
 		if err != nil {
 			for _, s := range active {
 				s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("failed to resolve %s command: %v", engineLabel, err))
 				s.converged = true
 			}
 			logFunc(fmt.Sprintf("power calibration: failed to resolve %s command: %v", engineLabel, err))
 			break calibDone
 		}
 		attemptCtx, cancelAttempt := context.WithCancel(ctx)
 		doneCh := make(chan sharedAttemptResult, 1)
 		go func() {
-			out, rows, err := runBenchmarkCommandWithMetrics(attemptCtx, verboseLog, logName, cmd, nil, gpuIndices, logFunc)
+			out, rows, err := runBenchmarkCommandWithMetrics(attemptCtx, verboseLog, logName, cmd, env, gpuIndices, logFunc)
 			doneCh <- sharedAttemptResult{out: out, rows: rows, err: err}
 		}()
@@ -3245,8 +3244,8 @@ calibDone:
 					if err != nil {
 						continue
 					}
-					// Record throttle but do NOT cancel — let dcgmi finish so
+					// Record throttle but do NOT cancel — let the load command finish so
-					// nv-hostengine releases the slot cleanly before the next attempt.
+					// runtime resources release cleanly before the next attempt.
 					if reason := benchmarkCalibrationThrottleReason(beforeThrottle[s.idx], after); reason != "" {
 						throttleReasons[s.idx] = reason
 						logFunc(fmt.Sprintf("power calibration: GPU %d detected %s throttle at %d W, waiting for run to finish", s.idx, reason, s.appliedLimitW))
@@ -3359,9 +3358,9 @@ calibDone:
 				logFunc(fmt.Sprintf("power calibration: GPU %d throttled (%s) at %d W, reducing power limit", s.idx, throttle, s.appliedLimitW))
 			case ar.err != nil:
 				s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("targeted_power attempt %d failed at %d W: %v", s.calib.Attempts, s.appliedLimitW, ar.err))
-				logFunc(fmt.Sprintf("power calibration: GPU %d targeted_power failed at %d W: %v", s.idx, s.appliedLimitW, ar.err))
+				logFunc(fmt.Sprintf("power calibration: GPU %d %s failed at %d W: %v", s.idx, engineLabel, s.appliedLimitW, ar.err))
 			default:
-				s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("targeted_power attempt %d at %d W: no valid power telemetry", s.calib.Attempts, s.appliedLimitW))
+				s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("%s attempt %d at %d W: no valid power telemetry", engineLabel, s.calib.Attempts, s.appliedLimitW))
 				logFunc(fmt.Sprintf("power calibration: GPU %d attempt %d at %d W: no valid telemetry", s.idx, s.calib.Attempts, s.appliedLimitW))
 			}
@@ -3384,7 +3383,7 @@ calibDone:
 						s.calib.Completed = true
 					}
 				} else {
-					s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("could not find a stable targeted_power limit within %d W of the default", maxDerateW))
+					s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("could not find a stable %s limit within %d W of the default", engineLabel, maxDerateW))
 				}
 				s.calib.MetricRows = filterRowsByGPU(ar.rows, s.idx)
 				s.converged = true
@@ -3399,7 +3398,7 @@ calibDone:
 				next = (s.lo + s.hi) / 2
 			}
 			if next < s.minLimitW {
-				s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("could not find a stable targeted_power limit within %d W of the default", maxDerateW))
+				s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("could not find a stable %s limit within %d W of the default", engineLabel, maxDerateW))
 				s.converged = true
 				continue
 			}
@@ -4117,13 +4116,13 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 	}
 	durationSec := powerBenchDurationSec(opts.Profile)
-	// Sample IPMI idle power before any GPU load.
+	// Sample server idle power before any GPU load.
 	var serverIdleW float64
 	var serverIdleOK bool
-	if w, ok := sampleIPMIPowerSeries(ctx, 10); ok {
+	if w, ok := sampleBenchmarkPowerSourceSeries(ctx, opts.ServerPowerSource, 10, benchmarkPowerAutotuneSampleInterval); ok {
 		serverIdleW = w
 		serverIdleOK = true
-		logFunc(fmt.Sprintf("server idle power (IPMI): %.0f W", w))
+		logFunc(fmt.Sprintf("server idle power (%s): %.0f W", opts.ServerPowerSource, w))
 	}
 	sdrIdle := sampleIPMISDRPowerSensors()
 	psuBefore := psuStatusSnapshot()
@@ -4141,20 +4140,18 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 		_ = os.MkdirAll(singleDir, 0755)
 		singleInfo := cloneBenchmarkGPUInfoMap(infoByIndex)
 		logFunc(fmt.Sprintf("power calibration: GPU %d single-card baseline", idx))
-		ipmiSingleCtx, ipmiSingleCancel := context.WithCancel(ctx)
+		singlePowerStopCh := make(chan struct{})
-		ipmiSingleDone := make(chan float64, 1)
+		singlePowerCh := startSelectedPowerSourceSampler(singlePowerStopCh, opts.ServerPowerSource, benchmarkPowerAutotuneSampleInterval)
 		go func() {
 			defer close(ipmiSingleDone)
 			if w, ok := sampleIPMIPowerSeries(ipmiSingleCtx, 3600); ok {
 				ipmiSingleDone <- w
 			}
 		}()
 		c, restore, singleRows := runBenchmarkPowerCalibration(ctx, verboseLog, singleDir, []int{idx}, singleInfo, logFunc, nil, durationSec)
 		appendBenchmarkMetrics(&allPowerRows, singleRows, fmt.Sprintf("single-gpu-%d", idx), &powerCursor, 0)
-		ipmiSingleCancel()
+		close(singlePowerStopCh)
-		if w, ok := <-ipmiSingleDone; ok {
+		sdrSingle := sampleIPMISDRPowerSensors()
-			singleIPMILoadedW[idx] = w
+		if samples := <-singlePowerCh; len(samples) > 0 {
-			logFunc(fmt.Sprintf("power calibration: GPU %d single-card IPMI loaded: %.0f W", idx, w))
+			singleIPMILoadedW[idx] = benchmarkMean(samples)
 			logFunc(fmt.Sprintf("power calibration: GPU %d single-card server power (%s avg): %.0f W", idx, opts.ServerPowerSource, singleIPMILoadedW[idx]))
 		} else if opts.ServerPowerSource == BenchmarkPowerSourceSDRPSUInput && sdrSingle.PSUInW > 0 {
 			singleIPMILoadedW[idx] = sdrSingle.PSUInW
 			logFunc(fmt.Sprintf("power calibration: GPU %d single-card fallback server power (SDR snapshot): %.0f W", idx, sdrSingle.PSUInW))
 		}
 		allRestoreActions = append(allRestoreActions, restore...)
 		if r, ok := c[idx]; ok {
@@ -4228,11 +4225,11 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 		result.RecommendedSlotOrder = append(result.RecommendedSlotOrder, gpu.Index)
 	}
 	if len(result.RecommendedSlotOrder) > 0 {
-		result.Findings = append(result.Findings, fmt.Sprintf("Recommended slot order for installation based on single-card targeted_power: %s.", joinIndexList(result.RecommendedSlotOrder)))
+		result.Findings = append(result.Findings, fmt.Sprintf("Recommended slot order for installation based on single-card %s: %s.", benchmarkPowerEngineLabel(benchmarkPowerEngine()), joinIndexList(result.RecommendedSlotOrder)))
 	}
 	for _, gpu := range gpus {
 		if gpu.Derated {
-			result.Findings = append(result.Findings, fmt.Sprintf("GPU %d required reduced power limit %.0f W to complete targeted_power.", gpu.Index, gpu.AppliedPowerLimitW))
+			result.Findings = append(result.Findings, fmt.Sprintf("GPU %d required reduced power limit %.0f W to complete %s.", gpu.Index, gpu.AppliedPowerLimitW, benchmarkPowerEngineLabel(benchmarkPowerEngine())))
 		}
 		if gpu.CoolingWarning != "" {
 			result.Findings = append(result.Findings, fmt.Sprintf(
@@ -4249,7 +4246,7 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 	// Phase 2: cumulative thermal ramp.
 	// Each step introduces one new GPU into an environment where all previously
 	// calibrated GPUs are already running at their fixed stable limits. The new
-	// GPU's stable TDP is searched via binary search (targeted_power) under real
+	// GPU's stable TDP is searched via binary search under real
 	// multi-GPU thermal load. Once found, its limit is fixed permanently for all
 	// subsequent steps. This ensures each GPU's limit reflects actual sustained
 	// power in the final full-system thermal state.
@@ -4262,6 +4259,10 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 	// per-step in NvidiaPowerBenchStep.ServerLoadedW.
 	var serverLoadedW float64
 	var serverLoadedOK bool
 	// sdrLastStep retains the SDR snapshot from the last ramp step while GPUs are
 	// still loaded. Used as PSUInputLoadedW in the summary instead of re-sampling
 	// after the test when GPUs have already returned to idle.
 	var sdrLastStep sdrPowerSnapshot
 	// Step 1: reuse single-card calibration result directly.
 	if len(result.RecommendedSlotOrder) > 0 {
@@ -4284,7 +4285,7 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 		}
 		if !firstCalib.Completed {
 			ramp.Status = "FAILED"
-			ramp.Notes = append(ramp.Notes, fmt.Sprintf("GPU %d did not complete single-card targeted_power", firstIdx))
+			ramp.Notes = append(ramp.Notes, fmt.Sprintf("GPU %d did not complete single-card %s", firstIdx, benchmarkPowerEngineLabel(benchmarkPowerEngine())))
 			result.OverallStatus = "PARTIAL"
 		} else if firstCalib.Derated {
 			ramp.Status = "PARTIAL"
@@ -4330,23 +4331,16 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 			step, len(result.RecommendedSlotOrder), len(subset), newGPUIdx))
 		stepInfo := cloneBenchmarkGPUInfoMap(infoByIndex)
-		ipmiStepCtx, ipmiStepCancel := context.WithCancel(ctx)
+		stepPowerStopCh := make(chan struct{})
-		ipmiStepDone := make(chan float64, 1)
+		stepPowerCh := startSelectedPowerSourceSampler(stepPowerStopCh, opts.ServerPowerSource, benchmarkPowerAutotuneSampleInterval)
 		go func() {
 			defer close(ipmiStepDone)
 			if w, ok := sampleIPMIPowerSeries(ipmiStepCtx, 3600); ok {
 				ipmiStepDone <- w
 			}
 		}()
 		stepCalib, stepRestore, stepRows := runBenchmarkPowerCalibration(ctx, verboseLog, stepDir, subset, stepInfo, logFunc, seedForStep, durationSec)
 		appendBenchmarkMetrics(&allPowerRows, stepRows, fmt.Sprintf("ramp-step-%d", step), &powerCursor, 0)
-		ipmiStepCancel()
+		close(stepPowerStopCh)
 		var stepIPMILoadedW float64
 		var stepIPMIOK bool
-		if w, ok := <-ipmiStepDone; ok {
+		if samples := <-stepPowerCh; len(samples) > 0 {
-			stepIPMILoadedW = w
+			stepIPMILoadedW = benchmarkMean(samples)
 			stepIPMIOK = true
 			logFunc(fmt.Sprintf("power ramp: step %d IPMI loaded: %.0f W", step, w))
 		}
 		// Accumulate restore actions; they all run in the outer defer.
 		allRestoreActions = append(allRestoreActions, stepRestore...)
@@ -4382,7 +4376,7 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 				}
 				ramp.Status = "FAILED"
 				ramp.Notes = append(ramp.Notes,
-					fmt.Sprintf("GPU %d did not complete targeted_power in ramp step %d; keeping previous stable limit %d W", idx, step, fallback))
+					fmt.Sprintf("GPU %d did not complete %s in ramp step %d; keeping previous stable limit %d W", idx, benchmarkPowerEngineLabel(benchmarkPowerEngine()), step, fallback))
 				result.OverallStatus = "PARTIAL"
 				continue
 			}
@@ -4410,20 +4404,33 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 			result.Findings = append(result.Findings, fmt.Sprintf("Ramp step %d (GPU %d) required derating to %.0f W under combined thermal load.", step, newGPUIdx, c.AppliedPowerLimitW))
 		}
 		// Per-step PSU slot snapshot — also used as the authoritative loaded power
 		// source when SDR PSU sensors are available (more accurate than DCMI on
 		// servers where DCMI covers only a subset of installed PSUs).
 		sdrStep := sampleIPMISDRPowerSensors()
 		if len(sdrStep.PSUSlots) > 0 {
 			ramp.PSUSlotReadings = sdrStep.PSUSlots
 		}
 		if stepIPMIOK && serverIdleOK && stepIPMILoadedW > 0 {
 			ramp.ServerLoadedW = stepIPMILoadedW
 			ramp.ServerDeltaW = stepIPMILoadedW - serverIdleW
 			logFunc(fmt.Sprintf("power ramp: step %d server loaded power (%s avg): %.0f W", step, opts.ServerPowerSource, stepIPMILoadedW))
 			// The last step has all GPUs loaded — use it as the top-level loaded_w.
 			if step == len(result.RecommendedSlotOrder) {
 				serverLoadedW = stepIPMILoadedW
 				serverLoadedOK = true
 				sdrLastStep = sdrStep
 			}
 		} else if opts.ServerPowerSource == BenchmarkPowerSourceSDRPSUInput && sdrStep.PSUInW > 0 {
 			ramp.ServerLoadedW = sdrStep.PSUInW
 			ramp.ServerDeltaW = sdrStep.PSUInW - sdrIdle.PSUInW
 			logFunc(fmt.Sprintf("power ramp: step %d fallback server loaded power (SDR snapshot): %.0f W", step, sdrStep.PSUInW))
 			if step == len(result.RecommendedSlotOrder) {
 				serverLoadedW = sdrStep.PSUInW
 				serverLoadedOK = true
 				sdrLastStep = sdrStep
 			}
 		}
 		// Per-step PSU slot snapshot.
 		sdrStep := sampleIPMISDRPowerSensors()
 		if len(sdrStep.PSUSlots) > 0 {
 			ramp.PSUSlotReadings = sdrStep.PSUSlots
 		}
 		// Fan state at end of ramp step.
@@ -4480,11 +4487,13 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 		gpuActualSumW = result.PlatformMaxTDPW
 	}
 	_ = serverIdleOK // used implicitly via characterizeServerPower
-	result.ServerPower = characterizeServerPower(serverIdleW, serverLoadedW, gpuActualSumW, serverIdleOK && serverLoadedOK)
+	result.ServerPower = characterizeServerPower(serverIdleW, serverLoadedW, gpuActualSumW, opts.ServerPowerSource, serverIdleOK && serverLoadedOK)
 	// Supplement DCMI with SDR multi-source data via collector's PSU slot patterns.
 	// Per-slot readings enable correlation with audit HardwarePowerSupply entries.
 	if result.ServerPower != nil {
-		sdrLoaded := sampleIPMISDRPowerSensors()
+		// Use the SDR snapshot from the last ramp step (GPUs still loaded) rather
 		// than re-sampling here, which would capture post-test idle state.
 		sdrLoaded := sdrLastStep
 		result.ServerPower.PSUInputIdleW = sdrIdle.PSUInW
 		result.ServerPower.PSUInputLoadedW = sdrLoaded.PSUInW
 		result.ServerPower.PSUOutputIdleW = sdrIdle.PSUOutW
--- a/audit/internal/platform/benchmark_power_autotune.go
+++ b/audit/internal/platform/benchmark_power_autotune.go
@@ -0,0 +1,735 @@
 package platform
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"math"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"sort"
 	"strings"
 	"time"
 )
 const (
 	benchmarkPowerAutotuneVersion         = 1
 	benchmarkPowerAutotuneIdleSec         = 60
 	benchmarkPowerAutotuneLoadSec         = 90
 	benchmarkPowerAutotuneSampleInterval  = 3
 	defaultBenchmarkPowerSourceConfigPath = "/appdata/bee/export/bee-bench/power-source-autotune.json"
 )
 func BenchmarkPowerSourceConfigPath(baseDir string) string {
 	baseDir = strings.TrimSpace(baseDir)
 	if baseDir == "" {
 		return defaultBenchmarkPowerSourceConfigPath
 	}
 	return filepath.Join(filepath.Dir(baseDir), "power-source-autotune.json")
 }
 func LoadBenchmarkPowerAutotuneConfig(path string) (*BenchmarkPowerAutotuneConfig, error) {
 	raw, err := os.ReadFile(path)
 	if err != nil {
 		return nil, err
 	}
 	var cfg BenchmarkPowerAutotuneConfig
 	if err := json.Unmarshal(raw, &cfg); err != nil {
 		return nil, err
 	}
 	if strings.TrimSpace(cfg.SelectedSource) == "" {
 		return nil, fmt.Errorf("autotune config missing selected_source")
 	}
 	return &cfg, nil
 }
 func SaveBenchmarkPowerAutotuneConfig(path string, cfg BenchmarkPowerAutotuneConfig) error {
 	if strings.TrimSpace(path) == "" {
 		return fmt.Errorf("empty autotune config path")
 	}
 	if cfg.Version <= 0 {
 		cfg.Version = benchmarkPowerAutotuneVersion
 	}
 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 		return err
 	}
 	data, err := json.MarshalIndent(cfg, "", "  ")
 	if err != nil {
 		return err
 	}
 	tmp := path + ".tmp"
 	if err := os.WriteFile(tmp, data, 0644); err != nil {
 		return err
 	}
 	return os.Rename(tmp, path)
 }
 func LoadSystemPowerSourceConfig(exportDir string) (*BenchmarkPowerAutotuneConfig, error) {
 	return LoadBenchmarkPowerAutotuneConfig(BenchmarkPowerSourceConfigPath(exportDir))
 }
 func ResetBenchmarkPowerAutotuneConfig(path string) error {
 	if strings.TrimSpace(path) == "" {
 		return fmt.Errorf("empty autotune config path")
 	}
 	if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
 		return err
 	}
 	return nil
 }
 func normalizeBenchmarkPowerSource(source string) string {
 	switch strings.TrimSpace(strings.ToLower(source)) {
 	case BenchmarkPowerSourceSDRPSUInput:
 		return BenchmarkPowerSourceSDRPSUInput
 	default:
 		return BenchmarkPowerSourceDCMI
 	}
 }
 func ResolveSystemPowerDecision(exportDir string) SystemPowerSourceDecision {
 	cfg, err := LoadSystemPowerSourceConfig(exportDir)
 	if err == nil && cfg != nil && strings.TrimSpace(cfg.SelectedSource) != "" {
 		selected := normalizeBenchmarkPowerSource(cfg.SelectedSource)
 		return SystemPowerSourceDecision{
 			Configured:      true,
 			SelectedSource:  selected,
 			EffectiveSource: selected,
 			Mode:            "autotuned",
 			Reason:          strings.TrimSpace(cfg.Reason),
 			ConfiguredAt:    cfg.UpdatedAt,
 		}
 	}
 	sources := sampleBenchmarkPowerSources()
 	if value := sources[BenchmarkPowerSourceSDRPSUInput]; value > 0 {
 		return SystemPowerSourceDecision{
 			Configured:      false,
 			EffectiveSource: BenchmarkPowerSourceSDRPSUInput,
 			Mode:            "fallback",
 			Reason:          "autotune config not found; using temporary fallback source sdr_psu_input",
 		}
 	}
 	return SystemPowerSourceDecision{
 		Configured:      false,
 		EffectiveSource: BenchmarkPowerSourceDCMI,
 		Mode:            "fallback",
 		Reason:          "autotune config not found; using temporary fallback source dcmi",
 	}
 }
 func SampleSystemPowerResolved(exportDir string) (float64, SystemPowerSourceDecision, error) {
 	decision := ResolveSystemPowerDecision(exportDir)
 	if decision.EffectiveSource != "" {
 		if value, err := queryBenchmarkPowerSourceW(decision.EffectiveSource); err == nil && value > 0 {
 			return value, decision, nil
 		} else if decision.Configured {
 			fallback := BenchmarkPowerSourceDCMI
 			if decision.EffectiveSource == BenchmarkPowerSourceDCMI {
 				fallback = BenchmarkPowerSourceSDRPSUInput
 			}
 			if fallbackValue, fallbackErr := queryBenchmarkPowerSourceW(fallback); fallbackErr == nil && fallbackValue > 0 {
 				decision.Mode = "degraded"
 				decision.Reason = fmt.Sprintf("configured source %s unavailable; using degraded fallback %s", decision.SelectedSource, fallback)
 				decision.EffectiveSource = fallback
 				return fallbackValue, decision, nil
 			}
 			decision.Mode = "degraded"
 			decision.Reason = fmt.Sprintf("configured source %s unavailable and no fallback source responded", decision.SelectedSource)
 			return 0, decision, err
 		}
 	}
 	return 0, decision, fmt.Errorf("system power source unavailable")
 }
 func queryBenchmarkPowerSourceW(source string) (float64, error) {
 	switch normalizeBenchmarkPowerSource(source) {
 	case BenchmarkPowerSourceSDRPSUInput:
 		sdr := sampleIPMISDRPowerSensors()
 		if sdr.PSUInW > 0 {
 			return sdr.PSUInW, nil
 		}
 		return 0, fmt.Errorf("sdr psu input unavailable")
 	default:
 		return queryIPMIServerPowerW()
 	}
 }
 func sampleBenchmarkPowerSources() map[string]float64 {
 	out := map[string]float64{}
 	if w, err := queryIPMIServerPowerW(); err == nil && w > 0 {
 		out[BenchmarkPowerSourceDCMI] = w
 	}
 	if w, err := queryBenchmarkPowerSourceW(BenchmarkPowerSourceSDRPSUInput); err == nil && w > 0 {
 		out[BenchmarkPowerSourceSDRPSUInput] = w
 	}
 	return out
 }
 func sampleBenchmarkPowerSourceSeries(ctx context.Context, source string, durationSec, intervalSec int) (float64, bool) {
 	if durationSec <= 0 {
 		return 0, false
 	}
 	samples := collectSelectedPowerSourceSamples(ctx, source, durationSec, intervalSec)
 	if len(samples) == 0 {
 		return 0, false
 	}
 	return benchmarkMean(samples), true
 }
 func collectSelectedPowerSourceSamples(ctx context.Context, source string, durationSec, intervalSec int) []float64 {
 	if durationSec <= 0 {
 		return nil
 	}
 	stopCh := make(chan struct{})
 	doneCh := startSelectedPowerSourceSampler(stopCh, source, intervalSec)
 	select {
 	case <-ctx.Done():
 	case <-time.After(time.Duration(durationSec) * time.Second):
 	}
 	close(stopCh)
 	return <-doneCh
 }
 func startSelectedPowerSourceSampler(stopCh <-chan struct{}, source string, intervalSec int) <-chan []float64 {
 	if intervalSec <= 0 {
 		intervalSec = benchmarkPowerAutotuneSampleInterval
 	}
 	ch := make(chan []float64, 1)
 	go func() {
 		defer close(ch)
 		var samples []float64
 		record := func() {
 			if w, err := queryBenchmarkPowerSourceW(source); err == nil && w > 0 {
 				samples = append(samples, w)
 			}
 		}
 		record()
 		ticker := time.NewTicker(time.Duration(intervalSec) * time.Second)
 		defer ticker.Stop()
 		for {
 			select {
 			case <-stopCh:
 				ch <- samples
 				return
 			case <-ticker.C:
 				record()
 			}
 		}
 	}()
 	return ch
 }
 type benchmarkPowerAutotuneSample struct {
 	ElapsedSec     float64
 	GPUAvgUsagePct float64
 	CPUUsagePct    float64
 	GPUSumPowerW   float64
 	Sources        map[string]float64
 }
 func collectBenchmarkPowerAutotuneSamples(ctx context.Context, phase string, gpuIndices []int, durationSec int, logFunc func(string)) []benchmarkPowerAutotuneSample {
 	if durationSec <= 0 {
 		return nil
 	}
 	var out []benchmarkPowerAutotuneSample
 	deadline := time.Now().Add(time.Duration(durationSec) * time.Second)
 	start := time.Now()
 	for {
 		if ctx.Err() != nil {
 			return out
 		}
 		row := benchmarkPowerAutotuneSample{
 			ElapsedSec:  time.Since(start).Seconds(),
 			CPUUsagePct: sampleCPULoadPct(),
 			Sources:     sampleBenchmarkPowerSources(),
 		}
 		if gpuRows, err := sampleGPUMetrics(gpuIndices); err == nil && len(gpuRows) > 0 {
 			var usageSum float64
 			for _, gpu := range gpuRows {
 				row.GPUSumPowerW += gpu.PowerW
 				usageSum += gpu.UsagePct
 			}
 			row.GPUAvgUsagePct = usageSum / float64(len(gpuRows))
 		}
 		out = append(out, row)
 		logBenchmarkPowerAutotuneSample(phase, row, logFunc)
 		if time.Now().After(deadline) {
 			return out
 		}
 		select {
 		case <-ctx.Done():
 			return out
 		case <-time.After(benchmarkPowerAutotuneSampleInterval * time.Second):
 		}
 	}
 }
 func logBenchmarkPowerAutotuneSample(phase string, sample benchmarkPowerAutotuneSample, logFunc func(string)) {
 	if logFunc == nil {
 		return
 	}
 	var sourceParts []string
 	for _, source := range []string{BenchmarkPowerSourceDCMI, BenchmarkPowerSourceSDRPSUInput} {
 		if value, ok := sample.Sources[source]; ok && value > 0 {
 			sourceParts = append(sourceParts, fmt.Sprintf("%s=%.0fW", source, value))
 		} else {
 			sourceParts = append(sourceParts, fmt.Sprintf("%s=n/a", source))
 		}
 	}
 	logFunc(fmt.Sprintf(
 		"autotune %s sample t=%.0fs gpu_avg_util=%.1f%% gpu_sum_power=%.0fW cpu_load=%.1f%% %s",
 		phase,
 		sample.ElapsedSec,
 		sample.GPUAvgUsagePct,
 		sample.GPUSumPowerW,
 		sample.CPUUsagePct,
 		strings.Join(sourceParts, " "),
 	))
 }
 func logBenchmarkPowerAutotunePhaseSummary(phase string, samples []benchmarkPowerAutotuneSample, logFunc func(string)) {
 	if logFunc == nil || len(samples) == 0 {
 		return
 	}
 	var gpuUsage []float64
 	var cpuUsage []float64
 	var gpuPower []float64
 	sourceBuckets := map[string][]float64{}
 	for _, sample := range samples {
 		gpuUsage = append(gpuUsage, sample.GPUAvgUsagePct)
 		cpuUsage = append(cpuUsage, sample.CPUUsagePct)
 		gpuPower = append(gpuPower, sample.GPUSumPowerW)
 		for source, value := range sample.Sources {
 			if value > 0 {
 				sourceBuckets[source] = append(sourceBuckets[source], value)
 			}
 		}
 	}
 	var sourceParts []string
 	for _, source := range []string{BenchmarkPowerSourceDCMI, BenchmarkPowerSourceSDRPSUInput} {
 		values := sourceBuckets[source]
 		if len(values) == 0 {
 			sourceParts = append(sourceParts, fmt.Sprintf("%s_avg=n/a", source))
 			continue
 		}
 		sourceParts = append(sourceParts, fmt.Sprintf("%s_avg=%.0fW", source, benchmarkMean(values)))
 	}
 	logFunc(fmt.Sprintf(
 		"autotune %s summary samples=%d gpu_avg_util=%.1f%% gpu_p95_util=%.1f%% gpu_avg_power=%.0fW cpu_avg=%.1f%% cpu_p95=%.1f%% %s",
 		phase,
 		len(samples),
 		benchmarkMean(gpuUsage),
 		benchmarkPercentile(gpuUsage, 95),
 		benchmarkMean(gpuPower),
 		benchmarkMean(cpuUsage),
 		benchmarkPercentile(cpuUsage, 95),
 		strings.Join(sourceParts, " "),
 	))
 }
 func logBenchmarkPowerAutotuneSelection(candidates []BenchmarkPowerAutotuneCandidate, selectedSource string, gpuDelta float64, logFunc func(string)) {
 	if logFunc == nil {
 		return
 	}
 	for _, candidate := range candidates {
 		if !candidate.Available {
 			logFunc(fmt.Sprintf("autotune candidate %s unavailable", candidate.Source))
 			continue
 		}
 		logFunc(fmt.Sprintf(
 			"autotune candidate %s idle_avg=%.0fW load_avg=%.0fW delta=%.0fW gpu_delta=%.0fW relative_error=%.3f confidence=%.0f%%%s",
 			candidate.Source,
 			candidate.IdleAvgW,
 			candidate.LoadAvgW,
 			candidate.DeltaW,
 			gpuDelta,
 			candidate.RelativeError,
 			candidate.Confidence*100,
 			map[bool]string{true: " SELECTED", false: ""}[candidate.Source == selectedSource],
 		))
 		if strings.TrimSpace(candidate.SelectionNotes) != "" {
 			logFunc(fmt.Sprintf("autotune candidate %s reason: %s", candidate.Source, candidate.SelectionNotes))
 		}
 	}
 }
 func validateBenchmarkPowerAutotuneIdle(samples []benchmarkPowerAutotuneSample) *BenchmarkPowerAutotuneValidation {
 	result := &BenchmarkPowerAutotuneValidation{}
 	if len(samples) == 0 {
 		result.Reason = "no idle telemetry samples collected"
 		return result
 	}
 	var gpuUsage []float64
 	var cpuUsage []float64
 	for _, sample := range samples {
 		gpuUsage = append(gpuUsage, sample.GPUAvgUsagePct)
 		if sample.CPUUsagePct > 0 {
 			cpuUsage = append(cpuUsage, sample.CPUUsagePct)
 		}
 	}
 	result.GPUSamples = len(gpuUsage)
 	result.CPUSamples = len(cpuUsage)
 	result.GPUAvgUsagePct = math.Round(benchmarkMean(gpuUsage)*10) / 10
 	result.GPUP95UsagePct = math.Round(benchmarkPercentile(gpuUsage, 95)*10) / 10
 	result.CPUAvgUsagePct = math.Round(benchmarkMean(cpuUsage)*10) / 10
 	result.CPUP95UsagePct = math.Round(benchmarkPercentile(cpuUsage, 95)*10) / 10
 	switch {
 	case result.GPUAvgUsagePct > 5:
 		result.Reason = fmt.Sprintf("idle validation failed: average GPU load %.1f%% exceeds 5%%", result.GPUAvgUsagePct)
 	case result.GPUP95UsagePct > 10:
 		result.Reason = fmt.Sprintf("idle validation failed: p95 GPU load %.1f%% exceeds 10%%", result.GPUP95UsagePct)
 	case result.CPUAvgUsagePct > 20:
 		result.Reason = fmt.Sprintf("idle validation failed: average CPU load %.1f%% exceeds 20%%", result.CPUAvgUsagePct)
 	case result.CPUP95UsagePct > 35:
 		result.Reason = fmt.Sprintf("idle validation failed: p95 CPU load %.1f%% exceeds 35%%", result.CPUP95UsagePct)
 	default:
 		result.Valid = true
 	}
 	return result
 }
 func chooseBenchmarkPowerAutotuneSource(idle, load []benchmarkPowerAutotuneSample) (string, []BenchmarkPowerAutotuneCandidate, float64, float64, error) {
 	idleBySource := map[string][]float64{}
 	loadBySource := map[string][]float64{}
 	var idleGPU []float64
 	var loadGPU []float64
 	for _, sample := range idle {
 		idleGPU = append(idleGPU, sample.GPUSumPowerW)
 		for source, value := range sample.Sources {
 			if value > 0 {
 				idleBySource[source] = append(idleBySource[source], value)
 			}
 		}
 	}
 	for _, sample := range load {
 		loadGPU = append(loadGPU, sample.GPUSumPowerW)
 		for source, value := range sample.Sources {
 			if value > 0 {
 				loadBySource[source] = append(loadBySource[source], value)
 			}
 		}
 	}
 	idleGPUAvg := benchmarkMean(idleGPU)
 	loadGPUAvg := benchmarkMean(loadGPU)
 	gpuDelta := loadGPUAvg - idleGPUAvg
 	if gpuDelta <= 0 {
 		gpuDelta = loadGPUAvg
 	}
 	candidates := []BenchmarkPowerAutotuneCandidate{
 		buildBenchmarkPowerAutotuneCandidate(BenchmarkPowerSourceDCMI, idleBySource[BenchmarkPowerSourceDCMI], loadBySource[BenchmarkPowerSourceDCMI], gpuDelta),
 		buildBenchmarkPowerAutotuneCandidate(BenchmarkPowerSourceSDRPSUInput, idleBySource[BenchmarkPowerSourceSDRPSUInput], loadBySource[BenchmarkPowerSourceSDRPSUInput], gpuDelta),
 	}
 	available := make([]BenchmarkPowerAutotuneCandidate, 0, len(candidates))
 	for _, candidate := range candidates {
 		if candidate.Available && candidate.DeltaW > 0 {
 			available = append(available, candidate)
 		}
 	}
 	if len(available) == 0 {
 		return "", candidates, idleGPUAvg, loadGPUAvg, fmt.Errorf("no usable server power source samples collected")
 	}
 	sort.Slice(available, func(i, j int) bool {
 		if math.Abs(available[i].RelativeError-available[j].RelativeError) <= 0.10 {
 			if available[i].Source != available[j].Source {
 				return available[i].Source == BenchmarkPowerSourceSDRPSUInput
 			}
 		}
 		if available[i].RelativeError != available[j].RelativeError {
 			return available[i].RelativeError < available[j].RelativeError
 		}
 		return available[i].Samples > available[j].Samples
 	})
 	selected := available[0]
 	for idx := range candidates {
 		if candidates[idx].Source == selected.Source {
 			candidates[idx].Selected = true
 			candidates[idx].SelectionNotes = fmt.Sprintf("selected because delta %.0f W is closest to GPU delta %.0f W (relative error %.3f)", selected.DeltaW, gpuDelta, selected.RelativeError)
 		}
 	}
 	return selected.Source, candidates, idleGPUAvg, loadGPUAvg, nil
 }
 func buildBenchmarkPowerAutotuneCandidate(source string, idle, load []float64, gpuDelta float64) BenchmarkPowerAutotuneCandidate {
 	candidate := BenchmarkPowerAutotuneCandidate{
 		Source:    source,
 		Available: len(idle) > 0 && len(load) > 0,
 		Samples:   minInt(len(idle), len(load)),
 	}
 	if !candidate.Available {
 		return candidate
 	}
 	candidate.IdleAvgW = benchmarkMean(idle)
 	candidate.LoadAvgW = benchmarkMean(load)
 	candidate.DeltaW = candidate.LoadAvgW - candidate.IdleAvgW
 	if gpuDelta > 0 {
 		candidate.RelativeError = math.Abs(candidate.DeltaW-gpuDelta) / gpuDelta
 		candidate.Confidence = math.Max(0, 1-candidate.RelativeError)
 	}
 	return candidate
 }
 func renderBenchmarkPowerAutotuneSummary(result BenchmarkPowerAutotuneResult) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "generated_at=%s\n", result.GeneratedAt.UTC().Format(time.RFC3339))
 	fmt.Fprintf(&b, "status=%s\n", result.Status)
 	fmt.Fprintf(&b, "benchmark_kind=%s\n", result.BenchmarkKind)
 	fmt.Fprintf(&b, "profile=%s\n", result.Profile)
 	fmt.Fprintf(&b, "idle_duration_sec=%d\n", result.IdleDurationSec)
 	fmt.Fprintf(&b, "load_duration_sec=%d\n", result.LoadDurationSec)
 	fmt.Fprintf(&b, "sample_interval_sec=%d\n", result.SampleIntervalSec)
 	if result.SelectedSource != "" {
 		fmt.Fprintf(&b, "selected_source=%s\n", result.SelectedSource)
 	}
 	if result.IdleValidation != nil {
 		fmt.Fprintf(&b, "idle_valid=%t\n", result.IdleValidation.Valid)
 		fmt.Fprintf(&b, "idle_gpu_avg_usage_pct=%.1f\n", result.IdleValidation.GPUAvgUsagePct)
 		fmt.Fprintf(&b, "idle_gpu_p95_usage_pct=%.1f\n", result.IdleValidation.GPUP95UsagePct)
 		fmt.Fprintf(&b, "idle_cpu_avg_usage_pct=%.1f\n", result.IdleValidation.CPUAvgUsagePct)
 		fmt.Fprintf(&b, "idle_cpu_p95_usage_pct=%.1f\n", result.IdleValidation.CPUP95UsagePct)
 		if result.IdleValidation.Reason != "" {
 			fmt.Fprintf(&b, "idle_validation_error=%s\n", result.IdleValidation.Reason)
 		}
 	}
 	for _, candidate := range result.Candidates {
 		fmt.Fprintf(&b, "candidate_%s_available=%t\n", candidate.Source, candidate.Available)
 		if candidate.Available {
 			fmt.Fprintf(&b, "candidate_%s_idle_avg_w=%.0f\n", candidate.Source, candidate.IdleAvgW)
 			fmt.Fprintf(&b, "candidate_%s_load_avg_w=%.0f\n", candidate.Source, candidate.LoadAvgW)
 			fmt.Fprintf(&b, "candidate_%s_delta_w=%.0f\n", candidate.Source, candidate.DeltaW)
 			fmt.Fprintf(&b, "candidate_%s_relative_error=%.3f\n", candidate.Source, candidate.RelativeError)
 		}
 	}
 	return b.String()
 }
 func renderBenchmarkPowerAutotuneReport(result BenchmarkPowerAutotuneResult) string {
 	var b strings.Builder
 	b.WriteString("# Bee Bench Power Source Autotune\n\n")
 	fmt.Fprintf(&b, "**Status:** %s  \n", result.Status)
 	fmt.Fprintf(&b, "**Benchmark kind:** %s  \n", result.BenchmarkKind)
 	fmt.Fprintf(&b, "**Profile:** %s  \n", result.Profile)
 	fmt.Fprintf(&b, "**Idle window:** %ds  \n", result.IdleDurationSec)
 	fmt.Fprintf(&b, "**Load window:** %ds  \n", result.LoadDurationSec)
 	fmt.Fprintf(&b, "**Sample interval:** %ds  \n", result.SampleIntervalSec)
 	if result.SelectedSource != "" {
 		fmt.Fprintf(&b, "**Selected source:** `%s`  \n", result.SelectedSource)
 	}
 	b.WriteString("\n")
 	if result.IdleValidation != nil {
 		b.WriteString("## Idle Validation\n\n")
 		fmt.Fprintf(&b, "- valid: %t\n", result.IdleValidation.Valid)
 		fmt.Fprintf(&b, "- GPU avg usage: %.1f%%\n", result.IdleValidation.GPUAvgUsagePct)
 		fmt.Fprintf(&b, "- GPU p95 usage: %.1f%%\n", result.IdleValidation.GPUP95UsagePct)
 		fmt.Fprintf(&b, "- CPU avg usage: %.1f%%\n", result.IdleValidation.CPUAvgUsagePct)
 		fmt.Fprintf(&b, "- CPU p95 usage: %.1f%%\n", result.IdleValidation.CPUP95UsagePct)
 		if result.IdleValidation.Reason != "" {
 			fmt.Fprintf(&b, "- reason: %s\n", result.IdleValidation.Reason)
 		}
 		b.WriteString("\n")
 	}
 	if len(result.Candidates) > 0 {
 		b.WriteString("## Candidates\n\n")
 		b.WriteString("| Source | Idle avg W | Load avg W | Delta W | Relative error | Selected |\n")
 		b.WriteString("|--------|------------|------------|---------|----------------|----------|\n")
 		for _, candidate := range result.Candidates {
 			if !candidate.Available {
 				fmt.Fprintf(&b, "| %s | — | — | — | — | no |\n", candidate.Source)
 				continue
 			}
 			selected := "no"
 			if candidate.Selected {
 				selected = "yes"
 			}
 			fmt.Fprintf(&b, "| %s | %.0f | %.0f | %.0f | %.2f | %s |\n",
 				candidate.Source, candidate.IdleAvgW, candidate.LoadAvgW, candidate.DeltaW, candidate.RelativeError, selected)
 		}
 		b.WriteString("\n")
 	}
 	for _, note := range result.Notes {
 		fmt.Fprintf(&b, "- %s\n", note)
 	}
 	return b.String()
 }
 func benchmarkAutotuneLoadCommand(kind string, durationSec int, gpuIndices []int, sizeMB int) ([]string, string) {
 	allDevices := joinIndexList(gpuIndices)
 	switch strings.TrimSpace(strings.ToLower(kind)) {
 	case "power-fit", "power", "nvidia-bench-power":
 		cmd, _, err := resolveBenchmarkPowerLoadCommand(durationSec, gpuIndices)
 		if err == nil {
 			return cmd, "power-fit"
 		}
 		return nvidiaDCGMNamedDiagCommand("targeted_power", durationSec, gpuIndices), "power-fit"
 	default:
 		cmd := []string{
 			"bee-gpu-burn",
 			"--seconds", fmt.Sprintf("%d", durationSec),
 			"--devices", allDevices,
 		}
 		if sizeMB > 0 {
 			cmd = append(cmd, "--size-mb", fmt.Sprintf("%d", sizeMB))
 		}
 		return cmd, "performance"
 	}
 }
 func (s *System) RunNvidiaPowerSourceAutotune(ctx context.Context, baseDir string, opts NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error) {
 	if ctx == nil {
 		ctx = context.Background()
 	}
 	if logFunc == nil {
 		logFunc = func(string) {}
 	}
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = "/var/log/bee-bench/autotune"
 	}
 	if err := os.MkdirAll(baseDir, 0755); err != nil {
 		return "", fmt.Errorf("mkdir %s: %w", baseDir, err)
 	}
 	selected, err := resolveNvidiaGPUSelection(nil, nil)
 	if err != nil {
 		return "", err
 	}
 	if len(selected) == 0 {
 		return "", fmt.Errorf("no NVIDIA GPUs detected for autotune")
 	}
 	ts := time.Now().UTC().Format("20060102-150405")
 	runDir := filepath.Join(baseDir, "autotune-"+ts)
 	if err := os.MkdirAll(runDir, 0755); err != nil {
 		return "", fmt.Errorf("mkdir %s: %w", runDir, err)
 	}
 	verboseLog := filepath.Join(runDir, "verbose.log")
 	hostname, _ := os.Hostname()
 	loadCmd, normalizedKind := benchmarkAutotuneLoadCommand(benchmarkKind, benchmarkPowerAutotuneLoadSec, selected, opts.SizeMB)
 	result := BenchmarkPowerAutotuneResult{
 		GeneratedAt:       time.Now().UTC(),
 		Hostname:          hostname,
 		ServerModel:       readServerModel(),
 		BenchmarkKind:     normalizedKind,
 		Profile:           opts.Profile,
 		Status:            "FAILED",
 		IdleDurationSec:   benchmarkPowerAutotuneIdleSec,
 		LoadDurationSec:   benchmarkPowerAutotuneLoadSec,
 		SampleIntervalSec: benchmarkPowerAutotuneSampleInterval,
 	}
 	logFunc(fmt.Sprintf("autotune: idle validation window %ds on GPUs %s", benchmarkPowerAutotuneIdleSec, joinIndexList(selected)))
 	idleSamples := collectBenchmarkPowerAutotuneSamples(ctx, "idle", selected, benchmarkPowerAutotuneIdleSec, logFunc)
 	logBenchmarkPowerAutotunePhaseSummary("idle", idleSamples, logFunc)
 	result.IdleValidation = validateBenchmarkPowerAutotuneIdle(idleSamples)
 	if result.IdleValidation == nil || !result.IdleValidation.Valid {
 		if result.IdleValidation != nil {
 			result.IdleValidationError = result.IdleValidation.Reason
 			logFunc(result.IdleValidation.Reason)
 		}
 		result.Notes = append(result.Notes, "autotune stopped before load stage because idle validation failed")
 		if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
 			return "", err
 		}
 		return runDir, fmt.Errorf("%s", result.IdleValidationError)
 	}
 	logFunc(fmt.Sprintf("autotune: full-load stage using %s for %ds", normalizedKind, benchmarkPowerAutotuneLoadSec))
 	loadSamplesCh := make(chan []benchmarkPowerAutotuneSample, 1)
 	go func() {
 		loadSamplesCh <- collectBenchmarkPowerAutotuneSamples(ctx, "load", selected, benchmarkPowerAutotuneLoadSec, logFunc)
 	}()
 	out, runErr := runSATCommandCtx(ctx, verboseLog, "autotune-load.log", loadCmd, nil, logFunc)
 	_ = os.WriteFile(filepath.Join(runDir, "autotune-load.log"), out, 0644)
 	loadSamples := <-loadSamplesCh
 	logBenchmarkPowerAutotunePhaseSummary("load", loadSamples, logFunc)
 	if runErr != nil {
 		result.Notes = append(result.Notes, "full-load stage failed: "+runErr.Error())
 		if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
 			return "", err
 		}
 		return runDir, fmt.Errorf("autotune load stage: %w", runErr)
 	}
 	selectedSource, candidates, idleGPUAvg, loadGPUAvg, chooseErr := chooseBenchmarkPowerAutotuneSource(idleSamples, loadSamples)
 	result.Candidates = candidates
 	result.GPUPowerIdleW = idleGPUAvg
 	result.GPUPowerLoadW = loadGPUAvg
 	if chooseErr != nil {
 		result.Notes = append(result.Notes, chooseErr.Error())
 		if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
 			return "", err
 		}
 		return runDir, chooseErr
 	}
 	gpuDelta := loadGPUAvg - idleGPUAvg
 	if gpuDelta <= 0 {
 		gpuDelta = loadGPUAvg
 	}
 	logBenchmarkPowerAutotuneSelection(candidates, selectedSource, gpuDelta, logFunc)
 	result.SelectedSource = selectedSource
 	result.Status = "OK"
 	var confidence float64
 	selectionReason := fmt.Sprintf("selected %s after comparing full-load average against GPU-reported delta", selectedSource)
 	for _, candidate := range candidates {
 		if candidate.Selected {
 			confidence = candidate.Confidence
 			if strings.TrimSpace(candidate.SelectionNotes) != "" {
 				selectionReason = candidate.SelectionNotes
 			}
 			break
 		}
 	}
 	cfg := BenchmarkPowerAutotuneConfig{
 		Version:           benchmarkPowerAutotuneVersion,
 		UpdatedAt:         time.Now().UTC(),
 		SelectedSource:    selectedSource,
 		BenchmarkKind:     normalizedKind,
 		Profile:           opts.Profile,
 		IdleDurationSec:   benchmarkPowerAutotuneIdleSec,
 		LoadDurationSec:   benchmarkPowerAutotuneLoadSec,
 		SampleIntervalSec: benchmarkPowerAutotuneSampleInterval,
 		Confidence:        confidence,
 		Reason:            selectionReason,
 	}
 	result.Config = &cfg
 	configPath := BenchmarkPowerSourceConfigPath(baseDir)
 	if err := SaveBenchmarkPowerAutotuneConfig(configPath, cfg); err != nil {
 		result.Status = "FAILED"
 		result.Notes = append(result.Notes, "failed to save autotune config: "+err.Error())
 		if writeErr := writeBenchmarkPowerAutotuneArtifacts(runDir, result); writeErr != nil {
 			return "", writeErr
 		}
 		return runDir, err
 	}
 	logFunc(fmt.Sprintf("autotune conclusion: selected source %s; reason: %s", selectedSource, cfg.Reason))
 	result.Notes = append(result.Notes, "saved autotune config to "+configPath)
 	if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
 		return "", err
 	}
 	return runDir, nil
 }
 func writeBenchmarkPowerAutotuneArtifacts(runDir string, result BenchmarkPowerAutotuneResult) error {
 	resultJSON, err := json.MarshalIndent(result, "", "  ")
 	if err != nil {
 		return fmt.Errorf("marshal autotune result: %w", err)
 	}
 	if err := os.WriteFile(filepath.Join(runDir, "result.json"), resultJSON, 0644); err != nil {
 		return fmt.Errorf("write autotune result.json: %w", err)
 	}
 	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(renderBenchmarkPowerAutotuneSummary(result)), 0644); err != nil {
 		return fmt.Errorf("write autotune summary.txt: %w", err)
 	}
 	if err := os.WriteFile(filepath.Join(runDir, "report.md"), []byte(renderBenchmarkPowerAutotuneReport(result)), 0644); err != nil {
 		return fmt.Errorf("write autotune report.md: %w", err)
 	}
 	return nil
 }
 func minInt(a, b int) int {
 	if a < b {
 		return a
 	}
 	return b
 }
 var _ = exec.ErrNotFound
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -401,11 +401,15 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 		}
 	}
-	// ── Server Power (IPMI) ───────────────────────────────────────────────────
+	// ── Server Power ───────────────────────────────────────────────────────────
 	if sp := result.ServerPower; sp != nil {
-		b.WriteString("## Server Power (IPMI)\n\n")
+		title := "## Server Power\n\n"
 		if sp.Source != "" {
 			title = fmt.Sprintf("## Server Power (`%s`)\n\n", sp.Source)
 		}
 		b.WriteString(title)
 		if !sp.Available {
-			b.WriteString("IPMI power measurement unavailable.\n\n")
+			b.WriteString("Server power measurement unavailable.\n\n")
 		} else {
 			spRows := [][]string{
 				{"Server idle", fmt.Sprintf("%.0f W", sp.IdleW)},
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -43,6 +43,11 @@ const (
 	NvidiaBenchmarkProfileOvernight = "overnight"
 )
 const (
 	BenchmarkPowerEngineDCGMProfTester = "dcgmproftester"
 	BenchmarkPowerEngineTargetedPower  = "targeted_power"
 )
 // Estimated wall-clock durations for benchmark runs, derived from real _v8 logs.
 // Rule: when changing profile phase durations in resolveBenchmarkProfile(),
 // re-measure from actual task logs and update the constants here.
@@ -61,7 +66,7 @@ const (
 	BenchmarkEstimatedPerfStabilitySec = 5532 // ~92 min; ramp-up 1-8 measured
 	BenchmarkEstimatedPerfOvernightSec = 8 * 3600
-	// Power / Thermal Fit (dcgmi targeted_power binary-search calibration).
+	// Power / Thermal Fit (dcgmproftester load + nvidia-smi power-limit binary search).
 	// Duration is for the full ramp-up run; individual steps vary with convergence speed.
 	BenchmarkEstimatedPowerStandardSec  = 2600 // ~43 min; ramp 1-4: 2663 s, ramp 1-8: 2375 s
 	BenchmarkEstimatedPowerStabilitySec = 5400 // ~90 min; calibDurationSec=300 × 8 GPU × ~2-3 attempts
@@ -74,12 +79,84 @@ type NvidiaBenchmarkOptions struct {
 	GPUIndices        []int
 	ExcludeGPUIndices []int
 	RunNCCL           bool
 	ServerPowerSource string
 	ParallelGPUs      bool   // run all selected GPUs simultaneously instead of sequentially
 	RampStep          int    // 1-based step index within a ramp-up run (0 = not a ramp-up)
 	RampTotal         int    // total number of ramp-up steps in this run
 	RampRunID         string // shared identifier across all steps of the same ramp-up run
 }
 const (
 	BenchmarkPowerSourceDCMI        = "dcmi"
 	BenchmarkPowerSourceSDRPSUInput = "sdr_psu_input"
 )
 type BenchmarkPowerAutotuneConfig struct {
 	Version           int       `json:"version"`
 	UpdatedAt         time.Time `json:"updated_at"`
 	SelectedSource    string    `json:"selected_source"`
 	BenchmarkKind     string    `json:"benchmark_kind,omitempty"`
 	Profile           string    `json:"profile,omitempty"`
 	IdleDurationSec   int       `json:"idle_duration_sec,omitempty"`
 	LoadDurationSec   int       `json:"load_duration_sec,omitempty"`
 	SampleIntervalSec int       `json:"sample_interval_sec,omitempty"`
 	Confidence        float64   `json:"confidence,omitempty"`
 	Reason            string    `json:"reason,omitempty"`
 }
 type SystemPowerSourceDecision struct {
 	Configured      bool      `json:"configured"`
 	SelectedSource  string    `json:"selected_source,omitempty"`
 	EffectiveSource string    `json:"effective_source,omitempty"`
 	Mode            string    `json:"mode,omitempty"` // autotuned, fallback, degraded
 	Reason          string    `json:"reason,omitempty"`
 	ConfiguredAt    time.Time `json:"configured_at,omitempty"`
 }
 type BenchmarkPowerAutotuneResult struct {
 	GeneratedAt         time.Time                         `json:"generated_at"`
 	Hostname            string                            `json:"hostname,omitempty"`
 	ServerModel         string                            `json:"server_model,omitempty"`
 	BenchmarkKind       string                            `json:"benchmark_kind,omitempty"`
 	Profile             string                            `json:"profile,omitempty"`
 	Status              string                            `json:"status"`
 	IdleDurationSec     int                               `json:"idle_duration_sec"`
 	LoadDurationSec     int                               `json:"load_duration_sec"`
 	SampleIntervalSec   int                               `json:"sample_interval_sec"`
 	SelectedSource      string                            `json:"selected_source,omitempty"`
 	IdleValidationError string                            `json:"idle_validation_error,omitempty"`
 	IdleValidation      *BenchmarkPowerAutotuneValidation `json:"idle_validation,omitempty"`
 	GPUPowerIdleW       float64                           `json:"gpu_power_idle_w,omitempty"`
 	GPUPowerLoadW       float64                           `json:"gpu_power_load_w,omitempty"`
 	Candidates          []BenchmarkPowerAutotuneCandidate `json:"candidates,omitempty"`
 	Notes               []string                          `json:"notes,omitempty"`
 	Config              *BenchmarkPowerAutotuneConfig     `json:"config,omitempty"`
 }
 type BenchmarkPowerAutotuneValidation struct {
 	Valid          bool    `json:"valid"`
 	GPUAvgUsagePct float64 `json:"gpu_avg_usage_pct,omitempty"`
 	GPUP95UsagePct float64 `json:"gpu_p95_usage_pct,omitempty"`
 	CPUAvgUsagePct float64 `json:"cpu_avg_usage_pct,omitempty"`
 	CPUP95UsagePct float64 `json:"cpu_p95_usage_pct,omitempty"`
 	GPUSamples     int     `json:"gpu_samples,omitempty"`
 	CPUSamples     int     `json:"cpu_samples,omitempty"`
 	Reason         string  `json:"reason,omitempty"`
 }
 type BenchmarkPowerAutotuneCandidate struct {
 	Source         string  `json:"source"`
 	IdleAvgW       float64 `json:"idle_avg_w,omitempty"`
 	LoadAvgW       float64 `json:"load_avg_w,omitempty"`
 	DeltaW         float64 `json:"delta_w,omitempty"`
 	Samples        int     `json:"samples,omitempty"`
 	RelativeError  float64 `json:"relative_error,omitempty"`
 	Confidence     float64 `json:"confidence,omitempty"`
 	Selected       bool    `json:"selected,omitempty"`
 	Available      bool    `json:"available"`
 	SelectionNotes string  `json:"selection_notes,omitempty"`
 }
 type NvidiaBenchmarkResult struct {
 	BenchmarkVersion string    `json:"benchmark_version"`
 	GeneratedAt      time.Time `json:"generated_at"`
@@ -294,12 +371,16 @@ type BenchmarkPSUSlotPower struct {
 //   - SDR       — `ipmitool sdr` PSUx_POWER_IN/OUT; per-PSU, reliable
 //   - nvidia-smi — GPU self-reported via internal shunt; accurate for GPU load
 type BenchmarkServerPower struct {
-	Available       bool    `json:"available"`
+	Available         bool    `json:"available"`
-	IdleW           float64 `json:"idle_w,omitempty"`   // DCMI at idle
+	Source            string  `json:"source,omitempty"`
-	LoadedW         float64 `json:"loaded_w,omitempty"` // DCMI at peak load
+	Mode              string  `json:"mode,omitempty"`
-	DeltaW          float64 `json:"delta_w,omitempty"`  // DCMI loaded − idle
+	Reason            string  `json:"reason,omitempty"`
-	GPUReportedSumW float64 `json:"gpu_reported_sum_w,omitempty"`
+	SampleIntervalSec int     `json:"sample_interval_sec,omitempty"`
-	ReportingRatio  float64 `json:"reporting_ratio,omitempty"`
+	IdleW             float64 `json:"idle_w,omitempty"`   // DCMI at idle
 	LoadedW           float64 `json:"loaded_w,omitempty"` // DCMI at peak load
 	DeltaW            float64 `json:"delta_w,omitempty"`  // DCMI loaded − idle
 	GPUReportedSumW   float64 `json:"gpu_reported_sum_w,omitempty"`
 	ReportingRatio    float64 `json:"reporting_ratio,omitempty"`
 	// PSU AC input sum — sampled at idle and at peak load using collector's
 	// slot patterns (PSU1_POWER_IN, PSU1_PIN, PS1 POut, Power1…).
--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -12,6 +12,7 @@ import (
 )
 const installToRAMDir = "/dev/shm/bee-live"
 const copyProgressLogStep int64 = 100 * 1024 * 1024
 func (s *System) IsLiveMediaInRAM() bool {
 	return s.LiveMediaRAMState().InRAM
@@ -319,6 +320,7 @@ func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) e
 	defer out.Close()
 	total := fi.Size()
 	var copied int64
 	var lastLogged int64
 	buf := make([]byte, 4*1024*1024)
 	for {
 		if err := ctx.Err(); err != nil {
@@ -330,7 +332,8 @@ func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) e
 				return werr
 			}
 			copied += int64(n)
-			if logFunc != nil && total > 0 {
+			if shouldLogCopyProgress(copied, total, lastLogged) {
 				lastLogged = copied
 				pct := int(float64(copied) / float64(total) * 100)
 				logFunc(fmt.Sprintf("  %s / %s (%d%%)", humanBytes(copied), humanBytes(total), pct))
 			}
@@ -345,6 +348,19 @@ func copyFileLarge(ctx context.Context, src, dst string, logFunc func(string)) e
 	return out.Sync()
 }
 func shouldLogCopyProgress(copied, total, lastLogged int64) bool {
 	if total <= 0 || copied <= 0 {
 		return false
 	}
 	if copied >= total {
 		return copied > lastLogged
 	}
 	if copied < copyProgressLogStep {
 		return false
 	}
 	return copied-lastLogged >= copyProgressLogStep
 }
 func cpDir(ctx context.Context, src, dst string, logFunc func(string)) error {
 	return filepath.Walk(src, func(path string, fi os.FileInfo, err error) error {
 		if ctx.Err() != nil {
--- a/audit/internal/platform/install_to_ram_test.go
+++ b/audit/internal/platform/install_to_ram_test.go
@@ -101,3 +101,26 @@ func TestEvaluateLiveMediaRAMState(t *testing.T) {
 		}
 	})
 }
 func TestShouldLogCopyProgress(t *testing.T) {
 	t.Parallel()
 	total := int64(250 * 1024 * 1024)
 	step := int64(100 * 1024 * 1024)
 	if shouldLogCopyProgress(step-1, total, 0) {
 		t.Fatal("progress logged too early")
 	}
 	if !shouldLogCopyProgress(step, total, 0) {
 		t.Fatal("expected log at first 100MB boundary")
 	}
 	if shouldLogCopyProgress(step+16*1024*1024, total, step) {
 		t.Fatal("progress logged again before next 100MB")
 	}
 	if !shouldLogCopyProgress(2*step, total, step) {
 		t.Fatal("expected log at second 100MB boundary")
 	}
 	if !shouldLogCopyProgress(total, total, 2*step) {
 		t.Fatal("expected final completion log")
 	}
 }
--- a/audit/internal/platform/live_metrics.go
+++ b/audit/internal/platform/live_metrics.go
@@ -1,8 +1,10 @@
 package platform
 import (
 	"bee/audit/internal/collector"
 	"bufio"
 	"encoding/json"
 	"fmt"
 	"os"
 	"os/exec"
 	"sort"
@@ -14,14 +16,17 @@ import (
 // LiveMetricSample is a single point-in-time snapshot of server metrics
 // collected for the web UI metrics page.
 type LiveMetricSample struct {
-	Timestamp  time.Time      `json:"ts"`
+	Timestamp   time.Time      `json:"ts"`
-	Fans       []FanReading   `json:"fans"`
+	Fans        []FanReading   `json:"fans"`
-	Temps      []TempReading  `json:"temps"`
+	Temps       []TempReading  `json:"temps"`
-	PowerW     float64        `json:"power_w"`
+	PowerW      float64        `json:"power_w"`
-	PSUs       []PSUReading   `json:"psus,omitempty"`
+	PowerSource string         `json:"power_source,omitempty"`
-	CPULoadPct float64        `json:"cpu_load_pct"`
+	PowerMode   string         `json:"power_mode,omitempty"`
-	MemLoadPct float64        `json:"mem_load_pct"`
+	PowerReason string         `json:"power_reason,omitempty"`
-	GPUs       []GPUMetricRow `json:"gpus"`
+	PSUs        []PSUReading   `json:"psus,omitempty"`
 	CPULoadPct  float64        `json:"cpu_load_pct"`
 	MemLoadPct  float64        `json:"mem_load_pct"`
 	GPUs        []GPUMetricRow `json:"gpus"`
 }
 // PSUReading is a per-slot power supply input power reading.
@@ -62,12 +67,18 @@ func SampleLiveMetrics() LiveMetricSample {
 		}
 	}
 	// System power — returns 0 if unavailable
 	s.PowerW = sampleSystemPower()
 	// Per-PSU power — populated when IPMI SDR has Power Supply entities with Watt readings
 	s.PSUs = samplePSUPower()
 	// System power: use the global autotune-selected source when configured,
 	// otherwise fall back to the historical heuristic and mark the mode.
 	if powerW, decision, err := SampleSystemPowerResolved(""); err == nil {
 		s.PowerW = powerW
 		s.PowerSource = decision.EffectiveSource
 		s.PowerMode = decision.Mode
 		s.PowerReason = decision.Reason
 	}
 	// CPU load — from /proc/stat
 	s.CPULoadPct = sampleCPULoadPct()
@@ -339,63 +350,44 @@ func compactAmbientTempName(chip, name string) string {
 }
 // samplePSUPower reads per-PSU input power via IPMI SDR.
-// It parses `ipmitool sdr elist full` output looking for Power Supply entity
+// Uses collector.PSUSlotsFromSDR (name-based matching) which works across
-// sensors (entity ID "10.N") that report a value in Watts.
+// vendors where PSU sensors may not carry entity ID "10.N".
 // Returns nil when IPMI is unavailable or no PSU Watt sensors exist.
 func samplePSUPower() []PSUReading {
-	out, err := exec.Command("ipmitool", "sdr", "elist", "full").Output()
+	out, err := exec.Command("ipmitool", "sdr").Output()
 	if err != nil || len(out) == 0 {
 		return nil
 	}
-	// map slot → reading (keep highest-watt value per slot in case of duplicates)
+	slots := collector.PSUSlotsFromSDR(string(out))
-	type entry struct {
+	if len(slots) == 0 {
 		name   string
 		powerW float64
 	}
 	bySlot := map[int]entry{}
 	for _, line := range strings.Split(string(out), "\n") {
 		parts := strings.Split(line, "|")
 		if len(parts) < 5 {
 			continue
 		}
 		entityID := strings.TrimSpace(parts[3]) // e.g. "10.1"
 		if !strings.HasPrefix(entityID, "10.") {
 			continue // not a Power Supply entity
 		}
 		slotStr := strings.TrimPrefix(entityID, "10.")
 		slot, err := strconv.Atoi(slotStr)
 		if err != nil {
 			continue
 		}
 		valueField := strings.TrimSpace(parts[4]) // e.g. "740.00 Watts"
 		if !strings.Contains(strings.ToLower(valueField), "watts") {
 			continue
 		}
 		valueFields := strings.Fields(valueField)
 		if len(valueFields) < 2 {
 			continue
 		}
 		w, err := strconv.ParseFloat(valueFields[0], 64)
 		if err != nil || w <= 0 {
 			continue
 		}
 		sensorName := strings.TrimSpace(parts[0])
 		if existing, ok := bySlot[slot]; !ok || w > existing.powerW {
 			bySlot[slot] = entry{name: sensorName, powerW: w}
 		}
 	}
 	if len(bySlot) == 0 {
 		return nil
 	}
-	slots := make([]int, 0, len(bySlot))
+	// Collect slot keys and sort for stable output.
-	for s := range bySlot {
+	keys := make([]int, 0, len(slots))
-		slots = append(slots, s)
+	for k := range slots {
 		n, err := strconv.Atoi(k)
 		if err == nil {
 			keys = append(keys, n)
 		}
 	}
-	sort.Ints(slots)
+	sort.Ints(keys)
-	psus := make([]PSUReading, 0, len(slots))
+	psus := make([]PSUReading, 0, len(keys))
-	for _, s := range slots {
+	for _, k := range keys {
-		e := bySlot[s]
+		entry := slots[strconv.Itoa(k)]
-		psus = append(psus, PSUReading{Slot: s, Name: e.name, PowerW: e.powerW})
+		// Prefer AC input power; fall back to DC output power.
 		var w float64
 		if entry.InputW != nil && *entry.InputW > 0 {
 			w = *entry.InputW
 		} else if entry.OutputW != nil && *entry.OutputW > 0 {
 			w = *entry.OutputW
 		}
 		if w <= 0 {
 			continue
 		}
 		psus = append(psus, PSUReading{Slot: k + 1, Name: fmt.Sprintf("PSU%d", k+1), PowerW: w})
 	}
 	if len(psus) == 0 {
 		return nil
 	}
 	return psus
 }
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -443,11 +443,19 @@ func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir strin
 		profCmd []string
 		profEnv []string
 	)
-	if staggerSec > 0 && len(selected) > 1 {
+	if len(selected) > 1 {
 		// For multiple GPUs, always spawn one dcgmproftester process per GPU via
 		// bee-dcgmproftester-staggered (stagger=0 means all start simultaneously).
 		// A single dcgmproftester process without -i only loads GPU 0 regardless
 		// of CUDA_VISIBLE_DEVICES.
 		stagger := staggerSec
 		if stagger < 0 {
 			stagger = 0
 		}
 		profCmd = []string{
 			"bee-dcgmproftester-staggered",
 			"--seconds", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)),
-			"--stagger-seconds", strconv.Itoa(staggerSec),
+			"--stagger-seconds", strconv.Itoa(stagger),
 			"--devices", joinIndexList(selected),
 		}
 	} else {
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -43,17 +43,22 @@ type GPUStressMetric struct {
 // FanStressRow is one second-interval telemetry sample covering all monitored dimensions.
 type FanStressRow struct {
-	TimestampUTC string
+	TimestampUTC   string
-	ElapsedSec   float64
+	ElapsedSec     float64
-	Phase        string // "baseline", "load1", "pause", "load2", "cooldown"
+	Phase          string // "baseline", "load1", "pause", "load2", "cooldown"
-	GPUs         []GPUStressMetric
+	GPUs           []GPUStressMetric
-	Fans         []FanReading
+	Fans           []FanReading
-	CPUMaxTempC  float64 // highest CPU temperature from ipmitool / sensors
+	CPUMaxTempC    float64 // highest CPU temperature from ipmitool / sensors
-	SysPowerW    float64 // DCMI system power reading
+	SysPowerW      float64
 	SysPowerSource string
 	SysPowerMode   string
 }
 type cachedPowerReading struct {
 	Value     float64
 	Source    string
 	Mode      string
 	Reason    string
 	UpdatedAt time.Time
 }
@@ -278,7 +283,7 @@ func sampleFanStressRow(gpuIndices []int, phase string, elapsed float64) FanStre
 	row.GPUs = sampleGPUStressMetrics(gpuIndices)
 	row.Fans, _ = sampleFanSpeeds()
 	row.CPUMaxTempC = sampleCPUMaxTemp()
-	row.SysPowerW = sampleSystemPower()
+	row.SysPowerW, row.SysPowerSource, row.SysPowerMode = sampleSystemPowerResolved()
 	return row
 }
@@ -763,19 +768,19 @@ func sampleCPUTempViaSensors() float64 {
 	return max
 }
-// sampleSystemPower reads system power draw via DCMI.
+// sampleSystemPowerResolved reads system power via the global autotune source,
-func sampleSystemPower() float64 {
+// falling back to the historical heuristic before autotune or when degraded.
 func sampleSystemPowerResolved() (float64, string, string) {
 	now := time.Now()
-	current := 0.0
+	current, decision, err := SampleSystemPowerResolved("")
 	out, err := exec.Command("ipmitool", "dcmi", "power", "reading").Output()
 	if err == nil {
 		current = parseDCMIPowerReading(string(out))
 	}
 	systemPowerCacheMu.Lock()
 	defer systemPowerCacheMu.Unlock()
-	value, updated := effectiveSystemPowerReading(systemPowerCache, current, now)
+	if err != nil {
 		current = 0
 	}
 	value, updated := effectiveSystemPowerReading(systemPowerCache, current, decision.EffectiveSource, decision.Mode, decision.Reason, now)
 	systemPowerCache = updated
-	return value
+	return value, updated.Source, updated.Mode
 }
 // parseDCMIPowerReading extracts the instantaneous power reading from ipmitool dcmi output.
@@ -798,9 +803,9 @@ func parseDCMIPowerReading(raw string) float64 {
 	return 0
 }
-func effectiveSystemPowerReading(cache cachedPowerReading, current float64, now time.Time) (float64, cachedPowerReading) {
+func effectiveSystemPowerReading(cache cachedPowerReading, current float64, source, mode, reason string, now time.Time) (float64, cachedPowerReading) {
 	if current > 0 {
-		cache = cachedPowerReading{Value: current, UpdatedAt: now}
+		cache = cachedPowerReading{Value: current, Source: source, Mode: mode, Reason: reason, UpdatedAt: now}
 		return current, cache
 	}
 	if cache.Value > 0 && !cache.UpdatedAt.IsZero() && now.Sub(cache.UpdatedAt) <= systemPowerHoldTTL {
--- a/audit/internal/platform/sat_fan_stress_test.go
+++ b/audit/internal/platform/sat_fan_stress_test.go
@@ -112,7 +112,7 @@ func TestEffectiveSystemPowerReading(t *testing.T) {
 	now := time.Now()
 	cache := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-5 * time.Second)}
-	got, updated := effectiveSystemPowerReading(cache, 0, now)
+	got, updated := effectiveSystemPowerReading(cache, 0, "", "", "", now)
 	if got != 480 {
 		t.Fatalf("got=%v want cached 480", got)
 	}
@@ -120,7 +120,7 @@ func TestEffectiveSystemPowerReading(t *testing.T) {
 		t.Fatalf("updated=%+v", updated)
 	}
-	got, updated = effectiveSystemPowerReading(cache, 530, now)
+	got, updated = effectiveSystemPowerReading(cache, 530, "dcmi", "fallback", "test", now)
 	if got != 530 {
 		t.Fatalf("got=%v want 530", got)
 	}
@@ -129,7 +129,7 @@ func TestEffectiveSystemPowerReading(t *testing.T) {
 	}
 	expired := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-systemPowerHoldTTL - time.Second)}
-	got, _ = effectiveSystemPowerReading(expired, 0, now)
+	got, _ = effectiveSystemPowerReading(expired, 0, "", "", "", now)
 	if got != 0 {
 		t.Fatalf("expired cache returned %v want 0", got)
 	}
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -127,7 +127,7 @@ func defaultTaskPriority(target string, params taskParams) int {
 		return taskPriorityInstallToRAM
 	case "audit":
 		return taskPriorityAudit
-	case "nvidia-bench-perf", "nvidia-bench-power":
+	case "nvidia-bench-perf", "nvidia-bench-power", "nvidia-bench-autotune":
 		return taskPriorityBenchmark
 	case "nvidia-stress", "amd-stress", "memory-stress", "sat-stress", "platform-stress", "nvidia-compute":
 		return taskPriorityBurn
@@ -701,6 +701,78 @@ func (h *handler) handleAPIBenchmarkNvidiaRunKind(target string) http.HandlerFun
 	}
 }
 func (h *handler) handleAPIBenchmarkAutotuneRun() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		if h.opts.App == nil {
 			writeError(w, http.StatusServiceUnavailable, "app not configured")
 			return
 		}
 		var body struct {
 			Profile       string `json:"profile"`
 			BenchmarkKind string `json:"benchmark_kind"`
 			SizeMB        int    `json:"size_mb"`
 		}
 		if r.Body != nil {
 			if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
 				writeError(w, http.StatusBadRequest, "invalid request body")
 				return
 			}
 		}
 		profile := strings.TrimSpace(body.Profile)
 		if profile == "" {
 			profile = "standard"
 		}
 		benchmarkKind := strings.TrimSpace(body.BenchmarkKind)
 		if benchmarkKind == "" {
 			benchmarkKind = "power-fit"
 		}
 		now := time.Now()
 		taskName := fmt.Sprintf("NVIDIA Benchmark Autotune · %s · %s", profile, benchmarkKind)
 		t := &Task{
 			ID:        newJobID("bee-bench-autotune"),
 			Name:      taskName,
 			Target:    "nvidia-bench-autotune",
 			Priority:  defaultTaskPriority("nvidia-bench-autotune", taskParams{}),
 			Status:    TaskPending,
 			CreatedAt: now,
 			params: taskParams{
 				BenchmarkProfile: profile,
 				BenchmarkKind:    benchmarkKind,
 				SizeMB:           body.SizeMB,
 				DisplayName:      taskName,
 			},
 		}
 		globalQueue.enqueue(t)
 		writeTaskRunResponse(w, []*Task{t})
 	}
 }
 func (h *handler) handleAPIBenchmarkAutotuneStatus(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
 	cfg, err := h.opts.App.LoadBenchmarkPowerAutotune()
 	if err != nil {
 		if os.IsNotExist(err) {
 			w.WriteHeader(http.StatusOK)
 			writeJSON(w, map[string]any{
 				"configured": false,
 				"decision":   platform.ResolveSystemPowerDecision(h.opts.ExportDir),
 			})
 			return
 		}
 		writeError(w, http.StatusInternalServerError, err.Error())
 		return
 	}
 	w.WriteHeader(http.StatusOK)
 	writeJSON(w, map[string]any{
 		"configured": true,
 		"config":     cfg,
 		"decision":   platform.ResolveSystemPowerDecision(h.opts.ExportDir),
 	})
 }
 func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
 	h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf").ServeHTTP(w, r)
 }
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -195,6 +195,40 @@ func TestHandleAPIBenchmarkPowerFitRampQueuesBenchmarkPowerFitTasks(t *testing.T
 	}
 }
 func TestHandleAPIBenchmarkAutotuneRunQueuesTask(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/autotune/run", strings.NewReader(`{"profile":"standard","benchmark_kind":"power-fit"}`))
 	rec := httptest.NewRecorder()
 	h.handleAPIBenchmarkAutotuneRun().ServeHTTP(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 1 {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	task := globalQueue.tasks[0]
 	if task.Target != "nvidia-bench-autotune" {
 		t.Fatalf("task target=%q want nvidia-bench-autotune", task.Target)
 	}
 	if task.params.BenchmarkKind != "power-fit" {
 		t.Fatalf("task benchmark kind=%q want power-fit", task.params.BenchmarkKind)
 	}
 }
 func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
--- a/audit/internal/webui/layout.go
+++ b/audit/internal/webui/layout.go
@@ -0,0 +1,137 @@
 package webui
 import (
 	"fmt"
 	"html"
 	"os"
 	"strings"
 )
 func layoutHead(title string) string {
 	return `<!DOCTYPE html>
 <html lang="en">
 <head>
 <meta charset="utf-8">
 <meta name="viewport" content="width=device-width,initial-scale=1">
 <title>` + html.EscapeString(title) + `</title>
 <style>
 :root{--bg:#fff;--surface:#fff;--surface-2:#f9fafb;--border:rgba(34,36,38,.15);--border-lite:rgba(34,36,38,.1);--ink:rgba(0,0,0,.87);--muted:rgba(0,0,0,.6);--accent:#2185d0;--accent-dark:#1678c2;--crit-bg:#fff6f6;--crit-fg:#9f3a38;--crit-border:#e0b4b4;--ok-bg:#fcfff5;--ok-fg:#2c662d;--warn-bg:#fffaf3;--warn-fg:#573a08}
 *{box-sizing:border-box;margin:0;padding:0}
 body{font:14px/1.5 Lato,"Helvetica Neue",Arial,Helvetica,sans-serif;background:var(--bg);color:var(--ink);display:flex;min-height:100vh}
 a{color:var(--accent);text-decoration:none}
 /* Sidebar */
 .sidebar{width:210px;min-height:100vh;background:#1b1c1d;flex-shrink:0;display:flex;flex-direction:column}
 .sidebar-logo{padding:18px 16px 12px;font-size:18px;font-weight:700;color:#fff;letter-spacing:-.5px}
 .sidebar-logo span{color:rgba(255,255,255,.5);font-weight:400;font-size:12px;display:block;margin-top:2px}
 .sidebar-version{padding:0 16px 14px;font-size:11px;color:rgba(255,255,255,.45)}
 .sidebar-badge{margin:0 12px 12px;padding:5px 8px;border-radius:4px;font-size:11px;font-weight:600;text-align:center}
 .sidebar-badge-warn{background:#7a4f00;color:#f6c90e}
 .sidebar-badge-crit{background:#5c1a1a;color:#ff6b6b}
 .nav{flex:1}
 .nav-item{display:block;padding:10px 16px;color:rgba(255,255,255,.7);font-size:13px;border-left:3px solid transparent;transition:all .15s}
 .nav-item:hover{color:#fff;background:rgba(255,255,255,.08)}
 .nav-item.active{color:#fff;background:rgba(33,133,208,.25);border-left-color:var(--accent)}
 /* Content */
 .main{flex:1;display:flex;flex-direction:column;overflow:auto}
 .topbar{padding:13px 24px;background:#1b1c1d;display:flex;align-items:center;gap:12px}
 .topbar h1{font-size:16px;font-weight:700;color:rgba(255,255,255,.9)}
 .content{padding:24px;flex:1}
 /* Cards */
 .card{background:var(--surface);border:1px solid var(--border);border-radius:4px;box-shadow:0 1px 2px rgba(34,36,38,.15);margin-bottom:16px;overflow:hidden}
 .card-head{padding:11px 16px;background:var(--surface-2);border-bottom:1px solid var(--border);font-weight:700;font-size:13px;display:flex;align-items:center;gap:8px}
 .card-head-actions{justify-content:space-between}
 .card-head-buttons{display:flex;align-items:center;gap:8px;margin-left:auto;flex-wrap:wrap}
 .card-body{padding:16px}
 /* Buttons */
 .btn{display:inline-flex;align-items:center;gap:6px;padding:8px 16px;border-radius:4px;font-size:13px;font-weight:700;cursor:pointer;border:none;transition:background .1s;font-family:inherit}
 .btn-primary{background:var(--accent);color:#fff}.btn-primary:hover{background:var(--accent-dark)}
 .btn-danger{background:#db2828;color:#fff}.btn-danger:hover{background:#b91c1c}
 .btn-secondary{background:var(--surface-2);color:var(--ink);border:1px solid var(--border)}.btn-secondary:hover{background:#eee}
 .btn-sm{padding:5px 10px;font-size:12px}
 /* Tables */
 table{width:100%;border-collapse:collapse;font-size:13px;background:var(--surface)}
 th{text-align:left;padding:9px 14px;color:var(--ink);font-weight:700;background:var(--surface-2);border-bottom:1px solid var(--border-lite)}
 td{padding:9px 14px;border-top:1px solid var(--border-lite)}
 tr:first-child td{border-top:0}
 tbody tr:hover td{background:rgba(0,0,0,.03)}
 /* Status badges */
 .badge{display:inline-block;padding:2px 9px;border-radius:4px;font-size:11px;font-weight:700}
 .badge-ok{background:var(--ok-bg);color:var(--ok-fg);border:1px solid #a3c293}
 .badge-warn{background:var(--warn-bg);color:var(--warn-fg);border:1px solid #c9ba9b}
 .badge-err{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)}
 .badge-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)}
 /* Component chips — one small square per device */
 .chips{display:inline-flex;flex-wrap:wrap;gap:3px;align-items:center;vertical-align:middle}
 .chip{display:inline-flex;align-items:center;justify-content:center;width:20px;height:20px;border-radius:3px;font-size:10px;font-weight:800;cursor:default;font-family:monospace;letter-spacing:0;user-select:none}
 .chip-ok{background:var(--ok-bg);color:var(--ok-fg);border:1px solid #a3c293}
 .chip-warn{background:var(--warn-bg);color:var(--warn-fg);border:1px solid #c9ba9b}
 .chip-fail{background:var(--crit-bg);color:var(--crit-fg);border:1px solid var(--crit-border)}
 .chip-unknown{background:var(--surface-2);color:var(--muted);border:1px solid var(--border)}
 /* Output terminal */
 .terminal{background:#1b1c1d;border:1px solid rgba(0,0,0,.2);border-radius:4px;padding:14px;font-family:monospace;font-size:12px;color:#b5cea8;max-height:400px;overflow-y:auto;white-space:pre-wrap;word-break:break-all;user-select:text;-webkit-user-select:text}
 .terminal-wrap{position:relative}.terminal-copy{position:absolute;top:6px;right:6px;background:#2d2f30;border:1px solid #444;color:#aaa;font-size:11px;padding:2px 8px;border-radius:3px;cursor:pointer;opacity:.7}.terminal-copy:hover{opacity:1}
 /* Forms */
 .form-row{margin-bottom:14px}
 .form-row label{display:block;font-size:12px;color:var(--muted);margin-bottom:5px;font-weight:700}
 .form-row input,.form-row select{width:100%;padding:8px 10px;background:var(--surface);border:1px solid var(--border);border-radius:4px;color:var(--ink);font-size:13px;outline:none;font-family:inherit}
 .form-row input:focus,.form-row select:focus{border-color:var(--accent);box-shadow:0 0 0 2px rgba(33,133,208,.2)}
 /* Grid */
 .grid2{display:grid;grid-template-columns:1fr 1fr;gap:16px}
 .grid3{display:grid;grid-template-columns:1fr 1fr 1fr;gap:16px}
@media(max-width:900px){.grid2,.grid3{grid-template-columns:1fr}.card-head-actions{align-items:flex-start;flex-direction:column}.card-head-buttons{margin-left:0}}
 /* iframe viewer */
 .viewer-frame{width:100%;height:calc(100vh - 160px);border:0;border-radius:4px;background:var(--surface-2)}
 /* Alerts */
 .alert{padding:10px 14px;border-radius:4px;font-size:13px;margin-bottom:14px}
 .alert-info{background:#dff0ff;border:1px solid #a9d4f5;color:#1e3a5f}
 .alert-warn{background:var(--warn-bg);border:1px solid #c9ba9b;color:var(--warn-fg)}
 </style>
 </head>
 <body>
 `
 }
 func layoutNav(active string, buildLabel string) string {
 	items := []struct{ id, label, href, onclick string }{
 		{"dashboard", "Dashboard", "/", ""},
 		{"audit", "Audit", "/audit", ""},
 		{"validate", "Validate", "/validate", ""},
 		{"burn", "Burn", "/burn", ""},
 		{"benchmark", "Benchmark", "/benchmark", ""},
 		{"tasks", "Tasks", "/tasks", ""},
 		{"tools", "Tools", "/tools", ""},
 	}
 	var b strings.Builder
 	b.WriteString(`<aside class="sidebar">`)
 	b.WriteString(`<div class="sidebar-logo">bee<span>hardware audit</span></div>`)
 	if strings.TrimSpace(buildLabel) == "" {
 		buildLabel = "dev"
 	}
 	b.WriteString(`<div class="sidebar-version">Version ` + html.EscapeString(buildLabel) + `</div>`)
 	if raw, err := os.ReadFile("/run/bee-nvidia-mode"); err == nil {
 		gspMode := strings.TrimSpace(string(raw))
 		switch gspMode {
 		case "gsp-off":
 			b.WriteString(`<div class="sidebar-badge sidebar-badge-warn">NVIDIA GSP=off</div>`)
 		case "gsp-stuck":
 			b.WriteString(`<div class="sidebar-badge sidebar-badge-crit">NVIDIA GSP stuck — reboot</div>`)
 		}
 	}
 	b.WriteString(`<nav class="nav">`)
 	for _, item := range items {
 		cls := "nav-item"
 		if item.id == active {
 			cls += " active"
 		}
 		if item.onclick != "" {
 			b.WriteString(fmt.Sprintf(`<a class="%s" href="%s" onclick="%s">%s</a>`,
 				cls, item.href, item.onclick, item.label))
 		} else {
 			b.WriteString(fmt.Sprintf(`<a class="%s" href="%s">%s</a>`,
 				cls, item.href, item.label))
 		}
 	}
 	b.WriteString(`</nav>`)
 	b.WriteString(`</aside>`)
 	return b.String()
 }
--- a/audit/internal/webui/metricsdb.go
+++ b/audit/internal/webui/metricsdb.go
@@ -53,6 +53,9 @@ CREATE TABLE IF NOT EXISTS sys_metrics (
  cpu_load_pct REAL,
  mem_load_pct REAL,
  power_w      REAL,
  power_source TEXT,
  power_mode   TEXT,
  power_reason TEXT,
  PRIMARY KEY (ts)
 );
 CREATE TABLE IF NOT EXISTS gpu_metrics (
@@ -86,7 +89,16 @@ CREATE TABLE IF NOT EXISTS temp_metrics (
 	if err := ensureMetricsColumn(db, "gpu_metrics", "clock_mhz", "REAL"); err != nil {
 		return err
 	}
-	return ensureMetricsColumn(db, "gpu_metrics", "mem_clock_mhz", "REAL")
+	if err := ensureMetricsColumn(db, "gpu_metrics", "mem_clock_mhz", "REAL"); err != nil {
 		return err
 	}
 	if err := ensureMetricsColumn(db, "sys_metrics", "power_source", "TEXT"); err != nil {
 		return err
 	}
 	if err := ensureMetricsColumn(db, "sys_metrics", "power_mode", "TEXT"); err != nil {
 		return err
 	}
 	return ensureMetricsColumn(db, "sys_metrics", "power_reason", "TEXT")
 }
 func ensureMetricsColumn(db *sql.DB, table, column, definition string) error {
@@ -125,8 +137,8 @@ func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
 	defer func() { _ = tx.Rollback() }()
 	_, err = tx.Exec(
-		`INSERT OR REPLACE INTO sys_metrics(ts,cpu_load_pct,mem_load_pct,power_w) VALUES(?,?,?,?)`,
+		`INSERT OR REPLACE INTO sys_metrics(ts,cpu_load_pct,mem_load_pct,power_w,power_source,power_mode,power_reason) VALUES(?,?,?,?,?,?,?)`,
-		ts, s.CPULoadPct, s.MemLoadPct, s.PowerW,
+		ts, s.CPULoadPct, s.MemLoadPct, s.PowerW, s.PowerSource, s.PowerMode, s.PowerReason,
 	)
 	if err != nil {
 		return err
@@ -213,12 +225,12 @@ func (m *MetricsDB) Prune(before time.Time) error {
 // LoadRecent returns up to n samples in chronological order (oldest first).
 func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
-	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM (SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?) ORDER BY ts`, n)
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w,IFNULL(power_source,''),IFNULL(power_mode,''),IFNULL(power_reason,'') FROM (SELECT ts,cpu_load_pct,mem_load_pct,power_w,power_source,power_mode,power_reason FROM sys_metrics ORDER BY ts DESC LIMIT ?) ORDER BY ts`, n)
 }
 // LoadAll returns all persisted samples in chronological order (oldest first).
 func (m *MetricsDB) LoadAll() ([]platform.LiveMetricSample, error) {
-	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts`, nil)
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w,IFNULL(power_source,''),IFNULL(power_mode,''),IFNULL(power_reason,'') FROM sys_metrics ORDER BY ts`, nil)
 }
 // LoadBetween returns samples in chronological order within the given time window.
@@ -233,7 +245,7 @@ func (m *MetricsDB) LoadBetween(start, end time.Time) ([]platform.LiveMetricSamp
 		start, end = end, start
 	}
 	return m.loadSamples(
-		`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics WHERE ts>=? AND ts<=? ORDER BY ts`,
+		`SELECT ts,cpu_load_pct,mem_load_pct,power_w,IFNULL(power_source,''),IFNULL(power_mode,''),IFNULL(power_reason,'') FROM sys_metrics WHERE ts>=? AND ts<=? ORDER BY ts`,
 		start.Unix(), end.Unix(),
 	)
 }
@@ -249,11 +261,14 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	type sysRow struct {
 		ts            int64
 		cpu, mem, pwr float64
 		powerSource   string
 		powerMode     string
 		powerReason   string
 	}
 	var sysRows []sysRow
 	for rows.Next() {
 		var r sysRow
-		if err := rows.Scan(&r.ts, &r.cpu, &r.mem, &r.pwr); err != nil {
+		if err := rows.Scan(&r.ts, &r.cpu, &r.mem, &r.pwr, &r.powerSource, &r.powerMode, &r.powerReason); err != nil {
 			continue
 		}
 		sysRows = append(sysRows, r)
@@ -363,10 +378,13 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	samples := make([]platform.LiveMetricSample, len(sysRows))
 	for i, r := range sysRows {
 		s := platform.LiveMetricSample{
-			Timestamp:  time.Unix(r.ts, 0).UTC(),
+			Timestamp:   time.Unix(r.ts, 0).UTC(),
-			CPULoadPct: r.cpu,
+			CPULoadPct:  r.cpu,
-			MemLoadPct: r.mem,
+			MemLoadPct:  r.mem,
-			PowerW:     r.pwr,
+			PowerW:      r.pwr,
 			PowerSource: r.powerSource,
 			PowerMode:   r.powerMode,
 			PowerReason: r.powerReason,
 		}
 		for _, idx := range gpuIndices {
 			if g, ok := gpuData[gpuKey{r.ts, idx}]; ok {
--- a/audit/internal/webui/page_benchmark.go
+++ b/audit/internal/webui/page_benchmark.go
@@ -0,0 +1,613 @@
 package webui
 import (
 	"encoding/json"
 	"fmt"
 	"html"
 	"os"
 	"path/filepath"
 	"sort"
 	"strconv"
 	"strings"
 	"time"
 	"bee/audit/internal/app"
 	"bee/audit/internal/platform"
 )
 type benchmarkHistoryRun struct {
 	generatedAt   time.Time
 	displayTime   string
 	gpuScores     map[int]float64
 	gpuStatuses   map[int]string
 	overallStatus string
 }
 func renderBenchmark(opts HandlerOptions) string {
 	return `<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Benchmark runs generate a human-readable TXT report and machine-readable result bundle. Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
 <div class="grid2">
  <div class="card">
    <div class="card-head">Benchmark Setup</div>
    <div class="card-body">
      <div class="form-row">
        <label>Profile</label>
        <select id="benchmark-profile">
          <option value="standard" selected>Standard — Perf ` + validateFmtDur(platform.BenchmarkEstimatedPerfStandardSec) + ` / Power Fit ` + validateFmtDur(platform.BenchmarkEstimatedPowerStandardSec) + `</option>
          <option value="stability">Stability — Perf ` + validateFmtDur(platform.BenchmarkEstimatedPerfStabilitySec) + ` / Power Fit ` + validateFmtDur(platform.BenchmarkEstimatedPowerStabilitySec) + `</option>
          <option value="overnight">Overnight — Perf ` + validateFmtDur(platform.BenchmarkEstimatedPerfOvernightSec) + ` / Power Fit ` + validateFmtDur(platform.BenchmarkEstimatedPowerOvernightSec) + `</option>
        </select>
      </div>
      <div class="form-row">
        <label>GPU Selection</label>
        <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
          <button class="btn btn-sm btn-secondary" type="button" onclick="benchmarkSelectAll()">Select All</button>
          <button class="btn btn-sm btn-secondary" type="button" onclick="benchmarkSelectNone()">Clear</button>
        </div>
        <div id="benchmark-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
          <p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
        </div>
      </div>
      <label class="benchmark-cb-row">
        <input type="radio" name="benchmark-mode" value="sequential" onchange="benchmarkUpdateSelectionNote()">
        <span>Sequential — one GPU at a time</span>
      </label>
      <label class="benchmark-cb-row" id="benchmark-parallel-label">
        <input type="radio" name="benchmark-mode" value="parallel" onchange="benchmarkUpdateSelectionNote()">
        <span>Parallel — all selected GPUs simultaneously</span>
      </label>
      <label class="benchmark-cb-row" id="benchmark-ramp-label">
        <input type="radio" name="benchmark-mode" value="ramp-up" checked onchange="benchmarkUpdateSelectionNote()">
        <span>Ramp-up — 1 GPU → 2 → … → all selected (separate tasks)</span>
      </label>
      <p id="benchmark-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 14px">Select one GPU for single-card benchmarking or several GPUs for a constrained multi-GPU run.</p>
      <div style="display:flex;gap:8px;flex-wrap:wrap;align-items:center">
        <button id="benchmark-run-performance-btn" class="btn btn-primary" onclick="runNvidiaBenchmark('performance')" disabled>&#9654; Run Performance Benchmark</button>
        <button id="benchmark-run-power-fit-btn" class="btn btn-secondary" onclick="runNvidiaBenchmark('power-fit')" disabled>&#9654; Run Power / Thermal Fit</button>
        <button id="benchmark-run-autotune-btn" class="btn btn-secondary" onclick="runBenchmarkAutotune()">Autotune</button>
      </div>
      <span id="benchmark-run-nccl" hidden>nccl-auto</span>
      <span id="benchmark-run-status" style="margin-left:10px;font-size:12px;color:var(--muted)"></span>
      <div id="benchmark-autotune-status" style="margin-top:10px;font-size:12px;color:var(--muted)">Autotune status: loading…</div>
      <div style="margin-top:6px;font-size:12px;color:var(--muted)">Autotune overwrites the saved system-power source and applies it to all new power charts and tests.</div>
    </div>
  </div>
  <div class="card">
    <div class="card-head">Method Split</div>
    <div class="card-body">
      <p style="font-size:13px;color:var(--muted);margin-bottom:10px">The benchmark page now exposes two fundamentally different test families so compute score and server power-fit are not mixed into one number.</p>
      <table>
        <tr><th>Run Type</th><th>Engine</th><th>Question</th><th>Standard</th><th>Stability</th></tr>
        <tr><td>Performance Benchmark</td><td><code>bee-gpu-burn</code></td><td>How much isolated compute performance does the GPU realize in this server?</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPerfStandardSec) + `</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPerfStabilitySec) + `</td></tr>
        <tr><td>Power / Thermal Fit</td><td><code>dcgmproftester</code> + <code>nvidia-smi -pl</code></td><td>How much power per GPU can this server sustain as GPU count ramps up?</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPowerStandardSec) + `</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPowerStabilitySec) + `</td></tr>
      </table>
      <p style="font-size:12px;color:var(--muted);margin-top:10px">Timings are per full ramp-up run (1 GPU → all selected), measured on 4–8 GPU servers. Use ramp-up mode for capacity work: it creates 1 GPU → 2 GPU → … → all selected steps so analysis software can derive server total score and watts-per-GPU curves.</p>
    </div>
  </div>
 </div>
 ` + `<div id="benchmark-results-section">` + renderBenchmarkResultsCard(opts.ExportDir) + `</div>` + `
 <div id="benchmark-output" style="display:none;margin-top:16px" class="card">
  <div class="card-head">Benchmark Output <span id="benchmark-title"></span></div>
  <div class="card-body"><div id="benchmark-terminal" class="terminal"></div></div>
 </div>
 <style>
 .benchmark-cb-row { display:flex; align-items:flex-start; gap:8px; cursor:pointer; font-size:13px; }
 .benchmark-cb-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
 .benchmark-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
 .benchmark-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
 </style>
 <script>
 let benchmarkES = null;
 function benchmarkTaskIDs(payload) {
  if (payload && Array.isArray(payload.task_ids) && payload.task_ids.length) return payload.task_ids;
  if (payload && payload.task_id) return [payload.task_id];
  return [];
 }
 function benchmarkSelectedGPUIndices() {
  return Array.from(document.querySelectorAll('.benchmark-gpu-checkbox'))
    .filter(function(el) { return el.checked && !el.disabled; })
    .map(function(el) { return parseInt(el.value, 10); })
    .filter(function(v) { return !Number.isNaN(v); })
    .sort(function(a, b) { return a - b; });
 }
 function benchmarkMode() {
  const el = document.querySelector('input[name="benchmark-mode"]:checked');
  return el ? el.value : 'sequential';
 }
 function benchmarkUpdateSelectionNote() {
  const selected = benchmarkSelectedGPUIndices();
  const perfBtn = document.getElementById('benchmark-run-performance-btn');
  const fitBtn = document.getElementById('benchmark-run-power-fit-btn');
  const note = document.getElementById('benchmark-selection-note');
  if (!selected.length) {
    perfBtn.disabled = true;
    fitBtn.disabled = true;
    note.textContent = 'Select at least one NVIDIA GPU to run the benchmark.';
    return;
  }
  perfBtn.disabled = false;
  fitBtn.disabled = false;
  const mode = benchmarkMode();
  if (mode === 'ramp-up') {
    note.textContent = 'Ramp-up: ' + selected.length + ' tasks (1 GPU → ' + selected.length + ' GPUs). Performance uses compute benchmark; Power / Thermal Fit uses dcgmproftester load with nvidia-smi power-limit search per step.';
  } else if (mode === 'parallel') {
    note.textContent = 'Parallel: all ' + selected.length + ' GPU(s) simultaneously. Only the performance benchmark supports this mode.';
  } else {
    note.textContent = 'Sequential: each selected GPU benchmarked separately.';
  }
 }
 function benchmarkRenderGPUList(gpus) {
  const root = document.getElementById('benchmark-gpu-list');
  if (!gpus || !gpus.length) {
    root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
    benchmarkUpdateSelectionNote();
    return;
  }
  root.innerHTML = gpus.map(function(gpu) {
    const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
    return '<label class="benchmark-gpu-row">'
      + '<input class="benchmark-gpu-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="benchmarkUpdateSelectionNote()">'
      + '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
      + '</label>';
  }).join('');
  benchmarkApplyMultiGPUState(gpus.length);
  benchmarkUpdateSelectionNote();
 }
 function benchmarkApplyMultiGPUState(gpuCount) {
  var multiValues = ['parallel', 'ramp-up'];
  var radios = document.querySelectorAll('input[name="benchmark-mode"]');
  radios.forEach(function(el) {
    var isMulti = multiValues.indexOf(el.value) >= 0;
    if (gpuCount < 2 && isMulti) {
      el.disabled = true;
      if (el.checked) {
        var seq = document.querySelector('input[name="benchmark-mode"][value="sequential"]');
        if (seq) seq.checked = true;
      }
      var label = el.closest('label');
      if (label) label.style.opacity = '0.4';
    } else {
      el.disabled = false;
      if (gpuCount >= 2 && el.value === 'ramp-up') el.checked = true;
      var label = el.closest('label');
      if (label) label.style.opacity = '';
    }
  });
  benchmarkUpdateSelectionNote();
 }
 function benchmarkLoadGPUs() {
  const status = document.getElementById('benchmark-run-status');
  status.textContent = '';
  fetch('/api/gpu/nvidia').then(function(r) {
    return r.json().then(function(body) {
      if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
      return body;
    });
  }).then(function(gpus) {
    benchmarkRenderGPUList(gpus);
  }).catch(function(err) {
    document.getElementById('benchmark-gpu-list').innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
    benchmarkUpdateSelectionNote();
  });
 }
 function benchmarkSelectAll() {
  document.querySelectorAll('.benchmark-gpu-checkbox').forEach(function(el) { el.checked = true; });
  benchmarkUpdateSelectionNote();
 }
 function benchmarkSelectNone() {
  document.querySelectorAll('.benchmark-gpu-checkbox').forEach(function(el) { el.checked = false; });
  benchmarkUpdateSelectionNote();
 }
 function runNvidiaBenchmark(kind) {
  const selected = benchmarkSelectedGPUIndices();
  const status = document.getElementById('benchmark-run-status');
  if (!selected.length) {
    status.textContent = 'Select at least one GPU.';
    return;
  }
  if (benchmarkES) { benchmarkES.close(); benchmarkES = null; }
  const mode = benchmarkMode();
  const rampUp = mode === 'ramp-up' && selected.length > 1;
  const parallelGPUs = mode === 'parallel' && kind === 'performance';
  if (kind === 'power-fit' && mode === 'parallel') {
    status.textContent = 'Power / Thermal Fit supports sequential or ramp-up only.';
    return;
  }
  const body = {
    profile: document.getElementById('benchmark-profile').value || 'standard',
    gpu_indices: selected,
    run_nccl: kind === 'performance' && selected.length > 1,
    parallel_gpus: parallelGPUs,
    ramp_up: rampUp,
    display_name: kind === 'power-fit' ? 'NVIDIA Power / Thermal Fit' : 'NVIDIA Performance Benchmark'
  };
  document.getElementById('benchmark-output').style.display = 'block';
  document.getElementById('benchmark-title').textContent = '— ' + body.display_name + ' · ' + body.profile + ' [' + selected.join(', ') + ']';
  const term = document.getElementById('benchmark-terminal');
  term.textContent = 'Enqueuing ' + body.display_name + ' for GPUs ' + selected.join(', ') + '...\n';
  status.textContent = 'Queueing...';
  const endpoint = kind === 'power-fit' ? '/api/bee-bench/nvidia/power/run' : '/api/bee-bench/nvidia/perf/run';
  fetch(endpoint, {
    method: 'POST',
    headers: {'Content-Type':'application/json'},
    body: JSON.stringify(body)
  }).then(function(r) {
    return r.json().then(function(payload) {
      if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
      return payload;
    });
  }).then(function(d) {
    const taskIds = benchmarkTaskIDs(d);
    if (!taskIds.length) throw new Error('No benchmark task was queued.');
    status.textContent = taskIds.length === 1 ? ('Task ' + taskIds[0] + ' queued.') : ('Queued ' + taskIds.length + ' tasks.');
    const streamNext = function(idx, failures) {
      if (idx >= taskIds.length) {
        status.textContent = failures ? 'Completed with failures.' : 'Completed.';
        return;
      }
      const taskId = taskIds[idx];
      term.textContent += '\n[' + (idx + 1) + '/' + taskIds.length + '] Task ' + taskId + ' queued. Streaming log...\n';
      benchmarkES = new EventSource('/api/tasks/' + taskId + '/stream');
      benchmarkES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
      benchmarkES.addEventListener('done', function(e) {
        benchmarkES.close();
        benchmarkES = null;
        if (e.data) failures += 1;
        term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
        term.scrollTop = term.scrollHeight;
        const isLast = (idx + 1 >= taskIds.length);
        streamNext(idx + 1, failures);
        if (isLast) { benchmarkRefreshResults(); }
      });
      benchmarkES.onerror = function() {
        if (benchmarkES) {
          benchmarkES.close();
          benchmarkES = null;
        }
        term.textContent += '\nERROR: stream disconnected.\n';
        term.scrollTop = term.scrollHeight;
        streamNext(idx + 1, failures + 1);
      };
    };
    streamNext(0, 0);
  }).catch(function(err) {
    status.textContent = 'Error.';
    term.textContent += 'ERROR: ' + err.message + '\n';
  });
 }
 function benchmarkRenderAutotuneStatus(payload) {
  const el = document.getElementById('benchmark-autotune-status');
  if (!el) return;
  if (!payload || !payload.configured || !payload.config) {
    el.textContent = 'Autotune status: not configured. Temporary fallback source is used until autotune completes.';
    return;
  }
  const cfg = payload.config || {};
  const decision = payload.decision || {};
  const updated = cfg.updated_at ? new Date(cfg.updated_at).toLocaleString() : 'unknown time';
  const confidence = typeof cfg.confidence === 'number' ? (' · confidence ' + Math.round(cfg.confidence * 100) + '%') : '';
  const effective = decision.effective_source ? (' · effective ' + decision.effective_source) : '';
  const mode = decision.mode ? (' · mode ' + decision.mode) : '';
  el.textContent = 'Autotune status: ' + cfg.selected_source + effective + mode + ' · updated ' + updated + confidence;
 }
 function loadBenchmarkAutotuneStatus() {
  fetch('/api/bee-bench/nvidia/autotune/status')
    .then(function(r) {
      return r.json().then(function(body) {
        if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
        return body;
      });
    })
    .then(function(body) { benchmarkRenderAutotuneStatus(body); })
    .catch(function(err) {
      const el = document.getElementById('benchmark-autotune-status');
      if (el) el.textContent = 'Autotune status error: ' + err.message;
    });
 }
 function runBenchmarkAutotune() {
  const selected = benchmarkSelectedGPUIndices();
  const status = document.getElementById('benchmark-run-status');
  const term = document.getElementById('benchmark-terminal');
  if (benchmarkES) { benchmarkES.close(); benchmarkES = null; }
  document.getElementById('benchmark-output').style.display = 'block';
  document.getElementById('benchmark-title').textContent = '— NVIDIA Benchmark Autotune';
  term.textContent = 'Enqueuing benchmark autotune...\n';
  status.textContent = 'Queueing autotune...';
  fetch('/api/bee-bench/nvidia/autotune/run', {
    method: 'POST',
    headers: {'Content-Type':'application/json'},
    body: JSON.stringify({
      profile: document.getElementById('benchmark-profile').value || 'standard',
      benchmark_kind: benchmarkMode() === 'parallel' ? 'performance' : 'power-fit',
      gpu_indices: selected
    })
  }).then(function(r) {
    return r.json().then(function(payload) {
      if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
      return payload;
    });
  }).then(function(d) {
    const taskIds = benchmarkTaskIDs(d);
    if (!taskIds.length) throw new Error('No autotune task was queued.');
    const taskId = taskIds[0];
    status.textContent = 'Autotune queued: ' + taskId;
    benchmarkES = new EventSource('/api/tasks/' + taskId + '/stream');
    benchmarkES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
    benchmarkES.addEventListener('done', function(e) {
      if (benchmarkES) {
        benchmarkES.close();
        benchmarkES = null;
      }
      term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
      status.textContent = e.data ? 'Autotune failed.' : 'Autotune completed.';
      loadBenchmarkAutotuneStatus();
    });
  }).catch(function(err) {
    status.textContent = 'Autotune error.';
    term.textContent += 'ERROR: ' + err.message + '\n';
  });
 }
 benchmarkLoadGPUs();
 loadBenchmarkAutotuneStatus();
 function benchmarkRefreshResults() {
  fetch('/api/benchmark/results')
    .then(function(r) { return r.text(); })
    .then(function(html) {
      const el = document.getElementById('benchmark-results-section');
      if (el) el.innerHTML = html;
    })
    .catch(function() {});
 }
 </script>`
 }
 func renderBenchmarkResultsCard(exportDir string) string {
 	maxIdx, runs := loadBenchmarkHistory(exportDir)
 	perf := renderBenchmarkResultsCardFromRuns(
 		"Perf Results",
 		"Composite score by saved benchmark run and GPU.",
 		"No saved performance benchmark runs yet.",
 		maxIdx,
 		runs,
 	)
 	power := renderPowerBenchmarkResultsCard(exportDir)
 	return perf + "\n" + power
 }
 func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string, maxGPUIndex int, runs []benchmarkHistoryRun) string {
 	if len(runs) == 0 {
 		return `<div class="card"><div class="card-head">` + html.EscapeString(title) + `</div><div class="card-body"><p style="color:var(--muted);font-size:13px">` + html.EscapeString(emptyMessage) + `</p></div></div>`
 	}
 	var b strings.Builder
 	b.WriteString(`<div class="card"><div class="card-head">` + html.EscapeString(title) + `</div><div class="card-body">`)
 	if strings.TrimSpace(description) != "" {
 		b.WriteString(`<p style="color:var(--muted);font-size:13px;margin-bottom:12px">` + html.EscapeString(description) + `</p>`)
 	}
 	b.WriteString(`<div style="overflow-x:auto">`)
 	b.WriteString(`<table><thead><tr><th>Run</th><th>Time</th><th>Status</th>`)
 	for i := 0; i <= maxGPUIndex; i++ {
 		b.WriteString(`<th>GPU ` + strconv.Itoa(i) + `</th>`)
 	}
 	b.WriteString(`</tr></thead><tbody>`)
 	for i, run := range runs {
 		b.WriteString(`<tr>`)
 		b.WriteString(`<td>#` + strconv.Itoa(i+1) + `</td>`)
 		b.WriteString(`<td>` + html.EscapeString(run.displayTime) + `</td>`)
 		overallColor := "var(--ok)"
 		overallLabel := run.overallStatus
 		if overallLabel == "" {
 			overallLabel = "OK"
 		}
 		if overallLabel == "FAILED" {
 			overallColor = "var(--crit-fg,#9f3a38)"
 		} else if overallLabel != "OK" {
 			overallColor = "var(--warn)"
 		}
 		b.WriteString(`<td style="color:` + overallColor + `;font-weight:600">` + html.EscapeString(overallLabel) + `</td>`)
 		for idx := 0; idx <= maxGPUIndex; idx++ {
 			score, ok := run.gpuScores[idx]
 			if !ok {
 				b.WriteString(`<td style="color:var(--muted)">-</td>`)
 				continue
 			}
 			gpuStatus := run.gpuStatuses[idx]
 			scoreColor := ""
 			switch gpuStatus {
 			case "FAILED":
 				scoreColor = ` style="color:var(--crit-fg,#9f3a38);font-weight:600"`
 			case "WARNING", "PARTIAL":
 				scoreColor = ` style="color:var(--warn);font-weight:600"`
 			case "", "OK":
 			default:
 				scoreColor = ` style="color:var(--warn);font-weight:600"`
 			}
 			b.WriteString(`<td` + scoreColor + `>` + fmt.Sprintf("%.2f", score) + `</td>`)
 		}
 		b.WriteString(`</tr>`)
 	}
 	b.WriteString(`</tbody></table></div></div></div>`)
 	return b.String()
 }
 func loadBenchmarkHistory(exportDir string) (int, []benchmarkHistoryRun) {
 	baseDir := app.DefaultBeeBenchPerfDir
 	if strings.TrimSpace(exportDir) != "" {
 		baseDir = filepath.Join(exportDir, "bee-bench", "perf")
 	}
 	paths, err := filepath.Glob(filepath.Join(baseDir, "perf-*", "result.json"))
 	if err != nil || len(paths) == 0 {
 		return -1, nil
 	}
 	sort.Strings(paths)
 	return loadBenchmarkHistoryFromPaths(paths)
 }
 func loadBenchmarkHistoryFromPaths(paths []string) (int, []benchmarkHistoryRun) {
 	runs := make([]benchmarkHistoryRun, 0, len(paths))
 	maxGPUIndex := -1
 	for _, path := range paths {
 		raw, err := os.ReadFile(path)
 		if err != nil {
 			continue
 		}
 		var result platform.NvidiaBenchmarkResult
 		if err := json.Unmarshal(raw, &result); err != nil {
 			continue
 		}
 		run := benchmarkHistoryRun{
 			generatedAt:   result.GeneratedAt,
 			displayTime:   result.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
 			gpuScores:     make(map[int]float64),
 			gpuStatuses:   make(map[int]string),
 			overallStatus: result.OverallStatus,
 		}
 		for _, gpu := range result.GPUs {
 			run.gpuScores[gpu.Index] = gpu.Scores.CompositeScore
 			run.gpuStatuses[gpu.Index] = gpu.Status
 			if gpu.Index > maxGPUIndex {
 				maxGPUIndex = gpu.Index
 			}
 		}
 		runs = append(runs, run)
 	}
 	sort.Slice(runs, func(i, j int) bool {
 		return runs[i].generatedAt.After(runs[j].generatedAt)
 	})
 	return maxGPUIndex, runs
 }
 func renderPowerBenchmarkResultsCard(exportDir string) string {
 	baseDir := app.DefaultBeeBenchPowerDir
 	if strings.TrimSpace(exportDir) != "" {
 		baseDir = filepath.Join(exportDir, "bee-bench", "power")
 	}
 	paths, err := filepath.Glob(filepath.Join(baseDir, "power-*", "result.json"))
 	if err != nil || len(paths) == 0 {
 		return `<div class="card" style="margin-top:16px"><div class="card-head">Power / Thermal Fit Results</div><div class="card-body"><p style="color:var(--muted);font-size:13px">No saved power benchmark runs yet.</p></div></div>`
 	}
 	sort.Strings(paths)
 	type powerRun struct {
 		generatedAt time.Time
 		displayTime string
 		result      platform.NvidiaPowerBenchResult
 	}
 	var runs []powerRun
 	for _, path := range paths {
 		raw, err := os.ReadFile(path)
 		if err != nil {
 			continue
 		}
 		var r platform.NvidiaPowerBenchResult
 		if err := json.Unmarshal(raw, &r); err != nil {
 			continue
 		}
 		runs = append(runs, powerRun{
 			generatedAt: r.GeneratedAt,
 			displayTime: r.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
 			result:      r,
 		})
 	}
 	sort.Slice(runs, func(i, j int) bool {
 		return runs[i].generatedAt.After(runs[j].generatedAt)
 	})
 	var b strings.Builder
 	b.WriteString(`<div class="card" style="margin-top:16px"><div class="card-head">Power / Thermal Fit Results</div><div class="card-body">`)
 	latest := runs[0].result
 	b.WriteString(`<p style="font-size:12px;color:var(--muted);margin-bottom:10px">Latest run: ` + html.EscapeString(runs[0].displayTime))
 	if latest.Hostname != "" {
 		b.WriteString(` — ` + html.EscapeString(latest.Hostname))
 	}
 	if latest.OverallStatus != "" {
 		statusColor := "var(--ok)"
 		if latest.OverallStatus != "OK" {
 			statusColor = "var(--warn)"
 		}
 		b.WriteString(` — <span style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(latest.OverallStatus) + `</span>`)
 	}
 	b.WriteString(`</p>`)
 	if len(latest.GPUs) > 0 {
 		b.WriteString(`<div style="overflow-x:auto"><table><thead><tr>`)
 		b.WriteString(`<th>GPU</th><th>Model</th><th>Nominal W</th><th>Single-card W</th><th>Multi-GPU W</th><th>P95 Observed W</th><th>Status</th>`)
 		b.WriteString(`</tr></thead><tbody>`)
 		for _, gpu := range latest.GPUs {
 			finalLimitW := gpu.StablePowerLimitW
 			if finalLimitW <= 0 {
 				finalLimitW = gpu.AppliedPowerLimitW
 			}
 			derated := gpu.Derated ||
 				(gpu.DefaultPowerLimitW > 0 && finalLimitW > 0 && finalLimitW < gpu.DefaultPowerLimitW-1)
 			rowStyle := ""
 			finalStyle := ""
 			if derated {
 				rowStyle = ` style="background:rgba(255,180,0,0.08)"`
 				finalStyle = ` style="color:#e6a000;font-weight:600"`
 			}
 			statusLabel := gpu.Status
 			if statusLabel == "" {
 				statusLabel = "OK"
 			}
 			statusColor := "var(--ok)"
 			if statusLabel == "FAILED" {
 				statusColor = "var(--crit-fg,#9f3a38)"
 			} else if statusLabel != "OK" {
 				statusColor = "var(--warn)"
 			}
 			nominalStr := "-"
 			if gpu.DefaultPowerLimitW > 0 {
 				nominalStr = fmt.Sprintf("%.0f", gpu.DefaultPowerLimitW)
 			}
 			singleStr := "-"
 			if gpu.AppliedPowerLimitW > 0 {
 				singleStr = fmt.Sprintf("%.0f", gpu.AppliedPowerLimitW)
 			}
 			multiStr := "-"
 			if gpu.StablePowerLimitW > 0 {
 				multiStr = fmt.Sprintf("%.0f", gpu.StablePowerLimitW)
 			}
 			p95Str := "-"
 			if gpu.MaxObservedPowerW > 0 {
 				p95Str = fmt.Sprintf("%.0f", gpu.MaxObservedPowerW)
 			}
 			b.WriteString(`<tr` + rowStyle + `>`)
 			b.WriteString(`<td>` + strconv.Itoa(gpu.Index) + `</td>`)
 			b.WriteString(`<td>` + html.EscapeString(gpu.Name) + `</td>`)
 			b.WriteString(`<td>` + nominalStr + `</td>`)
 			b.WriteString(`<td>` + singleStr + `</td>`)
 			b.WriteString(`<td` + finalStyle + `>` + multiStr + `</td>`)
 			b.WriteString(`<td>` + p95Str + `</td>`)
 			b.WriteString(`<td style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(statusLabel) + `</td>`)
 			b.WriteString(`</tr>`)
 		}
 		b.WriteString(`</tbody></table></div>`)
 	}
 	if len(runs) > 1 {
 		b.WriteString(`<details style="margin-top:12px"><summary style="font-size:12px;color:var(--muted);cursor:pointer">` + strconv.Itoa(len(runs)) + ` runs total</summary>`)
 		b.WriteString(`<div style="overflow-x:auto;margin-top:8px"><table><thead><tr><th>#</th><th>Time</th><th>GPUs</th><th>Status</th></tr></thead><tbody>`)
 		for i, run := range runs {
 			statusColor := "var(--ok)"
 			if run.result.OverallStatus != "OK" {
 				statusColor = "var(--warn)"
 			}
 			b.WriteString(`<tr>`)
 			b.WriteString(`<td>#` + strconv.Itoa(i+1) + `</td>`)
 			b.WriteString(`<td>` + html.EscapeString(run.displayTime) + `</td>`)
 			b.WriteString(`<td>` + strconv.Itoa(len(run.result.GPUs)) + `</td>`)
 			b.WriteString(`<td style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(run.result.OverallStatus) + `</td>`)
 			b.WriteString(`</tr>`)
 		}
 		b.WriteString(`</tbody></table></div></details>`)
 	}
 	b.WriteString(`</div></div>`)
 	return b.String()
 }
--- a/audit/internal/webui/page_burn.go
+++ b/audit/internal/webui/page_burn.go
@@ -0,0 +1,383 @@
 package webui
 func renderBurn() string {
 	return `<div class="alert alert-warn" style="margin-bottom:16px"><strong>&#9888; Warning:</strong> Stress tests on this page run hardware at high load. Repeated or prolonged use may reduce hardware lifespan. Use only when necessary.</div>
 <div class="alert alert-info" style="margin-bottom:16px"><strong>Scope:</strong> Burn exposes sustained GPU compute load recipes. DCGM diagnostics (` + "targeted_stress, targeted_power, pulse_test" + `) and LINPACK remain in <a href="/validate">Validate → Stress mode</a>; NCCL and NVBandwidth are available directly from <a href="/validate">Validate</a>.</div>
 <p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">Burn Profile</div>
  <div class="card-body burn-profile-body">
    <div class="burn-profile-col">
      <div class="form-row" style="margin:0 0 8px"><label>Preset</label></div>
      <label class="cb-row"><input type="radio" name="burn-profile" value="smoke" checked><span>Smoke — 5 min/GPU (sequential) or 5 min (parallel)</span></label>
      <label class="cb-row"><input type="radio" name="burn-profile" value="acceptance"><span>Acceptance — 1 h/GPU (sequential) or 1 h (parallel)</span></label>
      <label class="cb-row"><input type="radio" name="burn-profile" value="overnight"><span>Overnight — 8 h/GPU (sequential) or 8 h (parallel)</span></label>
    </div>
    <div class="burn-profile-col burn-profile-action">
      <button type="button" class="btn btn-primary" onclick="runAllBurnTasks()">Burn one by one</button>
      <p>Runs checked tests as separate sequential tasks. In sequential GPU mode, total time = profile duration × N GPU. In parallel mode, all selected GPUs burn simultaneously for one profile duration.</p>
    </div>
    <div class="burn-profile-col burn-profile-action">
      <button type="button" class="btn btn-secondary" onclick="runPlatformStress()">Thermal Cycling</button>
      <p>Run checked core test modules (CPU, MEM, GPU). Tests start at the same time and run for a period with short cooldown phases to stress the server cooling system.</p>
    </div>
  </div>
  <div class="card-body" style="padding-top:0;display:flex;justify-content:center">
    <span id="burn-all-status" style="font-size:12px;color:var(--muted)"></span>
  </div>
 </div>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">NVIDIA GPU Selection</div>
  <div class="card-body">
    <p style="font-size:12px;color:var(--muted);margin:0 0 10px">Official NVIDIA recipes and custom NVIDIA stressors use only the GPUs selected here. Multi-GPU interconnect tests are limited to this selection as well.</p>
    <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
      <button class="btn btn-sm btn-secondary" type="button" onclick="burnSelectAll()">Select All</button>
      <button class="btn btn-sm btn-secondary" type="button" onclick="burnSelectNone()">Clear</button>
    </div>
 	    <div id="burn-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
 	      <p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
 	    </div>
 	    <p id="burn-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 0">Select at least one NVIDIA GPU to enable NVIDIA burn recipes.</p>
 	    <div style="display:flex;flex-direction:column;gap:4px;margin-top:10px">
 	      <label class="cb-row">
 	        <input type="radio" name="burn-nvidia-mode" value="sequential" checked>
 	        <span>Sequential — selected GPUs one at a time</span>
 	      </label>
 	      <label class="cb-row" id="burn-parallel-label">
 	        <input type="radio" name="burn-nvidia-mode" value="parallel">
 	        <span>Parallel — all selected GPUs simultaneously</span>
 	      </label>
 	      <label class="cb-row" id="burn-ramp-label">
 	        <input type="radio" name="burn-nvidia-mode" value="ramp-up">
 	        <span>Ramp-up — add one GPU at a time</span>
 	      </label>
 	    </div>
 	  </div>
 	</div>
 <div class="burn-section">Core Burn Paths</div>
 <div class="grid2 burn-grid" style="margin-bottom:16px">
 <div class="card burn-card">
  <div class="card-head card-head-actions"><span>GPU Max Load</span><button class="btn btn-primary btn-sm" onclick="runBurnTaskSet([{id:'burn-nvidia-compute',target:'nvidia-compute',label:'NVIDIA Max Compute Load (dcgmproftester)',nvidia:true},{id:'burn-gpu-bee',target:'nvidia-stress',label:'GPU Burn (bee-gpu-burn)',nvidia:true,extra:{loader:'builtin'}},{id:'burn-gpu-john',target:'nvidia-stress',label:'John GPU Stress (john/OpenCL)',nvidia:true,extra:{loader:'john'}},{id:'burn-gpu-rvs',target:'amd-stress',label:'AMD GPU Stress (rvs gst)'}])">Run</button></div>
  <div class="card-body burn-card-body">
    <p style="font-size:12px;color:var(--muted);margin:0 0 10px">Combine vendor-backed and custom GPU max-load recipes in one run set. ` + "dcgmproftester" + ` is the primary official NVIDIA path; custom stressors remain available as parallel checkbox options.</p>
    <label class="cb-row"><input type="checkbox" id="burn-nvidia-compute" checked disabled><span>NVIDIA Max Compute Load (dcgmproftester) <span class="cb-note" id="note-nvidia-compute"></span></span></label>
    <label class="cb-row"><input type="checkbox" id="burn-gpu-bee" checked disabled><span>GPU Burn (bee-gpu-burn) <span class="cb-note" id="note-bee"></span></span></label>
    <label class="cb-row"><input type="checkbox" id="burn-gpu-john" disabled><span>John GPU Stress (john/OpenCL) <span class="cb-note" id="note-john"></span></span></label>
    <label class="cb-row"><input type="checkbox" id="burn-gpu-rvs" disabled><span>AMD GPU Stress (rvs gst) <span class="cb-note" id="note-rvs"></span></span></label>
  </div>
 </div>
 <div class="card burn-card">
  <div class="card-head card-head-actions"><span>Compute Stress</span><button class="btn btn-primary btn-sm" onclick="runBurnTaskSet([{id:'burn-cpu',target:'cpu',label:'CPU Burn-in'},{id:'burn-mem-stress',target:'memory-stress',label:'Memory Burn-in'},{id:'burn-sat-stress',target:'sat-stress',label:'SAT Stress (stressapptest)'}])">Run</button></div>
  <div class="card-body burn-card-body">
    <p style="font-size:12px;color:var(--muted);margin:0 0 10px">Select which subsystems to stress. Each checked item runs as a separate task.</p>
    <label class="cb-row"><input type="checkbox" id="burn-cpu" checked><span>CPU stress (stress-ng)</span></label>
    <label class="cb-row"><input type="checkbox" id="burn-mem-stress" checked><span>Memory stress (stress-ng --vm)</span></label>
    <label class="cb-row"><input type="checkbox" id="burn-sat-stress"><span>stressapptest (CPU + memory bus)</span></label>
  </div>
 </div>
 </div>
 <div id="bi-output" style="display:none;margin-top:16px" class="card">
  <div class="card-head">Output <span id="bi-title"></span></div>
  <div class="card-body"><div id="bi-terminal" class="terminal"></div></div>
 </div>
 <style>
 .cb-row { display:flex; align-items:flex-start; gap:8px; padding:4px 0; cursor:pointer; font-size:13px; }
 .cb-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
 .cb-row input[type=checkbox]:disabled { opacity:0.4; cursor:not-allowed; }
 .cb-row input[type=checkbox]:disabled ~ span { opacity:0.45; cursor:not-allowed; }
 .cb-note { font-size:11px; color:var(--muted); font-style:italic; }
 .burn-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
 .burn-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
 .burn-profile-body { display:grid; grid-template-columns:1fr 1fr 1fr; gap:24px; align-items:stretch; }
 .burn-profile-col { min-width:0; }
 .burn-profile-action { display:flex; flex-direction:column; align-items:center; justify-content:flex-start; gap:8px; }
 .burn-profile-action p { font-size:12px; color:var(--muted); margin:0; width:100%; text-align:left; }
 .burn-section { font-size:12px; font-weight:700; letter-spacing:.06em; text-transform:uppercase; color:var(--muted); margin:0 0 10px; padding-top:4px; }
 .burn-grid { align-items:stretch; }
 .burn-card { height:100%; display:flex; flex-direction:column; }
 .burn-card-body { flex:1; display:flex; flex-direction:column; }
 .card-head-actions { justify-content:space-between; }
 .card-head-buttons { display:flex; align-items:center; gap:8px; margin-left:auto; }
@media(max-width:900px){ .card-head-actions { align-items:flex-start; flex-direction:column; } .card-head-buttons { margin-left:0; } .burn-profile-body { grid-template-columns:1fr; } }
 </style>
 <script>
 let biES = null;
 function burnTaskIDs(payload) {
  if (payload && Array.isArray(payload.task_ids) && payload.task_ids.length) return payload.task_ids;
  if (payload && payload.task_id) return [payload.task_id];
  return [];
 }
 function burnProfile() {
  const selected = document.querySelector('input[name="burn-profile"]:checked');
  return selected ? selected.value : 'smoke';
 }
 function burnSelectedGPUIndices() {
  return Array.from(document.querySelectorAll('.burn-gpu-checkbox'))
    .filter(function(el) { return el.checked && !el.disabled; })
    .map(function(el) { return parseInt(el.value, 10); })
    .filter(function(v) { return !Number.isNaN(v); })
    .sort(function(a, b) { return a - b; });
 }
 function burnNvidiaMode() {
  const el = document.querySelector('input[name="burn-nvidia-mode"]:checked');
  return el ? el.value : 'sequential';
 }
 function burnApplyMultiGPUState(gpuCount) {
  var multiValues = ['parallel', 'ramp-up'];
  var radios = document.querySelectorAll('input[name="burn-nvidia-mode"]');
  radios.forEach(function(el) {
    var isMulti = multiValues.indexOf(el.value) >= 0;
    if (gpuCount < 2 && isMulti) {
      el.disabled = true;
      if (el.checked) {
        var seq = document.querySelector('input[name="burn-nvidia-mode"][value="sequential"]');
        if (seq) seq.checked = true;
      }
      var label = el.closest('label');
      if (label) label.style.opacity = '0.4';
    } else {
      el.disabled = false;
      var label = el.closest('label');
      if (label) label.style.opacity = '';
    }
  });
 }
 function burnUpdateSelectionNote() {
  const note = document.getElementById('burn-selection-note');
  const selected = burnSelectedGPUIndices();
  if (!selected.length) {
    note.textContent = 'Select at least one NVIDIA GPU to enable NVIDIA burn recipes.';
    return;
  }
  note.textContent = 'Selected NVIDIA GPUs: ' + selected.join(', ') + '. Official and custom NVIDIA tasks will use only these GPUs.';
 }
 function burnRenderGPUList(gpus) {
  const root = document.getElementById('burn-gpu-list');
  if (!gpus || !gpus.length) {
    root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
    burnUpdateSelectionNote();
    return;
  }
  root.innerHTML = gpus.map(function(gpu) {
    const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
    return '<label class="burn-gpu-row">'
      + '<input class="burn-gpu-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="burnUpdateSelectionNote()">'
      + '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
      + '</label>';
  }).join('');
  burnApplyMultiGPUState(gpus.length);
  burnUpdateSelectionNote();
 }
 function burnSelectAll() {
  document.querySelectorAll('.burn-gpu-checkbox').forEach(function(el) { el.checked = true; });
  burnUpdateSelectionNote();
 }
 function burnSelectNone() {
  document.querySelectorAll('.burn-gpu-checkbox').forEach(function(el) { el.checked = false; });
  burnUpdateSelectionNote();
 }
 function burnLoadGPUs() {
  fetch('/api/gpu/nvidia').then(function(r) {
    return r.json().then(function(body) {
      if (!r.ok) throw new Error(body.error || ('HTTP ' + r.status));
      return body;
    });
  }).then(function(gpus) {
    burnRenderGPUList(gpus);
  }).catch(function(err) {
    document.getElementById('burn-gpu-list').innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
    burnUpdateSelectionNote();
  });
 }
 function enqueueBurnTask(target, label, extra, useSelectedNvidia) {
  const body = Object.assign({ profile: burnProfile(), display_name: label }, extra || {});
  if (useSelectedNvidia) {
    const selected = burnSelectedGPUIndices();
    if (!selected.length) {
      return Promise.reject(new Error('Select at least one NVIDIA GPU.'));
    }
    body.gpu_indices = selected;
    const bMode = burnNvidiaMode();
    if (bMode === 'ramp-up' && selected.length > 1) {
      body.stagger_gpu_start = true;
    } else if (bMode === 'parallel' && selected.length > 1) {
      body.parallel_gpus = true;
    }
  }
  return fetch('/api/sat/' + target + '/run', {
    method: 'POST',
    headers: {'Content-Type':'application/json'},
    body: JSON.stringify(body)
  }).then(function(r) {
    return r.json().then(function(payload) {
      if (!r.ok) throw new Error(payload.error || ('HTTP ' + r.status));
      return payload;
    });
  });
 }
 function streamTask(taskId, label) {
  if (biES) { biES.close(); biES = null; }
  document.getElementById('bi-output').style.display = 'block';
  document.getElementById('bi-title').textContent = '— ' + label + ' [' + burnProfile() + ']';
  const term = document.getElementById('bi-terminal');
  term.textContent = 'Task ' + taskId + ' queued. Streaming...\n';
  biES = new EventSource('/api/tasks/' + taskId + '/stream');
  biES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
  biES.addEventListener('done', function(e) {
    biES.close();
    biES = null;
    term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
    term.scrollTop = term.scrollHeight;
  });
 }
 function streamBurnTask(taskId, label, resetTerminal) {
  return streamBurnTaskSet([taskId], label, resetTerminal);
 }
 function streamBurnTaskSet(taskIds, label, resetTerminal) {
  if (biES) { biES.close(); biES = null; }
  document.getElementById('bi-output').style.display = 'block';
  document.getElementById('bi-title').textContent = '— ' + label + ' [' + burnProfile() + ']';
  const term = document.getElementById('bi-terminal');
  if (resetTerminal) {
    term.textContent = '';
  }
  if (!Array.isArray(taskIds) || !taskIds.length) {
    term.textContent += 'ERROR: no tasks queued.\n';
    return Promise.resolve({ok:false, error:'no tasks queued'});
  }
  const streamNext = function(idx, failures) {
    if (idx >= taskIds.length) {
      return Promise.resolve({ok: failures === 0, error: failures ? (failures + ' task(s) failed') : ''});
    }
    const taskId = taskIds[idx];
    term.textContent += '[' + (idx + 1) + '/' + taskIds.length + '] Task ' + taskId + ' queued. Streaming...\n';
    return new Promise(function(resolve) {
      biES = new EventSource('/api/tasks/' + taskId + '/stream');
      biES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
      biES.addEventListener('done', function(e) {
        biES.close();
        biES = null;
        term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
        term.scrollTop = term.scrollHeight;
        resolve(failures + (e.data ? 1 : 0));
      });
      biES.onerror = function() {
        if (biES) {
          biES.close();
          biES = null;
        }
        term.textContent += '\nERROR: stream disconnected.\n';
        term.scrollTop = term.scrollHeight;
        resolve(failures + 1);
      };
    }).then(function(nextFailures) {
      return streamNext(idx + 1, nextFailures);
    });
  };
  return streamNext(0, 0);
 }
 function runBurnTaskSet(tasks, statusElId) {
  const enabled = tasks.filter(function(t) {
    const el = document.getElementById(t.id);
    return el && el.checked && !el.disabled;
  });
  const status = statusElId ? document.getElementById(statusElId) : null;
  if (status) status.textContent = '';
  if (!enabled.length) {
    if (status) status.textContent = 'No tasks selected.';
    return;
  }
  const term = document.getElementById('bi-terminal');
  document.getElementById('bi-output').style.display = 'block';
  document.getElementById('bi-title').textContent = '— Burn one by one [' + burnProfile() + ']';
  term.textContent = '';
  const runNext = function(idx) {
    if (idx >= enabled.length) {
      if (status) status.textContent = 'Completed ' + enabled.length + ' task(s).';
      return Promise.resolve();
    }
    const t = enabled[idx];
    term.textContent += '\n[' + (idx + 1) + '/' + enabled.length + '] ' + t.label + '\n';
    if (status) status.textContent = 'Running ' + (idx + 1) + '/' + enabled.length + '...';
    return enqueueBurnTask(t.target, t.label, t.extra, !!t.nvidia)
      .then(function(d) {
        return streamBurnTaskSet(burnTaskIDs(d), t.label, false);
      })
      .then(function() {
        return runNext(idx + 1);
      })
      .catch(function(err) {
        if (status) status.textContent = 'Error: ' + err.message;
        document.getElementById('bi-output').style.display = 'block';
        term.textContent += 'ERROR: ' + err.message + '\n';
        return Promise.reject(err);
      });
  };
  return runNext(0);
 }
 function runPlatformStress() {
  const comps = [];
  const computeIDs = ['burn-cpu', 'burn-mem-stress', 'burn-sat-stress'];
  const gpuIDs = ['burn-nvidia-compute', 'burn-gpu-bee', 'burn-gpu-john', 'burn-gpu-rvs'];
  const hasChecked = function(ids) {
    return ids.some(function(id) {
      const el = document.getElementById(id);
      return el && el.checked && !el.disabled;
    });
  };
  if (hasChecked(computeIDs)) comps.push('cpu');
  if (hasChecked(gpuIDs)) comps.push('gpu');
  if (!comps.length) {
    const status = document.getElementById('burn-all-status');
    if (status) status.textContent = 'Select at least one test in GPU Max Load or Compute Stress.';
    return;
  }
  const extra = comps.length > 0 ? {platform_components: comps} : {};
  enqueueBurnTask('platform-stress', 'Platform Thermal Cycling', extra, false).then(function(d) {
    streamTask(d.task_id, 'Platform Thermal Cycling');
  });
 }
 function runAllBurnTasks() {
  const status = document.getElementById('burn-all-status');
  const all = [
    {id:'burn-nvidia-compute',target:'nvidia-compute',label:'NVIDIA Max Compute Load (dcgmproftester)',nvidia:true},
    {id:'burn-gpu-bee',target:'nvidia-stress',label:'GPU Burn (bee-gpu-burn)',nvidia:true,extra:{loader:'builtin'}},
    {id:'burn-gpu-john',target:'nvidia-stress',label:'John GPU Stress (john/OpenCL)',nvidia:true,extra:{loader:'john'}},
    {id:'burn-gpu-rvs',target:'amd-stress',label:'AMD GPU Stress (rvs gst)'},
    {id:'burn-cpu',target:'cpu',label:'CPU Burn-in'},
    {id:'burn-mem-stress',target:'memory-stress',label:'Memory Burn-in'},
    {id:'burn-sat-stress',target:'sat-stress',label:'SAT Stress (stressapptest)'},
  ];
  status.textContent = 'Enqueuing...';
  runBurnTaskSet(all, 'burn-all-status');
 }
 fetch('/api/gpu/tools').then(function(r) { return r.json(); }).then(function(tools) {
  const map = {
    'nvidia-compute': {cb:'burn-nvidia-compute', note:'note-nvidia-compute', reason:'dcgmproftester not available or NVIDIA driver not running'},
    'bee-gpu-burn': {cb:'burn-gpu-bee', note:'note-bee', reason:'bee-gpu-burn not available or NVIDIA driver not running'},
    'john': {cb:'burn-gpu-john', note:'note-john', reason:'bee-john-gpu-stress not available or NVIDIA driver not running'},
    'rvs': {cb:'burn-gpu-rvs', note:'note-rvs', reason:'AMD driver not running'},
  };
  tools.forEach(function(t) {
    const spec = map[t.id];
    if (!spec) return;
    const cb = document.getElementById(spec.cb);
    const note = document.getElementById(spec.note);
    if (!cb) return;
    if (t.available) {
      cb.disabled = false;
    } else if (note) {
      note.textContent = '— ' + spec.reason;
    }
  });
 }).catch(function() {});
 burnLoadGPUs();
 </script>`
 }
--- a/audit/internal/webui/page_export_tools.go
+++ b/audit/internal/webui/page_export_tools.go
@@ -0,0 +1,434 @@
 package webui
 import (
 	"fmt"
 	"html"
 	"net/url"
 	"os"
 	"path/filepath"
 	"sort"
 	"strings"
 )
 func renderExport(exportDir string) string {
 	entries, _ := listExportFiles(exportDir)
 	var rows strings.Builder
 	for _, e := range entries {
 		rows.WriteString(fmt.Sprintf(`<tr><td><a href="/export/file?path=%s" target="_blank">%s</a></td></tr>`,
 			url.QueryEscape(e), html.EscapeString(e)))
 	}
 	if len(entries) == 0 {
 		rows.WriteString(`<tr><td style="color:var(--muted)">No export files found.</td></tr>`)
 	}
 	return `<div class="grid2">
 <div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
 <p style="font-size:13px;color:var(--muted);margin-bottom:12px">Creates a tar.gz archive of all audit files, SAT results, and logs.</p>
 ` + renderSupportBundleInline() + `
 </div></div>
 <div class="card"><div class="card-head">Export Files</div><div class="card-body">
 <table><tr><th>File</th></tr>` + rows.String() + `</table>
 </div></div>
 </div>
 ` + renderUSBExportCard()
 }
 func listExportFiles(exportDir string) ([]string, error) {
 	var entries []string
 	err := filepath.Walk(strings.TrimSpace(exportDir), func(path string, info os.FileInfo, err error) error {
 		if err != nil {
 			return err
 		}
 		if info.IsDir() {
 			return nil
 		}
 		rel, err := filepath.Rel(exportDir, path)
 		if err != nil {
 			return err
 		}
 		entries = append(entries, rel)
 		return nil
 	})
 	if err != nil && !os.IsNotExist(err) {
 		return nil, err
 	}
 	sort.Strings(entries)
 	return entries, nil
 }
 func renderSupportBundleInline() string {
 	return `<button id="support-bundle-btn" class="btn btn-primary" onclick="supportBundleDownload()">&#8595; Download Support Bundle</button>
 <div id="support-bundle-status" style="margin-top:10px;font-size:13px;color:var(--muted)"></div>
 <script>
 window.supportBundleDownload = function() {
  var btn = document.getElementById('support-bundle-btn');
  var status = document.getElementById('support-bundle-status');
  btn.disabled = true;
  btn.textContent = 'Building...';
  status.textContent = 'Collecting logs and export data\u2026';
  status.style.color = 'var(--muted)';
  var filename = 'bee-support.tar.gz';
  fetch('/export/support.tar.gz')
    .then(function(r) {
      if (!r.ok) throw new Error('HTTP ' + r.status);
      var cd = r.headers.get('Content-Disposition') || '';
      var m = cd.match(/filename="?([^";]+)"?/);
      if (m) filename = m[1];
      return r.blob();
    })
    .then(function(blob) {
      var url = URL.createObjectURL(blob);
      var a = document.createElement('a');
      a.href = url;
      a.download = filename;
      document.body.appendChild(a);
      a.click();
      document.body.removeChild(a);
      URL.revokeObjectURL(url);
      status.textContent = 'Download started.';
      status.style.color = 'var(--ok-fg)';
    })
    .catch(function(e) {
      status.textContent = 'Error: ' + e.message;
      status.style.color = 'var(--crit-fg)';
    })
    .finally(function() {
      btn.disabled = false;
      btn.textContent = '\u2195 Download Support Bundle';
    });
 };
 </script>`
 }
 func renderUSBExportCard() string {
 	return `<div class="card" style="margin-top:16px">
  <div class="card-head">Export to USB
    <button class="btn btn-sm btn-secondary" onclick="usbRefresh()" style="margin-left:auto">&#8635; Refresh</button>
  </div>
  <div class="card-body">` + renderUSBExportInline() + `</div>
 </div>`
 }
 func renderUSBExportInline() string {
 	return `<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Write audit JSON or support bundle directly to a removable USB drive.</p>
 <div id="usb-status" style="font-size:13px;color:var(--muted)">Scanning for USB devices...</div>
 <div id="usb-targets" style="margin-top:12px"></div>
 <div id="usb-msg" style="margin-top:10px;font-size:13px"></div>
 <script>
 (function(){
 function usbRefresh() {
  document.getElementById('usb-status').textContent = 'Scanning...';
  document.getElementById('usb-targets').innerHTML = '';
  document.getElementById('usb-msg').textContent = '';
  fetch('/api/export/usb').then(r=>r.json()).then(targets => {
    window._usbTargets = Array.isArray(targets) ? targets : [];
    const st = document.getElementById('usb-status');
    const ct = document.getElementById('usb-targets');
    if (!targets || targets.length === 0) {
      st.textContent = 'No removable USB devices found.';
      return;
    }
    st.textContent = targets.length + ' device(s) found:';
    ct.innerHTML = '<table><tr><th>Device</th><th>FS</th><th>Size</th><th>Label</th><th>Model</th><th>Actions</th></tr>' +
      targets.map((t, idx) => {
        const dev = t.device || '';
        const label = t.label || '';
        const model = t.model || '';
        return '<tr>' +
          '<td style="font-family:monospace">'+dev+'</td>' +
          '<td>'+t.fs_type+'</td>' +
          '<td>'+t.size+'</td>' +
          '<td>'+label+'</td>' +
          '<td style="font-size:12px;color:var(--muted)">'+model+'</td>' +
          '<td style="white-space:nowrap">' +
            '<button class="btn btn-sm btn-primary" onclick="usbExport(\'audit\','+idx+',this)">Audit JSON</button> ' +
            '<button class="btn btn-sm btn-secondary" onclick="usbExport(\'bundle\','+idx+',this)">Support Bundle</button>' +
            '<div class="usb-row-msg" style="margin-top:6px;font-size:12px;color:var(--muted)"></div>' +
          '</td></tr>';
      }).join('') + '</table>';
  }).catch(e => {
    document.getElementById('usb-status').textContent = 'Error: ' + e;
  });
 }
 window.usbExport = function(type, targetIndex, btn) {
  const target = (window._usbTargets || [])[targetIndex];
  if (!target) {
    const msg = document.getElementById('usb-msg');
    msg.style.color = 'var(--err,red)';
    msg.textContent = 'Error: USB target not found. Refresh and try again.';
    return;
  }
  const msg = document.getElementById('usb-msg');
  const row = btn ? btn.closest('td') : null;
  const rowMsg = row ? row.querySelector('.usb-row-msg') : null;
  const originalText = btn ? btn.textContent : '';
  if (btn) {
    btn.disabled = true;
    btn.textContent = 'Exporting...';
  }
  if (rowMsg) {
    rowMsg.style.color = 'var(--muted)';
    rowMsg.textContent = 'Working...';
  }
  msg.style.color = 'var(--muted)';
  msg.textContent = 'Exporting ' + (type === 'bundle' ? 'support bundle' : 'audit JSON') + ' to ' + (target.device||'') + '...';
  fetch('/api/export/usb/'+type, {
    method: 'POST',
    headers: {'Content-Type':'application/json'},
    body: JSON.stringify(target)
  }).then(async r => {
    const d = await r.json();
    if (!r.ok) throw new Error(d.error || ('HTTP ' + r.status));
    return d;
  }).then(d => {
    msg.style.color = 'var(--ok,green)';
    msg.textContent = d.message || 'Done.';
    if (rowMsg) {
      rowMsg.style.color = 'var(--ok,green)';
      rowMsg.textContent = d.message || 'Done.';
    }
  }).catch(e => {
    msg.style.color = 'var(--err,red)';
    msg.textContent = 'Error: '+e;
    if (rowMsg) {
      rowMsg.style.color = 'var(--err,red)';
      rowMsg.textContent = 'Error: ' + e;
    }
  }).finally(() => {
    if (btn) {
      btn.disabled = false;
      btn.textContent = originalText;
    }
  });
 };
 window.usbRefresh = usbRefresh;
 usbRefresh();
 })();
 </script>`
 }
 func renderNvidiaSelfHealInline() string {
 	return `<p style="font-size:13px;color:var(--muted);margin-bottom:12px">Inspect NVIDIA GPU health, restart the bee-nvidia driver service, and issue a per-GPU reset when the driver reports reset required.</p>
 <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:12px">
  <button id="nvidia-restart-btn" class="btn btn-secondary" onclick="nvidiaRestartDrivers()">Restart GPU Drivers</button>
  <button class="btn btn-sm btn-secondary" onclick="loadNvidiaSelfHeal()">&#8635; Refresh</button>
 </div>
 <div id="nvidia-self-heal-status" style="font-size:13px;color:var(--muted);margin-bottom:12px">Loading NVIDIA GPU status...</div>
 <div id="nvidia-self-heal-table"><p style="color:var(--muted);font-size:13px">Loading...</p></div>
 <div id="nvidia-self-heal-out" style="display:none;margin-top:12px">
  <div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:4px">
    <span id="nvidia-self-heal-out-label" style="font-size:12px;font-weight:600;color:var(--muted)">Output</span>
    <span id="nvidia-self-heal-out-status" style="font-size:12px"></span>
  </div>
  <div id="nvidia-self-heal-terminal" class="terminal" style="max-height:220px;width:100%;box-sizing:border-box"></div>
 </div>
 <script>
 function nvidiaSelfHealShowResult(label, status, output) {
  var out = document.getElementById('nvidia-self-heal-out');
  var term = document.getElementById('nvidia-self-heal-terminal');
  var statusEl = document.getElementById('nvidia-self-heal-out-status');
  var labelEl = document.getElementById('nvidia-self-heal-out-label');
  out.style.display = 'block';
  labelEl.textContent = label;
  term.textContent = output || '(no output)';
  term.scrollTop = term.scrollHeight;
  if (status === 'ok') {
    statusEl.textContent = '✓ done';
    statusEl.style.color = 'var(--ok-fg, #2c662d)';
  } else {
    statusEl.textContent = '✗ failed';
    statusEl.style.color = 'var(--crit-fg, #9f3a38)';
  }
 }
 function nvidiaRestartDrivers() {
  var btn = document.getElementById('nvidia-restart-btn');
  var original = btn.textContent;
  btn.disabled = true;
  btn.textContent = 'Restarting...';
  nvidiaSelfHealShowResult('restart bee-nvidia', 'ok', 'Running...');
  fetch('/api/services/action', {
    method:'POST',
    headers:{'Content-Type':'application/json'},
    body:JSON.stringify({name:'bee-nvidia', action:'restart'})
  }).then(r=>r.json()).then(d => {
    nvidiaSelfHealShowResult('restart bee-nvidia', d.status || 'error', d.output || d.error || '(no output)');
    setTimeout(function() {
      loadServices();
      loadNvidiaSelfHeal();
    }, 800);
  }).catch(e => {
    nvidiaSelfHealShowResult('restart bee-nvidia', 'error', 'Request failed: ' + e);
  }).finally(() => {
    btn.disabled = false;
    btn.textContent = original;
  });
 }
 function nvidiaResetGPU(index, btn) {
  var original = btn.textContent;
  btn.disabled = true;
  btn.textContent = 'Resetting...';
  nvidiaSelfHealShowResult('reset gpu ' + index, 'ok', 'Running...');
  fetch('/api/gpu/nvidia-reset', {
    method:'POST',
    headers:{'Content-Type':'application/json'},
    body:JSON.stringify({index:index})
  }).then(r=>r.json()).then(d => {
    nvidiaSelfHealShowResult('reset gpu ' + index, d.status || 'error', d.output || '(no output)');
    setTimeout(loadNvidiaSelfHeal, 1000);
  }).catch(e => {
    nvidiaSelfHealShowResult('reset gpu ' + index, 'error', 'Request failed: ' + e);
  }).finally(() => {
    btn.disabled = false;
    btn.textContent = original;
  });
 }
 function loadNvidiaSelfHeal() {
  var status = document.getElementById('nvidia-self-heal-status');
  var table = document.getElementById('nvidia-self-heal-table');
  status.textContent = 'Loading NVIDIA GPU status...';
  status.style.color = 'var(--muted)';
  table.innerHTML = '<p style="color:var(--muted);font-size:13px">Loading...</p>';
  fetch('/api/gpu/nvidia-status').then(r=>r.json()).then(gpus => {
    if (!Array.isArray(gpus) || gpus.length === 0) {
      status.textContent = 'No NVIDIA GPUs detected or nvidia-smi is unavailable.';
      table.innerHTML = '';
      return;
    }
    status.textContent = gpus.length + ' NVIDIA GPU(s) detected.';
    const rows = gpus.map(g => {
      const serial = g.serial || '';
      const bdf = g.bdf || '';
      const id = serial || bdf || ('gpu-' + g.index);
      const badge = g.status === 'OK' ? 'badge-ok' : g.status === 'RESET_REQUIRED' ? 'badge-err' : 'badge-warn';
      const details = [];
      if (serial) details.push('serial ' + serial);
      if (bdf) details.push('bdf ' + bdf);
      if (g.parse_failure && g.raw_line) details.push(g.raw_line);
      return '<tr>'
        + '<td style="white-space:nowrap">' + g.index + '</td>'
        + '<td>' + (g.name || 'unknown') + '</td>'
        + '<td style="font-family:monospace">' + id + '</td>'
        + '<td><span class="badge ' + badge + '">' + (g.status || 'UNKNOWN') + '</span>'
        + (details.length ? '<div style="margin-top:4px;font-size:12px;color:var(--muted)">' + details.join(' | ') + '</div>' : '')
        + '</td>'
        + '<td style="white-space:nowrap"><button class="btn btn-sm btn-secondary" onclick="nvidiaResetGPU(' + g.index + ', this)">Reset GPU</button></td>'
        + '</tr>';
    }).join('');
    table.innerHTML = '<table><tr><th>GPU</th><th>Model</th><th>ID</th><th>Status</th><th>Action</th></tr>' + rows + '</table>';
  }).catch(e => {
    status.textContent = 'Error loading NVIDIA GPU status: ' + e;
    status.style.color = 'var(--crit-fg, #9f3a38)';
    table.innerHTML = '';
  });
 }
 loadNvidiaSelfHeal();
 </script>`
 }
 func renderTools() string {
 	return `<div class="card" style="margin-bottom:16px">
  <div class="card-head">System Install</div>
  <div class="card-body">
    <div style="margin-bottom:20px">
    <div style="font-weight:600;margin-bottom:8px">Install to RAM</div>
    <p id="boot-source-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Detecting boot source...</p>
    <p id="ram-status-text" style="color:var(--muted);font-size:13px;margin-bottom:8px">Checking...</p>
    <button id="ram-install-btn" class="btn btn-primary" onclick="installToRAM()" style="display:none">&#9654; Copy to RAM</button>
    </div>
    <div style="border-top:1px solid var(--line);padding-top:20px">
    <div style="font-weight:600;margin-bottom:8px">Install to Disk</div>` +
 		renderInstallInline() + `
    </div>
  </div>
 </div>
 <script>
 fetch('/api/system/ram-status').then(r=>r.json()).then(d=>{
  const boot = document.getElementById('boot-source-text');
  const txt = document.getElementById('ram-status-text');
  const btn = document.getElementById('ram-install-btn');
  let source = d.device || d.source || 'unknown source';
  let kind = d.kind || 'unknown';
  let label = source;
  if (kind === 'ram') label = 'RAM';
  else if (kind === 'usb') label = 'USB (' + source + ')';
  else if (kind === 'cdrom') label = 'CD-ROM (' + source + ')';
  else if (kind === 'disk') label = 'disk (' + source + ')';
  else label = source;
  boot.textContent = 'Current boot source: ' + label + '.';
  txt.textContent = d.message || 'Checking...';
  if (d.status === 'ok' || d.in_ram) {
    txt.style.color = 'var(--ok, green)';
  } else if (d.status === 'failed') {
    txt.style.color = 'var(--err, #b91c1c)';
  } else {
    txt.style.color = 'var(--muted)';
  }
  if (d.can_start_task) {
    btn.style.display = '';
    btn.disabled = false;
  } else {
    btn.style.display = 'none';
  }
 });
 function installToRAM() {
  document.getElementById('ram-install-btn').disabled = true;
  fetch('/api/system/install-to-ram', {method:'POST'}).then(r=>r.json()).then(d=>{
    window.location.href = '/tasks#' + d.task_id;
  });
 }
 </script>
 <div class="card"><div class="card-head">Support Bundle</div><div class="card-body">
 <p style="font-size:13px;color:var(--muted);margin-bottom:12px">Downloads a tar.gz archive of all audit files, SAT results, and logs.</p>
 ` + renderSupportBundleInline() + `
 <div style="border-top:1px solid var(--border);margin-top:16px;padding-top:16px">
  <div style="font-weight:600;margin-bottom:8px">Export to USB</div>
  ` + renderUSBExportInline() + `
 </div>
 </div></div>
 <div class="card"><div class="card-head">Tool Check <button class="btn btn-sm btn-secondary" onclick="checkTools()" style="margin-left:auto">&#8635; Check</button></div>
 <div class="card-body"><div id="tools-table"><p style="color:var(--muted);font-size:13px">Checking...</p></div></div></div>
 <div class="card"><div class="card-head">NVIDIA Self Heal</div><div class="card-body">` +
 		renderNvidiaSelfHealInline() + `</div></div>
 <div class="card"><div class="card-head">Network</div><div class="card-body">` +
 		renderNetworkInline() + `</div></div>
 <div class="card"><div class="card-head">Services</div><div class="card-body">` +
 		renderServicesInline() + `</div></div>
 <script>
 function checkTools() {
  document.getElementById('tools-table').innerHTML = '<p style="color:var(--muted);font-size:13px">Checking...</p>';
  fetch('/api/tools/check').then(r=>r.json()).then(tools => {
    const rows = tools.map(t =>
      '<tr><td>'+t.Name+'</td><td><span class="badge '+(t.OK ? 'badge-ok' : 'badge-err')+'">'+(t.OK ? '&#10003; '+t.Path : '&#10007; missing')+'</span></td></tr>'
    ).join('');
    document.getElementById('tools-table').innerHTML =
      '<table><tr><th>Tool</th><th>Status</th></tr>'+rows+'</table>';
  });
 }
 checkTools();
 </script>`
 }
 func renderExportIndex(exportDir string) (string, error) {
 	entries, err := listExportFiles(exportDir)
 	if err != nil {
 		return "", err
 	}
 	var body strings.Builder
 	body.WriteString(`<!DOCTYPE html><html><head><meta charset="utf-8"><title>Bee Export Files</title></head><body>`)
 	body.WriteString(`<h1>Bee Export Files</h1><ul>`)
 	for _, entry := range entries {
 		body.WriteString(`<li><a href="/export/file?path=` + url.QueryEscape(entry) + `">` + html.EscapeString(entry) + `</a></li>`)
 	}
 	if len(entries) == 0 {
 		body.WriteString(`<li>No export files found.</li>`)
 	}
 	body.WriteString(`</ul></body></html>`)
 	return body.String(), nil
 }
--- a/audit/internal/webui/page_install_tasks.go
+++ b/audit/internal/webui/page_install_tasks.go
@@ -0,0 +1,314 @@
 package webui
 func renderInstallInline() string {
 	return `
    <div class="alert alert-warn" style="margin-bottom:16px">
      <strong>Warning:</strong> Installing will <strong>completely erase</strong> the selected
      disk and write the live system onto it. All existing data on the target disk will be lost.
      This operation cannot be undone.
    </div>
    <div id="install-loading" style="color:var(--muted);font-size:13px">Loading disk list…</div>
    <div id="install-disk-section" style="display:none">
      <div class="card" style="margin-bottom:0">
        <table id="install-disk-table">
          <thead><tr><th></th><th>Device</th><th>Model</th><th>Size</th><th>Status</th></tr></thead>
          <tbody id="install-disk-tbody"></tbody>
        </table>
      </div>
      <div style="margin-top:12px">
        <button class="btn btn-secondary btn-sm" onclick="installRefreshDisks()">↻ Refresh</button>
      </div>
    </div>
    <div id="install-confirm-section" style="display:none;margin-top:20px">
      <div id="install-confirm-warn" class="alert" style="background:#fff6f6;border:1px solid #e0b4b4;color:#9f3a38;font-size:13px"></div>
      <div class="form-row" style="max-width:360px">
        <label>Type the device name to confirm (e.g. /dev/sda)</label>
        <input type="text" id="install-confirm-input" placeholder="/dev/..." oninput="installCheckConfirm()" autocomplete="off" spellcheck="false">
      </div>
      <button class="btn btn-danger" id="install-start-btn" disabled onclick="installStart()">Install to Disk</button>
      <button class="btn btn-secondary" style="margin-left:8px" onclick="installDeselect()">Cancel</button>
    </div>
    <div id="install-progress-section" style="display:none;margin-top:20px">
      <div class="card-head" style="margin-bottom:8px">Installation Progress</div>
      <div id="install-terminal" class="terminal" style="max-height:500px"></div>
      <div id="install-status" style="margin-top:12px;font-size:13px"></div>
    </div>
 <style>
 #install-disk-tbody tr{cursor:pointer}
 #install-disk-tbody tr.selected td{background:rgba(33,133,208,.1)}
 #install-disk-tbody tr:hover td{background:rgba(33,133,208,.07)}
 </style>
 <script>
 var _installSelected = null;
 function installRefreshDisks() {
  document.getElementById('install-loading').style.display = '';
  document.getElementById('install-disk-section').style.display = 'none';
  document.getElementById('install-confirm-section').style.display = 'none';
  _installSelected = null;
  fetch('/api/install/disks').then(function(r){ return r.json(); }).then(function(disks){
    document.getElementById('install-loading').style.display = 'none';
    var tbody = document.getElementById('install-disk-tbody');
    tbody.innerHTML = '';
    if (!disks || disks.length === 0) {
      tbody.innerHTML = '<tr><td colspan="5" style="color:var(--muted);text-align:center">No installable disks found</td></tr>';
    } else {
      disks.forEach(function(d) {
        var warnings = (d.warnings || []);
        var statusHtml;
        if (warnings.length === 0) {
          statusHtml = '<span class="badge badge-ok">OK</span>';
        } else {
          var hasSmall = warnings.some(function(w){ return w.indexOf('too small') >= 0; });
          statusHtml = warnings.map(function(w){
            var cls = hasSmall ? 'badge-err' : 'badge-warn';
            return '<span class="badge ' + cls + '" title="' + w.replace(/"/g,'&quot;') + '">' +
              (w.length > 40 ? w.substring(0,38)+'…' : w) + '</span>';
          }).join(' ');
        }
        var mountedNote = (d.mounted_parts && d.mounted_parts.length > 0)
          ? ' <span style="color:var(--warn-fg);font-size:11px">(mounted)</span>' : '';
        var tr = document.createElement('tr');
        tr.dataset.device = d.device;
        tr.dataset.model = d.model || 'Unknown';
        tr.dataset.size = d.size;
        tr.dataset.warnings = JSON.stringify(warnings);
        tr.innerHTML =
          '<td><input type="radio" name="install-disk" value="' + d.device + '"></td>' +
          '<td><code>' + d.device + '</code>' + mountedNote + '</td>' +
          '<td>' + (d.model || '—') + '</td>' +
          '<td>' + d.size + '</td>' +
          '<td>' + statusHtml + '</td>';
        tr.addEventListener('click', function(){ installSelectDisk(this); });
        tbody.appendChild(tr);
      });
    }
    document.getElementById('install-disk-section').style.display = '';
  }).catch(function(e){
    document.getElementById('install-loading').textContent = 'Failed to load disk list: ' + e;
  });
 }
 function installSelectDisk(tr) {
  document.querySelectorAll('#install-disk-tbody tr').forEach(function(r){ r.classList.remove('selected'); });
  tr.classList.add('selected');
  var radio = tr.querySelector('input[type=radio]');
  if (radio) radio.checked = true;
  _installSelected = {
    device: tr.dataset.device,
    model: tr.dataset.model,
    size: tr.dataset.size,
    warnings: JSON.parse(tr.dataset.warnings || '[]')
  };
  var warnBox = document.getElementById('install-confirm-warn');
  var warnLines = '<strong>⚠ DANGER:</strong> ' + _installSelected.device +
    ' (' + _installSelected.model + ', ' + _installSelected.size + ')' +
    ' will be <strong>completely erased</strong> and repartitioned. All data will be lost.<br>';
  if (_installSelected.warnings.length > 0) {
    warnLines += '<br>' + _installSelected.warnings.map(function(w){ return '• ' + w; }).join('<br>');
  }
  warnBox.innerHTML = warnLines;
  document.getElementById('install-confirm-input').value = '';
  document.getElementById('install-start-btn').disabled = true;
  document.getElementById('install-confirm-section').style.display = '';
  document.getElementById('install-progress-section').style.display = 'none';
 }
 function installDeselect() {
  _installSelected = null;
  document.querySelectorAll('#install-disk-tbody tr').forEach(function(r){ r.classList.remove('selected'); });
  document.querySelectorAll('#install-disk-tbody input[type=radio]').forEach(function(r){ r.checked = false; });
  document.getElementById('install-confirm-section').style.display = 'none';
 }
 function installCheckConfirm() {
  var val = document.getElementById('install-confirm-input').value.trim();
  var ok = _installSelected && val === _installSelected.device;
  document.getElementById('install-start-btn').disabled = !ok;
 }
 function installStart() {
  if (!_installSelected) return;
  document.getElementById('install-confirm-section').style.display = 'none';
  document.getElementById('install-disk-section').style.display = 'none';
  document.getElementById('install-loading').style.display = 'none';
  var prog = document.getElementById('install-progress-section');
  var term = document.getElementById('install-terminal');
  var status = document.getElementById('install-status');
  prog.style.display = '';
  term.textContent = '';
  status.textContent = 'Starting installation…';
  status.style.color = 'var(--muted)';
  fetch('/api/install/run', {
    method: 'POST',
    headers: {'Content-Type': 'application/json'},
    body: JSON.stringify({device: _installSelected.device})
  }).then(function(r){
    return r.json().then(function(j){
      if (!r.ok) throw new Error(j.error || r.statusText);
      return j;
    });
  }).then(function(j){
    if (!j.task_id) throw new Error('missing task id');
    installStreamLog(j.task_id);
  }).catch(function(e){
    status.textContent = 'Error: ' + e;
    status.style.color = 'var(--crit-fg)';
  });
 }
 function installStreamLog(taskId) {
  var term = document.getElementById('install-terminal');
  var status = document.getElementById('install-status');
  var es = new EventSource('/api/tasks/' + taskId + '/stream');
  es.onmessage = function(e) {
    term.textContent += e.data + '\n';
    term.scrollTop = term.scrollHeight;
  };
  es.addEventListener('done', function(e) {
    es.close();
    if (!e.data) {
      status.innerHTML = '<span style="color:var(--ok-fg);font-weight:700">✓ Installation complete.</span> Remove the ISO and reboot.';
      var rebootBtn = document.createElement('button');
      rebootBtn.className = 'btn btn-primary btn-sm';
      rebootBtn.style.marginLeft = '12px';
      rebootBtn.textContent = 'Reboot now';
      rebootBtn.onclick = function(){
        fetch('/api/services/action', {method:'POST',headers:{'Content-Type':'application/json'},
          body: JSON.stringify({name:'', action:'reboot'})});
      };
      status.appendChild(rebootBtn);
    } else {
      status.textContent = '✗ Installation failed: ' + e.data;
      status.style.color = 'var(--crit-fg)';
    }
  });
  es.onerror = function() {
    es.close();
    status.textContent = '✗ Stream disconnected.';
    status.style.color = 'var(--crit-fg)';
  };
 }
 installRefreshDisks();
 </script>
 `
 }
 func renderInstall() string {
 	return `<div class="card"><div class="card-head">Install Live System to Disk</div><div class="card-body">` +
 		renderInstallInline() +
 		`</div></div>`
 }
 func renderTasks() string {
 	return `<div style="display:flex;align-items:center;gap:12px;margin-bottom:16px;flex-wrap:wrap">
 <button class="btn btn-danger btn-sm" onclick="cancelAll()">Cancel All</button>
 <button class="btn btn-sm" style="background:#b45309;color:#fff" onclick="killWorkers()" title="Send SIGKILL to all running test processes (bee-gpu-burn, stress-ng, stressapptest, memtester)">Kill Workers</button>
 <span id="kill-toast" style="font-size:12px;color:var(--muted);display:none"></span>
 <span style="font-size:12px;color:var(--muted)">Open a task to view its saved logs and charts.</span>
 </div>
 <div class="card">
 <div id="tasks-table"><p style="color:var(--muted);font-size:13px;padding:16px">Loading...</p></div>
 </div>
 <script>
 var _taskRefreshTimer = null;
 var _tasksAll = [];
 var _taskPage = 1;
 var _taskPageSize = 50;
 function loadTasks() {
  fetch('/api/tasks').then(r=>r.json()).then(tasks => {
    _tasksAll = Array.isArray(tasks) ? tasks : [];
    if (_tasksAll.length === 0) {
      _taskPage = 1;
      document.getElementById('tasks-table').innerHTML = '<p style="color:var(--muted);font-size:13px;padding:16px">No tasks.</p>';
      return;
    }
    const totalPages = Math.max(1, Math.ceil(_tasksAll.length / _taskPageSize));
    if (_taskPage > totalPages) _taskPage = totalPages;
    if (_taskPage < 1) _taskPage = 1;
    const start = (_taskPage - 1) * _taskPageSize;
    const pageTasks = _tasksAll.slice(start, start + _taskPageSize);
    const rows = pageTasks.map(t => {
      const dur = t.elapsed_sec ? formatDurSec(t.elapsed_sec) : '';
      const statusClass = {running:'badge-ok',pending:'badge-unknown',done:'badge-ok',failed:'badge-err',cancelled:'badge-unknown'}[t.status]||'badge-unknown';
      const statusLabel = {running:'&#9654; running',pending:'pending',done:'&#10003; done',failed:'&#10007; failed',cancelled:'cancelled'}[t.status]||t.status;
      let actions = '<a class="btn btn-sm btn-secondary" href="/tasks/'+encodeURIComponent(t.id)+'">Open</a>';
      if (t.status === 'running' || t.status === 'pending') {
        actions += ' <button class="btn btn-sm btn-danger" onclick="cancelTask(\''+t.id+'\')">Cancel</button>';
      }
      if (t.status === 'pending') {
        actions += ' <button class="btn btn-sm btn-secondary" onclick="setPriority(\''+t.id+'\',1)" title="Increase priority">&#8679;</button>';
        actions += ' <button class="btn btn-sm btn-secondary" onclick="setPriority(\''+t.id+'\',-1)" title="Decrease priority">&#8681;</button>';
      }
      return '<tr><td><a href="/tasks/'+encodeURIComponent(t.id)+'">'+escHtml(t.name)+'</a></td>' +
        '<td><span class="badge '+statusClass+'">'+statusLabel+'</span></td>' +
        '<td style="font-size:12px;color:var(--muted)">'+fmtTime(t.created_at)+'</td>' +
        '<td style="font-size:12px;color:var(--muted)">'+dur+'</td>' +
        '<td>'+t.priority+'</td>' +
        '<td>'+actions+'</td></tr>';
    }).join('');
    const showingFrom = start + 1;
    const showingTo = Math.min(start + pageTasks.length, _tasksAll.length);
    const pager =
      '<div style="display:flex;align-items:center;justify-content:space-between;gap:12px;flex-wrap:wrap;padding:12px 14px;border-top:1px solid var(--border-lite);background:var(--surface-2)">' +
        '<div style="font-size:12px;color:var(--muted)">Showing '+showingFrom+'-'+showingTo+' of '+_tasksAll.length+' tasks</div>' +
        '<div style="display:flex;align-items:center;gap:8px">' +
          '<button class="btn btn-sm btn-secondary" onclick="setTaskPage('+(_taskPage-1)+')" '+(_taskPage <= 1 ? 'disabled' : '')+'>Previous</button>' +
          '<span style="font-size:12px;color:var(--muted)">Page '+_taskPage+' / '+totalPages+'</span>' +
          '<button class="btn btn-sm btn-secondary" onclick="setTaskPage('+(_taskPage+1)+')" '+(_taskPage >= totalPages ? 'disabled' : '')+'>Next</button>' +
        '</div>' +
      '</div>';
    document.getElementById('tasks-table').innerHTML =
      '<table><tr><th>Name</th><th>Status</th><th>Created</th><th>Duration</th><th>Priority</th><th>Actions</th></tr>'+rows+'</table>' + pager;
  });
 }
 function escHtml(s) { return (s||'').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;'); }
 function fmtTime(s) { if (!s) return ''; try { return new Date(s).toLocaleTimeString(); } catch(e){ return s; } }
 function formatDurSec(sec) {
  sec = Math.max(0, Math.round(sec||0));
  if (sec < 60) return sec+'s';
  const m = Math.floor(sec/60), ss = sec%60;
  return m+'m '+ss+'s';
 }
 function setTaskPage(page) {
  const totalPages = Math.max(1, Math.ceil(_tasksAll.length / _taskPageSize));
  _taskPage = Math.min(totalPages, Math.max(1, page));
  loadTasks();
 }
 function cancelTask(id) {
  fetch('/api/tasks/'+id+'/cancel',{method:'POST'}).then(()=>loadTasks());
 }
 function cancelAll() {
  fetch('/api/tasks/cancel-all',{method:'POST'}).then(()=>loadTasks());
 }
 function killWorkers() {
  if (!confirm('Send SIGKILL to all running test workers (bee-gpu-burn, stress-ng, stressapptest, memtester)?\n\nThis will also cancel all queued and running tasks.')) return;
  fetch('/api/tasks/kill-workers',{method:'POST'})
    .then(r=>r.json())
    .then(d=>{
      loadTasks();
      var toast = document.getElementById('kill-toast');
      var parts = [];
      if (d.cancelled > 0) parts.push(d.cancelled+' task'+(d.cancelled===1?'':'s')+' cancelled');
      if (d.killed > 0) parts.push(d.killed+' process'+(d.killed===1?'':'es')+' killed');
      toast.textContent = parts.length ? parts.join(', ')+'.' : 'No processes found.';
      toast.style.display = '';
      setTimeout(()=>{ toast.style.display='none'; }, 5000);
    });
 }
 function setPriority(id, delta) {
  fetch('/api/tasks/'+id+'/priority',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({delta:delta})})
    .then(()=>loadTasks());
 }
 loadTasks();
 _taskRefreshTimer = setInterval(loadTasks, 2000);
 </script>`
 }
--- a/audit/internal/webui/page_metrics.go
+++ b/audit/internal/webui/page_metrics.go
@@ -0,0 +1,238 @@
 package webui
 func renderMetrics() string {
 	return `<p style="color:var(--muted);font-size:13px;margin-bottom:16px">Live metrics — updated every 2 seconds.</p>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">Server — Load</div>
  <div class="card-body" style="padding:8px">
    <img id="chart-server-load" data-chart-refresh="1" src="/api/metrics/chart/server-load.svg" style="width:100%;display:block;border-radius:6px" alt="CPU/Mem load">
  </div>
 </div>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">Temperature — CPU</div>
  <div class="card-body" style="padding:8px">
    <img id="chart-server-temp-cpu" data-chart-refresh="1" src="/api/metrics/chart/server-temp-cpu.svg" style="width:100%;display:block;border-radius:6px" alt="CPU temperature">
  </div>
 </div>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">Temperature — Ambient Sensors</div>
  <div class="card-body" style="padding:8px">
    <img id="chart-server-temp-ambient" data-chart-refresh="1" src="/api/metrics/chart/server-temp-ambient.svg" style="width:100%;display:block;border-radius:6px" alt="Ambient temperature sensors">
  </div>
 </div>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">Server — Power</div>
  <div class="card-body" style="padding:8px">
    <img id="chart-server-power" data-chart-refresh="1" src="/api/metrics/chart/server-power.svg" style="width:100%;display:block;border-radius:6px" alt="System power">
  </div>
 </div>
 <div id="card-server-fans" class="card" style="margin-bottom:16px;display:none">
  <div class="card-head">Server — Fan RPM</div>
  <div class="card-body" style="padding:8px">
    <img id="chart-server-fans" data-chart-refresh="1" src="/api/metrics/chart/server-fans.svg" style="width:100%;display:block;border-radius:6px" alt="Fan RPM">
  </div>
 </div>
 <section id="gpu-metrics-section" style="display:none;margin-top:24px;padding:16px 16px 4px;border:1px solid #d7e0ea;border-radius:10px;background:linear-gradient(180deg,#f7fafc 0%,#eef4f8 100%)">
  <div style="display:flex;align-items:center;justify-content:space-between;gap:16px;flex-wrap:wrap;margin-bottom:14px">
    <div>
      <div style="font-size:12px;font-weight:700;letter-spacing:.08em;text-transform:uppercase;color:#486581">GPU Metrics</div>
      <div id="gpu-metrics-summary" style="font-size:13px;color:var(--muted);margin-top:4px">Detected GPUs are rendered in a dedicated section.</div>
    </div>
    <label style="display:inline-flex;align-items:center;gap:8px;font-size:13px;color:var(--ink);font-weight:700;cursor:pointer">
      <input id="gpu-chart-toggle" type="checkbox">
      <span>One chart per GPU</span>
    </label>
  </div>
  <div id="gpu-metrics-by-metric">
    <div class="card" style="margin-bottom:16px">
      <div class="card-head">GPU — Compute Load</div>
      <div class="card-body" style="padding:8px">
        <img id="chart-gpu-all-load" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-load.svg" style="width:100%;display:block;border-radius:6px" alt="GPU compute load">
      </div>
    </div>
    <div class="card" style="margin-bottom:16px">
      <div class="card-head">GPU — Memory Load</div>
      <div class="card-body" style="padding:8px">
        <img id="chart-gpu-all-memload" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-memload.svg" style="width:100%;display:block;border-radius:6px" alt="GPU memory load">
      </div>
    </div>
    <div class="card" style="margin-bottom:16px">
      <div class="card-head">GPU — Core Clock</div>
      <div class="card-body" style="padding:8px">
        <img id="chart-gpu-all-clock" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-clock.svg" style="width:100%;display:block;border-radius:6px" alt="GPU core clock">
      </div>
    </div>
    <div class="card" style="margin-bottom:16px">
      <div class="card-head">GPU — Power</div>
      <div class="card-body" style="padding:8px">
        <img id="chart-gpu-all-power" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-power.svg" style="width:100%;display:block;border-radius:6px" alt="GPU power">
      </div>
    </div>
    <div class="card" style="margin-bottom:16px">
      <div class="card-head">GPU — Temperature</div>
      <div class="card-body" style="padding:8px">
        <img id="chart-gpu-all-temp" data-chart-refresh="1" src="/api/metrics/chart/gpu-all-temp.svg" style="width:100%;display:block;border-radius:6px" alt="GPU temperature">
      </div>
    </div>
  </div>
  <div id="gpu-metrics-by-gpu" style="display:none"></div>
 </section>
 <script>
 let gpuChartKey = '';
 const gpuChartModeStorageKey = 'bee.metrics.gpuChartMode';
 let metricsNvidiaGPUsPromise = null;
 function loadMetricsNvidiaGPUs() {
  if (!metricsNvidiaGPUsPromise) {
    metricsNvidiaGPUsPromise = fetch('/api/gpu/nvidia')
      .then(function(r) {
        if (!r.ok) throw new Error('Failed to load NVIDIA GPUs.');
        return r.json();
      })
      .then(function(list) { return Array.isArray(list) ? list : []; })
      .catch(function() { return []; });
  }
  return metricsNvidiaGPUsPromise;
 }
 function metricsGPUNameMap(list) {
  const out = {};
  (list || []).forEach(function(gpu) {
    const idx = Number(gpu.index);
    if (!Number.isFinite(idx) || !gpu.name) return;
    out[idx] = gpu.name;
  });
  return out;
 }
 function metricsGPUDisplayLabel(idx, names) {
  const name = names && names[idx];
  return name ? ('GPU ' + idx + ' — ' + name) : ('GPU ' + idx);
 }
 function loadGPUChartModePreference() {
  try {
    return sessionStorage.getItem(gpuChartModeStorageKey) === 'per-gpu';
  } catch (_) {
    return false;
  }
 }
 function saveGPUChartModePreference(perGPU) {
  try {
    sessionStorage.setItem(gpuChartModeStorageKey, perGPU ? 'per-gpu' : 'per-metric');
  } catch (_) {}
 }
 function refreshChartImage(el) {
  if (!el || el.dataset.loading === '1') return;
  if (el.offsetParent === null) return;
  const baseSrc = el.dataset.baseSrc || el.src.split('?')[0];
  const nextSrc = baseSrc + '?t=' + Date.now();
  const probe = new Image();
  el.dataset.baseSrc = baseSrc;
  el.dataset.loading = '1';
  probe.onload = function() {
    el.src = nextSrc;
    el.dataset.loading = '0';
  };
  probe.onerror = function() {
    el.dataset.loading = '0';
  };
  probe.src = nextSrc;
 }
 function refreshCharts() {
  document.querySelectorAll('img[data-chart-refresh="1"]').forEach(refreshChartImage);
 }
 function gpuIndices(rows) {
  const seen = {};
  const out = [];
  (rows || []).forEach(function(row) {
    const idx = Number(row.index);
    if (!Number.isFinite(idx) || seen[idx]) return;
    seen[idx] = true;
    out.push(idx);
  });
  return out.sort(function(a, b) { return a - b; });
 }
 function renderGPUOverviewCards(indices, names) {
  const host = document.getElementById('gpu-metrics-by-gpu');
  if (!host) return;
  host.innerHTML = indices.map(function(idx) {
    const label = metricsGPUDisplayLabel(idx, names);
    return '<div class="card" style="margin-bottom:16px">' +
      '<div class="card-head">' + label + ' — Overview</div>' +
      '<div class="card-body" style="padding:8px">' +
      '<img id="chart-gpu-' + idx + '-overview" data-chart-refresh="1" src="/api/metrics/chart/gpu/' + idx + '-overview.svg" style="width:100%;display:block;border-radius:6px" alt="' + label + ' overview">' +
      '</div></div>';
  }).join('');
 }
 function applyGPUChartMode() {
  const perMetric = document.getElementById('gpu-metrics-by-metric');
  const perGPU = document.getElementById('gpu-metrics-by-gpu');
  const toggle = document.getElementById('gpu-chart-toggle');
  const gpuModePerGPU = !!(toggle && toggle.checked);
  if (perMetric) perMetric.style.display = gpuModePerGPU ? 'none' : '';
  if (perGPU) perGPU.style.display = gpuModePerGPU ? '' : 'none';
 }
 function syncMetricsLayout(d) {
  const fanCard = document.getElementById('card-server-fans');
  if (fanCard) fanCard.style.display = (d.fans && d.fans.length > 0) ? '' : 'none';
  const section = document.getElementById('gpu-metrics-section');
  const summary = document.getElementById('gpu-metrics-summary');
  const indices = gpuIndices(d.gpus);
  loadMetricsNvidiaGPUs().then(function(gpus) {
    const names = metricsGPUNameMap(gpus);
    if (section) section.style.display = indices.length > 0 ? '' : 'none';
    if (summary) {
      summary.textContent = indices.length > 0
        ? ('Detected GPUs: ' + indices.map(function(idx) { return metricsGPUDisplayLabel(idx, names); }).join(', '))
        : 'No GPUs detected in live metrics.';
    }
    const nextKey = indices.join(',') + '|' + indices.map(function(idx) { return names[idx] || ''; }).join(',');
    if (nextKey !== gpuChartKey) {
      renderGPUOverviewCards(indices, names);
      gpuChartKey = nextKey;
    }
    applyGPUChartMode();
  });
 }
 function loadMetricsLayout() {
  fetch('/api/metrics/latest').then(function(r) { return r.json(); }).then(syncMetricsLayout).catch(function() {});
 }
 const gpuChartToggle = document.getElementById('gpu-chart-toggle');
 if (gpuChartToggle) {
  gpuChartToggle.checked = loadGPUChartModePreference();
 }
 applyGPUChartMode();
 if (gpuChartToggle) {
  gpuChartToggle.addEventListener('change', function() {
    saveGPUChartModePreference(!!gpuChartToggle.checked);
    applyGPUChartMode();
    refreshCharts();
  });
 }
 loadMetricsLayout();
 setInterval(refreshCharts, 3000);
 setInterval(loadMetricsLayout, 5000);
 </script>`
 }
--- a/audit/internal/webui/page_network_services.go
+++ b/audit/internal/webui/page_network_services.go
@@ -0,0 +1,213 @@
 package webui
 import "html"
 // renderNetworkInline returns the network UI without a wrapping card (for embedding in Tools).
 func renderNetworkInline() string {
 	return `<div id="net-pending" style="display:none" class="alert alert-warn">
 <strong>&#9888; Network change applied.</strong> Reverting in <span id="net-countdown">60</span>s unless confirmed.
 <button class="btn btn-primary btn-sm" style="margin-left:8px" onclick="confirmNetChange()">Confirm</button>
 <button class="btn btn-secondary btn-sm" style="margin-left:4px" onclick="rollbackNetChange()">Rollback</button>
 </div>
 <div id="iface-table"><p style="color:var(--muted);font-size:13px">Loading...</p></div>
 <div class="grid2" style="margin-top:16px">
 <div><div style="font-weight:700;font-size:13px;margin-bottom:8px">DHCP</div>
 <div class="form-row"><label>Interface (leave empty for all)</label><input type="text" id="dhcp-iface" placeholder="eth0"></div>
 <button class="btn btn-primary" onclick="runDHCP()">&#9654; Run DHCP</button>
 <div id="dhcp-out" style="margin-top:10px;font-size:12px;color:var(--ok-fg)"></div>
 </div>
 <div><div style="font-weight:700;font-size:13px;margin-bottom:8px">Static IPv4</div>
 <div class="form-row"><label>Interface</label><input type="text" id="st-iface" placeholder="eth0"></div>
 <div class="form-row"><label>Address</label><input type="text" id="st-addr" placeholder="192.168.1.100"></div>
 <div class="form-row"><label>Prefix length</label><input type="text" id="st-prefix" placeholder="24"></div>
 <div class="form-row"><label>Gateway</label><input type="text" id="st-gw" placeholder="192.168.1.1"></div>
 <div class="form-row"><label>DNS (comma-separated)</label><input type="text" id="st-dns" placeholder="8.8.8.8,8.8.4.4"></div>
 <button class="btn btn-primary" onclick="setStatic()">Apply Static IP</button>
 <div id="static-out" style="margin-top:10px;font-size:12px;color:var(--ok-fg)"></div>
 </div>
 </div>
 <script>
 var _netCountdownTimer = null;
 var _netRefreshTimer = null;
 const NET_ROLLBACK_SECS = 60;
 function loadNetwork() {
  fetch('/api/network').then(r=>r.json()).then(d => {
    const rows = (d.interfaces||[]).map(i =>
      '<tr><td style="cursor:pointer" onclick="selectIface(\''+i.Name+'\')" title="Use this interface in the forms below"><span style="text-decoration:underline">'+i.Name+'</span></td>' +
      '<td style="cursor:pointer" onclick="toggleIface(\''+i.Name+'\',\''+i.State+'\')" title="Click to toggle"><span class="badge '+(i.State==='up'?'badge-ok':'badge-warn')+'">'+i.State+'</span></td>' +
      '<td>'+(i.IPv4||[]).join(', ')+'</td></tr>'
    ).join('');
    document.getElementById('iface-table').innerHTML =
      '<table><tr><th>Interface</th><th>State (click to toggle)</th><th>Addresses</th></tr>'+rows+'</table>' +
      (d.default_route ? '<p style="font-size:12px;color:var(--muted);margin-top:8px">Default route: '+d.default_route+'</p>' : '');
    if (d.pending_change) showNetPending(d.rollback_in || NET_ROLLBACK_SECS);
    else hideNetPending();
  }).catch(function() {});
 }
 function selectIface(iface) {
  document.getElementById('dhcp-iface').value = iface;
  document.getElementById('st-iface').value = iface;
 }
 function toggleIface(iface, currentState) {
  showNetPending(NET_ROLLBACK_SECS);
  fetch('/api/network/toggle',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({iface:iface})})
    .then(r=>r.json()).then(d => {
      if (d.error) { hideNetPending(); alert('Error: '+d.error); return; }
      loadNetwork();
      showNetPending(d.rollback_in || NET_ROLLBACK_SECS);
    }).catch(function() {
      setTimeout(loadNetwork, 1500);
    });
 }
 function hideNetPending() {
  const el = document.getElementById('net-pending');
  if (_netCountdownTimer) clearInterval(_netCountdownTimer);
  _netCountdownTimer = null;
  el.style.display = 'none';
 }
 function showNetPending(secs) {
  if (!secs || secs < 1) { hideNetPending(); return; }
  const el = document.getElementById('net-pending');
  el.style.display = 'block';
  if (_netCountdownTimer) clearInterval(_netCountdownTimer);
  let remaining = secs;
  document.getElementById('net-countdown').textContent = remaining;
  _netCountdownTimer = setInterval(function() {
    remaining--;
    document.getElementById('net-countdown').textContent = remaining;
    if (remaining <= 0) { hideNetPending(); loadNetwork(); }
  }, 1000);
 }
 function confirmNetChange() {
  hideNetPending();
  fetch('/api/network/confirm',{method:'POST'}).then(()=>loadNetwork()).catch(()=>{});
 }
 function rollbackNetChange() {
  hideNetPending();
  fetch('/api/network/rollback',{method:'POST'}).then(()=>loadNetwork()).catch(()=>{});
 }
 function runDHCP() {
  const iface = document.getElementById('dhcp-iface').value.trim();
  showNetPending(NET_ROLLBACK_SECS);
  fetch('/api/network/dhcp',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({interface:iface||'all'})})
    .then(r=>r.json()).then(d => {
      document.getElementById('dhcp-out').textContent = d.output || d.error || 'Done.';
      if (d.error) { hideNetPending(); return; }
      showNetPending(d.rollback_in || NET_ROLLBACK_SECS);
      loadNetwork();
    }).catch(function() {
      setTimeout(loadNetwork, 1500);
    });
 }
 function setStatic() {
  const dns = document.getElementById('st-dns').value.split(',').map(s=>s.trim()).filter(Boolean);
  showNetPending(NET_ROLLBACK_SECS);
  fetch('/api/network/static',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({
    interface: document.getElementById('st-iface').value,
    address: document.getElementById('st-addr').value,
    prefix: document.getElementById('st-prefix').value,
    gateway: document.getElementById('st-gw').value,
    dns: dns,
  })}).then(r=>r.json()).then(d => {
    document.getElementById('static-out').textContent = d.output || d.error || 'Done.';
    if (d.error) { hideNetPending(); return; }
    showNetPending(d.rollback_in || NET_ROLLBACK_SECS);
    loadNetwork();
  }).catch(function() {
    setTimeout(loadNetwork, 1500);
  });
 }
 loadNetwork();
 if (_netRefreshTimer) clearInterval(_netRefreshTimer);
 _netRefreshTimer = setInterval(loadNetwork, 5000);
 </script>`
 }
 func renderNetwork() string {
 	return `<div class="card"><div class="card-head">Network Interfaces</div><div class="card-body">` +
 		renderNetworkInline() +
 		`</div></div>`
 }
 func renderServicesInline() string {
 	return `<p style="font-size:13px;color:var(--muted);margin-bottom:10px">` + html.EscapeString(`bee-selfheal.timer is expected to be active; the oneshot bee-selfheal.service itself is not shown as a long-running service.`) + `</p>
 <div style="display:flex;justify-content:flex-end;gap:8px;flex-wrap:wrap;margin-bottom:8px"><button class="btn btn-sm btn-secondary" onclick="loadServices()">&#8635; Refresh</button></div>
 <div id="svc-table"><p style="color:var(--muted);font-size:13px">Loading...</p></div>
 <div id="svc-out" style="display:none;margin-top:12px">
  <div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:4px">
    <span id="svc-out-label" style="font-size:12px;font-weight:600;color:var(--muted)">Output</span>
    <span id="svc-out-status" style="font-size:12px"></span>
  </div>
  <div id="svc-terminal" class="terminal" style="max-height:220px;width:100%;box-sizing:border-box"></div>
 </div>
 <script>
 function loadServices() {
  fetch('/api/services').then(r=>r.json()).then(svcs => {
    const rows = svcs.map(s => {
      const st = s.state||'unknown';
      const badge = st==='active' ? 'badge-ok' : st==='failed' ? 'badge-err' : 'badge-warn';
      const id = 'svc-body-'+s.name.replace(/[^a-z0-9]/g,'-');
      const body = (s.body||'').replace(/</g,'&lt;').replace(/>/g,'&gt;');
      return '<tr>' +
        '<td style="white-space:nowrap">'+s.name+'</td>' +
        '<td style="white-space:nowrap"><span class="badge '+badge+'" style="cursor:pointer" onclick="toggleBody(\''+id+'\')">'+st+' ▾</span>' +
        '<div id="'+id+'" style="display:none;margin-top:6px"><pre style="font-size:11px;white-space:pre-wrap;word-break:break-all;max-height:200px;overflow-y:auto;background:#1b1c1d;padding:8px;border-radius:4px;color:#b5cea8">'+body+'</pre></div>' +
        '</td>' +
        '<td style="white-space:nowrap">' +
        '<button class="btn btn-sm btn-secondary" id="btn-'+s.name+'-start"   onclick="svcAction(this,\''+s.name+'\',\'start\')">Start</button> ' +
        '<button class="btn btn-sm btn-secondary" id="btn-'+s.name+'-stop"    onclick="svcAction(this,\''+s.name+'\',\'stop\')">Stop</button> ' +
        '<button class="btn btn-sm btn-secondary" id="btn-'+s.name+'-restart" onclick="svcAction(this,\''+s.name+'\',\'restart\')">Restart</button>' +
        '</td></tr>';
    }).join('');
    document.getElementById('svc-table').innerHTML =
      '<table><tr><th>Unit</th><th>Status</th><th>Actions</th></tr>'+rows+'</table>';
  });
 }
 function toggleBody(id) {
  const el = document.getElementById(id);
  if (el) el.style.display = el.style.display==='none' ? 'block' : 'none';
 }
 function svcAction(btn, name, action) {
  var label = btn.textContent;
  btn.disabled = true;
  btn.textContent = '...';
  var out = document.getElementById('svc-out');
  var term = document.getElementById('svc-terminal');
  var statusEl = document.getElementById('svc-out-status');
  var labelEl = document.getElementById('svc-out-label');
  out.style.display = 'block';
  labelEl.textContent = action + ' ' + name;
  term.textContent = 'Running...';
  statusEl.textContent = '';
  statusEl.style.color = '';
  fetch('/api/services/action',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({name,action})})
    .then(r=>r.json()).then(d => {
      term.textContent = d.output || d.error || '(no output)';
      term.scrollTop = term.scrollHeight;
      if (d.status === 'ok') {
        statusEl.textContent = '✓ done';
        statusEl.style.color = 'var(--ok-fg, #2c662d)';
      } else {
        statusEl.textContent = '✗ failed';
        statusEl.style.color = 'var(--crit-fg, #9f3a38)';
      }
      btn.textContent = label;
      btn.disabled = false;
      setTimeout(loadServices, 800);
    }).catch(e => {
      term.textContent = 'Request failed: ' + e;
      statusEl.textContent = '✗ error';
      statusEl.style.color = 'var(--crit-fg, #9f3a38)';
      btn.textContent = label;
      btn.disabled = false;
    });
 }
 loadServices();
 </script>`
 }
 func renderServices() string {
 	return `<div class="card"><div class="card-head">Bee Services</div><div class="card-body">` +
 		renderServicesInline() +
 		`</div></div>`
 }
--- a/audit/internal/webui/page_validate.go
+++ b/audit/internal/webui/page_validate.go
@@ -0,0 +1,716 @@
 package webui
 import (
 	"encoding/json"
 	"fmt"
 	"html"
 	"sort"
 	"strings"
 	"bee/audit/internal/platform"
 	"bee/audit/internal/schema"
 )
 type validateInventory struct {
 	CPU            string
 	Memory         string
 	Storage        string
 	NVIDIA         string
 	AMD            string
 	NvidiaGPUCount int
 	AMDGPUCount    int
 }
 func validateFmtDur(secs int) string {
 	if secs < 120 {
 		return fmt.Sprintf("~%d s", secs)
 	}
 	mins := (secs + 29) / 60
 	return fmt.Sprintf("~%d min", mins)
 }
 func validateTotalValidateSec(n int) int {
 	if n < 0 {
 		n = 0
 	}
 	total := platform.SATEstimatedCPUValidateSec +
 		platform.SATEstimatedMemoryValidateSec +
 		n*platform.SATEstimatedNvidiaGPUValidatePerGPUSec +
 		platform.SATEstimatedNvidiaInterconnectSec +
 		platform.SATEstimatedNvidiaBandwidthSec
 	return total
 }
 func validateTotalStressSec(n int) int {
 	if n < 0 {
 		n = 0
 	}
 	total := platform.SATEstimatedCPUStressSec +
 		platform.SATEstimatedMemoryStressSec +
 		n*platform.SATEstimatedNvidiaGPUStressPerGPUSec +
 		n*platform.SATEstimatedNvidiaTargetedStressPerGPUSec +
 		n*platform.SATEstimatedNvidiaTargetedPowerPerGPUSec +
 		platform.SATEstimatedNvidiaPulseTestSec +
 		platform.SATEstimatedNvidiaInterconnectSec +
 		platform.SATEstimatedNvidiaBandwidthSec
 	return total
 }
 func renderValidate(opts HandlerOptions) string {
 	inv := loadValidateInventory(opts)
 	n := inv.NvidiaGPUCount
 	validateTotalStr := validateFmtDur(validateTotalValidateSec(n))
 	stressTotalStr := validateFmtDur(validateTotalStressSec(n))
 	gpuNote := ""
 	if n > 0 {
 		gpuNote = fmt.Sprintf(" (%d GPU)", n)
 	}
 	return `<div class="alert alert-info" style="margin-bottom:16px"><strong>Non-destructive:</strong> Validate tests collect diagnostics only. They do not write to disks, do not run sustained load, and do not increment hardware wear counters.</div>
 <p style="color:var(--muted);font-size:13px;margin-bottom:16px">Tasks continue in the background — view progress in <a href="/tasks">Tasks</a>.</p>
 	<div class="card" style="margin-bottom:16px">
 	  <div class="card-head">Validate Profile</div>
 	  <div class="card-body validate-profile-body">
 	    <div class="validate-profile-col">
 	      <div class="form-row" style="margin:12px 0 0"><label>Mode</label></div>
 	      <label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-validate" value="validate" checked onchange="satModeChanged()"><span>Validate — quick non-destructive check</span></label>
 	      <label class="cb-row"><input type="radio" name="sat-mode" id="sat-mode-stress" value="stress" onchange="satModeChanged()"><span>Stress — thorough load test (` + stressTotalStr + gpuNote + `)</span></label>
 	    </div>
 	    <div class="validate-profile-col validate-profile-action">
 	      <p style="color:var(--muted);font-size:12px;margin:0 0 10px">Runs validate modules sequentially. Validate: ` + validateTotalStr + gpuNote + `; Stress: ` + stressTotalStr + gpuNote + `. Estimates are based on real log data and scale with GPU count.</p>
 	      <button type="button" class="btn btn-primary" onclick="runAllSAT()">Validate one by one</button>
 	      <div style="margin-top:12px">
 	        <span id="sat-all-status" style="font-size:12px;color:var(--muted)"></span>
 	      </div>
 	    </div>
 	  </div>
 	</div>
 <div class="grid3">
 ` + renderSATCard("cpu", "CPU", "runSAT('cpu')", "", renderValidateCardBody(
 		inv.CPU,
 		`Collects CPU inventory and temperatures, then runs a bounded CPU stress pass.`,
 		`<code>lscpu</code>, <code>sensors</code>, <code>stress-ng</code>`,
 		validateFmtDur(platform.SATEstimatedCPUValidateSec)+` in Validate (stress-ng 60 s). `+validateFmtDur(platform.SATEstimatedCPUStressSec)+` in Stress (stress-ng 30 min).`,
 	)) +
 		renderSATCard("memory", "Memory", "runSAT('memory')", "", renderValidateCardBody(
 			inv.Memory,
 			`Runs a RAM validation pass and records memory state around the test.`,
 			`<code>free</code>, <code>memtester</code>`,
 			validateFmtDur(platform.SATEstimatedMemoryValidateSec)+` in Validate (256 MB × 1 pass). `+validateFmtDur(platform.SATEstimatedMemoryStressSec)+` in Stress (512 MB × 1 pass).`,
 		)) +
 		renderSATCard("storage", "Storage", "runSAT('storage')", "", renderValidateCardBody(
 			inv.Storage,
 			`Scans all storage devices and runs the matching health or self-test path for each device type.`,
 			`<code>lsblk</code>; NVMe: <code>nvme</code>; SATA/SAS: <code>smartctl</code>`,
 			`Seconds in Validate (NVMe: instant device query; SATA/SAS: short self-test). Up to ~1 h per device in Stress (extended self-test, device-dependent).`,
 		)) +
 		`</div>
 <div style="height:1px;background:var(--border);margin:16px 0"></div>
 <div class="card" style="margin-bottom:16px">
  <div class="card-head">NVIDIA GPU Selection</div>
  <div class="card-body">
    <p style="font-size:12px;color:var(--muted);margin:0 0 8px">` + inv.NVIDIA + `</p>
    <p style="font-size:12px;color:var(--muted);margin:0 0 10px">All NVIDIA validate tasks use only the GPUs selected here. The same selection is used by Validate one by one.</p>
    <div style="display:flex;gap:8px;flex-wrap:wrap;margin-bottom:8px">
      <button class="btn btn-sm btn-secondary" type="button" onclick="satSelectAllGPUs()">Select All</button>
      <button class="btn btn-sm btn-secondary" type="button" onclick="satSelectNoGPUs()">Clear</button>
    </div>
    <div id="sat-gpu-list" style="border:1px solid var(--border);border-radius:4px;padding:12px;min-height:88px">
      <p style="color:var(--muted);font-size:13px">Loading NVIDIA GPUs...</p>
    </div>
    <p id="sat-gpu-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 0">Select at least one NVIDIA GPU to enable NVIDIA validate tasks.</p>
  </div>
 </div>
 <div class="grid3">
 ` + renderSATCard("nvidia", "NVIDIA GPU", "runNvidiaValidateSet('nvidia')", "", renderValidateCardBody(
 		inv.NVIDIA,
 		`Runs NVIDIA diagnostics and board inventory checks.`,
 		`<code>nvidia-smi</code>, <code>dmidecode</code>, <code>dcgmi diag</code>`,
 		func() string {
 			perV := platform.SATEstimatedNvidiaGPUValidatePerGPUSec
 			perS := platform.SATEstimatedNvidiaGPUStressPerGPUSec
 			if n > 0 {
 				return fmt.Sprintf("Validate: %s/GPU × %d = %s (Level 2, sequential). Stress: %s/GPU × %d = %s (Level 3, sequential).",
 					validateFmtDur(perV), n, validateFmtDur(perV*n),
 					validateFmtDur(perS), n, validateFmtDur(perS*n))
 			}
 			return fmt.Sprintf("Validate: %s/GPU (Level 2, sequential). Stress: %s/GPU (Level 3, sequential).",
 				validateFmtDur(perV), validateFmtDur(perS))
 		}(),
 	)) +
 		`<div id="sat-card-nvidia-targeted-stress">` +
 		renderSATCard("nvidia-targeted-stress", "NVIDIA GPU Targeted Stress", "runNvidiaValidateSet('nvidia-targeted-stress')", "", renderValidateCardBody(
 			inv.NVIDIA,
 			`Runs a controlled NVIDIA DCGM load to check stability under moderate stress.`,
 			`<code>dcgmi diag targeted_stress</code>`,
 			func() string {
 				per := platform.SATEstimatedNvidiaTargetedStressPerGPUSec
 				s := "Skipped in Validate. "
 				if n > 0 {
 					s += fmt.Sprintf("Stress: %s/GPU × %d = %s sequential.", validateFmtDur(per), n, validateFmtDur(per*n))
 				} else {
 					s += fmt.Sprintf("Stress: %s/GPU sequential.", validateFmtDur(per))
 				}
 				return s + `<p id="sat-ts-mode-hint" style="color:var(--warn-fg);font-size:12px;margin:8px 0 0">Only runs in Stress mode. Switch mode above to enable in Run All.</p>`
 			}(),
 		)) +
 		`</div>` +
 		`<div id="sat-card-nvidia-targeted-power">` +
 		renderSATCard("nvidia-targeted-power", "NVIDIA Targeted Power", "runNvidiaValidateSet('nvidia-targeted-power')", "", renderValidateCardBody(
 			inv.NVIDIA,
 			`Checks that the GPU can sustain its declared power delivery envelope. Pass/fail determined by DCGM.`,
 			`<code>dcgmi diag targeted_power</code>`,
 			func() string {
 				per := platform.SATEstimatedNvidiaTargetedPowerPerGPUSec
 				s := "Skipped in Validate. "
 				if n > 0 {
 					s += fmt.Sprintf("Stress: %s/GPU × %d = %s sequential.", validateFmtDur(per), n, validateFmtDur(per*n))
 				} else {
 					s += fmt.Sprintf("Stress: %s/GPU sequential.", validateFmtDur(per))
 				}
 				return s + `<p id="sat-tp-mode-hint" style="color:var(--warn-fg);font-size:12px;margin:8px 0 0">Only runs in Stress mode. Switch mode above to enable in Run All.</p>`
 			}(),
 		)) +
 		`</div>` +
 		`<div id="sat-card-nvidia-pulse">` +
 		renderSATCard("nvidia-pulse", "NVIDIA PSU Pulse Test", "runNvidiaFabricValidate('nvidia-pulse')", "", renderValidateCardBody(
 			inv.NVIDIA,
 			`Tests power supply transient response by pulsing all GPUs simultaneously between idle and full load. Synchronous pulses across all GPUs create worst-case PSU load spikes — running per-GPU would miss PSU-level failures.`,
 			`<code>dcgmi diag pulse_test</code>`,
 			`Skipped in Validate. Stress: `+validateFmtDur(platform.SATEstimatedNvidiaPulseTestSec)+` (all GPUs simultaneously; measured on 8-GPU system).`+`<p id="sat-pt-mode-hint" style="color:var(--warn-fg);font-size:12px;margin:8px 0 0">Only runs in Stress mode. Switch mode above to enable in Run All.</p>`,
 		)) +
 		`</div>` +
 		`<div id="sat-card-nvidia-interconnect">` +
 		renderSATCard("nvidia-interconnect", "NVIDIA Interconnect (NCCL)", "runNvidiaFabricValidate('nvidia-interconnect')", "", renderValidateCardBody(
 			inv.NVIDIA,
 			`Verifies NVLink/NVSwitch fabric bandwidth using NCCL all_reduce_perf across all selected GPUs. Pass/fail based on achieved bandwidth vs. theoretical.`,
 			`<code>all_reduce_perf</code> (NCCL tests)`,
 			`Validate and Stress: `+validateFmtDur(platform.SATEstimatedNvidiaInterconnectSec)+` (all GPUs simultaneously, requires ≥2).`,
 		)) +
 		`</div>` +
 		`<div id="sat-card-nvidia-bandwidth">` +
 		renderSATCard("nvidia-bandwidth", "NVIDIA Bandwidth (NVBandwidth)", "runNvidiaFabricValidate('nvidia-bandwidth')", "", renderValidateCardBody(
 			inv.NVIDIA,
 			`Validates GPU memory copy and peer-to-peer bandwidth paths using NVBandwidth.`,
 			`<code>nvbandwidth</code>`,
 			`Validate and Stress: `+validateFmtDur(platform.SATEstimatedNvidiaBandwidthSec)+` (all GPUs simultaneously; nvbandwidth runs all built-in tests without a time limit — duration set by the tool).`,
 		)) +
 		`</div>` +
 		`</div>
 <div class="grid3" style="margin-top:16px">
 ` + renderSATCard("amd", "AMD GPU", "runAMDValidateSet()", "", renderValidateCardBody(
 		inv.AMD,
 		`Runs the selected AMD checks only. GPU Validate collects inventory; MEM Integrity uses the RVS MEM module; MEM Bandwidth uses rocm-bandwidth-test and the RVS BABEL module.`,
 		`GPU Validate: <code>rocm-smi</code>, <code>dmidecode</code>; MEM Integrity: <code>rvs mem</code>; MEM Bandwidth: <code>rocm-bandwidth-test</code>, <code>rvs babel</code>`,
 		`<div style="display:flex;flex-direction:column;gap:4px"><label class="cb-row"><input type="checkbox" id="sat-amd-target" checked><span>GPU Validate</span></label><label class="cb-row"><input type="checkbox" id="sat-amd-mem-target" checked><span>MEM Integrity</span></label><label class="cb-row"><input type="checkbox" id="sat-amd-bandwidth-target" checked><span>MEM Bandwidth</span></label></div>`,
 	)) +
 		`</div>
 <div id="sat-output" style="display:none;margin-top:16px" class="card">
  <div class="card-head">Test Output <span id="sat-title"></span></div>
  <div class="card-body"><div id="sat-terminal" class="terminal"></div></div>
 </div>
 <style>
 .validate-profile-body { display:grid; grid-template-columns:1fr 1fr 1fr; gap:24px; align-items:stretch; }
 .validate-profile-col { min-width:0; display:flex; flex-direction:column; }
 .validate-profile-action { display:flex; flex-direction:column; align-items:center; justify-content:center; }
 .validate-card-body { padding:0; }
 .validate-card-section { padding:12px 16px 0; }
 .validate-card-section:last-child { padding-bottom:16px; }
 .sat-gpu-row { display:flex; align-items:flex-start; gap:8px; padding:6px 0; cursor:pointer; font-size:13px; }
 .sat-gpu-row input[type=checkbox] { width:16px; height:16px; margin-top:2px; flex-shrink:0; }
@media(max-width:900px){ .validate-profile-body { grid-template-columns:1fr; } }
 </style>
 <script>
 let satES = null;
 function satStressMode() {
  return document.querySelector('input[name="sat-mode"]:checked')?.value === 'stress';
 }
 function satModeChanged() {
  const stress = satStressMode();
  [
    {card: 'sat-card-nvidia-targeted-stress', hint: 'sat-ts-mode-hint'},
    {card: 'sat-card-nvidia-targeted-power',  hint: 'sat-tp-mode-hint'},
    {card: 'sat-card-nvidia-pulse',           hint: 'sat-pt-mode-hint'},
  ].forEach(function(item) {
    const card = document.getElementById(item.card);
    if (card) {
      card.style.opacity = stress ? '1' : '0.5';
      const hint = document.getElementById(item.hint);
      if (hint) hint.style.display = stress ? 'none' : '';
    }
  });
 }
 function satLabels() {
  return {nvidia:'Validate GPU', 'nvidia-targeted-stress':'NVIDIA Targeted Stress (dcgmi diag targeted_stress)', 'nvidia-targeted-power':'NVIDIA Targeted Power (dcgmi diag targeted_power)', 'nvidia-pulse':'NVIDIA PSU Pulse Test (dcgmi diag pulse_test)', 'nvidia-interconnect':'NVIDIA Interconnect (NCCL all_reduce_perf)', 'nvidia-bandwidth':'NVIDIA Bandwidth (NVBandwidth)', memory:'Validate Memory', storage:'Validate Storage', cpu:'Validate CPU', amd:'Validate AMD GPU', 'amd-mem':'AMD GPU MEM Integrity', 'amd-bandwidth':'AMD GPU MEM Bandwidth'};
 }
 let satNvidiaGPUsPromise = null;
 function loadSatNvidiaGPUs() {
  if (!satNvidiaGPUsPromise) {
    satNvidiaGPUsPromise = fetch('/api/gpu/nvidia')
      .then(r => {
        if (!r.ok) throw new Error('Failed to load NVIDIA GPUs.');
        return r.json();
      })
      .then(list => Array.isArray(list) ? list : []);
  }
  return satNvidiaGPUsPromise;
 }
 function satSelectedGPUIndices() {
  return Array.from(document.querySelectorAll('.sat-nvidia-checkbox'))
    .filter(function(el) { return el.checked && !el.disabled; })
    .map(function(el) { return parseInt(el.value, 10); })
    .filter(function(v) { return !Number.isNaN(v); })
    .sort(function(a, b) { return a - b; });
 }
 function satUpdateGPUSelectionNote() {
  const note = document.getElementById('sat-gpu-selection-note');
  if (!note) return;
  const selected = satSelectedGPUIndices();
  if (!selected.length) {
    note.textContent = 'Select at least one NVIDIA GPU to enable NVIDIA validate tasks.';
    return;
  }
  note.textContent = 'Selected GPUs: ' + selected.join(', ') + '. Multi-GPU tests will use all selected GPUs.';
 }
 function satRenderGPUList(gpus) {
  const root = document.getElementById('sat-gpu-list');
  if (!root) return;
  if (!gpus || !gpus.length) {
    root.innerHTML = '<p style="color:var(--muted);font-size:13px">No NVIDIA GPUs detected.</p>';
    satUpdateGPUSelectionNote();
    return;
  }
  root.innerHTML = gpus.map(function(gpu) {
    const mem = gpu.memory_mb > 0 ? ' · ' + gpu.memory_mb + ' MiB' : '';
    return '<label class="sat-gpu-row">'
      + '<input class="sat-nvidia-checkbox" type="checkbox" value="' + gpu.index + '" checked onchange="satUpdateGPUSelectionNote()">'
      + '<span><strong>GPU ' + gpu.index + '</strong> — ' + gpu.name + mem + '</span>'
      + '</label>';
  }).join('');
  satUpdateGPUSelectionNote();
 }
 function satSelectAllGPUs() {
  document.querySelectorAll('.sat-nvidia-checkbox').forEach(function(el) { el.checked = true; });
  satUpdateGPUSelectionNote();
 }
 function satSelectNoGPUs() {
  document.querySelectorAll('.sat-nvidia-checkbox').forEach(function(el) { el.checked = false; });
  satUpdateGPUSelectionNote();
 }
 function satLoadGPUs() {
  loadSatNvidiaGPUs().then(function(gpus) {
    satRenderGPUList(gpus);
  }).catch(function(err) {
    const root = document.getElementById('sat-gpu-list');
    if (root) {
      root.innerHTML = '<p style="color:var(--crit-fg);font-size:13px">Error: ' + err.message + '</p>';
    }
    satUpdateGPUSelectionNote();
  });
 }
 function satGPUDisplayName(gpu) {
  const idx = (gpu && Number.isFinite(Number(gpu.index))) ? Number(gpu.index) : 0;
  const name = gpu && gpu.name ? gpu.name : ('GPU ' + idx);
  return 'GPU ' + idx + ' — ' + name;
 }
 function satRequestBody(target, overrides) {
  const body = {};
  const labels = satLabels();
  body.display_name = labels[target] || ('Validate ' + target);
  body.stress_mode = satStressMode();
  if (target === 'cpu') body.duration = satStressMode() ? 1800 : 60;
  if (overrides) {
    Object.keys(overrides).forEach(key => { body[key] = overrides[key]; });
  }
  return body;
 }
 function enqueueSATTarget(target, overrides) {
  return fetch('/api/sat/'+target+'/run', {method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify(satRequestBody(target, overrides))})
    .then(r => r.json());
 }
 function streamSATTask(taskId, title, resetTerminal) {
  if (satES) { satES.close(); satES = null; }
  document.getElementById('sat-output').style.display='block';
  document.getElementById('sat-title').textContent = '— ' + title;
  const term = document.getElementById('sat-terminal');
  if (resetTerminal) {
    term.textContent = '';
  }
  term.textContent += 'Task ' + taskId + ' queued. Streaming log...\n';
  return new Promise(function(resolve) {
    satES = new EventSource('/api/tasks/' + taskId + '/stream');
    satES.onmessage = function(e) { term.textContent += e.data + '\n'; term.scrollTop = term.scrollHeight; };
    satES.addEventListener('done', function(e) {
      satES.close();
      satES = null;
      term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
      term.scrollTop = term.scrollHeight;
      resolve({ok: !e.data, error: e.data || ''});
    });
    satES.onerror = function() {
      if (satES) {
        satES.close();
        satES = null;
      }
      term.textContent += '\nERROR: stream disconnected.\n';
      term.scrollTop = term.scrollHeight;
      resolve({ok: false, error: 'stream disconnected'});
    };
  });
 }
 function selectedAMDValidateTargets() {
  const targets = [];
  const gpu = document.getElementById('sat-amd-target');
  const mem = document.getElementById('sat-amd-mem-target');
  const bw = document.getElementById('sat-amd-bandwidth-target');
  if (gpu && gpu.checked && !gpu.disabled) targets.push('amd');
  if (mem && mem.checked && !mem.disabled) targets.push('amd-mem');
  if (bw && bw.checked && !bw.disabled) targets.push('amd-bandwidth');
  return targets;
 }
 function runSAT(target) {
  return runSATWithOverrides(target, null);
 }
 function runSATWithOverrides(target, overrides) {
  const title = (overrides && overrides.display_name) || target;
  const term = document.getElementById('sat-terminal');
  document.getElementById('sat-output').style.display='block';
  document.getElementById('sat-title').textContent = '— ' + title;
  term.textContent = 'Enqueuing ' + title + ' test...\n';
  return enqueueSATTarget(target, overrides)
    .then(d => streamSATTask(d.task_id, title, false));
 }
 const nvidiaPerGPUTargets = ['nvidia', 'nvidia-targeted-stress', 'nvidia-targeted-power'];
 const nvidiaAllGPUTargets = ['nvidia-pulse', 'nvidia-interconnect', 'nvidia-bandwidth'];
 function satAllGPUIndicesForMulti() {
  return Promise.resolve(satSelectedGPUIndices());
 }
 function expandSATTarget(target) {
  if (nvidiaAllGPUTargets.indexOf(target) >= 0) {
    return satAllGPUIndicesForMulti().then(function(indices) {
      if (!indices.length) return Promise.reject(new Error('No NVIDIA GPUs available.'));
      return [{target: target, overrides: {gpu_indices: indices, display_name: satLabels()[target] || target}}];
    });
  }
  if (nvidiaPerGPUTargets.indexOf(target) < 0) {
    return Promise.resolve([{target: target}]);
  }
  const selected = satSelectedGPUIndices();
  if (!selected.length) {
    return Promise.reject(new Error('Select at least one NVIDIA GPU.'));
  }
  return loadSatNvidiaGPUs().then(gpus => gpus.filter(gpu => selected.indexOf(Number(gpu.index)) >= 0).map(gpu => ({
    target: target,
    overrides: {
      gpu_indices: [Number(gpu.index)],
      display_name: (satLabels()[target] || ('Validate ' + target)) + ' (' + satGPUDisplayName(gpu) + ')'
    },
    label: satGPUDisplayName(gpu),
  })));
 }
 function runNvidiaFabricValidate(target) {
  satAllGPUIndicesForMulti().then(function(indices) {
    if (!indices.length) { alert('No NVIDIA GPUs available.'); return; }
    runSATWithOverrides(target, {gpu_indices: indices, display_name: satLabels()[target] || target});
  });
 }
 function runNvidiaValidateSet(target) {
  return loadSatNvidiaGPUs().then(gpus => {
    const selected = satSelectedGPUIndices();
    const picked = gpus.filter(gpu => selected.indexOf(Number(gpu.index)) >= 0);
    if (!picked.length) {
      throw new Error('Select at least one NVIDIA GPU.');
    }
    if (picked.length === 1) {
      const gpu = picked[0];
      return runSATWithOverrides(target, {
        gpu_indices: [Number(gpu.index)],
        display_name: (satLabels()[target] || ('Validate ' + target)) + ' (' + satGPUDisplayName(gpu) + ')',
      });
    }
    document.getElementById('sat-output').style.display='block';
    document.getElementById('sat-title').textContent = '— ' + target;
    const term = document.getElementById('sat-terminal');
    term.textContent = 'Running ' + target + ' one GPU at a time...\n';
    const labelBase = satLabels()[target] || ('Validate ' + target);
    const runNext = (idx) => {
      if (idx >= picked.length) return Promise.resolve();
      const gpu = picked[idx];
      const gpuLabel = satGPUDisplayName(gpu);
      term.textContent += '\n[' + (idx + 1) + '/' + picked.length + '] ' + gpuLabel + '\n';
      return enqueueSATTarget(target, {
        gpu_indices: [Number(gpu.index)],
        display_name: labelBase + ' (' + gpuLabel + ')',
      }).then(d => {
        return streamSATTask(d.task_id, labelBase + ' (' + gpuLabel + ')', false);
      }).then(function() {
        return runNext(idx + 1);
      });
    };
    return runNext(0);
  });
 }
 function runAMDValidateSet() {
  const targets = selectedAMDValidateTargets();
  if (!targets.length) return;
  if (targets.length === 1) return runSAT(targets[0]);
  document.getElementById('sat-output').style.display='block';
  document.getElementById('sat-title').textContent = '— amd';
  const term = document.getElementById('sat-terminal');
  term.textContent = 'Running AMD validate set one by one...\n';
  const labels = satLabels();
  const runNext = (idx) => {
    if (idx >= targets.length) return Promise.resolve();
    const target = targets[idx];
    term.textContent += '\n[' + (idx + 1) + '/' + targets.length + '] ' + labels[target] + '\n';
    return enqueueSATTarget(target)
      .then(d => {
        return streamSATTask(d.task_id, labels[target], false);
      }).then(function() {
        return runNext(idx + 1);
      });
  };
  return runNext(0);
 }
 function runAllSAT() {
  const cycles = 1;
  const status = document.getElementById('sat-all-status');
  status.textContent = 'Enqueuing...';
  const stressOnlyTargets = ['nvidia-targeted-stress', 'nvidia-targeted-power', 'nvidia-pulse'];
  const baseTargets = ['nvidia','nvidia-targeted-stress','nvidia-targeted-power','nvidia-pulse','nvidia-interconnect','nvidia-bandwidth','memory','storage','cpu'].concat(selectedAMDValidateTargets());
  const activeTargets = baseTargets.filter(target => {
    if (stressOnlyTargets.indexOf(target) >= 0 && !satStressMode()) return false;
    const btn = document.getElementById('sat-btn-' + target);
    return !(btn && btn.disabled);
  });
  Promise.all(activeTargets.map(expandSATTarget)).then(groups => {
    const expanded = [];
    for (let cycle = 0; cycle < cycles; cycle++) {
      groups.forEach(group => group.forEach(item => expanded.push(item)));
    }
    const total = expanded.length;
    let enqueued = 0;
    if (!total) {
      status.textContent = 'No tasks selected.';
      return;
    }
    const runNext = (idx) => {
      if (idx >= expanded.length) { status.textContent = 'Completed ' + total + ' task(s).'; return Promise.resolve(); }
      const item = expanded[idx];
      status.textContent = 'Running ' + (idx + 1) + '/' + total + '...';
      return enqueueSATTarget(item.target, item.overrides)
        .then(() => {
          enqueued++;
          return runNext(idx + 1);
        });
    };
    return runNext(0);
  }).catch(err => {
    status.textContent = 'Error: ' + err.message;
  });
 }
 </script>
 <script>
 fetch('/api/gpu/presence').then(r=>r.json()).then(gp => {
    if (!gp.nvidia) disableSATCard('nvidia', 'No NVIDIA GPU detected');
    if (!gp.nvidia) disableSATCard('nvidia-targeted-stress', 'No NVIDIA GPU detected');
    if (!gp.nvidia) disableSATCard('nvidia-targeted-power', 'No NVIDIA GPU detected');
    if (!gp.nvidia) disableSATCard('nvidia-pulse', 'No NVIDIA GPU detected');
    if (!gp.nvidia) disableSATCard('nvidia-interconnect', 'No NVIDIA GPU detected');
    if (!gp.nvidia) disableSATCard('nvidia-bandwidth', 'No NVIDIA GPU detected');
    if (!gp.amd) disableSATCard('amd', 'No AMD GPU detected');
    if (!gp.amd) disableSATAMDOptions('No AMD GPU detected');
 });
 satLoadGPUs();
 function disableSATAMDOptions(reason) {
    ['sat-amd-target','sat-amd-mem-target','sat-amd-bandwidth-target'].forEach(function(id) {
        const cb = document.getElementById(id);
        if (!cb) return;
        cb.disabled = true;
        cb.checked = false;
        cb.title = reason;
    });
 }
 function disableSATCard(id, reason) {
    const btn = document.getElementById('sat-btn-' + id);
    if (!btn) return;
    btn.disabled = true;
    btn.title = reason;
    btn.style.opacity = '0.4';
    const card = btn.closest('.card');
    if (card) {
        let note = card.querySelector('.sat-unavail');
        if (!note) {
            note = document.createElement('p');
            note.className = 'sat-unavail';
            note.style.cssText = 'color:var(--muted);font-size:12px;margin:0 0 8px';
            const body = card.querySelector('.card-body');
            if (body) body.insertBefore(note, body.firstChild);
        }
        note.textContent = reason;
    }
 }
 </script>`
 }
 func loadValidateInventory(opts HandlerOptions) validateInventory {
 	unknown := "Audit snapshot not loaded."
 	out := validateInventory{
 		CPU:     unknown,
 		Memory:  unknown,
 		Storage: unknown,
 		NVIDIA:  unknown,
 		AMD:     unknown,
 	}
 	data, err := loadSnapshot(opts.AuditPath)
 	if err != nil {
 		return out
 	}
 	var snap schema.HardwareIngestRequest
 	if err := json.Unmarshal(data, &snap); err != nil {
 		return out
 	}
 	cpuCounts := map[string]int{}
 	cpuTotal := 0
 	for _, cpu := range snap.Hardware.CPUs {
 		if cpu.Present != nil && !*cpu.Present {
 			continue
 		}
 		cpuTotal++
 		addValidateModel(cpuCounts, validateFirstNonEmpty(validateTrimPtr(cpu.Model), validateTrimPtr(cpu.Manufacturer), "unknown"))
 	}
 	memCounts := map[string]int{}
 	memTotal := 0
 	for _, dimm := range snap.Hardware.Memory {
 		if dimm.Present != nil && !*dimm.Present {
 			continue
 		}
 		memTotal++
 		addValidateModel(memCounts, validateFirstNonEmpty(validateTrimPtr(dimm.PartNumber), validateTrimPtr(dimm.Type), validateTrimPtr(dimm.Manufacturer), "unknown"))
 	}
 	storageCounts := map[string]int{}
 	storageTotal := 0
 	for _, dev := range snap.Hardware.Storage {
 		if dev.Present != nil && !*dev.Present {
 			continue
 		}
 		storageTotal++
 		addValidateModel(storageCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown"))
 	}
 	nvidiaCounts := map[string]int{}
 	nvidiaTotal := 0
 	amdCounts := map[string]int{}
 	amdTotal := 0
 	for _, dev := range snap.Hardware.PCIeDevices {
 		if dev.Present != nil && !*dev.Present {
 			continue
 		}
 		if validateIsVendorGPU(dev, "nvidia") {
 			nvidiaTotal++
 			addValidateModel(nvidiaCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown"))
 		}
 		if validateIsVendorGPU(dev, "amd") {
 			amdTotal++
 			addValidateModel(amdCounts, validateFirstNonEmpty(validateTrimPtr(dev.Model), validateTrimPtr(dev.Manufacturer), "unknown"))
 		}
 	}
 	out.CPU = formatValidateDeviceSummary(cpuTotal, cpuCounts, "CPU")
 	out.Memory = formatValidateDeviceSummary(memTotal, memCounts, "module")
 	out.Storage = formatValidateDeviceSummary(storageTotal, storageCounts, "device")
 	out.NVIDIA = formatValidateDeviceSummary(nvidiaTotal, nvidiaCounts, "GPU")
 	out.AMD = formatValidateDeviceSummary(amdTotal, amdCounts, "GPU")
 	out.NvidiaGPUCount = nvidiaTotal
 	out.AMDGPUCount = amdTotal
 	return out
 }
 func renderValidateCardBody(devices, description, commands, settings string) string {
 	return `<div class="validate-card-section"><div style="font-size:13px;color:var(--muted)">` + devices + `</div></div>` +
 		`<div class="validate-card-section"><div style="font-size:13px">` + description + `</div></div>` +
 		`<div class="validate-card-section"><div style="font-size:13px">` + commands + `</div></div>` +
 		`<div class="validate-card-section"><div style="font-size:13px;color:var(--muted)">` + settings + `</div></div>`
 }
 func formatValidateDeviceSummary(total int, models map[string]int, unit string) string {
 	if total == 0 {
 		return "0 " + unit + "s detected."
 	}
 	keys := make([]string, 0, len(models))
 	for key := range models {
 		keys = append(keys, key)
 	}
 	sort.Strings(keys)
 	parts := make([]string, 0, len(keys))
 	for _, key := range keys {
 		parts = append(parts, fmt.Sprintf("%d x %s", models[key], html.EscapeString(key)))
 	}
 	label := unit
 	if total != 1 {
 		label += "s"
 	}
 	if len(parts) == 1 {
 		return parts[0] + " " + label
 	}
 	return fmt.Sprintf("%d %s: %s", total, label, strings.Join(parts, ", "))
 }
 func addValidateModel(counts map[string]int, name string) {
 	name = strings.TrimSpace(name)
 	if name == "" {
 		name = "unknown"
 	}
 	counts[name]++
 }
 func validateTrimPtr(value *string) string {
 	if value == nil {
 		return ""
 	}
 	return strings.TrimSpace(*value)
 }
 func validateFirstNonEmpty(values ...string) string {
 	for _, value := range values {
 		value = strings.TrimSpace(value)
 		if value != "" {
 			return value
 		}
 	}
 	return ""
 }
 func validateIsVendorGPU(dev schema.HardwarePCIeDevice, vendor string) bool {
 	model := strings.ToLower(validateTrimPtr(dev.Model))
 	manufacturer := strings.ToLower(validateTrimPtr(dev.Manufacturer))
 	class := strings.ToLower(validateTrimPtr(dev.DeviceClass))
 	if strings.Contains(model, "aspeed") || strings.Contains(manufacturer, "aspeed") {
 		return false
 	}
 	switch vendor {
 	case "nvidia":
 		return strings.Contains(model, "nvidia") || strings.Contains(manufacturer, "nvidia")
 	case "amd":
 		isGPUClass := class == "processingaccelerator" || class == "displaycontroller" || class == "videocontroller"
 		isAMDVendor := strings.Contains(manufacturer, "advanced micro devices") || strings.Contains(manufacturer, "amd") || strings.Contains(manufacturer, "ati")
 		isAMDModel := strings.Contains(model, "instinct") || strings.Contains(model, "radeon") || strings.Contains(model, "amd")
 		return isGPUClass && (isAMDVendor || isAMDModel)
 	default:
 		return false
 	}
 }
 func renderSATCard(id, label, runAction, headerActions, body string) string {
 	actions := `<button id="sat-btn-` + id + `" class="btn btn-primary btn-sm" onclick="` + runAction + `">Run</button>`
 	if strings.TrimSpace(headerActions) != "" {
 		actions += headerActions
 	}
 	return fmt.Sprintf(`<div class="card"><div class="card-head card-head-actions"><span>%s</span><div class="card-head-buttons">%s</div></div><div class="card-body validate-card-body">%s</div></div>`,
 		label, actions, body)
 }
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -271,6 +271,8 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
 	mux.HandleFunc("POST /api/bee-bench/nvidia/perf/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf"))
 	mux.HandleFunc("POST /api/bee-bench/nvidia/power/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-power"))
 	mux.HandleFunc("POST /api/bee-bench/nvidia/autotune/run", h.handleAPIBenchmarkAutotuneRun())
 	mux.HandleFunc("GET /api/bee-bench/nvidia/autotune/status", h.handleAPIBenchmarkAutotuneStatus)
 	mux.HandleFunc("GET /api/benchmark/results", h.handleAPIBenchmarkResults)
 	// Tasks
@@ -687,41 +689,22 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) (dat
 	case path == "server-power":
 		title = "System Power"
-		// Use per-PSU stacked chart when PSU SDR data is available.
+		power := make([]float64, len(samples))
-		// Collect the union of PSU slots seen across all samples.
+		label := "Power W"
-		psuSlots := psuSlotsFromSamples(samples)
+		for i, s := range samples {
-		if len(psuSlots) > 1 {
+			power[i] = s.PowerW
-			// Build one dataset per PSU slot.
+			if strings.TrimSpace(s.PowerSource) != "" {
-			psuDatasets := make([][]float64, len(psuSlots))
+				label = fmt.Sprintf("Power W · %s", s.PowerSource)
-			psuNames := make([]string, len(psuSlots))
+				if strings.TrimSpace(s.PowerMode) != "" {
-			for si, slot := range psuSlots {
+					label += fmt.Sprintf(" (%s)", s.PowerMode)
 				ds := make([]float64, len(samples))
 				for i, s := range samples {
 					for _, psu := range s.PSUs {
 						if psu.Slot == slot {
 							ds[i] = psu.PowerW
 							break
 						}
 					}
 				}
 				psuDatasets[si] = normalizePowerSeries(ds)
 				psuNames[si] = fmt.Sprintf("PSU %d", slot)
 			}
 			datasets = psuDatasets
 			names = psuNames
 			stacked = true
 			yMax = autoMax120(psuStackedTotal(psuDatasets))
 		} else {
 			power := make([]float64, len(samples))
 			for i, s := range samples {
 				power[i] = s.PowerW
 			}
 			power = normalizePowerSeries(power)
 			datasets = [][]float64{power}
 			names = []string{"Power W"}
 			yMin = floatPtr(0)
 			yMax = autoMax120(power)
 		}
 		power = normalizePowerSeries(power)
 		datasets = [][]float64{power}
 		names = []string{label}
 		yMin = floatPtr(0)
 		yMax = autoMax120(power)
 	case path == "server-fans":
 		title = "Fan RPM"
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -420,6 +420,49 @@ func TestHandleMetricsChartSVGRendersCustomSVG(t *testing.T) {
 	}
 }
 func TestChartDataFromSamplesServerPowerUsesResolvedSystemPower(t *testing.T) {
 	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
 	samples := []platform.LiveMetricSample{
 		{
 			Timestamp: start,
 			PSUs: []platform.PSUReading{
 				{Slot: 1, PowerW: 120},
 				{Slot: 2, PowerW: 130},
 			},
 			PowerW:      250,
 			PowerSource: "sdr_psu_input",
 			PowerMode:   "autotuned",
 		},
 		{
 			Timestamp: start.Add(time.Minute),
 			PSUs: []platform.PSUReading{
 				{Slot: 1, PowerW: 140},
 				{Slot: 2, PowerW: 135},
 			},
 			PowerW:      275,
 			PowerSource: "sdr_psu_input",
 			PowerMode:   "autotuned",
 		},
 	}
 	datasets, names, _, title, _, _, stacked, ok := chartDataFromSamples("server-power", samples)
 	if !ok {
 		t.Fatal("expected server-power chart data")
 	}
 	if title != "System Power" {
 		t.Fatalf("title=%q", title)
 	}
 	if stacked {
 		t.Fatal("server-power should use resolved system power, not stacked PSU inputs")
 	}
 	if len(datasets) != 1 || len(names) != 1 {
 		t.Fatalf("datasets=%d names=%d want 1/1", len(datasets), len(names))
 	}
 	if names[0] != "Power W · sdr_psu_input (autotuned)" {
 		t.Fatalf("names=%v", names)
 	}
 }
 func TestNormalizeFanSeriesHoldsLastPositive(t *testing.T) {
 	got := normalizeFanSeries([]float64{4200, 0, 0, 4300, 0})
 	want := []float64{4200, 4200, 4200, 4300, 4300}
@@ -650,9 +693,12 @@ func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
 		`/api/gpu/nvidia`,
 		`/api/bee-bench/nvidia/perf/run`,
 		`/api/bee-bench/nvidia/power/run`,
 		`/api/bee-bench/nvidia/autotune/run`,
 		`/api/bee-bench/nvidia/autotune/status`,
 		`benchmark-run-nccl`,
 		`Run Performance Benchmark`,
 		`Run Power / Thermal Fit`,
 		`Autotune`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("benchmark page missing %q: %s", needle, body)
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -34,6 +34,7 @@ var taskNames = map[string]string{
 	"nvidia-targeted-stress": "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)",
 	"nvidia-bench-perf":      "NVIDIA Bee Bench Perf",
 	"nvidia-bench-power":     "NVIDIA Bee Bench Power",
 	"nvidia-bench-autotune":  "NVIDIA Bee Bench Power Source Autotune",
 	"nvidia-compute":         "NVIDIA Max Compute Load (dcgmproftester)",
 	"nvidia-targeted-power":  "NVIDIA Targeted Power (dcgmi diag targeted_power)",
 	"nvidia-pulse":           "NVIDIA Pulse Test (dcgmi diag pulse_test)",
@@ -125,6 +126,7 @@ type taskParams struct {
 	Loader             string   `json:"loader,omitempty"`
 	BurnProfile        string   `json:"burn_profile,omitempty"`
 	BenchmarkProfile   string   `json:"benchmark_profile,omitempty"`
 	BenchmarkKind      string   `json:"benchmark_kind,omitempty"`
 	RunNCCL            bool     `json:"run_nccl,omitempty"`
 	ParallelGPUs       bool     `json:"parallel_gpus,omitempty"`
 	RampStep           int      `json:"ramp_step,omitempty"`
@@ -686,6 +688,15 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			RampTotal:         t.params.RampTotal,
 			RampRunID:         t.params.RampRunID,
 		}, j.append)
 	case "nvidia-bench-autotune":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = a.RunNvidiaPowerSourceAutotuneCtx(ctx, app.DefaultBeeBenchAutotuneDir, platform.NvidiaBenchmarkOptions{
 			Profile: t.params.BenchmarkProfile,
 			SizeMB:  t.params.SizeMB,
 		}, t.params.BenchmarkKind, j.append)
 	case "nvidia-compute":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
--- a/iso/builder/auto/config
+++ b/iso/builder/auto/config
@@ -32,7 +32,7 @@ lb config noauto \
    --memtest memtest86+ \
    --iso-volume "EASY_BEE_${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
    --iso-application "EASY-BEE-${BEE_GPU_VENDOR_UPPER:-NVIDIA}" \
-    --bootappend-live "boot=live components video=1920x1080 console=tty0 console=ttyS0,115200n8 loglevel=3 systemd.show_status=1 username=bee user-fullname=Bee modprobe.blacklist=nouveau,snd_hda_intel,snd_hda_codec_realtek,snd_hda_codec_generic,soundcore" \
+    --bootappend-live "boot=live components video=1920x1080 console=ttyS0,115200n8 console=tty0 loglevel=3 systemd.show_status=1 username=bee user-fullname=Bee modprobe.blacklist=nouveau,snd_hda_intel,snd_hda_codec_realtek,snd_hda_codec_generic,soundcore" \
    --debootstrap-options "--include=ca-certificates" \
    --apt-recommends false \
    --chroot-squashfs-compression-type zstd \
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
@@ -542,6 +542,186 @@ label memtest
 EOF
 }
 extract_live_grub_entry() {
    cfg="$1"
    live_linux="$(awk '/^[[:space:]]*linux[[:space:]]+\/live\// { print; exit }' "$cfg")"
    live_initrd="$(awk '/^[[:space:]]*initrd[[:space:]]+\/live\// { print; exit }' "$cfg")"
    [ -n "$live_linux" ] || return 1
    [ -n "$live_initrd" ] || return 1
    grub_kernel="$(printf '%s\n' "$live_linux" | awk '{print $2}')"
    grub_append="$(printf '%s\n' "$live_linux" | cut -d' ' -f3-)"
    grub_initrd="$(printf '%s\n' "$live_initrd" | awk '{print $2}')"
    [ -n "$grub_kernel" ] || return 1
    [ -n "$grub_append" ] || return 1
    [ -n "$grub_initrd" ] || return 1
    return 0
 }
 extract_live_isolinux_entry() {
    cfg="$1"
    isolinux_linux="$(awk '/^[[:space:]]*linux[[:space:]]+\/live\// { print; exit }' "$cfg")"
    isolinux_initrd="$(awk '/^[[:space:]]*initrd[[:space:]]+\/live\// { print; exit }' "$cfg")"
    isolinux_append="$(awk '/^[[:space:]]*append[[:space:]]+/ { sub(/^[[:space:]]*append[[:space:]]+/, ""); print; exit }' "$cfg")"
    [ -n "$isolinux_linux" ] || return 1
    [ -n "$isolinux_initrd" ] || return 1
    [ -n "$isolinux_append" ] || return 1
    isolinux_kernel="$(printf '%s\n' "$isolinux_linux" | awk '{print $2}')"
    isolinux_initrd_path="$(printf '%s\n' "$isolinux_initrd" | awk '{print $2}')"
    [ -n "$isolinux_kernel" ] || return 1
    [ -n "$isolinux_initrd_path" ] || return 1
    return 0
 }
 write_canonical_grub_cfg() {
    cfg="$1"
    kernel="$2"
    append_live="$3"
    initrd="$4"
    cat > "$cfg" <<EOF
 source /boot/grub/config.cfg
 echo ""
 echo "  ███████╗ █████╗ ███████╗██╗   ██╗      ██████╗ ███████╗███████╗"
 echo "  ██╔════╝██╔══██╗██╔════╝╚██╗ ██╔╝      ██╔══██╗██╔════╝██╔════╝"
 echo "  █████╗  ███████║███████╗ ╚████╔╝ █████╗██████╔╝█████╗  █████╗"
 echo "  ██╔══╝  ██╔══██║╚════██║  ╚██╔╝  ╚════╝██╔══██╗██╔══╝  ██╔══╝"
 echo "  ███████╗██║  ██║███████║   ██║         ██████╔╝███████╗███████╗"
 echo "  ╚══════╝╚═╝  ╚═╝╚══════╝   ╚═╝         ╚═════╝ ╚══════╝╚══════╝"
 echo "  Hardware Audit LiveCD"
 echo ""
 menuentry "EASY-BEE" {
    linux   ${kernel} ${append_live} nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
    initrd  ${initrd}
 }
 menuentry "EASY-BEE — load to RAM (toram)" {
    linux   ${kernel} ${append_live} toram nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
    initrd  ${initrd}
 }
 submenu "EASY-BEE (advanced options) -->" {
    menuentry "EASY-BEE — GSP=off" {
        linux   ${kernel} ${append_live} nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
        initrd  ${initrd}
    }
    menuentry "EASY-BEE — KMS (no nomodeset)" {
        linux   ${kernel} ${append_live} bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
        initrd  ${initrd}
    }
    menuentry "EASY-BEE — KMS + GSP=off" {
        linux   ${kernel} ${append_live} bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
        initrd  ${initrd}
    }
    menuentry "EASY-BEE — fail-safe" {
        linux   ${kernel} ${append_live} nomodeset bee.nvidia.mode=gsp-off noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0
        initrd  ${initrd}
    }
 }
 if [ "\${grub_platform}" = "efi" ]; then
    menuentry "Memory Test (memtest86+)" {
        chainloader /boot/memtest86+x64.efi
    }
 else
    menuentry "Memory Test (memtest86+)" {
        linux16 /boot/memtest86+x64.bin
    }
 fi
 if [ "\${grub_platform}" = "efi" ]; then
    menuentry "UEFI Firmware Settings" {
        fwsetup
    }
 fi
 EOF
 }
 write_canonical_isolinux_cfg() {
    cfg="$1"
    kernel="$2"
    initrd="$3"
    append_live="$4"
    cat > "$cfg" <<EOF
 label live-@FLAVOUR@-normal
    menu label ^EASY-BEE
    menu default
    linux ${kernel}
    initrd ${initrd}
    append ${append_live} nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-toram
    menu label EASY-BEE (^load to RAM)
    linux ${kernel}
    initrd ${initrd}
    append ${append_live} toram nomodeset bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-gsp-off
    menu label EASY-BEE (^NVIDIA GSP=off)
    linux ${kernel}
    initrd ${initrd}
    append ${append_live} nomodeset bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-kms
    menu label EASY-BEE (^KMS, no nomodeset)
    linux ${kernel}
    initrd ${initrd}
    append ${append_live} bee.nvidia.mode=normal net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-kms-gsp-off
    menu label EASY-BEE (KMS, ^GSP=off)
    linux ${kernel}
    initrd ${initrd}
    append ${append_live} bee.nvidia.mode=gsp-off net.ifnames=0 biosdevname=0 mitigations=off transparent_hugepage=always numa_balancing=disable pcie_aspm=off intel_idle.max_cstate=1 processor.max_cstate=1 nowatchdog nosoftlockup
 label live-@FLAVOUR@-failsafe
    menu label EASY-BEE (^fail-safe)
    linux ${kernel}
    initrd ${initrd}
    append ${append_live} nomodeset bee.nvidia.mode=gsp-off noapic noapm nodma nomce nolapic nosmp vga=normal net.ifnames=0 biosdevname=0
 label memtest
    menu label ^Memory Test (memtest86+)
    linux /boot/memtest86+x64.bin
 EOF
 }
 enforce_live_build_bootloader_assets() {
    lb_dir="$1"
    grub_cfg="$lb_dir/binary/boot/grub/grub.cfg"
    grub_dir="$lb_dir/binary/boot/grub"
    isolinux_cfg="$lb_dir/binary/isolinux/live.cfg"
    if [ -f "$grub_cfg" ]; then
        if extract_live_grub_entry "$grub_cfg"; then
            mkdir -p "$grub_dir/live-theme"
            cp "${BUILDER_DIR}/config/bootloaders/grub-efi/config.cfg" "$grub_dir/config.cfg"
            cp "${BUILDER_DIR}/config/bootloaders/grub-efi/theme.cfg" "$grub_dir/theme.cfg"
            cp -R "${BUILDER_DIR}/config/bootloaders/grub-efi/live-theme/." "$grub_dir/live-theme/"
            write_canonical_grub_cfg "$grub_cfg" "$grub_kernel" "$grub_append" "$grub_initrd"
            echo "bootloader sync: rewrote binary/boot/grub/grub.cfg with canonical EASY-BEE menu"
        else
            echo "bootloader sync: WARNING: could not extract live entry from $grub_cfg" >&2
        fi
    fi
    if [ -f "$isolinux_cfg" ]; then
        if extract_live_isolinux_entry "$isolinux_cfg"; then
            write_canonical_isolinux_cfg "$isolinux_cfg" "$isolinux_kernel" "$isolinux_initrd_path" "$isolinux_append"
            echo "bootloader sync: rewrote binary/isolinux/live.cfg with canonical EASY-BEE menu"
        else
            echo "bootloader sync: WARNING: could not extract live entry from $isolinux_cfg" >&2
        fi
    fi
 }
 copy_memtest_from_deb() {
    deb="$1"
    dst_boot="$2"
@@ -1229,6 +1409,11 @@ run_step_sh "live-build clean" "80-lb-clean" "lb clean --all 2>&1 | tail -3"
 run_step_sh "live-build config" "81-lb-config" "lb config 2>&1 | tail -5"
 dump_memtest_debug "pre-build" "${LB_DIR}"
 run_step_sh "live-build build" "90-lb-build" "lb build 2>&1"
 echo "=== enforcing canonical bootloader assets ==="
 enforce_live_build_bootloader_assets "${LB_DIR}"
 run_step_sh "rebuild live-build checksums after bootloader sync" "91b-lb-checksums" "lb binary_checksums 2>&1"
 run_step_sh "rebuild ISO after bootloader sync" "91c-lb-binary-iso" "rm -f '${LB_DIR}/live-image-amd64.hybrid.iso' && lb binary_iso 2>&1"
 run_step_sh "rebuild zsync after bootloader sync" "91d-lb-zsync" "lb binary_zsync 2>&1"
 # --- persist deb package cache back to shared location ---
 # This allows the second variant to reuse all downloaded packages.
--- a/iso/builder/config/bootloaders/grub-efi/theme.cfg
+++ b/iso/builder/config/bootloaders/grub-efi/theme.cfg
@@ -1,7 +1,7 @@
 set color_normal=light-gray/black
 set color_highlight=yellow/black
-if [ -e /boot/grub/splash.png ]; then
+if [ -e /boot/grub/live-theme/theme.txt ]; then
    set theme=/boot/grub/live-theme/theme.txt
 else
    set menu_color_normal=yellow/black
--- a/iso/overlay/etc/systemd/system/bee-web.service
+++ b/iso/overlay/etc/systemd/system/bee-web.service
@@ -10,7 +10,8 @@ RestartSec=3
 StandardOutput=journal
 StandardError=journal
 LimitMEMLOCK=infinity
-MemoryMax=3G
+# No MemoryMax: bee-web spawns GPU test subprocesses (dcgmproftester etc.)
 # that legitimately use several GB; a cgroup limit kills them via OOM.
 # Keep the web server responsive during GPU/CPU stress (children inherit nice+10
 # via Setpriority in runCmdJob, but the bee-web parent stays at 0).
 Nice=0
--- a/scripts/deploy.sh
+++ b/scripts/deploy.sh
@@ -0,0 +1,64 @@
 #!/usr/bin/env bash
 set -euo pipefail
 REMOTE_USER="bee"
 REMOTE_BIN="/usr/local/bin/bee"
 LOCAL_BIN="audit/bee"
 SERVICES="bee-audit bee-web"
 # --- IP ---
 if [[ $# -ge 1 ]]; then
    HOST="$1"
 else
    read -rp "IP адрес хоста: " HOST
 fi
 [[ -z "$HOST" ]] && { echo "Ошибка: IP не указан"; exit 1; }
 # --- SSH options ---
 SSH_OPTS=(-o StrictHostKeyChecking=no -o ConnectTimeout=10)
 # Проверяем, нужен ли пароль
 SSH_PASS=""
 if ! ssh "${SSH_OPTS[@]}" -o BatchMode=yes "${REMOTE_USER}@${HOST}" true 2>/dev/null; then
    if command -v sshpass &>/dev/null; then
        read -rsp "Пароль для ${REMOTE_USER}@${HOST}: " SSH_PASS
        echo
        SSH_CMD=(sshpass -p "$SSH_PASS" ssh "${SSH_OPTS[@]}")
        SCP_CMD=(sshpass -p "$SSH_PASS" scp "${SSH_OPTS[@]}")
    else
        echo "sshpass не установлен. Введите пароль вручную при запросе (или установите SSH-ключ)."
        SSH_CMD=(ssh "${SSH_OPTS[@]}")
        SCP_CMD=(scp "${SSH_OPTS[@]}")
    fi
 else
    SSH_CMD=(ssh "${SSH_OPTS[@]}")
    SCP_CMD=(scp "${SSH_OPTS[@]}")
 fi
 REMOTE="${REMOTE_USER}@${HOST}"
 # --- Build ---
 echo "==> Сборка бинарника..."
 (
    cd audit
    VERSION=$(sh ./scripts/resolve-version.sh 2>/dev/null || echo "dev")
    CGO_ENABLED=0 GOOS=linux GOARCH=amd64 \
        go build -ldflags "-X main.Version=${VERSION}" -o bee ./cmd/bee
 )
 echo "    OK: $(ls -lh "${LOCAL_BIN}" | awk '{print $5, $9}')"
 # --- Deploy ---
 echo "==> Копирование на ${REMOTE}..."
 "${SCP_CMD[@]}" "${LOCAL_BIN}" "${REMOTE}:/tmp/bee-new"
 echo "==> Замена бинарника и перезапуск сервисов..."
 "${SSH_CMD[@]}" "$REMOTE" bash -s <<EOF
 set -e
 sudo mv /tmp/bee-new ${REMOTE_BIN}
 sudo chmod +x ${REMOTE_BIN}
 sudo systemctl restart ${SERVICES}
 sleep 2
 systemctl status ${SERVICES} --no-pager -l
 EOF
 echo "==> Готово."
Author	SHA1	Message	Date
Michael Chus	c69bf07b27	Commit remaining workspace changes	2026-04-20 07:02:31 +03:00
Michael Chus	b3cf8e3893	Globalize autotuned system power source	2026-04-20 07:02:12 +03:00
Michael Chus	17118298bd	audit: switch power benchmark load to dcgmproftester	2026-04-20 06:57:14 +03:00
Michael Chus	65bcc9ce81	refactor(webui): split pages into task modules	2026-04-20 06:56:52 +03:00
Michael Chus	0cdfbc5875	fix(iso): restore boot UX and boot logs	2026-04-19 23:08:09 +03:00
Michael Chus	cf9b54b600	Use last ramp-step SDR snapshot for PSU loaded power; add deploy script - benchmark.go: retain sdrLastStep from final ramp step instead of re-sampling after test when GPUs are already idle - scripts/deploy.sh: build+deploy bee binary to remote host over SSH Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-19 21:26:44 +03:00
Michael Chus	0bfb3fe954	Use PSU SDR sum for system power chart when available DCMI reports only the managed power domain (~CPU+MB), missing GPU draw. PSU AC input sensors cover full wall power. When samplePSUPower returns data, sum the slots for PowerW; fall back to DCMI otherwise. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-19 19:10:01 +03:00
Michael Chus	3053cb0710	Fix PSU slot regex: match MSI underscore format PSU1_POWER_IN \b does not fire between a digit and '_' because '_' is \w in RE2. The pattern \bpsu?\s*([0-9]+)\b never matched PSU1_POWER_IN style sensors, so parsePSUSDR (and PSUSlotsFromSDR / samplePSUPower) returned empty results for MSI servers — causing all power graphs to fall back to DCMI which reports ~half actual draw. Added an explicit underscore-terminated pattern first in the list and tests covering the MSI format. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-19 19:03:02 +03:00
Michael Chus	2038489961	Remove MemoryMax=3G from bee-web.service to fix OOM kill during GPU tests dcgmproftester and other GPU test subprocesses run inside the bee-web cgroup and exceed 3G with 8 GPUs. OOM killer terminates the whole service. No memory cap is appropriate on a LiveCD where GPU tests legitimately use several GB. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-19 18:52:41 +03:00
Michael Chus	e35484013e	Use SDR PSU AC input for single-card calibration server power Same fix as ramp steps: take sdrSingle snapshot after calibration and prefer PSUInW over DCMI for singleIPMILoadedW. DCMI kept as fallback. Log message indicates source. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-19 18:44:13 +03:00
Michael Chus	2cdf034bb0	Use SDR PSU AC input for per-step server power in power ramp When sdrStep.PSUInW is available, prefer it over DCMI for ramp.ServerLoadedW and ServerDeltaW. DCMI on this platform (MSI 4-PSU) reports ~half actual draw; SDR sums all PSU_POWER_IN sensors correctly. Delta is now SDR-to-SDR (sdrStep.PSUInW - sdrIdle.PSUInW) for consistency. DCMI path kept as fallback when SDR has no PSU data. Log message now indicates the source (SDR PSU AC input vs DCMI). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-19 18:43:36 +03:00
Michael Chus	b89580c24d	Fix PSU power chart: use name-based SDR matching instead of entity ID MSI servers place PSU_POWER_IN/OUT sensors on entity 3.0, not 10.N (the IPMI "Power Supply" entity). The old parser filtered by entity ID and found nothing, so the dashboard fell back to DCMI which reports roughly half the actual draw. Now delegates to collector.PSUSlotsFromSDR — the same name-based matching already used in the Power Fit benchmark. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-19 18:39:21 +03:00
Michael Chus	df1385d3d6	Fix dcgmproftester parallel mode: use staggered script for all multi-GPU runs A single dcgmproftester process without -i only loads GPU 0 regardless of CUDA_VISIBLE_DEVICES. Now always routes multi-GPU runs through bee-dcgmproftester-staggered (--stagger-seconds 0 for parallel mode), which spawns one process per GPU so all GPUs are loaded simultaneously. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-19 18:31:34 +03:00