Use MEPHI mirror, disable security repo, fix memtest in ISO build

- Switch all lb mirrors to mirror.mephi.ru/debian/ for faster/reliable downloads - Disable security repo (--security false) — not needed for LiveCD - Pin MEMTEST_VERSION=6.10-4 in VERSIONS, export to hook environment - Set BEE_REQUIRE_MEMTEST=1 in build-in-container.sh — missing memtest is now fatal - Fix 9100-memtest.hook.binary: add apt-get download fallback when lb binary_memtest has already purged the package cache; handle both 5.x (memtest86+x64.bin) and 6.x (memtest86+.bin) BIOS binary naming Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Fix optional step log dir missing after memtest recovery
2026-04-15 09:57:29 +03:00 · 2026-04-15 07:28:36 +03:00 · 2026-04-15 07:16:18 +03:00 · 2026-04-14 23:47:57 +03:00 · 2026-04-14 23:00:15 +03:00 · 2026-04-14 22:39:25 +03:00
34 changed files with 3329 additions and 1002 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 .DS_Store
 dist/
 iso/out/
+build-cache/
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -30,7 +30,9 @@ var (
 	DefaultRuntimeLogPath   = DefaultExportDir + "/runtime-health.log"
 	DefaultTechDumpDir      = DefaultExportDir + "/techdump"
 	DefaultSATBaseDir       = DefaultExportDir + "/bee-sat"
-	DefaultBenchmarkBaseDir = DefaultExportDir + "/bee-benchmark"
+	DefaultBeeBenchBaseDir  = DefaultExportDir + "/bee-bench"
+	DefaultBeeBenchPerfDir  = DefaultBeeBenchBaseDir + "/perf"
+	DefaultBeeBenchPowerDir = DefaultBeeBenchBaseDir + "/power"
 )

 type App struct {
@@ -84,6 +86,7 @@ type installer interface {
 	InstallToDisk(ctx context.Context, device string, logFile string) error
 	IsLiveMediaInRAM() bool
 	LiveBootSource() platform.LiveBootSource
+	LiveMediaRAMState() platform.LiveMediaRAMState
 	RunInstallToRAM(ctx context.Context, logFunc func(string)) error
 }

@@ -108,6 +111,10 @@ func (a *App) LiveBootSource() platform.LiveBootSource {
 	return a.installer.LiveBootSource()
 }

+func (a *App) LiveMediaRAMState() platform.LiveMediaRAMState {
+	return a.installer.LiveMediaRAMState()
+}
+
 func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
 	return a.installer.RunInstallToRAM(ctx, logFunc)
 }
@@ -117,6 +124,7 @@ type satRunner interface {
 	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBenchmark(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
+	RunNvidiaPowerBench(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
 	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
@@ -562,11 +570,18 @@ func (a *App) RunNvidiaBenchmark(baseDir string, opts platform.NvidiaBenchmarkOp

 func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
-		baseDir = DefaultBenchmarkBaseDir
+		baseDir = DefaultBeeBenchPerfDir
 	}
 	return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
 }

+func (a *App) RunNvidiaPowerBenchCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultBeeBenchPowerDir
+	}
+	return a.sat.RunNvidiaPowerBench(ctx, baseDir, opts, logFunc)
+}
+
 func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -122,6 +122,7 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
 type fakeSAT struct {
 	runNvidiaFn               func(string) (string, error)
 	runNvidiaBenchmarkFn      func(string, platform.NvidiaBenchmarkOptions) (string, error)
+	runNvidiaPowerBenchFn     func(string, platform.NvidiaBenchmarkOptions) (string, error)
 	runNvidiaStressFn         func(string, platform.NvidiaStressOptions) (string, error)
 	runNvidiaComputeFn        func(string, int, []int) (string, error)
 	runNvidiaPowerFn          func(string, int, []int) (string, error)
@@ -154,6 +155,13 @@ func (f fakeSAT) RunNvidiaBenchmark(_ context.Context, baseDir string, opts plat
 	return f.runNvidiaFn(baseDir)
 }

+func (f fakeSAT) RunNvidiaPowerBench(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, _ func(string)) (string, error) {
+	if f.runNvidiaPowerBenchFn != nil {
+		return f.runNvidiaPowerBenchFn(baseDir, opts)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
 func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaTargetedStressFn != nil {
 		return f.runNvidiaTargetedStressFn(baseDir, durationSec, gpuIndices)
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -2,25 +2,15 @@ package platform

 import (
 	"fmt"
-	"os"
-	"path/filepath"
-	"regexp"
 	"strings"
 	"time"
 )

 func renderBenchmarkReport(result NvidiaBenchmarkResult) string {
-	return renderBenchmarkReportWithCharts(result, nil)
+	return renderBenchmarkReportWithCharts(result)
 }

-type benchmarkReportChart struct {
-	Title   string
-	Content string
-}
-
-var ansiEscapePattern = regexp.MustCompile(`\x1b\[[0-9;]*m`)
-
-func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benchmarkReportChart) string {
+func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 	var b strings.Builder

 	// ── Header ────────────────────────────────────────────────────────────────
@@ -58,7 +48,7 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		fmt.Fprintf(&b, "**GPU(s):** %s  \n", strings.Join(parts, ", "))
 	}
 	fmt.Fprintf(&b, "**Profile:** %s  \n", result.BenchmarkProfile)
-	fmt.Fprintf(&b, "**App version:** %s  \n", result.BenchmarkVersion)
+	fmt.Fprintf(&b, "**Benchmark version:** %s  \n", result.BenchmarkVersion)
 	fmt.Fprintf(&b, "**Generated:** %s  \n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC"))
 	if result.RampStep > 0 && result.RampTotal > 0 {
 		fmt.Fprintf(&b, "**Ramp-up step:** %d of %d  \n", result.RampStep, result.RampTotal)
@@ -91,10 +81,28 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		b.WriteString("\n")
 	}

+	// ── Methodology ───────────────────────────────────────────────────────────
+	b.WriteString("## Methodology\n\n")
+	fmt.Fprintf(&b, "- Profile `%s` uses standardized baseline -> warmup -> steady-state -> interconnect phases.\n", result.BenchmarkProfile)
+	b.WriteString("- Single-GPU compute score comes from `bee-gpu-burn` on the cuBLASLt path when available.\n")
+	b.WriteString("- Thermal and power limits are inferred from NVIDIA clock-event counters plus sustained telemetry.\n")
+	b.WriteString("- `result.json` is the canonical machine-readable source for the run.\n\n")
+	b.WriteString("**Compute score** is derived from two phases:\n\n")
+	b.WriteString("- **Synthetic** — each precision type (int8, fp8, fp16, fp32, fp64, fp4) runs alone for a dedicated window. ")
+	b.WriteString("Measures peak throughput with the full GPU dedicated to one kernel type. ")
+	b.WriteString("Each result is normalised to fp32-equivalent TOPS using precision weights: ")
+	b.WriteString("fp64 ×2.0 · fp32 ×1.0 · fp16 ×0.5 · int8 ×0.25 · fp8 ×0.25 · fp4 ×0.125.\n")
+	b.WriteString("- **Mixed** — all precision types run simultaneously (combined phase). ")
+	b.WriteString("Reflects real inference workloads where fp8 matrix ops, fp16 attention and fp32 accumulation compete for bandwidth and SM scheduler slots.\n\n")
+	b.WriteString("**Formula:** `Compute = Synthetic × (1 + MixedEfficiency × 0.3)`\n\n")
+	b.WriteString("where `MixedEfficiency = Mixed / Synthetic`. A GPU that sustains 90 % throughput under mixed load ")
+	b.WriteString("receives a +27 % bonus over its synthetic score; one that drops to 60 % receives +18 %.\n\n")
+	b.WriteString("**Composite score** = `Compute × quality_factor` where quality factors in power sustain, thermal sustain, stability, and interconnect.\n\n")
+
 	// ── Scorecard table ───────────────────────────────────────────────────────
 	b.WriteString("## Scorecard\n\n")
-	b.WriteString("| GPU | Status | Composite | Compute | TOPS/SM/GHz | Power Sustain | Thermal Sustain | Stability | Interconnect |\n")
-	b.WriteString("|-----|--------|-----------|---------|-------------|---------------|-----------------|-----------|-------------|\n")
+	b.WriteString("| GPU | Status | Composite | Compute | Synthetic | Mixed | Mixed Eff. | TOPS/SM/GHz | Power Sustain | Thermal Sustain | Stability | Interconnect |\n")
+	b.WriteString("|-----|--------|-----------|---------|-----------|-------|------------|-------------|---------------|-----------------|-----------|-------------|\n")
 	for _, gpu := range result.GPUs {
 		name := strings.TrimSpace(gpu.Name)
 		if name == "" {
@@ -108,11 +116,26 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		if gpu.Scores.TOPSPerSMPerGHz > 0 {
 			topsPerSM = fmt.Sprintf("%.3f", gpu.Scores.TOPSPerSMPerGHz)
 		}
-		fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %.2f | %s | %.1f | %.1f | %.1f | %s |\n",
+		synthetic := "-"
+		if gpu.Scores.SyntheticScore > 0 {
+			synthetic = fmt.Sprintf("%.2f", gpu.Scores.SyntheticScore)
+		}
+		mixed := "-"
+		if gpu.Scores.MixedScore > 0 {
+			mixed = fmt.Sprintf("%.2f", gpu.Scores.MixedScore)
+		}
+		mixedEff := "-"
+		if gpu.Scores.MixedEfficiency > 0 {
+			mixedEff = fmt.Sprintf("%.1f%%", gpu.Scores.MixedEfficiency*100)
+		}
+		fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %.2f | %s | %s | %s | %s | %.1f | %.1f | %.1f | %s |\n",
 			gpu.Index, name,
 			gpu.Status,
 			gpu.Scores.CompositeScore,
 			gpu.Scores.ComputeScore,
+			synthetic,
+			mixed,
+			mixedEff,
 			topsPerSM,
 			gpu.Scores.PowerSustainScore,
 			gpu.Scores.ThermalSustainScore,
@@ -147,6 +170,16 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		if gpu.PowerLimitW > 0 {
 			fmt.Fprintf(&b, "- **Power limit:** %.0f W (default %.0f W)\n", gpu.PowerLimitW, gpu.DefaultPowerLimitW)
 		}
+		if gpu.PowerLimitDerated {
+			fmt.Fprintf(&b, "- **Power limit derating:** active after %d targeted_power attempt(s)\n", gpu.PowerCalibrationTries)
+		}
+		if gpu.CalibratedPeakPowerW > 0 {
+			if gpu.CalibratedPeakTempC > 0 {
+				fmt.Fprintf(&b, "- **Power calibration (`dcgmi targeted_power`):** %.0f W p95 at %.1f °C p95\n", gpu.CalibratedPeakPowerW, gpu.CalibratedPeakTempC)
+			} else {
+				fmt.Fprintf(&b, "- **Power calibration (`dcgmi targeted_power`):** %.0f W p95\n", gpu.CalibratedPeakPowerW)
+			}
+		}
 		if gpu.LockedGraphicsClockMHz > 0 {
 			fmt.Fprintf(&b, "- **Locked clocks:** GPU %.0f MHz / Mem %.0f MHz\n", gpu.LockedGraphicsClockMHz, gpu.LockedMemoryClockMHz)
 		}
@@ -162,6 +195,38 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		fmt.Fprintf(&b, "| GPU utilisation | %.1f %% | — |\n", gpu.Steady.AvgUsagePct)
 		b.WriteString("\n")

+		// Per-precision stability phases.
+		if len(gpu.PrecisionSteady) > 0 {
+			b.WriteString("**Per-precision stability:**\n\n")
+			b.WriteString("| Precision | Status | Clock CV | Power CV | Clock Drift | ECC corr | ECC uncorr |\n|-----------|--------|----------|----------|-------------|----------|------------|\n")
+			for _, p := range gpu.PrecisionSteady {
+				eccCorr := "—"
+				eccUncorr := "—"
+				if !p.ECC.IsZero() {
+					eccCorr = fmt.Sprintf("%d", p.ECC.Corrected)
+					eccUncorr = fmt.Sprintf("%d", p.ECC.Uncorrected)
+				}
+				status := p.Status
+				if strings.TrimSpace(status) == "" {
+					status = "OK"
+				}
+				fmt.Fprintf(&b, "| %s | %s | %.1f%% | %.1f%% | %.1f%% | %s | %s |\n",
+					p.Precision, status, p.Steady.ClockCVPct, p.Steady.PowerCVPct, p.Steady.ClockDriftPct,
+					eccCorr, eccUncorr)
+			}
+			b.WriteString("\n")
+		} else {
+			// Legacy: show combined-window variance.
+			fmt.Fprintf(&b, "**Clock/power variance (combined window):** clock CV %.1f%% · power CV %.1f%% · clock drift %.1f%%\n\n",
+				gpu.Steady.ClockCVPct, gpu.Steady.PowerCVPct, gpu.Steady.ClockDriftPct)
+		}
+
+		// ECC summary
+		if !gpu.ECC.IsZero() {
+			fmt.Fprintf(&b, "**ECC errors (total):** corrected=%d uncorrected=%d\n\n",
+				gpu.ECC.Corrected, gpu.ECC.Uncorrected)
+		}
+
 		// Throttle
 		throttle := formatThrottleLine(gpu.Throttle, gpu.Steady.DurationSec)
 		if throttle != "none" {
@@ -171,12 +236,14 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		// Precision results
 		if len(gpu.PrecisionResults) > 0 {
 			b.WriteString("**Precision results:**\n\n")
-			b.WriteString("| Precision | TOPS | Lanes | Iterations |\n|-----------|------|-------|------------|\n")
+			b.WriteString("| Precision | TOPS (raw) | Weight | TOPS (fp32-eq) | Lanes | Iterations |\n|-----------|------------|--------|----------------|-------|------------|\n")
 			for _, p := range gpu.PrecisionResults {
 				if p.Supported {
-					fmt.Fprintf(&b, "| %s | %.2f | %d | %d |\n", p.Name, p.TeraOpsPerSec, p.Lanes, p.Iterations)
+					weightStr := fmt.Sprintf("×%.3g", p.Weight)
+					fmt.Fprintf(&b, "| %s | %.2f | %s | %.2f | %d | %d |\n",
+						p.Name, p.TeraOpsPerSec, weightStr, p.WeightedTeraOpsPerSec, p.Lanes, p.Iterations)
 				} else {
-					fmt.Fprintf(&b, "| %s | — (unsupported) | — | — |\n", p.Name)
+					fmt.Fprintf(&b, "| %s | — (unsupported) | — | — | — | — |\n", p.Name)
 				}
 			}
 			b.WriteString("\n")
@@ -237,61 +304,41 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benc
 		}
 	}

-	// ── Terminal charts (steady-state only) ───────────────────────────────────
-	if len(charts) > 0 {
-		b.WriteString("## Steady-State Charts\n\n")
-		for _, chart := range charts {
-			content := strings.TrimSpace(stripANSIEscapeSequences(chart.Content))
-			if content == "" {
-				continue
+	// ── Cooling ───────────────────────────────────────────────────────────────
+	if cooling := result.Cooling; cooling != nil {
+		b.WriteString("## Cooling\n\n")
+		if cooling.Available {
+			b.WriteString("| Metric | Value |\n|--------|-------|\n")
+			fmt.Fprintf(&b, "| Average fan speed | %.0f RPM |\n", cooling.AvgFanRPM)
+			if cooling.FanDutyCycleAvailable {
+				fmt.Fprintf(&b, "| Average fan duty cycle | %.1f%% |\n", cooling.AvgFanDutyCyclePct)
+				fmt.Fprintf(&b, "| P95 fan duty cycle | %.1f%% |\n", cooling.P95FanDutyCyclePct)
+			} else {
+				b.WriteString("| Average fan duty cycle | N/A |\n")
+				b.WriteString("| P95 fan duty cycle | N/A |\n")
 			}
-			fmt.Fprintf(&b, "### %s\n\n```\n%s\n```\n\n", chart.Title, content)
+			b.WriteString("\n")
+		} else {
+			b.WriteString("Cooling telemetry unavailable.\n\n")
+		}
+		for _, note := range cooling.Notes {
+			fmt.Fprintf(&b, "- %s\n", note)
+		}
+		if len(cooling.Notes) > 0 {
+			b.WriteString("\n")
 		}
 	}

-	// ── Methodology ───────────────────────────────────────────────────────────
-	b.WriteString("## Methodology\n\n")
-	fmt.Fprintf(&b, "- Profile `%s` uses standardized baseline → warmup → steady-state → interconnect → cooldown phases.\n", result.BenchmarkProfile)
-	b.WriteString("- Single-GPU compute score from bee-gpu-burn cuBLASLt when available.\n")
-	b.WriteString("- Thermal and power limitations inferred from NVIDIA clock event reason counters and sustained telemetry.\n")
-	b.WriteString("- `result.json` is the canonical machine-readable source for this benchmark run.\n\n")
-
 	// ── Raw files ─────────────────────────────────────────────────────────────
 	b.WriteString("## Raw Files\n\n")
 	b.WriteString("- `result.json`\n- `report.md`\n- `summary.txt`\n- `verbose.log`\n")
-	b.WriteString("- `gpu-*-baseline-metrics.csv/html/term.txt`\n")
-	b.WriteString("- `gpu-*-warmup.log`\n")
-	b.WriteString("- `gpu-*-steady.log`\n")
-	b.WriteString("- `gpu-*-steady-metrics.csv/html/term.txt`\n")
-	b.WriteString("- `gpu-*-cooldown-metrics.csv/html/term.txt`\n")
+	b.WriteString("- `gpu-metrics.csv`\n- `gpu-metrics.html`\n- `gpu-burn.log`\n")
 	if result.Interconnect != nil {
 		b.WriteString("- `nccl-all-reduce.log`\n")
 	}
 	return b.String()
 }

-// loadBenchmarkReportCharts loads only steady-state terminal charts (baseline and
-// cooldown charts are not useful for human review).
-func loadBenchmarkReportCharts(runDir string, gpuIndices []int) []benchmarkReportChart {
-	var charts []benchmarkReportChart
-	for _, idx := range gpuIndices {
-		path := filepath.Join(runDir, fmt.Sprintf("gpu-%d-steady-metrics-term.txt", idx))
-		raw, err := os.ReadFile(path)
-		if err != nil || len(raw) == 0 {
-			continue
-		}
-		charts = append(charts, benchmarkReportChart{
-			Title:   fmt.Sprintf("GPU %d — Steady State", idx),
-			Content: string(raw),
-		})
-	}
-	return charts
-}
-
-func stripANSIEscapeSequences(raw string) string {
-	return ansiEscapePattern.ReplaceAllString(raw, "")
-}
-
 // formatThrottleLine renders throttle counters as human-readable percentages of
 // the steady-state window.  Only non-zero counters are shown.  When the steady
 // duration is unknown (0), raw seconds are shown instead.
@@ -331,6 +378,7 @@ func formatThrottleLine(t BenchmarkThrottleCounters, steadyDurationSec float64)
 func renderBenchmarkSummary(result NvidiaBenchmarkResult) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "run_at_utc=%s\n", result.GeneratedAt.Format(time.RFC3339))
+	fmt.Fprintf(&b, "benchmark_version=%s\n", result.BenchmarkVersion)
 	fmt.Fprintf(&b, "benchmark_profile=%s\n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "overall_status=%s\n", result.OverallStatus)
 	fmt.Fprintf(&b, "gpu_count=%d\n", len(result.GPUs))
--- a/audit/internal/platform/benchmark_test.go
+++ b/audit/internal/platform/benchmark_test.go
@@ -16,17 +16,17 @@ func TestResolveBenchmarkProfile(t *testing.T) {
 		{
 			name:    "default",
 			profile: "",
-			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 120, SteadySec: 480, NCCLSec: 180, CooldownSec: 120},
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 45, SteadySec: 480, NCCLSec: 180, CooldownSec: 0},
 		},
 		{
 			name:    "stability",
 			profile: "stability",
-			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 300, SteadySec: 3600, NCCLSec: 300, CooldownSec: 300},
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 120, SteadySec: 3600, NCCLSec: 300, CooldownSec: 0},
 		},
 		{
 			name:    "overnight",
 			profile: "overnight",
-			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 600, SteadySec: 27000, NCCLSec: 600, CooldownSec: 300},
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 180, SteadySec: 27000, NCCLSec: 600, CooldownSec: 0},
 		},
 	}

@@ -41,6 +41,129 @@ func TestResolveBenchmarkProfile(t *testing.T) {
 	}
 }

+func TestBuildBenchmarkSteadyPlanStandard(t *testing.T) {
+	t.Parallel()
+
+	labels, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
+		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, SteadySec: 480},
+		benchmarkPrecisionPhases,
+		func(label string) string { return label },
+	)
+	if len(labels) != 7 || len(phases) != 7 {
+		t.Fatalf("labels=%d phases=%d want 7", len(labels), len(phases))
+	}
+	if basePhaseSec != 60 {
+		t.Fatalf("basePhaseSec=%d want 60", basePhaseSec)
+	}
+	if mixedPhaseSec != 300 {
+		t.Fatalf("mixedPhaseSec=%d want 300", mixedPhaseSec)
+	}
+	if phases[len(phases)-1].PlanLabel != "mixed" || phases[len(phases)-1].DurationSec != 300 {
+		t.Fatalf("mixed phase=%+v want duration 300", phases[len(phases)-1])
+	}
+	if benchmarkPlanDurationsCSV(phases) != "60,60,60,60,60,60,300" {
+		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
+	}
+}
+
+func TestBuildBenchmarkSteadyPlanStability(t *testing.T) {
+	t.Parallel()
+
+	_, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
+		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, SteadySec: 3600},
+		benchmarkPrecisionPhases,
+		func(label string) string { return label },
+	)
+	if basePhaseSec != 300 {
+		t.Fatalf("basePhaseSec=%d want 300", basePhaseSec)
+	}
+	if mixedPhaseSec != 3600 {
+		t.Fatalf("mixedPhaseSec=%d want 3600", mixedPhaseSec)
+	}
+	if benchmarkPlanDurationsCSV(phases) != "300,300,300,300,300,300,3600" {
+		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
+	}
+}
+
+func TestBuildBenchmarkSteadyPlanOvernight(t *testing.T) {
+	t.Parallel()
+
+	_, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
+		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, SteadySec: 27000},
+		benchmarkPrecisionPhases,
+		func(label string) string { return label },
+	)
+	if basePhaseSec != 3600 {
+		t.Fatalf("basePhaseSec=%d want 3600", basePhaseSec)
+	}
+	if mixedPhaseSec != 14400 {
+		t.Fatalf("mixedPhaseSec=%d want 14400", mixedPhaseSec)
+	}
+	if benchmarkPlanDurationsCSV(phases) != "3600,3600,3600,3600,3600,3600,14400" {
+		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
+	}
+}
+
+func TestSplitBenchmarkRowsByPlannedPhaseUsesPhaseDurations(t *testing.T) {
+	t.Parallel()
+
+	phases := []benchmarkPlannedPhase{
+		{PlanLabel: "fp8", MetricStage: "fp8", DurationSec: 10},
+		{PlanLabel: "fp16", MetricStage: "fp16", DurationSec: 10},
+		{PlanLabel: "mixed", MetricStage: "mixed", DurationSec: 50},
+	}
+	rows := []GPUMetricRow{
+		{ElapsedSec: 5},
+		{ElapsedSec: 15},
+		{ElapsedSec: 25},
+		{ElapsedSec: 65},
+	}
+	got := splitBenchmarkRowsByPlannedPhase(rows, phases)
+	if len(got["fp8"]) != 1 {
+		t.Fatalf("fp8 rows=%d want 1", len(got["fp8"]))
+	}
+	if len(got["fp16"]) != 1 {
+		t.Fatalf("fp16 rows=%d want 1", len(got["fp16"]))
+	}
+	if len(got["mixed"]) != 2 {
+		t.Fatalf("mixed rows=%d want 2", len(got["mixed"]))
+	}
+}
+
+func TestBenchmarkSupportedPrecisionsSkipsFP4BeforeBlackwell(t *testing.T) {
+	t.Parallel()
+
+	if got := benchmarkSupportedPrecisions("9.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32,fp64" {
+		t.Fatalf("supported=%v", got)
+	}
+	if got := benchmarkSupportedPrecisions("10.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32,fp64,fp4" {
+		t.Fatalf("supported=%v", got)
+	}
+}
+
+func TestBenchmarkPlannedPhaseStatus(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name       string
+		raw        string
+		wantStatus string
+	}{
+		{name: "ok", raw: "status=OK\n", wantStatus: "OK"},
+		{name: "failed", raw: "phase_error=fp16\n", wantStatus: "FAILED"},
+		{name: "unsupported", raw: "cublasLt_profiles=unsupported\nphase_error=fp4\n", wantStatus: "UNSUPPORTED"},
+	}
+	for _, tc := range cases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			got, _ := benchmarkPlannedPhaseStatus([]byte(tc.raw))
+			if got != tc.wantStatus {
+				t.Fatalf("status=%q want %q", got, tc.wantStatus)
+			}
+		})
+	}
+}
+
 func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
 	t.Parallel()

@@ -65,8 +188,10 @@ func TestParseBenchmarkBurnLog(t *testing.T) {
 		"[gpu 0] compute_capability=9.0",
 		"[gpu 0] backend=cublasLt",
 		"[gpu 0] duration_s=10",
+		"[gpu 0] int8_tensor[0]=READY dim=16384x16384x8192 block=128 stream=0",
 		"[gpu 0] fp16_tensor[0]=READY dim=4096x4096x4096 block=128 stream=0",
 		"[gpu 0] fp8_e4m3[0]=READY dim=8192x8192x4096 block=128 stream=0",
+		"[gpu 0] int8_tensor_iterations=80",
 		"[gpu 0] fp16_tensor_iterations=200",
 		"[gpu 0] fp8_e4m3_iterations=50",
 		"[gpu 0] status=OK",
@@ -79,15 +204,24 @@ func TestParseBenchmarkBurnLog(t *testing.T) {
 	if got.ComputeCapability != "9.0" {
 		t.Fatalf("compute capability=%q want 9.0", got.ComputeCapability)
 	}
-	if len(got.Profiles) != 2 {
-		t.Fatalf("profiles=%d want 2", len(got.Profiles))
+	if len(got.Profiles) != 3 {
+		t.Fatalf("profiles=%d want 3", len(got.Profiles))
 	}
 	if got.Profiles[0].TeraOpsPerSec <= 0 {
 		t.Fatalf("profile[0] teraops=%f want >0", got.Profiles[0].TeraOpsPerSec)
 	}
+	if got.Profiles[0].Category != "fp16_bf16" {
+		t.Fatalf("profile[0] category=%q want fp16_bf16", got.Profiles[0].Category)
+	}
 	if got.Profiles[1].Category != "fp8" {
 		t.Fatalf("profile[1] category=%q want fp8", got.Profiles[1].Category)
 	}
+	if got.Profiles[2].Category != "int8" {
+		t.Fatalf("profile[2] category=%q want int8", got.Profiles[2].Category)
+	}
+	if got.Profiles[2].Weight != 0.25 {
+		t.Fatalf("profile[2] weight=%f want 0.25", got.Profiles[2].Weight)
+	}
 }

 func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
@@ -131,6 +265,13 @@ func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 				DegradationReasons: []string{"power_capped"},
 			},
 		},
+		Cooling: &BenchmarkCoolingSummary{
+			Available:             true,
+			AvgFanRPM:             9200,
+			FanDutyCycleAvailable: true,
+			AvgFanDutyCyclePct:    47.5,
+			P95FanDutyCyclePct:    62.0,
+		},
 	}

 	report := renderBenchmarkReport(result)
@@ -140,6 +281,9 @@ func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 		"1176.00",
 		"fp16_tensor",
 		"700.00",
+		"Cooling",
+		"Average fan duty cycle",
+		"47.5%",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
@@ -147,36 +291,27 @@ func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 	}
 }

-func TestRenderBenchmarkReportIncludesTerminalChartsWithoutANSI(t *testing.T) {
+func TestRenderBenchmarkReportListsUnifiedArtifacts(t *testing.T) {
 	t.Parallel()

-	report := renderBenchmarkReportWithCharts(NvidiaBenchmarkResult{
+	report := renderBenchmarkReport(NvidiaBenchmarkResult{
 		BenchmarkProfile:   NvidiaBenchmarkProfileStandard,
 		OverallStatus:      "OK",
 		SelectedGPUIndices: []int{0},
 		Normalization: BenchmarkNormalization{
 			Status: "full",
 		},
-	}, []benchmarkReportChart{
-		{
-			Title:   "GPU 0 Steady State",
-			Content: "\x1b[31mGPU 0 chart\x1b[0m\n 42┤───",
-		},
 	})

 	for _, needle := range []string{
-		"Steady-State Charts",
-		"GPU 0 Steady State",
-		"GPU 0 chart",
-		"42┤───",
+		"gpu-metrics.csv",
+		"gpu-metrics.html",
+		"gpu-burn.log",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
 		}
 	}
-	if strings.Contains(report, "\x1b[31m") {
-		t.Fatalf("report should not contain ANSI escapes\n%s", report)
-	}
 }

 func TestEnrichGPUInfoWithMaxClocks(t *testing.T) {
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -25,6 +25,17 @@ type BenchmarkCPULoad struct {
 	Note   string `json:"note,omitempty"`
 }

+// BenchmarkCoolingSummary captures fan telemetry averaged across the full
+// benchmark run.
+type BenchmarkCoolingSummary struct {
+	Available             bool     `json:"available"`
+	AvgFanRPM             float64  `json:"avg_fan_rpm,omitempty"`
+	FanDutyCycleAvailable bool     `json:"fan_duty_cycle_available,omitempty"`
+	AvgFanDutyCyclePct    float64  `json:"avg_fan_duty_cycle_pct,omitempty"`
+	P95FanDutyCyclePct    float64  `json:"p95_fan_duty_cycle_pct,omitempty"`
+	Notes                 []string `json:"notes,omitempty"`
+}
+
 const (
 	NvidiaBenchmarkProfileStandard  = "standard"
 	NvidiaBenchmarkProfileStability = "stability"
@@ -43,7 +54,6 @@ type NvidiaBenchmarkOptions struct {
 	RampRunID         string // shared identifier across all steps of the same ramp-up run
 }

-
 type NvidiaBenchmarkResult struct {
 	BenchmarkVersion   string                       `json:"benchmark_version"`
 	GeneratedAt        time.Time                    `json:"generated_at"`
@@ -62,6 +72,7 @@ type NvidiaBenchmarkResult struct {
 	Normalization      BenchmarkNormalization       `json:"normalization"`
 	HostConfig         *BenchmarkHostConfig         `json:"host_config,omitempty"`
 	CPULoad            *BenchmarkCPULoad            `json:"cpu_load,omitempty"`
+	Cooling            *BenchmarkCoolingSummary     `json:"cooling,omitempty"`
 	GPUs               []BenchmarkGPUResult         `json:"gpus"`
 	Interconnect       *BenchmarkInterconnectResult `json:"interconnect,omitempty"`
 	ServerPower        *BenchmarkServerPower        `json:"server_power,omitempty"`
@@ -84,35 +95,45 @@ type BenchmarkNormalizationGPU struct {
 }

 type BenchmarkGPUResult struct {
-	Index                  int                        `json:"index"`
-	UUID                   string                     `json:"uuid,omitempty"`
-	Name                   string                     `json:"name,omitempty"`
-	BusID                  string                     `json:"bus_id,omitempty"`
-	VBIOS                  string                     `json:"vbios,omitempty"`
-	ComputeCapability      string                     `json:"compute_capability,omitempty"`
-	Backend                string                     `json:"backend,omitempty"`
-	Status                 string                     `json:"status"`
-	PowerLimitW            float64                    `json:"power_limit_w,omitempty"`
-	MultiprocessorCount    int                        `json:"multiprocessor_count,omitempty"`
-	DefaultPowerLimitW     float64                    `json:"default_power_limit_w,omitempty"`
+	Index               int     `json:"index"`
+	UUID                string  `json:"uuid,omitempty"`
+	Name                string  `json:"name,omitempty"`
+	BusID               string  `json:"bus_id,omitempty"`
+	VBIOS               string  `json:"vbios,omitempty"`
+	ComputeCapability   string  `json:"compute_capability,omitempty"`
+	Backend             string  `json:"backend,omitempty"`
+	Status              string  `json:"status"`
+	PowerLimitW         float64 `json:"power_limit_w,omitempty"`
+	PowerLimitDerated   bool    `json:"power_limit_derated,omitempty"`
+	MultiprocessorCount int     `json:"multiprocessor_count,omitempty"`
+	DefaultPowerLimitW  float64 `json:"default_power_limit_w,omitempty"`
 	// CalibratedPeakPowerW is the p95 power measured during a short
 	// dcgmi targeted_power calibration run before the main benchmark.
 	// Used as the reference denominator for PowerSustainScore instead of
 	// the hardware default limit, which bee-gpu-burn cannot reach.
-	CalibratedPeakPowerW   float64                    `json:"calibrated_peak_power_w,omitempty"`
-	MaxGraphicsClockMHz    float64                    `json:"max_graphics_clock_mhz,omitempty"`
-	BaseGraphicsClockMHz   float64                    `json:"base_graphics_clock_mhz,omitempty"`
-	MaxMemoryClockMHz      float64                    `json:"max_memory_clock_mhz,omitempty"`
-	LockedGraphicsClockMHz float64                    `json:"locked_graphics_clock_mhz,omitempty"`
-	LockedMemoryClockMHz   float64                    `json:"locked_memory_clock_mhz,omitempty"`
-	Baseline               BenchmarkTelemetrySummary  `json:"baseline"`
-	Steady                 BenchmarkTelemetrySummary  `json:"steady"`
-	Cooldown               BenchmarkTelemetrySummary  `json:"cooldown"`
-	Throttle               BenchmarkThrottleCounters  `json:"throttle_counters"`
-	PrecisionResults       []BenchmarkPrecisionResult `json:"precision_results,omitempty"`
-	Scores                 BenchmarkScorecard         `json:"scores"`
-	DegradationReasons     []string                   `json:"degradation_reasons,omitempty"`
-	Notes                  []string                   `json:"notes,omitempty"`
+	CalibratedPeakPowerW   float64                         `json:"calibrated_peak_power_w,omitempty"`
+	CalibratedPeakTempC    float64                         `json:"calibrated_peak_temp_c,omitempty"`
+	PowerCalibrationTries  int                             `json:"power_calibration_tries,omitempty"`
+	MaxGraphicsClockMHz    float64                         `json:"max_graphics_clock_mhz,omitempty"`
+	BaseGraphicsClockMHz   float64                         `json:"base_graphics_clock_mhz,omitempty"`
+	MaxMemoryClockMHz      float64                         `json:"max_memory_clock_mhz,omitempty"`
+	LockedGraphicsClockMHz float64                         `json:"locked_graphics_clock_mhz,omitempty"`
+	LockedMemoryClockMHz   float64                         `json:"locked_memory_clock_mhz,omitempty"`
+	Baseline               BenchmarkTelemetrySummary       `json:"baseline"`
+	Steady                 BenchmarkTelemetrySummary       `json:"steady"`
+	PrecisionSteady        []BenchmarkPrecisionSteadyPhase `json:"precision_steady,omitempty"`
+	PrecisionFailures      []string                        `json:"precision_failures,omitempty"`
+	Cooldown               BenchmarkTelemetrySummary       `json:"cooldown"`
+	Throttle               BenchmarkThrottleCounters       `json:"throttle_counters"`
+	// ECC error delta accumulated over the full benchmark (all phases combined).
+	ECC                BenchmarkECCCounters       `json:"ecc,omitempty"`
+	PrecisionResults   []BenchmarkPrecisionResult `json:"precision_results,omitempty"`
+	Scores             BenchmarkScorecard         `json:"scores"`
+	DegradationReasons []string                   `json:"degradation_reasons,omitempty"`
+	Notes              []string                   `json:"notes,omitempty"`
+	// CoolingWarning is non-empty when a thermal throttle event occurred with
+	// a clock drop ≥20% while server fans were not at 100% duty cycle.
+	CoolingWarning string `json:"cooling_warning,omitempty"`
 }

 type BenchmarkTelemetrySummary struct {
@@ -142,6 +163,18 @@ type BenchmarkThrottleCounters struct {
 	HWPowerBrakeSlowdownUS uint64 `json:"hw_power_brake_slowdown_us"`
 }

+// BenchmarkECCCounters holds ECC error counts sampled at a point in time.
+// Corrected = single-bit errors fixed by ECC (DRAM degradation).
+// Uncorrected = double-bit errors that could not be corrected (serious fault).
+// Both are volatile (since last driver reset), not persistent.
+type BenchmarkECCCounters struct {
+	Corrected   uint64 `json:"corrected"`
+	Uncorrected uint64 `json:"uncorrected"`
+}
+
+func (e BenchmarkECCCounters) Total() uint64 { return e.Corrected + e.Uncorrected }
+func (e BenchmarkECCCounters) IsZero() bool  { return e.Corrected == 0 && e.Uncorrected == 0 }
+
 type BenchmarkPrecisionResult struct {
 	Name          string  `json:"name"`
 	Category      string  `json:"category"`
@@ -152,19 +185,31 @@ type BenchmarkPrecisionResult struct {
 	K             uint64  `json:"k,omitempty"`
 	Iterations    uint64  `json:"iterations,omitempty"`
 	TeraOpsPerSec float64 `json:"teraops_per_sec,omitempty"`
-	Notes         string  `json:"notes,omitempty"`
+	// Weight is the fp32-equivalence factor for this precision category.
+	// fp32 = 1.0 (baseline), fp64 = 2.0, fp16 = 0.5, int8/fp8 = 0.25, fp4 = 0.125.
+	// WeightedTOPS = TeraOpsPerSec * Weight gives fp32-equivalent throughput.
+	Weight                float64 `json:"weight,omitempty"`
+	WeightedTeraOpsPerSec float64 `json:"weighted_teraops_per_sec,omitempty"`
+	Notes                 string  `json:"notes,omitempty"`
 }

 type BenchmarkScorecard struct {
-	ComputeScore        float64 `json:"compute_score"`
+	ComputeScore float64 `json:"compute_score"`
+	// SyntheticScore is the sum of fp32-equivalent TOPS from per-precision
+	// steady phases (each precision ran alone, full GPU dedicated).
+	SyntheticScore float64 `json:"synthetic_score,omitempty"`
+	// MixedScore is the sum of fp32-equivalent TOPS from the combined phase
+	// (all precisions competing simultaneously — closer to real workloads).
+	MixedScore float64 `json:"mixed_score,omitempty"`
+	// MixedEfficiency = MixedScore / SyntheticScore. Measures how well the GPU
+	// sustains throughput under concurrent mixed-precision load.
+	MixedEfficiency     float64 `json:"mixed_efficiency,omitempty"`
 	PowerSustainScore   float64 `json:"power_sustain_score"`
 	ThermalSustainScore float64 `json:"thermal_sustain_score"`
 	StabilityScore      float64 `json:"stability_score"`
 	InterconnectScore   float64 `json:"interconnect_score"`
 	CompositeScore      float64 `json:"composite_score"`
 	// TOPSPerSMPerGHz is compute efficiency independent of clock speed and SM count.
-	// Comparable across throttle levels and GPU generations. Low value at normal
-	// clocks indicates silicon degradation.
 	TOPSPerSMPerGHz float64 `json:"tops_per_sm_per_ghz,omitempty"`
 }

@@ -182,6 +227,22 @@ type BenchmarkServerPower struct {
 	Notes           []string `json:"notes,omitempty"`
 }

+// BenchmarkPrecisionSteadyPhase holds per-precision-category telemetry collected
+// during a dedicated single-precision steady window.  Because only one kernel
+// type runs at a time the PowerCVPct here is a genuine stability signal.
+type BenchmarkPrecisionSteadyPhase struct {
+	Precision             string                    `json:"precision"` // e.g. "fp8", "fp16", "fp32"
+	Status                string                    `json:"status,omitempty"`
+	Steady                BenchmarkTelemetrySummary `json:"steady"`
+	TeraOpsPerSec         float64                   `json:"teraops_per_sec,omitempty"`
+	WeightedTeraOpsPerSec float64                   `json:"weighted_teraops_per_sec,omitempty"`
+	// ECC errors accumulated during this precision phase only.
+	// Non-zero corrected = stress-induced DRAM errors for this kernel type.
+	// Any uncorrected = serious fault triggered by this precision workload.
+	ECC   BenchmarkECCCounters `json:"ecc,omitempty"`
+	Notes string               `json:"notes,omitempty"`
+}
+
 type BenchmarkInterconnectResult struct {
 	Status             string   `json:"status"`
 	Attempted          bool     `json:"attempted"`
@@ -193,3 +254,45 @@ type BenchmarkInterconnectResult struct {
 	MaxBusBWGBps       float64  `json:"max_busbw_gbps,omitempty"`
 	Notes              []string `json:"notes,omitempty"`
 }
+
+type NvidiaPowerBenchResult struct {
+	BenchmarkVersion     string                 `json:"benchmark_version"`
+	GeneratedAt          time.Time              `json:"generated_at"`
+	Hostname             string                 `json:"hostname,omitempty"`
+	ServerModel          string                 `json:"server_model,omitempty"`
+	BenchmarkProfile     string                 `json:"benchmark_profile"`
+	SelectedGPUIndices   []int                  `json:"selected_gpu_indices"`
+	RecommendedSlotOrder []int                  `json:"recommended_slot_order,omitempty"`
+	RampSteps            []NvidiaPowerBenchStep `json:"ramp_steps,omitempty"`
+	OverallStatus        string                 `json:"overall_status"`
+	Findings             []string               `json:"findings,omitempty"`
+	GPUs                 []NvidiaPowerBenchGPU  `json:"gpus"`
+}
+
+type NvidiaPowerBenchGPU struct {
+	Index               int      `json:"index"`
+	Name                string   `json:"name,omitempty"`
+	BusID               string   `json:"bus_id,omitempty"`
+	DefaultPowerLimitW  float64  `json:"default_power_limit_w,omitempty"`
+	AppliedPowerLimitW  float64  `json:"applied_power_limit_w,omitempty"`
+	MaxObservedPowerW   float64  `json:"max_observed_power_w,omitempty"`
+	MaxObservedTempC    float64  `json:"max_observed_temp_c,omitempty"`
+	CalibrationAttempts int      `json:"calibration_attempts,omitempty"`
+	Derated             bool     `json:"derated,omitempty"`
+	Status              string   `json:"status"`
+	Notes               []string `json:"notes,omitempty"`
+	// CoolingWarning mirrors BenchmarkGPUResult.CoolingWarning for the power workflow.
+	CoolingWarning string `json:"cooling_warning,omitempty"`
+}
+
+type NvidiaPowerBenchStep struct {
+	StepIndex              int      `json:"step_index"`
+	GPUIndices             []int    `json:"gpu_indices"`
+	TotalObservedPowerW    float64  `json:"total_observed_power_w,omitempty"`
+	AvgObservedPowerW      float64  `json:"avg_observed_power_w,omitempty"`
+	MinPowerRealizationPct float64  `json:"min_power_realization_pct,omitempty"`
+	AvgPowerRealizationPct float64  `json:"avg_power_realization_pct,omitempty"`
+	DeratedGPUCount        int      `json:"derated_gpu_count,omitempty"`
+	Status                 string   `json:"status"`
+	Notes                  []string `json:"notes,omitempty"`
+}
--- a/audit/internal/platform/gpu_metrics.go
+++ b/audit/internal/platform/gpu_metrics.go
@@ -13,14 +13,20 @@ import (

 // GPUMetricRow is one telemetry sample from nvidia-smi during a stress test.
 type GPUMetricRow struct {
-	ElapsedSec  float64 `json:"elapsed_sec"`
-	GPUIndex    int     `json:"index"`
-	TempC       float64 `json:"temp_c"`
-	UsagePct    float64 `json:"usage_pct"`
-	MemUsagePct float64 `json:"mem_usage_pct"`
-	PowerW      float64 `json:"power_w"`
-	ClockMHz    float64 `json:"clock_mhz"`
-	MemClockMHz float64 `json:"mem_clock_mhz"`
+	Stage                 string  `json:"stage,omitempty"`
+	StageStartSec         float64 `json:"stage_start_sec,omitempty"`
+	StageEndSec           float64 `json:"stage_end_sec,omitempty"`
+	ElapsedSec            float64 `json:"elapsed_sec"`
+	GPUIndex              int     `json:"index"`
+	TempC                 float64 `json:"temp_c"`
+	UsagePct              float64 `json:"usage_pct"`
+	MemUsagePct           float64 `json:"mem_usage_pct"`
+	PowerW                float64 `json:"power_w"`
+	ClockMHz              float64 `json:"clock_mhz"`
+	MemClockMHz           float64 `json:"mem_clock_mhz"`
+	FanAvgRPM             float64 `json:"fan_avg_rpm,omitempty"`
+	FanDutyCyclePct       float64 `json:"fan_duty_cycle_pct,omitempty"`
+	FanDutyCycleAvailable bool    `json:"fan_duty_cycle_available,omitempty"`
 }

 // sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
@@ -141,14 +147,24 @@ func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
 // WriteGPUMetricsCSV writes collected rows as a CSV file.
 func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
 	var b bytes.Buffer
-	b.WriteString("elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz\n")
+	b.WriteString("stage,elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz,fan_avg_rpm,fan_duty_cycle_pct,fan_duty_cycle_available\n")
 	for _, r := range rows {
-		fmt.Fprintf(&b, "%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f\n",
-			r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz)
+		dutyAvail := 0
+		if r.FanDutyCycleAvailable {
+			dutyAvail = 1
+		}
+		fmt.Fprintf(&b, "%s,%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f,%.0f,%.1f,%d\n",
+			strconv.Quote(strings.TrimSpace(r.Stage)), r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz, r.FanAvgRPM, r.FanDutyCyclePct, dutyAvail)
 	}
 	return os.WriteFile(path, b.Bytes(), 0644)
 }

+type gpuMetricStageSpan struct {
+	Name  string
+	Start float64
+	End   float64
+}
+
 // WriteGPUMetricsHTML writes a standalone HTML file with one SVG chart per GPU.
 func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
 	// Group by GPU index preserving order.
@@ -163,9 +179,25 @@ func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
 		gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
 	}

+	stageSpans := buildGPUMetricStageSpans(rows)
+	stageColorByName := make(map[string]string, len(stageSpans))
+	for i, span := range stageSpans {
+		stageColorByName[span.Name] = gpuMetricStagePalette[i%len(gpuMetricStagePalette)]
+	}
+
+	var legend strings.Builder
+	if len(stageSpans) > 0 {
+		legend.WriteString(`<div class="stage-legend">`)
+		for _, span := range stageSpans {
+			fmt.Fprintf(&legend, `<span class="stage-chip"><span class="stage-swatch" style="background:%s"></span>%s</span>`,
+				stageColorByName[span.Name], gpuHTMLEscape(span.Name))
+		}
+		legend.WriteString(`</div>`)
+	}
+
 	var svgs strings.Builder
 	for _, gpuIdx := range order {
-		svgs.WriteString(drawGPUChartSVG(gpuMap[gpuIdx], gpuIdx))
+		svgs.WriteString(drawGPUChartSVG(gpuMap[gpuIdx], gpuIdx, stageSpans, stageColorByName))
 		svgs.WriteString("\n")
 	}

@@ -175,21 +207,39 @@ func WriteGPUMetricsHTML(path string, rows []GPUMetricRow) error {
 <meta charset="utf-8">
 <title>GPU Stress Test Metrics</title>
 <style>
-body { font-family: sans-serif; background: #f0f0f0; margin: 0; padding: 20px; }
-h1 { text-align: center; color: #333; margin: 0 0 8px; }
-p  { text-align: center; color: #888; font-size: 13px; margin: 0 0 24px; }
+:root{--bg:#fff;--surface:#fff;--surface-2:#f9fafb;--border:rgba(34,36,38,.15);--border-lite:rgba(34,36,38,.1);--ink:rgba(0,0,0,.87);--muted:rgba(0,0,0,.6)}
+*{box-sizing:border-box}
+body{font:14px/1.5 Lato,"Helvetica Neue",Arial,Helvetica,sans-serif;background:var(--bg);color:var(--ink);margin:0}
+.page{padding:24px}
+.card{background:var(--surface);border:1px solid var(--border);border-radius:4px;box-shadow:0 1px 2px rgba(34,36,38,.15);overflow:hidden}
+.card-head{padding:11px 16px;background:var(--surface-2);border-bottom:1px solid var(--border);font-weight:700;font-size:13px}
+.card-body{padding:16px}
+h1{font-size:22px;margin:0 0 6px}
+p{color:var(--muted);font-size:13px;margin:0 0 16px}
+.stage-legend{display:flex;flex-wrap:wrap;gap:10px;margin:0 0 16px}
+.stage-chip{display:inline-flex;align-items:center;gap:8px;padding:4px 10px;border-radius:999px;background:var(--surface-2);border:1px solid var(--border-lite);font-size:12px}
+.stage-swatch{display:inline-block;width:12px;height:12px;border-radius:999px}
+.chart-block{margin-top:16px}
 </style>
 </head><body>
+<div class="page">
+<div class="card">
+<div class="card-head">GPU Stress Test Metrics</div>
+<div class="card-body">
 <h1>GPU Stress Test Metrics</h1>
 <p>Generated %s</p>
 %s
-</body></html>`, ts, svgs.String())
+<div class="chart-block">%s</div>
+</div>
+</div>
+</div>
+</body></html>`, ts, legend.String(), svgs.String())

 	return os.WriteFile(path, []byte(html), 0644)
 }

 // drawGPUChartSVG generates a self-contained SVG chart for one GPU.
-func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
+func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int, stageSpans []gpuMetricStageSpan, stageColorByName map[string]string) string {
 	// Layout
 	const W, H = 960, 520
 	const plotX1 = 120 // usage axis / chart left border
@@ -284,6 +334,23 @@ func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
 	}
 	b.WriteString("</g>\n")

+	// Stage backgrounds
+	for _, span := range stageSpans {
+		x1 := xv(span.Start)
+		x2 := xv(span.End)
+		if x2 < x1 {
+			x1, x2 = x2, x1
+		}
+		if x2-x1 < 1 {
+			x2 = x1 + 1
+		}
+		color := stageColorByName[span.Name]
+		fmt.Fprintf(&b, `<rect x="%.1f" y="%d" width="%.1f" height="%d" fill="%s" fill-opacity="0.18"/>`+"\n",
+			x1, plotY1, x2-x1, PH, color)
+		fmt.Fprintf(&b, `<text x="%.1f" y="%d" font-family="sans-serif" font-size="10" fill="#444" text-anchor="middle">%s</text>`+"\n",
+			x1+(x2-x1)/2, plotY1+12, gpuHTMLEscape(span.Name))
+	}
+
 	// Chart border
 	fmt.Fprintf(&b, `<rect x="%d" y="%d" width="%d" height="%d"`+
 		` fill="none" stroke="#333" stroke-width="1"/>`+"\n",
@@ -382,221 +449,6 @@ func drawGPUChartSVG(rows []GPUMetricRow, gpuIdx int) string {
 	return b.String()
 }

-const (
-	ansiAmber  = "\033[38;5;214m"
-	ansiReset  = "\033[0m"
-)
-
-const (
-	termChartWidth  = 70
-	termChartHeight = 12
-)
-
-// RenderGPUTerminalChart returns ANSI line charts (asciigraph-style) per GPU.
-// Used in SAT stress-test logs.
-func RenderGPUTerminalChart(rows []GPUMetricRow) string {
-	seen := make(map[int]bool)
-	var order []int
-	gpuMap := make(map[int][]GPUMetricRow)
-	for _, r := range rows {
-		if !seen[r.GPUIndex] {
-			seen[r.GPUIndex] = true
-			order = append(order, r.GPUIndex)
-		}
-		gpuMap[r.GPUIndex] = append(gpuMap[r.GPUIndex], r)
-	}
-
-	type seriesDef struct {
-		caption string
-		color   string
-		fn      func(GPUMetricRow) float64
-	}
-	defs := []seriesDef{
-		{"Temperature (°C)", ansiAmber, func(r GPUMetricRow) float64 { return r.TempC }},
-		{"GPU Usage (%)", ansiAmber, func(r GPUMetricRow) float64 { return r.UsagePct }},
-		{"Power (W)", ansiAmber, func(r GPUMetricRow) float64 { return r.PowerW }},
-		{"Clock (MHz)", ansiAmber, func(r GPUMetricRow) float64 { return r.ClockMHz }},
-	}
-
-	var b strings.Builder
-	for _, gpuIdx := range order {
-		gr := gpuMap[gpuIdx]
-		if len(gr) == 0 {
-			continue
-		}
-		tMax := gr[len(gr)-1].ElapsedSec - gr[0].ElapsedSec
-		fmt.Fprintf(&b, "GPU %d — Stress Test Metrics  (%.0f seconds)\n\n", gpuIdx, tMax)
-		for _, d := range defs {
-			b.WriteString(renderLineChart(extractGPUField(gr, d.fn), d.color, d.caption,
-				termChartHeight, termChartWidth))
-			b.WriteRune('\n')
-		}
-	}
-
-	return strings.TrimRight(b.String(), "\n")
-}
-
-// renderLineChart draws a single time-series line chart using box-drawing characters.
-// Produces output in the style of asciigraph: ╭─╮ │ ╰─╯ with a Y axis and caption.
-func renderLineChart(vals []float64, color, caption string, height, width int) string {
-	if len(vals) == 0 {
-		return caption + "\n"
-	}
-
-	mn, mx := gpuMinMax(vals)
-	if mn == mx {
-		mx = mn + 1
-	}
-
-	// Use the smaller of width or len(vals) to avoid stretching sparse data.
-	w := width
-	if len(vals) < w {
-		w = len(vals)
-	}
-	data := gpuDownsample(vals, w)
-
-	// row[i] = display row index: 0 = top = max value, height = bottom = min value.
-	row := make([]int, w)
-	for i, v := range data {
-		r := int(math.Round((mx - v) / (mx - mn) * float64(height)))
-		if r < 0 {
-			r = 0
-		}
-		if r > height {
-			r = height
-		}
-		row[i] = r
-	}
-
-	// Fill the character grid.
-	grid := make([][]rune, height+1)
-	for i := range grid {
-		grid[i] = make([]rune, w)
-		for j := range grid[i] {
-			grid[i][j] = ' '
-		}
-	}
-	for x := 0; x < w; x++ {
-		r := row[x]
-		if x == 0 {
-			grid[r][0] = '─'
-			continue
-		}
-		p := row[x-1]
-		switch {
-		case r == p:
-			grid[r][x] = '─'
-		case r < p: // value went up (row index decreased toward top)
-			grid[r][x] = '╭'
-			grid[p][x] = '╯'
-			for y := r + 1; y < p; y++ {
-				grid[y][x] = '│'
-			}
-		default: // r > p, value went down
-			grid[p][x] = '╮'
-			grid[r][x] = '╰'
-			for y := p + 1; y < r; y++ {
-				grid[y][x] = '│'
-			}
-		}
-	}
-
-	// Y axis tick labels.
-	ticks := gpuNiceTicks(mn, mx, height/2)
-	tickAtRow := make(map[int]string)
-	labelWidth := 4
-	for _, t := range ticks {
-		r := int(math.Round((mx - t) / (mx - mn) * float64(height)))
-		if r < 0 || r > height {
-			continue
-		}
-		s := gpuFormatTick(t)
-		tickAtRow[r] = s
-		if len(s) > labelWidth {
-			labelWidth = len(s)
-		}
-	}
-
-	var b strings.Builder
-	for r := 0; r <= height; r++ {
-		label := tickAtRow[r]
-		fmt.Fprintf(&b, "%*s", labelWidth, label)
-		switch {
-		case label != "":
-			b.WriteRune('┤')
-		case r == height:
-			b.WriteRune('┼')
-		default:
-			b.WriteRune('│')
-		}
-		b.WriteString(color)
-		b.WriteString(string(grid[r]))
-		b.WriteString(ansiReset)
-		b.WriteRune('\n')
-	}
-
-	// Bottom axis.
-	b.WriteString(strings.Repeat(" ", labelWidth))
-	b.WriteRune('└')
-	b.WriteString(strings.Repeat("─", w))
-	b.WriteRune('\n')
-
-	// Caption centered under the chart.
-	if caption != "" {
-		total := labelWidth + 1 + w
-		if pad := (total - len(caption)) / 2; pad > 0 {
-			b.WriteString(strings.Repeat(" ", pad))
-		}
-		b.WriteString(caption)
-		b.WriteRune('\n')
-	}
-
-	return b.String()
-}
-
-func extractGPUField(rows []GPUMetricRow, fn func(GPUMetricRow) float64) []float64 {
-	v := make([]float64, len(rows))
-	for i, r := range rows {
-		v[i] = fn(r)
-	}
-	return v
-}
-
-// gpuDownsample averages vals into w buckets (or nearest-neighbor upsamples if len(vals) < w).
-func gpuDownsample(vals []float64, w int) []float64 {
-	n := len(vals)
-	if n == 0 {
-		return make([]float64, w)
-	}
-	result := make([]float64, w)
-	if n >= w {
-		counts := make([]int, w)
-		for i, v := range vals {
-			bucket := i * w / n
-			if bucket >= w {
-				bucket = w - 1
-			}
-			result[bucket] += v
-			counts[bucket]++
-		}
-		for i := range result {
-			if counts[i] > 0 {
-				result[i] /= float64(counts[i])
-			}
-		}
-	} else {
-		// Nearest-neighbour upsample.
-		for i := range result {
-			src := i * (n - 1) / (w - 1)
-			if src >= n {
-				src = n - 1
-			}
-			result[i] = vals[src]
-		}
-	}
-	return result
-}
-
 func gpuMinMax(vals []float64) (float64, float64) {
 	if len(vals) == 0 {
 		return 0, 1
@@ -641,3 +493,57 @@ func gpuFormatTick(v float64) string {
 	}
 	return strconv.FormatFloat(v, 'f', 1, 64)
 }
+
+var gpuMetricStagePalette = []string{
+	"#d95c5c",
+	"#2185d0",
+	"#21ba45",
+	"#f2c037",
+	"#6435c9",
+	"#00b5ad",
+	"#a5673f",
+}
+
+func buildGPUMetricStageSpans(rows []GPUMetricRow) []gpuMetricStageSpan {
+	var spans []gpuMetricStageSpan
+	for _, row := range rows {
+		name := strings.TrimSpace(row.Stage)
+		if name == "" {
+			name = "run"
+		}
+		start := row.StageStartSec
+		end := row.StageEndSec
+		if end <= start {
+			start = row.ElapsedSec
+			end = row.ElapsedSec
+		}
+		if len(spans) == 0 || spans[len(spans)-1].Name != name {
+			spans = append(spans, gpuMetricStageSpan{Name: name, Start: start, End: end})
+			continue
+		}
+		if start < spans[len(spans)-1].Start {
+			spans[len(spans)-1].Start = start
+		}
+		if end > spans[len(spans)-1].End {
+			spans[len(spans)-1].End = end
+		}
+	}
+	for i := range spans {
+		if spans[i].End <= spans[i].Start {
+			spans[i].End = spans[i].Start + 1
+		}
+	}
+	return spans
+}
+
+var gpuHTMLReplacer = strings.NewReplacer(
+	"&", "&amp;",
+	"<", "&lt;",
+	">", "&gt;",
+	`"`, "&quot;",
+	"'", "&#39;",
+)
+
+func gpuHTMLEscape(s string) string {
+	return gpuHTMLReplacer.Replace(s)
+}
--- a/audit/internal/platform/gpu_metrics_test.go
+++ b/audit/internal/platform/gpu_metrics_test.go
@@ -0,0 +1,65 @@
+package platform
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestWriteGPUMetricsCSVIncludesStageColumn(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	path := filepath.Join(dir, "gpu-metrics.csv")
+	rows := []GPUMetricRow{
+		{Stage: "warmup", ElapsedSec: 1, GPUIndex: 0, TempC: 71, UsagePct: 99, MemUsagePct: 80, PowerW: 420, ClockMHz: 1800, MemClockMHz: 1200},
+	}
+	if err := WriteGPUMetricsCSV(path, rows); err != nil {
+		t.Fatalf("WriteGPUMetricsCSV: %v", err)
+	}
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("ReadFile: %v", err)
+	}
+	text := string(raw)
+	for _, needle := range []string{
+		"stage,elapsed_sec,gpu_index",
+		`"warmup",1.0,0,71.0,99.0,80.0,420.0,1800,1200`,
+	} {
+		if !strings.Contains(text, needle) {
+			t.Fatalf("csv missing %q\n%s", needle, text)
+		}
+	}
+}
+
+func TestWriteGPUMetricsHTMLShowsStageLegendAndLabels(t *testing.T) {
+	t.Parallel()
+
+	dir := t.TempDir()
+	path := filepath.Join(dir, "gpu-metrics.html")
+	rows := []GPUMetricRow{
+		{Stage: "baseline", ElapsedSec: 1, GPUIndex: 0, TempC: 50, UsagePct: 10, MemUsagePct: 5, PowerW: 100, ClockMHz: 500, MemClockMHz: 400},
+		{Stage: "baseline", ElapsedSec: 2, GPUIndex: 0, TempC: 51, UsagePct: 11, MemUsagePct: 5, PowerW: 101, ClockMHz: 510, MemClockMHz: 400},
+		{Stage: "steady-fp16", ElapsedSec: 3, GPUIndex: 0, TempC: 70, UsagePct: 98, MemUsagePct: 75, PowerW: 390, ClockMHz: 1700, MemClockMHz: 1100},
+		{Stage: "steady-fp16", ElapsedSec: 4, GPUIndex: 0, TempC: 71, UsagePct: 99, MemUsagePct: 76, PowerW: 395, ClockMHz: 1710, MemClockMHz: 1110},
+	}
+	if err := WriteGPUMetricsHTML(path, rows); err != nil {
+		t.Fatalf("WriteGPUMetricsHTML: %v", err)
+	}
+	raw, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("ReadFile: %v", err)
+	}
+	text := string(raw)
+	for _, needle := range []string{
+		"stage-legend",
+		"baseline",
+		"steady-fp16",
+		"GPU Stress Test Metrics",
+	} {
+		if !strings.Contains(text, needle) {
+			t.Fatalf("html missing %q\n%s", needle, text)
+		}
+	}
+}
--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -11,20 +11,10 @@ import (
 	"strings"
 )

+const installToRAMDir = "/dev/shm/bee-live"
+
 func (s *System) IsLiveMediaInRAM() bool {
-	fsType := mountFSType("/run/live/medium")
-	if fsType == "" {
-		// No medium mount at all — fall back to toram kernel parameter.
-		return toramActive()
-	}
-	if strings.EqualFold(fsType, "tmpfs") {
-		return true
-	}
-	// When RunInstallToRAM copies squashfs to /dev/shm/bee-live but the bind
-	// mount of /run/live/medium fails (common for CD-ROM boots), the medium
-	// fstype still shows the CD-ROM type. Check whether the RAM copy exists.
-	files, _ := filepath.Glob("/dev/shm/bee-live/*.squashfs")
-	return len(files) > 0
+	return s.LiveMediaRAMState().InRAM
 }

 func (s *System) LiveBootSource() LiveBootSource {
@@ -56,14 +46,95 @@ func (s *System) LiveBootSource() LiveBootSource {
 	return status
 }

-func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
+func (s *System) LiveMediaRAMState() LiveMediaRAMState {
+	return evaluateLiveMediaRAMState(
+		s.LiveBootSource(),
+		toramActive(),
+		globPaths("/run/live/medium/live/*.squashfs"),
+		globPaths(filepath.Join(installToRAMDir, "*.squashfs")),
+	)
+}
+
+func evaluateLiveMediaRAMState(status LiveBootSource, toram bool, sourceSquashfs, copiedSquashfs []string) LiveMediaRAMState {
+	state := LiveMediaRAMState{
+		LiveBootSource: status,
+		ToramActive:    toram,
+		CopyPresent:    len(copiedSquashfs) > 0,
+	}
+	if status.InRAM {
+		state.State = "in_ram"
+		state.Status = "ok"
+		state.CopyComplete = true
+		state.Message = "Running from RAM — installation media can be safely disconnected."
+		return state
+	}
+
+	expected := pathBaseSet(sourceSquashfs)
+	copied := pathBaseSet(copiedSquashfs)
+	state.CopyComplete = len(expected) > 0 && setContainsAll(copied, expected)
+
+	switch {
+	case state.CopyComplete:
+		state.State = "partial"
+		state.Status = "partial"
+		state.CanStartCopy = true
+		state.Message = "Live media files were copied to RAM, but the system is still mounted from the original boot source."
+	case state.CopyPresent:
+		state.State = "partial"
+		state.Status = "partial"
+		state.CanStartCopy = true
+		state.Message = "Partial RAM copy detected. A previous Copy to RAM run was interrupted or cancelled."
+	case toram:
+		state.State = "toram_failed"
+		state.Status = "failed"
+		state.CanStartCopy = true
+		state.Message = "toram boot parameter is set but the live medium is not mounted from RAM."
+	default:
+		state.State = "not_in_ram"
+		state.Status = "warning"
+		state.CanStartCopy = true
+		state.Message = "ISO not copied to RAM. Use Copy to RAM to free the boot drive and improve performance."
+	}
+	return state
+}
+
+func globPaths(pattern string) []string {
+	matches, _ := filepath.Glob(pattern)
+	return matches
+}
+
+func pathBaseSet(paths []string) map[string]struct{} {
+	out := make(map[string]struct{}, len(paths))
+	for _, path := range paths {
+		base := strings.TrimSpace(filepath.Base(path))
+		if base != "" {
+			out[base] = struct{}{}
+		}
+	}
+	return out
+}
+
+func setContainsAll(have, want map[string]struct{}) bool {
+	if len(want) == 0 {
+		return false
+	}
+	for name := range want {
+		if _, ok := have[name]; !ok {
+			return false
+		}
+	}
+	return true
+}
+
+func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) (retErr error) {
 	log := func(msg string) {
 		if logFunc != nil {
 			logFunc(msg)
 		}
 	}

-	if s.IsLiveMediaInRAM() {
+	state := s.LiveMediaRAMState()
+	if state.InRAM {
 		log("Already running from RAM — installation media can be safely disconnected.")
 		return nil
 	}
@@ -88,10 +159,21 @@ func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) erro
 			humanBytes(needed+headroom), humanBytes(free))
 	}

-	dstDir := "/dev/shm/bee-live"
+	dstDir := installToRAMDir
+	if state.CopyPresent {
+		log("Removing stale partial RAM copy before retry...")
+	}
+	_ = os.RemoveAll(dstDir)
 	if err := os.MkdirAll(dstDir, 0755); err != nil {
 		return fmt.Errorf("create tmpfs dir: %v", err)
 	}
+	defer func() {
+		if retErr == nil {
+			return
+		}
+		_ = os.RemoveAll(dstDir)
+		log("Removed incomplete RAM copy.")
+	}()

 	for _, sf := range squashfsFiles {
 		if err := ctx.Err(); err != nil {
--- a/audit/internal/platform/install_to_ram_test.go
+++ b/audit/internal/platform/install_to_ram_test.go
@@ -58,3 +58,46 @@ func TestDescribeLiveBootSource(t *testing.T) {
 		t.Fatalf("got %q want /run/live/medium", got)
 	}
 }
+
+func TestEvaluateLiveMediaRAMState(t *testing.T) {
+	t.Parallel()
+
+	t.Run("in_ram", func(t *testing.T) {
+		state := evaluateLiveMediaRAMState(
+			LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"},
+			false,
+			nil,
+			nil,
+		)
+		if state.State != "in_ram" || state.Status != "ok" || state.CanStartCopy {
+			t.Fatalf("state=%+v", state)
+		}
+	})
+
+	t.Run("partial_copy_after_cancel", func(t *testing.T) {
+		state := evaluateLiveMediaRAMState(
+			LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"},
+			false,
+			[]string{"/run/live/medium/live/filesystem.squashfs", "/run/live/medium/live/firmware.squashfs"},
+			[]string{"/dev/shm/bee-live/filesystem.squashfs"},
+		)
+		if state.State != "partial" || state.Status != "partial" || !state.CanStartCopy {
+			t.Fatalf("state=%+v", state)
+		}
+		if state.CopyComplete {
+			t.Fatalf("CopyComplete=%v want false", state.CopyComplete)
+		}
+	})
+
+	t.Run("toram_failed", func(t *testing.T) {
+		state := evaluateLiveMediaRAMState(
+			LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"},
+			true,
+			nil,
+			nil,
+		)
+		if state.State != "toram_failed" || state.Status != "failed" || !state.CanStartCopy {
+			t.Fatalf("state=%+v", state)
+		}
+	})
+}
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -171,25 +171,28 @@ func resolvedToolStatus(display string, candidates ...string) ToolStatus {
 	return ToolStatus{Name: display}
 }

-// collectToRAMHealth checks whether the LiveCD ISO has been copied to RAM.
-// Status values: "ok" = in RAM, "warning" = toram not active (no copy attempted),
-// "failed" = toram was requested but medium is not in RAM (copy failed or in progress).
+// collectToRAMHealth evaluates whether the live system is fully running from RAM.
+// Status values: "ok" = fully in RAM, "warning" = not copied, "partial" = stale or
+// incomplete RAM copy exists but runtime still depends on the boot medium,
+// "failed" = toram was requested but medium is not in RAM.
 func (s *System) collectToRAMHealth(health *schema.RuntimeHealth) {
-	inRAM := s.IsLiveMediaInRAM()
-	active := toramActive()
-	switch {
-	case inRAM:
-		health.ToRAMStatus = "ok"
-	case active:
-		// toram was requested but medium is not yet/no longer in RAM
-		health.ToRAMStatus = "failed"
+	state := s.LiveMediaRAMState()
+	health.ToRAMStatus = state.Status
+	switch state.Status {
+	case "ok":
+		return
+	case "failed":
 		health.Issues = append(health.Issues, schema.RuntimeIssue{
 			Code:        "toram_copy_failed",
 			Severity:    "warning",
-			Description: "toram boot parameter is set but the live medium is not mounted from RAM.",
+			Description: state.Message,
+		})
+	case "partial":
+		health.Issues = append(health.Issues, schema.RuntimeIssue{
+			Code:        "toram_copy_partial",
+			Severity:    "warning",
+			Description: state.Message,
 		})
-	default:
-		health.ToRAMStatus = "warning"
 	}
 }

@@ -211,13 +214,13 @@ func findUSBExportMount() string {

 	// fs types that are expected on USB export drives
 	exportFSTypes := map[string]bool{
-		"vfat":  true,
-		"exfat": true,
-		"ext2":  true,
-		"ext3":  true,
-		"ext4":  true,
-		"ntfs":  true,
-		"ntfs3": true,
+		"vfat":    true,
+		"exfat":   true,
+		"ext2":    true,
+		"ext3":    true,
+		"ext4":    true,
+		"ntfs":    true,
+		"ntfs3":   true,
 		"fuseblk": true,
 	}

--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -108,15 +108,15 @@ type nvidiaGPUHealth struct {
 }

 type nvidiaGPUStatusFile struct {
-	Index       int
-	Name        string
-	RunStatus   string
-	Reason      string
-	Health      string
-	HealthRaw   string
-	Observed    bool
-	Selected    bool
-	FailingJob  string
+	Index      int
+	Name       string
+	RunStatus  string
+	Reason     string
+	Health     string
+	HealthRaw  string
+	Observed   bool
+	Selected   bool
+	FailingJob string
 }

 // AMDGPUInfo holds basic info about an AMD GPU from rocm-smi.
@@ -410,13 +410,13 @@ func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir strin
 	return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-compute", withNvidiaPersistenceMode(
 		satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
 		satJob{name: "02-dcgmi-version.log", cmd: []string{"dcgmi", "-v"}},
-			satJob{
-				name:       "03-dcgmproftester.log",
-				cmd:        profCmd,
-				env:        profEnv,
-				collectGPU: true,
-				gpuIndices: selected,
-			},
+		satJob{
+			name:       "03-dcgmproftester.log",
+			cmd:        profCmd,
+			env:        profEnv,
+			collectGPU: true,
+			gpuIndices: selected,
+		},
 		satJob{name: "04-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
 	), logFunc)
 }
@@ -552,9 +552,13 @@ func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, si
 	if passes <= 0 {
 		passes = 1
 	}
+	// Bound memtester with a hard wall-clock timeout: ~2.5 min per 100 MB per
+	// pass, plus a fixed 2-minute buffer. Without this, a stuck memory
+	// controller can cause memtester to spin forever on a single subtest.
+	timeoutSec := sizeMB*passes*150/100 + 120
 	return runAcceptancePackCtx(ctx, baseDir, "memory", []satJob{
 		{name: "01-free-before.log", cmd: []string{"free", "-h"}},
-		{name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
+		{name: "02-memtester.log", cmd: []string{"timeout", fmt.Sprintf("%d", timeoutSec), "memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
 		{name: "03-free-after.log", cmd: []string{"free", "-h"}},
 	}, logFunc)
 }
@@ -1382,8 +1386,6 @@ func runSATCommandWithMetrics(ctx context.Context, verboseLog, name string, cmd
 	if len(metricRows) > 0 {
 		_ = WriteGPUMetricsCSV(filepath.Join(runDir, "gpu-metrics.csv"), metricRows)
 		_ = WriteGPUMetricsHTML(filepath.Join(runDir, "gpu-metrics.html"), metricRows)
-		chart := RenderGPUTerminalChart(metricRows)
-		_ = os.WriteFile(filepath.Join(runDir, "gpu-metrics-term.txt"), []byte(chart), 0644)
 	}

 	return out, err
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -426,6 +426,101 @@ func sampleFanSpeedsViaSensorsJSON() ([]FanReading, error) {
 	return fans, nil
 }

+// sampleFanDutyCyclePct reads fan PWM/duty-cycle controls from lm-sensors.
+// Returns the average duty cycle across all exposed PWM controls.
+func sampleFanDutyCyclePct() (float64, bool) {
+	out, err := exec.Command("sensors", "-j").Output()
+	if err != nil || len(out) == 0 {
+		return 0, false
+	}
+	return parseFanDutyCyclePctSensorsJSON(out)
+}
+
+func parseFanDutyCyclePctSensorsJSON(raw []byte) (float64, bool) {
+	var doc map[string]map[string]any
+	if err := json.Unmarshal(raw, &doc); err != nil {
+		return 0, false
+	}
+	var samples []float64
+	for _, features := range doc {
+		for name, feature := range features {
+			if strings.EqualFold(name, "Adapter") {
+				continue
+			}
+			featureMap, ok := feature.(map[string]any)
+			if !ok {
+				continue
+			}
+			if duty, ok := firstFanDutyValue(name, featureMap); ok {
+				samples = append(samples, duty)
+			}
+		}
+	}
+	if len(samples) == 0 {
+		return 0, false
+	}
+	return benchmarkMean(samples), true
+}
+
+func firstFanDutyValue(featureName string, feature map[string]any) (float64, bool) {
+	featureName = strings.ToLower(strings.TrimSpace(featureName))
+	if strings.Contains(featureName, "enable") || strings.Contains(featureName, "mode") || strings.Contains(featureName, "alarm") {
+		return 0, false
+	}
+	if strings.Contains(featureName, "pwm") {
+		for _, key := range []string{"input", "value", "current"} {
+			if value, ok := feature[key]; ok {
+				if duty, parsed := parseFanDutyValue(value); parsed {
+					return duty, true
+				}
+			}
+		}
+	}
+	keys := make([]string, 0, len(feature))
+	for key := range feature {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+	for _, key := range keys {
+		lower := strings.ToLower(key)
+		if !strings.Contains(lower, "pwm") {
+			continue
+		}
+		if strings.Contains(lower, "enable") || strings.Contains(lower, "mode") || strings.Contains(lower, "alarm") {
+			continue
+		}
+		if duty, parsed := parseFanDutyValue(feature[key]); parsed {
+			return duty, true
+		}
+	}
+	return 0, false
+}
+
+func parseFanDutyValue(value any) (float64, bool) {
+	switch v := value.(type) {
+	case float64:
+		return normalizePWMAsDutyPct(v)
+	case string:
+		if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil {
+			return normalizePWMAsDutyPct(f)
+		}
+	}
+	return 0, false
+}
+
+func normalizePWMAsDutyPct(raw float64) (float64, bool) {
+	if raw < 0 {
+		return 0, false
+	}
+	if raw <= 100 {
+		return raw, true
+	}
+	if raw <= 255 {
+		return raw / 255.0 * 100.0, true
+	}
+	return 0, false
+}
+
 func firstFanInputValue(feature map[string]any) (float64, bool) {
 	keys := make([]string, 0, len(feature))
 	for key := range feature {
--- a/audit/internal/platform/sat_fan_stress_test.go
+++ b/audit/internal/platform/sat_fan_stress_test.go
@@ -29,6 +29,27 @@ func TestFirstFanInputValue(t *testing.T) {
 	}
 }

+func TestParseFanDutyCyclePctSensorsJSON(t *testing.T) {
+	raw := []byte(`{
+		"chip0": {
+			"fan1": {"input": 9000},
+			"pwm1": {"input": 128},
+			"pwm1_enable": {"input": 1}
+		},
+		"chip1": {
+			"pwm2": {"input": 64}
+		}
+	}`)
+
+	got, ok := parseFanDutyCyclePctSensorsJSON(raw)
+	if !ok {
+		t.Fatalf("expected duty cycle telemetry to be parsed")
+	}
+	if got < 57 || got > 58 {
+		t.Fatalf("got=%v want ~57.1", got)
+	}
+}
+
 func TestParseDCMIPowerReading(t *testing.T) {
 	raw := `
 Instantaneous power reading:                   512 Watts
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -9,6 +9,17 @@ type LiveBootSource struct {
 	Device string `json:"device,omitempty"`
 }

+type LiveMediaRAMState struct {
+	LiveBootSource
+	State        string `json:"state"`
+	Status       string `json:"status"`
+	ToramActive  bool   `json:"toram_active,omitempty"`
+	CopyPresent  bool   `json:"copy_present,omitempty"`
+	CopyComplete bool   `json:"copy_complete,omitempty"`
+	CanStartCopy bool   `json:"can_start_copy,omitempty"`
+	Message      string `json:"message,omitempty"`
+}
+
 type InterfaceInfo struct {
 	Name  string
 	State string
--- a/audit/internal/schema/hardware.go
+++ b/audit/internal/schema/hardware.go
@@ -15,17 +15,17 @@ type HardwareIngestRequest struct {
 }

 type RuntimeHealth struct {
-	Status        string                 `json:"status"`
-	CheckedAt     string                 `json:"checked_at"`
-	ExportDir     string                 `json:"export_dir,omitempty"`
-	DriverReady   bool                   `json:"driver_ready,omitempty"`
-	CUDAReady     bool                   `json:"cuda_ready,omitempty"`
-	NvidiaGSPMode string                 `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
-	NetworkStatus string                 `json:"network_status,omitempty"`
-	// ToRAMStatus: "ok" (ISO in RAM), "warning" (toram not active), "failed" (toram active but copy failed)
-	ToRAMStatus   string `json:"toram_status,omitempty"`
+	Status        string `json:"status"`
+	CheckedAt     string `json:"checked_at"`
+	ExportDir     string `json:"export_dir,omitempty"`
+	DriverReady   bool   `json:"driver_ready,omitempty"`
+	CUDAReady     bool   `json:"cuda_ready,omitempty"`
+	NvidiaGSPMode string `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
+	NetworkStatus string `json:"network_status,omitempty"`
+	// ToRAMStatus: "ok" (fully in RAM), "warning" (not copied), "partial" (stale/incomplete copy exists), "failed" (toram active but copy failed)
+	ToRAMStatus string `json:"toram_status,omitempty"`
 	// USBExportPath: mount point of the first writable USB drive found, empty if none.
-	USBExportPath string `json:"usb_export_path,omitempty"`
+	USBExportPath string                 `json:"usb_export_path,omitempty"`
 	Issues        []RuntimeIssue         `json:"issues,omitempty"`
 	Tools         []RuntimeToolStatus    `json:"tools,omitempty"`
 	Services      []RuntimeServiceStatus `json:"services,omitempty"`
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -36,6 +36,16 @@ var apiListNvidiaGPUStatuses = func(a *app.App) ([]platform.NvidiaGPUStatus, err
 	return a.ListNvidiaGPUStatuses()
 }

+const (
+	taskPriorityBenchmark      = 10
+	taskPriorityBurn           = 20
+	taskPriorityValidateStress = 30
+	taskPriorityValidate       = 40
+	taskPriorityAudit          = 50
+	taskPriorityInstallToRAM   = 60
+	taskPriorityInstall        = 70
+)
+
 // ── Job ID counter ────────────────────────────────────────────────────────────

 var jobCounter atomic.Uint64
@@ -100,7 +110,7 @@ func writeTaskRunResponse(w http.ResponseWriter, tasks []*Task) {

 func shouldSplitHomogeneousNvidiaTarget(target string) bool {
 	switch strings.TrimSpace(target) {
-	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute",
+	case "nvidia", "nvidia-targeted-stress", "nvidia-bench-perf", "nvidia-bench-power", "nvidia-compute",
 		"nvidia-targeted-power", "nvidia-pulse", "nvidia-interconnect",
 		"nvidia-bandwidth", "nvidia-stress":
 		return true
@@ -109,6 +119,30 @@ func shouldSplitHomogeneousNvidiaTarget(target string) bool {
 	}
 }

+func defaultTaskPriority(target string, params taskParams) int {
+	switch strings.TrimSpace(target) {
+	case "install":
+		return taskPriorityInstall
+	case "install-to-ram":
+		return taskPriorityInstallToRAM
+	case "audit":
+		return taskPriorityAudit
+	case "nvidia-bench-perf", "nvidia-bench-power":
+		return taskPriorityBenchmark
+	case "nvidia-stress", "amd-stress", "memory-stress", "sat-stress", "platform-stress", "nvidia-compute":
+		return taskPriorityBurn
+	case "nvidia", "nvidia-targeted-stress", "nvidia-targeted-power", "nvidia-pulse",
+		"nvidia-interconnect", "nvidia-bandwidth", "memory", "storage", "cpu",
+		"amd", "amd-mem", "amd-bandwidth":
+		if params.StressMode {
+			return taskPriorityValidateStress
+		}
+		return taskPriorityValidate
+	default:
+		return 0
+	}
+}
+
 func expandHomogeneousNvidiaSelections(gpus []platform.NvidiaGPU, include, exclude []int) ([]nvidiaTaskSelection, error) {
 	if len(gpus) == 0 {
 		return nil, fmt.Errorf("no NVIDIA GPUs detected")
@@ -458,6 +492,7 @@ func (h *handler) handleAPIAuditRun(w http.ResponseWriter, _ *http.Request) {
 		ID:        newJobID("audit"),
 		Name:      "Audit",
 		Target:    "audit",
+		Priority:  defaultTaskPriority("audit", taskParams{}),
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 	}
@@ -491,13 +526,14 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 			return
 		}

-			var body struct {
-				Duration           int      `json:"duration"`
-				StressMode         bool     `json:"stress_mode"`
-				GPUIndices         []int    `json:"gpu_indices"`
-				ExcludeGPUIndices  []int    `json:"exclude_gpu_indices"`
-				StaggerGPUStart    bool     `json:"stagger_gpu_start"`
-				Loader             string   `json:"loader"`
+		var body struct {
+			Duration           int      `json:"duration"`
+			StressMode         bool     `json:"stress_mode"`
+			GPUIndices         []int    `json:"gpu_indices"`
+			ExcludeGPUIndices  []int    `json:"exclude_gpu_indices"`
+			StaggerGPUStart    bool     `json:"stagger_gpu_start"`
+			ParallelGPUs       bool     `json:"parallel_gpus"`
+			Loader             string   `json:"loader"`
 			Profile            string   `json:"profile"`
 			DisplayName        string   `json:"display_name"`
 			PlatformComponents []string `json:"platform_components"`
@@ -513,18 +549,153 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 		if strings.TrimSpace(body.DisplayName) != "" {
 			name = body.DisplayName
 		}
-			params := taskParams{
-				Duration:           body.Duration,
-				StressMode:         body.StressMode,
-				GPUIndices:         body.GPUIndices,
-				ExcludeGPUIndices:  body.ExcludeGPUIndices,
-				StaggerGPUStart:    body.StaggerGPUStart,
-				Loader:             body.Loader,
+		params := taskParams{
+			Duration:           body.Duration,
+			StressMode:         body.StressMode,
+			GPUIndices:         body.GPUIndices,
+			ExcludeGPUIndices:  body.ExcludeGPUIndices,
+			StaggerGPUStart:    body.StaggerGPUStart,
+			ParallelGPUs:       body.ParallelGPUs,
+			Loader:             body.Loader,
 			BurnProfile:        body.Profile,
 			DisplayName:        body.DisplayName,
 			PlatformComponents: body.PlatformComponents,
 		}
-		tasks, err := buildNvidiaTaskSet(target, 0, time.Now(), params, name, h.opts.App, "sat-"+target)
+		tasks, err := buildNvidiaTaskSet(target, defaultTaskPriority(target, params), time.Now(), params, name, h.opts.App, "sat-"+target)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, err.Error())
+			return
+		}
+		for _, t := range tasks {
+			globalQueue.enqueue(t)
+		}
+		writeTaskRunResponse(w, tasks)
+	}
+}
+
+func (h *handler) handleAPIBenchmarkNvidiaRunKind(target string) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		if h.opts.App == nil {
+			writeError(w, http.StatusServiceUnavailable, "app not configured")
+			return
+		}
+
+		var body struct {
+			Profile           string `json:"profile"`
+			SizeMB            int    `json:"size_mb"`
+			GPUIndices        []int  `json:"gpu_indices"`
+			ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
+			RunNCCL           *bool  `json:"run_nccl"`
+			ParallelGPUs      *bool  `json:"parallel_gpus"`
+			RampUp            *bool  `json:"ramp_up"`
+			DisplayName       string `json:"display_name"`
+		}
+		if r.Body != nil {
+			if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
+				writeError(w, http.StatusBadRequest, "invalid request body")
+				return
+			}
+		}
+
+		runNCCL := true
+		if body.RunNCCL != nil {
+			runNCCL = *body.RunNCCL
+		}
+		parallelGPUs := false
+		if body.ParallelGPUs != nil {
+			parallelGPUs = *body.ParallelGPUs
+		}
+		rampUp := false
+		if body.RampUp != nil {
+			rampUp = *body.RampUp
+		}
+		// Build a descriptive base name that includes profile and mode so the task
+		// list is self-explanatory without opening individual task detail pages.
+		profile := strings.TrimSpace(body.Profile)
+		if profile == "" {
+			profile = "standard"
+		}
+		name := taskDisplayName(target, "", "")
+		if strings.TrimSpace(body.DisplayName) != "" {
+			name = body.DisplayName
+		}
+		// Append profile tag.
+		name = fmt.Sprintf("%s · %s", name, profile)
+
+		if target == "nvidia-bench-power" && parallelGPUs {
+			writeError(w, http.StatusBadRequest, "power / thermal fit benchmark uses sequential or ramp-up modes only")
+			return
+		}
+
+		if rampUp && len(body.GPUIndices) > 1 {
+			// Ramp-up mode: resolve GPU list, then create one task per prefix
+			// [gpu0], [gpu0,gpu1], ..., [gpu0,...,gpuN-1], each running in parallel.
+			gpus, err := apiListNvidiaGPUs(h.opts.App)
+			if err != nil {
+				writeError(w, http.StatusBadRequest, err.Error())
+				return
+			}
+			resolved, err := expandSelectedGPUIndices(gpus, body.GPUIndices, body.ExcludeGPUIndices)
+			if err != nil {
+				writeError(w, http.StatusBadRequest, err.Error())
+				return
+			}
+			if len(resolved) < 2 {
+				// Fall through to normal single-task path.
+				rampUp = false
+			} else {
+				now := time.Now()
+				rampRunID := fmt.Sprintf("ramp-%s", now.UTC().Format("20060102-150405"))
+				var allTasks []*Task
+				for step := 1; step <= len(resolved); step++ {
+					subset := resolved[:step]
+					stepName := fmt.Sprintf("%s · ramp %d/%d · GPU %s", name, step, len(resolved), formatGPUIndexList(subset))
+					t := &Task{
+						ID:        newJobID("bee-bench-nvidia"),
+						Name:      stepName,
+						Target:    target,
+						Priority:  defaultTaskPriority(target, taskParams{}),
+						Status:    TaskPending,
+						CreatedAt: now,
+						params: taskParams{
+							GPUIndices:       append([]int(nil), subset...),
+							SizeMB:           body.SizeMB,
+							BenchmarkProfile: body.Profile,
+							RunNCCL:          runNCCL && step == len(resolved),
+							ParallelGPUs:     true,
+							RampStep:         step,
+							RampTotal:        len(resolved),
+							RampRunID:        rampRunID,
+							DisplayName:      stepName,
+						},
+					}
+					allTasks = append(allTasks, t)
+				}
+				for _, t := range allTasks {
+					globalQueue.enqueue(t)
+				}
+				writeTaskRunResponse(w, allTasks)
+				return
+			}
+		}
+
+		// For non-ramp tasks append mode tag.
+		if parallelGPUs {
+			name = fmt.Sprintf("%s · parallel", name)
+		} else {
+			name = fmt.Sprintf("%s · sequential", name)
+		}
+
+		params := taskParams{
+			GPUIndices:        body.GPUIndices,
+			ExcludeGPUIndices: body.ExcludeGPUIndices,
+			SizeMB:            body.SizeMB,
+			BenchmarkProfile:  body.Profile,
+			RunNCCL:           runNCCL,
+			ParallelGPUs:      parallelGPUs,
+			DisplayName:       body.DisplayName,
+		}
+		tasks, err := buildNvidiaTaskSet(target, defaultTaskPriority(target, params), time.Now(), params, name, h.opts.App, "bee-bench-nvidia")
 		if err != nil {
 			writeError(w, http.StatusBadRequest, err.Error())
 			return
@@ -537,129 +708,7 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 }

 func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
-	if h.opts.App == nil {
-		writeError(w, http.StatusServiceUnavailable, "app not configured")
-		return
-	}
-
-	var body struct {
-		Profile           string `json:"profile"`
-		SizeMB            int    `json:"size_mb"`
-		GPUIndices        []int  `json:"gpu_indices"`
-		ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
-		RunNCCL           *bool  `json:"run_nccl"`
-		ParallelGPUs      *bool  `json:"parallel_gpus"`
-		RampUp            *bool  `json:"ramp_up"`
-		DisplayName       string `json:"display_name"`
-	}
-	if r.Body != nil {
-		if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
-			writeError(w, http.StatusBadRequest, "invalid request body")
-			return
-		}
-	}
-
-	runNCCL := true
-	if body.RunNCCL != nil {
-		runNCCL = *body.RunNCCL
-	}
-	parallelGPUs := false
-	if body.ParallelGPUs != nil {
-		parallelGPUs = *body.ParallelGPUs
-	}
-	rampUp := false
-	if body.RampUp != nil {
-		rampUp = *body.RampUp
-	}
-	// Build a descriptive base name that includes profile and mode so the task
-	// list is self-explanatory without opening individual task detail pages.
-	profile := strings.TrimSpace(body.Profile)
-	if profile == "" {
-		profile = "standard"
-	}
-	name := taskDisplayName("nvidia-benchmark", "", "")
-	if strings.TrimSpace(body.DisplayName) != "" {
-		name = body.DisplayName
-	}
-	// Append profile tag.
-	name = fmt.Sprintf("%s · %s", name, profile)
-
-	if rampUp && len(body.GPUIndices) > 1 {
-		// Ramp-up mode: resolve GPU list, then create one task per prefix
-		// [gpu0], [gpu0,gpu1], ..., [gpu0,...,gpuN-1], each running in parallel.
-		gpus, err := apiListNvidiaGPUs(h.opts.App)
-		if err != nil {
-			writeError(w, http.StatusBadRequest, err.Error())
-			return
-		}
-		resolved, err := expandSelectedGPUIndices(gpus, body.GPUIndices, body.ExcludeGPUIndices)
-		if err != nil {
-			writeError(w, http.StatusBadRequest, err.Error())
-			return
-		}
-		if len(resolved) < 2 {
-			// Fall through to normal single-task path.
-			rampUp = false
-		} else {
-			now := time.Now()
-			rampRunID := fmt.Sprintf("ramp-%s", now.UTC().Format("20060102-150405"))
-			var allTasks []*Task
-			for step := 1; step <= len(resolved); step++ {
-				subset := resolved[:step]
-				stepName := fmt.Sprintf("%s · ramp %d/%d · GPU %s", name, step, len(resolved), formatGPUIndexList(subset))
-				t := &Task{
-					ID:        newJobID("benchmark-nvidia"),
-					Name:      stepName,
-					Target:    "nvidia-benchmark",
-					Priority:  15,
-					Status:    TaskPending,
-					CreatedAt: now,
-					params: taskParams{
-						GPUIndices:       append([]int(nil), subset...),
-						SizeMB:           body.SizeMB,
-						BenchmarkProfile: body.Profile,
-						RunNCCL:          runNCCL && step == len(resolved),
-						ParallelGPUs:     true,
-						RampStep:         step,
-						RampTotal:        len(resolved),
-						RampRunID:        rampRunID,
-						DisplayName:      stepName,
-					},
-				}
-				allTasks = append(allTasks, t)
-			}
-			for _, t := range allTasks {
-				globalQueue.enqueue(t)
-			}
-			writeTaskRunResponse(w, allTasks)
-			return
-		}
-	}
-
-	// For non-ramp tasks append mode tag.
-	if parallelGPUs {
-		name = fmt.Sprintf("%s · parallel", name)
-	} else {
-		name = fmt.Sprintf("%s · sequential", name)
-	}
-
-	tasks, err := buildNvidiaTaskSet("nvidia-benchmark", 15, time.Now(), taskParams{
-		GPUIndices:        body.GPUIndices,
-		ExcludeGPUIndices: body.ExcludeGPUIndices,
-		SizeMB:            body.SizeMB,
-		BenchmarkProfile:  body.Profile,
-		RunNCCL:           runNCCL,
-		ParallelGPUs:      parallelGPUs,
-		DisplayName:       body.DisplayName,
-	}, name, h.opts.App, "benchmark-nvidia")
-	if err != nil {
-		writeError(w, http.StatusBadRequest, err.Error())
-		return
-	}
-	for _, t := range tasks {
-		globalQueue.enqueue(t)
-	}
-	writeTaskRunResponse(w, tasks)
+	h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf").ServeHTTP(w, r)
 }

 func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
@@ -1034,25 +1083,62 @@ func (h *handler) handleAPIRAMStatus(w http.ResponseWriter, r *http.Request) {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-	status := h.opts.App.LiveBootSource()
+	status := h.currentRAMStatus()
 	w.Header().Set("Content-Type", "application/json")
 	_ = json.NewEncoder(w).Encode(status)
 }

+type ramStatusResponse struct {
+	platform.LiveMediaRAMState
+	InstallTaskActive bool   `json:"install_task_active,omitempty"`
+	CopyTaskActive    bool   `json:"copy_task_active,omitempty"`
+	CanStartTask      bool   `json:"can_start_task,omitempty"`
+	BlockedReason     string `json:"blocked_reason,omitempty"`
+}
+
+func (h *handler) currentRAMStatus() ramStatusResponse {
+	state := h.opts.App.LiveMediaRAMState()
+	resp := ramStatusResponse{LiveMediaRAMState: state}
+	if globalQueue.hasActiveTarget("install") {
+		resp.InstallTaskActive = true
+		resp.BlockedReason = "install to disk is already running"
+		return resp
+	}
+	if globalQueue.hasActiveTarget("install-to-ram") {
+		resp.CopyTaskActive = true
+		resp.BlockedReason = "install to RAM task is already pending or running"
+		return resp
+	}
+	if state.InRAM {
+		resp.BlockedReason = "system is already running from RAM"
+		return resp
+	}
+	resp.CanStartTask = state.CanStartCopy
+	if !resp.CanStartTask && resp.BlockedReason == "" {
+		resp.BlockedReason = state.Message
+	}
+	return resp
+}
+
 func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-	if globalQueue.hasActiveTarget("install") {
-		writeError(w, http.StatusConflict, "install to disk is already running")
+	status := h.currentRAMStatus()
+	if !status.CanStartTask {
+		msg := strings.TrimSpace(status.BlockedReason)
+		if msg == "" {
+			msg = "install to RAM is not available"
+		}
+		writeError(w, http.StatusConflict, msg)
 		return
 	}
 	t := &Task{
 		ID:        newJobID("install-to-ram"),
 		Name:      "Install to RAM",
 		Target:    "install-to-ram",
-		Priority:  10,
+		Priority:  defaultTaskPriority("install-to-ram", taskParams{}),
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 	}
@@ -1167,7 +1253,7 @@ func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) {
 		ID:        newJobID("install"),
 		Name:      "Install to Disk",
 		Target:    "install",
-		Priority:  20,
+		Priority:  defaultTaskPriority("install", taskParams{}),
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 		params: taskParams{
@@ -1443,6 +1529,11 @@ func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Reques
 	writeJSON(w, map[string]string{"status": "rolled back"})
 }

+func (h *handler) handleAPIBenchmarkResults(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+	fmt.Fprint(w, renderBenchmarkResultsCard(h.opts.ExportDir))
+}
+
 func (h *handler) rollbackPendingNetworkChange() error {
 	h.pendingNetMu.Lock()
 	pnc := h.pendingNet
@@ -1459,4 +1550,3 @@ func (h *handler) rollbackPendingNetworkChange() error {
 	}
 	return nil
 }
-
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -39,6 +39,9 @@ func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
 	if got := globalQueue.tasks[0].params.BurnProfile; got != "smoke" {
 		t.Fatalf("burn profile=%q want smoke", got)
 	}
+	if got := globalQueue.tasks[0].Priority; got != taskPriorityValidate {
+		t.Fatalf("priority=%d want %d", got, taskPriorityValidate)
+	}
 }

 func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
@@ -61,7 +64,7 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })

 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
-	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
+	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/perf/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
 	rec := httptest.NewRecorder()

 	h.handleAPIBenchmarkNvidiaRun(rec, req)
@@ -75,8 +78,8 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	task := globalQueue.tasks[0]
-	if task.Target != "nvidia-benchmark" {
-		t.Fatalf("target=%q want nvidia-benchmark", task.Target)
+	if task.Target != "nvidia-bench-perf" {
+		t.Fatalf("target=%q want nvidia-bench-perf", task.Target)
 	}
 	if got := task.params.GPUIndices; len(got) != 2 || got[0] != 1 || got[1] != 3 {
 		t.Fatalf("gpu indices=%v want [1 3]", got)
@@ -84,6 +87,9 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 	if task.params.RunNCCL {
 		t.Fatal("RunNCCL should reflect explicit false from request")
 	}
+	if task.Priority != taskPriorityBenchmark {
+		t.Fatalf("priority=%d want %d", task.Priority, taskPriorityBenchmark)
+	}
 }

 func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
@@ -107,7 +113,7 @@ func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })

 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
-	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"run_nccl":false}`))
+	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/perf/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"run_nccl":false}`))
 	rec := httptest.NewRecorder()

 	h.handleAPIBenchmarkNvidiaRun(rec, req)
@@ -133,6 +139,56 @@ func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
 	if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 {
 		t.Fatalf("task[1] gpu indices=%v want [2]", got)
 	}
+	if got := globalQueue.tasks[0].Priority; got != taskPriorityBenchmark {
+		t.Fatalf("task[0] priority=%d want %d", got, taskPriorityBenchmark)
+	}
+	if got := globalQueue.tasks[1].Priority; got != taskPriorityBenchmark {
+		t.Fatalf("task[1] priority=%d want %d", got, taskPriorityBenchmark)
+	}
+}
+
+func TestHandleAPIBenchmarkPowerFitRampQueuesBenchmarkPowerFitTasks(t *testing.T) {
+	globalQueue.mu.Lock()
+	originalTasks := globalQueue.tasks
+	globalQueue.tasks = nil
+	globalQueue.mu.Unlock()
+	t.Cleanup(func() {
+		globalQueue.mu.Lock()
+		globalQueue.tasks = originalTasks
+		globalQueue.mu.Unlock()
+	})
+	prevList := apiListNvidiaGPUs
+	apiListNvidiaGPUs = func(_ *app.App) ([]platform.NvidiaGPU, error) {
+		return []platform.NvidiaGPU{
+			{Index: 0, Name: "NVIDIA H100 PCIe"},
+			{Index: 1, Name: "NVIDIA H100 PCIe"},
+			{Index: 2, Name: "NVIDIA H100 PCIe"},
+		}, nil
+	}
+	t.Cleanup(func() { apiListNvidiaGPUs = prevList })
+
+	h := &handler{opts: HandlerOptions{App: &app.App{}}}
+	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/power/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"ramp_up":true}`))
+	rec := httptest.NewRecorder()
+
+	h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-power").ServeHTTP(rec, req)
+
+	if rec.Code != 200 {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	globalQueue.mu.Lock()
+	defer globalQueue.mu.Unlock()
+	if len(globalQueue.tasks) != 3 {
+		t.Fatalf("tasks=%d want 3", len(globalQueue.tasks))
+	}
+	for i, task := range globalQueue.tasks {
+		if task.Target != "nvidia-bench-power" {
+			t.Fatalf("task[%d] target=%q", i, task.Target)
+		}
+		if task.Priority != taskPriorityBenchmark {
+			t.Fatalf("task[%d] priority=%d want %d", i, task.Priority, taskPriorityBenchmark)
+		}
+	}
 }

 func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) {
@@ -175,6 +231,41 @@ func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) {
 	if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 {
 		t.Fatalf("task[1] gpu indices=%v want [2]", got)
 	}
+	if got := globalQueue.tasks[0].Priority; got != taskPriorityValidate {
+		t.Fatalf("task[0] priority=%d want %d", got, taskPriorityValidate)
+	}
+	if got := globalQueue.tasks[1].Priority; got != taskPriorityValidate {
+		t.Fatalf("task[1] priority=%d want %d", got, taskPriorityValidate)
+	}
+}
+
+func TestDefaultTaskPriorityOrder(t *testing.T) {
+	got := []int{
+		defaultTaskPriority("install-to-ram", taskParams{}),
+		defaultTaskPriority("audit", taskParams{}),
+		defaultTaskPriority("cpu", taskParams{}),
+		defaultTaskPriority("cpu", taskParams{StressMode: true}),
+		defaultTaskPriority("nvidia-stress", taskParams{}),
+		defaultTaskPriority("nvidia-bench-perf", taskParams{}),
+		defaultTaskPriority("nvidia-bench-power", taskParams{}),
+	}
+	want := []int{
+		taskPriorityInstallToRAM,
+		taskPriorityAudit,
+		taskPriorityValidate,
+		taskPriorityValidateStress,
+		taskPriorityBurn,
+		taskPriorityBenchmark,
+		taskPriorityBenchmark,
+	}
+	for i := range want {
+		if got[i] != want[i] {
+			t.Fatalf("priority[%d]=%d want %d", i, got[i], want[i])
+		}
+	}
+	if !(got[0] > got[1] && got[1] > got[2] && got[2] > got[3] && got[3] > got[4] && got[4] > got[5] && got[5] == got[6]) {
+		t.Fatalf("priority order=%v", got)
+	}
 }

 func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
--- a/audit/internal/webui/kmsg_watcher.go
+++ b/audit/internal/webui/kmsg_watcher.go
@@ -232,7 +232,7 @@ func truncate(s string, max int) string {
 // isSATTarget returns true for task targets that run hardware acceptance tests.
 func isSATTarget(target string) bool {
 	switch target {
-	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
+	case "nvidia", "nvidia-targeted-stress", "nvidia-bench-perf", "nvidia-bench-power", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
 		"nvidia-interconnect", "nvidia-bandwidth", "nvidia-stress", "memory", "memory-stress", "storage",
 		"cpu", "sat-stress", "amd", "amd-mem", "amd-bandwidth", "amd-stress",
 		"platform-stress":
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
@@ -845,6 +845,13 @@ func buildRuntimeToRAMRow(health schema.RuntimeHealth) runtimeHealthRow {
 			Source: "live-boot / /proc/mounts",
 			Issue:  "",
 		}
+	case "partial":
+		return runtimeHealthRow{
+			Title:  "LiveCD in RAM",
+			Status: "WARNING",
+			Source: "live-boot / /proc/mounts / /dev/shm/bee-live",
+			Issue:  "Partial or staged RAM copy detected. System is not fully running from RAM; Copy to RAM can be retried.",
+		}
 	case "failed":
 		return runtimeHealthRow{
 			Title:  "LiveCD in RAM",
@@ -1928,23 +1935,10 @@ func renderSATCard(id, label, runAction, headerActions, body string) string {

 // ── Benchmark ─────────────────────────────────────────────────────────────────

-type benchmarkHistoryColumn struct {
-	key      string
-	label    string
-	name     string
-	index    int
-	parallel bool
-}
-
-type benchmarkHistoryCell struct {
-	score   float64
-	present bool
-}
-
 type benchmarkHistoryRun struct {
 	generatedAt time.Time
 	displayTime string
-	cells       map[string]benchmarkHistoryCell
+	gpuScores   map[int]float64 // GPU index → composite score
 }

 func renderBenchmark(opts HandlerOptions) string {
@@ -1952,7 +1946,7 @@ func renderBenchmark(opts HandlerOptions) string {

 <div class="grid2">
  <div class="card">
-    <div class="card-head">NVIDIA Benchmark</div>
+    <div class="card-head">Benchmark Setup</div>
    <div class="card-body">
      <div class="form-row">
        <label>Profile</label>
@@ -1985,26 +1979,30 @@ func renderBenchmark(opts HandlerOptions) string {
        <span>Ramp-up — 1 GPU → 2 → … → all selected (separate tasks)</span>
      </label>
      <p id="benchmark-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 14px">Select one GPU for single-card benchmarking or several GPUs for a constrained multi-GPU run.</p>
-      <button id="benchmark-run-btn" class="btn btn-primary" onclick="runNvidiaBenchmark()" disabled>&#9654; Run Benchmark</button>
+      <div style="display:flex;gap:8px;flex-wrap:wrap;align-items:center">
+        <button id="benchmark-run-performance-btn" class="btn btn-primary" onclick="runNvidiaBenchmark('performance')" disabled>&#9654; Run Performance Benchmark</button>
+        <button id="benchmark-run-power-fit-btn" class="btn btn-secondary" onclick="runNvidiaBenchmark('power-fit')" disabled>&#9654; Run Power / Thermal Fit</button>
+      </div>
+      <span id="benchmark-run-nccl" hidden>nccl-auto</span>
      <span id="benchmark-run-status" style="margin-left:10px;font-size:12px;color:var(--muted)"></span>
    </div>
  </div>

  <div class="card">
-    <div class="card-head">Method</div>
+    <div class="card-head">Method Split</div>
    <div class="card-body">
-      <p style="font-size:13px;color:var(--muted);margin-bottom:10px">Each benchmark run performs warmup, sustained compute, telemetry capture, cooldown, and optional NCCL interconnect checks.</p>
+      <p style="font-size:13px;color:var(--muted);margin-bottom:10px">The benchmark page now exposes two fundamentally different test families so compute score and server power-fit are not mixed into one number.</p>
      <table>
-        <tr><th>Profile</th><th>Purpose</th></tr>
-        <tr><td>Standard</td><td>Fast, repeatable performance check for server-to-server comparison.</td></tr>
-        <tr><td>Stability</td><td>Longer run for thermal drift, power caps, and clock instability.</td></tr>
-        <tr><td>Overnight</td><td>Extended verification of long-run stability and late throttling.</td></tr>
+        <tr><th>Run Type</th><th>Engine</th><th>Question</th></tr>
+        <tr><td>Performance Benchmark</td><td><code>bee-gpu-burn</code></td><td>How much isolated compute performance does the GPU realize in this server?</td></tr>
+        <tr><td>Power / Thermal Fit</td><td><code>dcgmi targeted_power</code></td><td>How much power per GPU can this server sustain as GPU count ramps up?</td></tr>
      </table>
+      <p style="font-size:12px;color:var(--muted);margin-top:10px">Use ramp-up mode for capacity work: it creates 1 GPU → 2 GPU → … → all selected steps so analysis software can derive server total score and watts-per-GPU curves.</p>
    </div>
  </div>
 </div>

-` + renderBenchmarkResultsCard(opts.ExportDir) + `
+`+`<div id="benchmark-results-section">`+renderBenchmarkResultsCard(opts.ExportDir)+`</div>`+`

 <div id="benchmark-output" style="display:none;margin-top:16px" class="card">
  <div class="card-head">Benchmark Output <span id="benchmark-title"></span></div>
@@ -2042,21 +2040,24 @@ function benchmarkMode() {

 function benchmarkUpdateSelectionNote() {
  const selected = benchmarkSelectedGPUIndices();
-  const btn = document.getElementById('benchmark-run-btn');
+  const perfBtn = document.getElementById('benchmark-run-performance-btn');
+  const fitBtn = document.getElementById('benchmark-run-power-fit-btn');
  const note = document.getElementById('benchmark-selection-note');
  if (!selected.length) {
-    btn.disabled = true;
+    perfBtn.disabled = true;
+    fitBtn.disabled = true;
    note.textContent = 'Select at least one NVIDIA GPU to run the benchmark.';
    return;
  }
-  btn.disabled = false;
+  perfBtn.disabled = false;
+  fitBtn.disabled = false;
  const mode = benchmarkMode();
  if (mode === 'ramp-up') {
-    note.textContent = 'Ramp-up: ' + selected.length + ' tasks (1 GPU → ' + selected.length + ' GPUs). NCCL on final step.';
+    note.textContent = 'Ramp-up: ' + selected.length + ' tasks (1 GPU → ' + selected.length + ' GPUs). Performance uses compute benchmark; Power / Thermal Fit uses targeted_power per step.';
  } else if (mode === 'parallel') {
-    note.textContent = 'Parallel: all ' + selected.length + ' GPU(s) simultaneously.' + (selected.length > 1 ? ' NCCL included.' : '');
+    note.textContent = 'Parallel: all ' + selected.length + ' GPU(s) simultaneously. Only the performance benchmark supports this mode.';
  } else {
-    note.textContent = 'Sequential: each GPU benchmarked separately.' + (selected.length > 1 ? ' NCCL included on each.' : '');
+    note.textContent = 'Sequential: each selected GPU benchmarked separately.';
  }
 }

@@ -2130,7 +2131,7 @@ function benchmarkSelectNone() {
  benchmarkUpdateSelectionNote();
 }

-function runNvidiaBenchmark() {
+function runNvidiaBenchmark(kind) {
  const selected = benchmarkSelectedGPUIndices();
  const status = document.getElementById('benchmark-run-status');
  if (!selected.length) {
@@ -2140,21 +2141,26 @@ function runNvidiaBenchmark() {
  if (benchmarkES) { benchmarkES.close(); benchmarkES = null; }
  const mode = benchmarkMode();
  const rampUp = mode === 'ramp-up' && selected.length > 1;
-  const parallelGPUs = mode === 'parallel';
+  const parallelGPUs = mode === 'parallel' && kind === 'performance';
+  if (kind === 'power-fit' && mode === 'parallel') {
+    status.textContent = 'Power / Thermal Fit supports sequential or ramp-up only.';
+    return;
+  }
  const body = {
    profile: document.getElementById('benchmark-profile').value || 'standard',
    gpu_indices: selected,
-    run_nccl: selected.length > 1,
+    run_nccl: kind === 'performance' && selected.length > 1,
    parallel_gpus: parallelGPUs,
    ramp_up: rampUp,
-    display_name: 'NVIDIA Benchmark'
+    display_name: kind === 'power-fit' ? 'NVIDIA Power / Thermal Fit' : 'NVIDIA Performance Benchmark'
  };
  document.getElementById('benchmark-output').style.display = 'block';
-  document.getElementById('benchmark-title').textContent = '— ' + body.profile + ' [' + selected.join(', ') + ']';
+  document.getElementById('benchmark-title').textContent = '— ' + body.display_name + ' · ' + body.profile + ' [' + selected.join(', ') + ']';
  const term = document.getElementById('benchmark-terminal');
-  term.textContent = 'Enqueuing benchmark for GPUs ' + selected.join(', ') + '...\n';
+  term.textContent = 'Enqueuing ' + body.display_name + ' for GPUs ' + selected.join(', ') + '...\n';
  status.textContent = 'Queueing...';
-  fetch('/api/benchmark/nvidia/run', {
+  const endpoint = kind === 'power-fit' ? '/api/bee-bench/nvidia/power/run' : '/api/bee-bench/nvidia/perf/run';
+  fetch(endpoint, {
    method: 'POST',
    headers: {'Content-Type':'application/json'},
    body: JSON.stringify(body)
@@ -2182,7 +2188,9 @@ function runNvidiaBenchmark() {
        if (e.data) failures += 1;
        term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
        term.scrollTop = term.scrollHeight;
+        const isLast = (idx + 1 >= taskIds.length);
        streamNext(idx + 1, failures);
+        if (isLast) { benchmarkRefreshResults(); }
      });
      benchmarkES.onerror = function() {
        if (benchmarkES) {
@@ -2202,21 +2210,33 @@ function runNvidiaBenchmark() {
 }

 benchmarkLoadGPUs();
+
+function benchmarkRefreshResults() {
+  fetch('/api/benchmark/results')
+    .then(function(r) { return r.text(); })
+    .then(function(html) {
+      const el = document.getElementById('benchmark-results-section');
+      if (el) el.innerHTML = html;
+    })
+    .catch(function() {});
+}
 </script>`
 }

 func renderBenchmarkResultsCard(exportDir string) string {
-	columns, runs := loadBenchmarkHistory(exportDir)
-	return renderBenchmarkResultsCardFromRuns(
-		"Benchmark Results",
+	maxIdx, runs := loadBenchmarkHistory(exportDir)
+	perf := renderBenchmarkResultsCardFromRuns(
+		"Performance Results",
 		"Composite score by saved benchmark run and GPU.",
-		"No saved benchmark runs yet.",
-		columns,
+		"No saved performance benchmark runs yet.",
+		maxIdx,
 		runs,
 	)
+	power := renderPowerBenchmarkResultsCard(exportDir)
+	return perf + "\n" + power
 }

-func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string, columns []benchmarkHistoryColumn, runs []benchmarkHistoryRun) string {
+func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string, maxGPUIndex int, runs []benchmarkHistoryRun) string {
 	if len(runs) == 0 {
 		return `<div class="card"><div class="card-head">` + html.EscapeString(title) + `</div><div class="card-body"><p style="color:var(--muted);font-size:13px">` + html.EscapeString(emptyMessage) + `</p></div></div>`
 	}
@@ -2226,22 +2246,22 @@ func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string,
 		b.WriteString(`<p style="color:var(--muted);font-size:13px;margin-bottom:12px">` + html.EscapeString(description) + `</p>`)
 	}
 	b.WriteString(`<div style="overflow-x:auto">`)
-	b.WriteString(`<table><thead><tr><th>Test</th><th>Time</th>`)
-	for _, col := range columns {
-		b.WriteString(`<th>` + html.EscapeString(col.label) + `</th>`)
+	b.WriteString(`<table><thead><tr><th>Run</th><th>Time</th>`)
+	for i := 0; i <= maxGPUIndex; i++ {
+		b.WriteString(`<th>GPU ` + strconv.Itoa(i) + `</th>`)
 	}
 	b.WriteString(`</tr></thead><tbody>`)
 	for i, run := range runs {
 		b.WriteString(`<tr>`)
 		b.WriteString(`<td>#` + strconv.Itoa(i+1) + `</td>`)
 		b.WriteString(`<td>` + html.EscapeString(run.displayTime) + `</td>`)
-		for _, col := range columns {
-			cell, ok := run.cells[col.key]
-			if !ok || !cell.present {
+		for idx := 0; idx <= maxGPUIndex; idx++ {
+			score, ok := run.gpuScores[idx]
+			if !ok {
 				b.WriteString(`<td style="color:var(--muted)">-</td>`)
 				continue
 			}
-			b.WriteString(`<td>` + fmt.Sprintf("%.2f", cell.score) + `</td>`)
+			b.WriteString(`<td>` + fmt.Sprintf("%.2f", score) + `</td>`)
 		}
 		b.WriteString(`</tr>`)
 	}
@@ -2249,22 +2269,22 @@ func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string,
 	return b.String()
 }

-func loadBenchmarkHistory(exportDir string) ([]benchmarkHistoryColumn, []benchmarkHistoryRun) {
-	baseDir := app.DefaultBenchmarkBaseDir
+func loadBenchmarkHistory(exportDir string) (int, []benchmarkHistoryRun) {
+	baseDir := app.DefaultBeeBenchPerfDir
 	if strings.TrimSpace(exportDir) != "" {
-		baseDir = filepath.Join(exportDir, "bee-benchmark")
+		baseDir = filepath.Join(exportDir, "bee-bench", "perf")
 	}
-	paths, err := filepath.Glob(filepath.Join(baseDir, "gpu-benchmark-*", "result.json"))
+	paths, err := filepath.Glob(filepath.Join(baseDir, "perf-*", "result.json"))
 	if err != nil || len(paths) == 0 {
-		return nil, nil
+		return -1, nil
 	}
 	sort.Strings(paths)
 	return loadBenchmarkHistoryFromPaths(paths)
 }

-func loadBenchmarkHistoryFromPaths(paths []string) ([]benchmarkHistoryColumn, []benchmarkHistoryRun) {
-	columnByKey := make(map[string]benchmarkHistoryColumn)
+func loadBenchmarkHistoryFromPaths(paths []string) (int, []benchmarkHistoryRun) {
 	runs := make([]benchmarkHistoryRun, 0, len(paths))
+	maxGPUIndex := -1
 	for _, path := range paths {
 		raw, err := os.ReadFile(path)
 		if err != nil {
@@ -2277,101 +2297,140 @@ func loadBenchmarkHistoryFromPaths(paths []string) ([]benchmarkHistoryColumn, []
 		run := benchmarkHistoryRun{
 			generatedAt: result.GeneratedAt,
 			displayTime: result.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
-			cells:       make(map[string]benchmarkHistoryCell),
+			gpuScores:   make(map[int]float64),
 		}
-
-		if result.ParallelGPUs {
-			// All GPUs ran simultaneously — one column per server, score = avg composite.
-			gpuModelCount := make(map[string]int)
-			for _, gpu := range result.GPUs {
-				gpuModelCount[strings.TrimSpace(gpu.Name)]++
-			}
-			scoreSum := make(map[string]float64)
-			scoreCnt := make(map[string]int)
-			for _, gpu := range result.GPUs {
-				key := "parallel|" + strings.TrimSpace(result.ServerModel) + "|" + strings.TrimSpace(gpu.Name)
-				scoreSum[key] += gpu.Scores.CompositeScore
-				scoreCnt[key]++
-				count := gpuModelCount[strings.TrimSpace(gpu.Name)]
-				columnByKey[key] = benchmarkHistoryColumn{
-					key:      key,
-					label:    benchmarkHistoryParallelLabel(result.ServerModel, gpu.Name, count),
-					name:     strings.TrimSpace(gpu.Name),
-					index:    -1,
-					parallel: true,
-				}
-			}
-			for key, sum := range scoreSum {
-				run.cells[key] = benchmarkHistoryCell{score: sum / float64(scoreCnt[key]), present: true}
-			}
-		} else {
-			// Each GPU ran independently — one column per GPU index.
-			for _, gpu := range result.GPUs {
-				key := "gpu|" + strings.TrimSpace(result.ServerModel) + "|" + strings.TrimSpace(gpu.Name) + "|" + strconv.Itoa(gpu.Index)
-				columnByKey[key] = benchmarkHistoryColumn{
-					key:      key,
-					label:    benchmarkHistoryPerGPULabel(gpu.Name, gpu.Index),
-					name:     strings.TrimSpace(gpu.Name),
-					index:    gpu.Index,
-					parallel: false,
-				}
-				run.cells[key] = benchmarkHistoryCell{score: gpu.Scores.CompositeScore, present: true}
+		for _, gpu := range result.GPUs {
+			run.gpuScores[gpu.Index] = gpu.Scores.CompositeScore
+			if gpu.Index > maxGPUIndex {
+				maxGPUIndex = gpu.Index
 			}
 		}
 		runs = append(runs, run)
 	}
-
-	columns := make([]benchmarkHistoryColumn, 0, len(columnByKey))
-	for _, col := range columnByKey {
-		columns = append(columns, col)
-	}
-	// Sequential GPU columns first (sorted by GPU index), then parallel server columns.
-	sort.Slice(columns, func(i, j int) bool {
-		if columns[i].parallel != columns[j].parallel {
-			return !columns[i].parallel // sequential first
-		}
-		if columns[i].parallel {
-			li := strings.ToLower(columns[i].label)
-			lj := strings.ToLower(columns[j].label)
-			if li != lj {
-				return li < lj
-			}
-			return columns[i].key < columns[j].key
-		}
-		// Sequential: sort by GPU index, then name.
-		if columns[i].index != columns[j].index {
-			return columns[i].index < columns[j].index
-		}
-		return strings.ToLower(columns[i].name) < strings.ToLower(columns[j].name)
-	})
 	sort.Slice(runs, func(i, j int) bool {
 		return runs[i].generatedAt.After(runs[j].generatedAt)
 	})
-	return columns, runs
+	return maxGPUIndex, runs
 }

-// benchmarkHistoryPerGPULabel formats a label for a single-GPU column: "GPU #N — ModelName".
-func benchmarkHistoryPerGPULabel(gpuName string, index int) string {
-	gpuName = strings.TrimSpace(gpuName)
-	if gpuName == "" {
-		gpuName = "Unknown GPU"
+func renderPowerBenchmarkResultsCard(exportDir string) string {
+	baseDir := app.DefaultBeeBenchPowerDir
+	if strings.TrimSpace(exportDir) != "" {
+		baseDir = filepath.Join(exportDir, "bee-bench", "power")
 	}
-	return fmt.Sprintf("GPU #%d — %s", index, gpuName)
-}
+	paths, err := filepath.Glob(filepath.Join(baseDir, "power-*", "result.json"))
+	if err != nil || len(paths) == 0 {
+		return `<div class="card" style="margin-top:16px"><div class="card-head">Power / Thermal Fit Results</div><div class="card-body"><p style="color:var(--muted);font-size:13px">No saved power benchmark runs yet.</p></div></div>`
+	}
+	sort.Strings(paths)

-// benchmarkHistoryParallelLabel formats a label for an all-GPU parallel column:
-// "ServerModel — N× ModelName (All GPUs)" or "N× ModelName (All GPUs)" if no server.
-func benchmarkHistoryParallelLabel(serverModel, gpuName string, count int) string {
-	serverModel = strings.TrimSpace(serverModel)
-	gpuName = strings.TrimSpace(gpuName)
-	if gpuName == "" {
-		gpuName = "Unknown GPU"
+	type powerRun struct {
+		generatedAt time.Time
+		displayTime string
+		result      platform.NvidiaPowerBenchResult
 	}
-	gpuPart := fmt.Sprintf("%d× %s (All GPUs)", count, gpuName)
-	if serverModel == "" {
-		return gpuPart
+	var runs []powerRun
+	for _, path := range paths {
+		raw, err := os.ReadFile(path)
+		if err != nil {
+			continue
+		}
+		var r platform.NvidiaPowerBenchResult
+		if err := json.Unmarshal(raw, &r); err != nil {
+			continue
+		}
+		runs = append(runs, powerRun{
+			generatedAt: r.GeneratedAt,
+			displayTime: r.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
+			result:      r,
+		})
 	}
-	return fmt.Sprintf("%s — %s", serverModel, gpuPart)
+	sort.Slice(runs, func(i, j int) bool {
+		return runs[i].generatedAt.After(runs[j].generatedAt)
+	})
+
+	// Show only the most recent run's GPU slot table, plus a run history summary.
+	var b strings.Builder
+	b.WriteString(`<div class="card" style="margin-top:16px"><div class="card-head">Power / Thermal Fit Results</div><div class="card-body">`)
+
+	latest := runs[0].result
+	b.WriteString(`<p style="font-size:12px;color:var(--muted);margin-bottom:10px">Latest run: ` + html.EscapeString(runs[0].displayTime))
+	if latest.Hostname != "" {
+		b.WriteString(` — ` + html.EscapeString(latest.Hostname))
+	}
+	if latest.OverallStatus != "" {
+		statusColor := "var(--ok)"
+		if latest.OverallStatus != "OK" {
+			statusColor = "var(--warn)"
+		}
+		b.WriteString(` — <span style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(latest.OverallStatus) + `</span>`)
+	}
+	b.WriteString(`</p>`)
+
+	if len(latest.GPUs) > 0 {
+		b.WriteString(`<div style="overflow-x:auto"><table><thead><tr>`)
+		b.WriteString(`<th>GPU</th><th>Model</th><th>Nominal W</th><th>Achieved W</th><th>P95 Observed W</th><th>Status</th>`)
+		b.WriteString(`</tr></thead><tbody>`)
+		for _, gpu := range latest.GPUs {
+			derated := gpu.Derated || (gpu.DefaultPowerLimitW > 0 && gpu.AppliedPowerLimitW < gpu.DefaultPowerLimitW-1)
+			rowStyle := ""
+			achievedStyle := ""
+			if derated {
+				rowStyle = ` style="background:rgba(255,180,0,0.08)"`
+				achievedStyle = ` style="color:#e6a000;font-weight:600"`
+			}
+			statusLabel := gpu.Status
+			if statusLabel == "" {
+				statusLabel = "OK"
+			}
+			statusColor := "var(--ok)"
+			if statusLabel != "OK" {
+				statusColor = "var(--warn)"
+			}
+			nominalStr := "-"
+			if gpu.DefaultPowerLimitW > 0 {
+				nominalStr = fmt.Sprintf("%.0f", gpu.DefaultPowerLimitW)
+			}
+			achievedStr := "-"
+			if gpu.AppliedPowerLimitW > 0 {
+				achievedStr = fmt.Sprintf("%.0f", gpu.AppliedPowerLimitW)
+			}
+			p95Str := "-"
+			if gpu.MaxObservedPowerW > 0 {
+				p95Str = fmt.Sprintf("%.0f", gpu.MaxObservedPowerW)
+			}
+			b.WriteString(`<tr` + rowStyle + `>`)
+			b.WriteString(`<td>` + strconv.Itoa(gpu.Index) + `</td>`)
+			b.WriteString(`<td>` + html.EscapeString(gpu.Name) + `</td>`)
+			b.WriteString(`<td>` + nominalStr + `</td>`)
+			b.WriteString(`<td` + achievedStyle + `>` + achievedStr + `</td>`)
+			b.WriteString(`<td>` + p95Str + `</td>`)
+			b.WriteString(`<td style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(statusLabel) + `</td>`)
+			b.WriteString(`</tr>`)
+		}
+		b.WriteString(`</tbody></table></div>`)
+	}
+
+	if len(runs) > 1 {
+		b.WriteString(`<details style="margin-top:12px"><summary style="font-size:12px;color:var(--muted);cursor:pointer">` + strconv.Itoa(len(runs)) + ` runs total</summary>`)
+		b.WriteString(`<div style="overflow-x:auto;margin-top:8px"><table><thead><tr><th>#</th><th>Time</th><th>GPUs</th><th>Status</th></tr></thead><tbody>`)
+		for i, run := range runs {
+			statusColor := "var(--ok)"
+			if run.result.OverallStatus != "OK" {
+				statusColor = "var(--warn)"
+			}
+			b.WriteString(`<tr>`)
+			b.WriteString(`<td>#` + strconv.Itoa(i+1) + `</td>`)
+			b.WriteString(`<td>` + html.EscapeString(run.displayTime) + `</td>`)
+			b.WriteString(`<td>` + strconv.Itoa(len(run.result.GPUs)) + `</td>`)
+			b.WriteString(`<td style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(run.result.OverallStatus) + `</td>`)
+			b.WriteString(`</tr>`)
+		}
+		b.WriteString(`</tbody></table></div></details>`)
+	}
+
+	b.WriteString(`</div></div>`)
+	return b.String()
 }

 // ── Burn ──────────────────────────────────────────────────────────────────────
@@ -3338,12 +3397,19 @@ fetch('/api/system/ram-status').then(r=>r.json()).then(d=>{
  else if (kind === 'disk') label = 'disk (' + source + ')';
  else label = source;
  boot.textContent = 'Current boot source: ' + label + '.';
-  if (d.in_ram) {
-    txt.textContent = '✓ Running from RAM — installation media can be safely disconnected.';
+  txt.textContent = d.message || 'Checking...';
+  if (d.status === 'ok' || d.in_ram) {
    txt.style.color = 'var(--ok, green)';
+  } else if (d.status === 'failed') {
+    txt.style.color = 'var(--err, #b91c1c)';
  } else {
-    txt.textContent = 'Live media is mounted from installation device. Copy to RAM to allow media removal.';
+    txt.style.color = 'var(--muted)';
+  }
+  if (d.can_start_task) {
    btn.style.display = '';
+    btn.disabled = false;
+  } else {
+    btn.style.display = 'none';
  }
 });
 function installToRAM() {
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -261,7 +261,9 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	mux.HandleFunc("POST /api/sat/platform-stress/run", h.handleAPISATRun("platform-stress"))
 	mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
 	mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
-	mux.HandleFunc("POST /api/benchmark/nvidia/run", h.handleAPIBenchmarkNvidiaRun)
+	mux.HandleFunc("POST /api/bee-bench/nvidia/perf/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf"))
+	mux.HandleFunc("POST /api/bee-bench/nvidia/power/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-power"))
+	mux.HandleFunc("GET /api/benchmark/results", h.handleAPIBenchmarkResults)

 	// Tasks
 	mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -11,6 +11,7 @@ import (
 	"time"

 	"bee/audit/internal/platform"
+	"bee/audit/internal/schema"
 )

 func TestChartLegendNumber(t *testing.T) {
@@ -78,6 +79,16 @@ func TestRecoverMiddlewarePreservesStreamingInterfaces(t *testing.T) {
 	}
 }

+func TestBuildRuntimeToRAMRowShowsPartialCopyWarning(t *testing.T) {
+	row := buildRuntimeToRAMRow(schema.RuntimeHealth{ToRAMStatus: "partial"})
+	if row.Status != "WARNING" {
+		t.Fatalf("status=%q want WARNING", row.Status)
+	}
+	if !strings.Contains(row.Issue, "Partial or staged RAM copy detected") {
+		t.Fatalf("issue=%q", row.Issue)
+	}
+}
+
 func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
 	samples := []platform.LiveMetricSample{
 		{
@@ -637,8 +648,11 @@ func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
 		`href="/benchmark"`,
 		`id="benchmark-gpu-list"`,
 		`/api/gpu/nvidia`,
-		`/api/benchmark/nvidia/run`,
+		`/api/bee-bench/nvidia/perf/run`,
+		`/api/bee-bench/nvidia/power/run`,
 		`benchmark-run-nccl`,
+		`Run Performance Benchmark`,
+		`Run Power / Thermal Fit`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("benchmark page missing %q: %s", needle, body)
@@ -649,7 +663,7 @@ func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
 func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
 	dir := t.TempDir()
 	exportDir := filepath.Join(dir, "export")
-	runDir := filepath.Join(exportDir, "bee-benchmark", "gpu-benchmark-20260406-120000")
+	runDir := filepath.Join(exportDir, "bee-bench", "perf", "perf-20260406-120000")
 	if err := os.MkdirAll(runDir, 0755); err != nil {
 		t.Fatal(err)
 	}
@@ -691,10 +705,10 @@ func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
 	body := rec.Body.String()
 	wantTime := result.GeneratedAt.Local().Format("2006-01-02 15:04:05")
 	for _, needle := range []string{
-		`Benchmark Results`,
+		`Perf Results`,
 		`Composite score by saved benchmark run and GPU.`,
-		`GPU #0 — NVIDIA H100 PCIe`,
-		`GPU #1 — NVIDIA H100 PCIe`,
+		`GPU 0`,
+		`GPU 1`,
 		`#1`,
 		wantTime,
 		`1176.25`,
@@ -1113,8 +1127,8 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
 		`>Storage<`,
 		`>GPU<`,
 		`>PSU<`,
-		`badge-warn`,   // cpu Warning badge
-		`badge-err`,    // storage Critical badge
+		`badge-warn`, // cpu Warning badge
+		`badge-err`,  // storage Critical badge
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("dashboard missing %q: %s", needle, body)
--- a/audit/internal/webui/task_report.go
+++ b/audit/internal/webui/task_report.go
@@ -233,6 +233,9 @@ func renderTaskReportFragment(report taskReport, charts map[string]string, logTe
 	if benchmarkCard := renderTaskBenchmarkResultsCard(report.Target, logText); benchmarkCard != "" {
 		b.WriteString(benchmarkCard)
 	}
+	if powerCard := renderTaskPowerResultsCard(report.Target, logText); powerCard != "" {
+		b.WriteString(powerCard)
+	}

 	if len(report.Charts) > 0 {
 		for _, chart := range report.Charts {
@@ -251,7 +254,9 @@ func renderTaskReportFragment(report taskReport, charts map[string]string, logTe
 }

 func renderTaskBenchmarkResultsCard(target, logText string) string {
-	if strings.TrimSpace(target) != "nvidia-benchmark" {
+	switch strings.TrimSpace(target) {
+	case "nvidia-bench-perf":
+	default:
 		return ""
 	}
 	resultPath := taskBenchmarkResultPath(logText)
@@ -263,7 +268,7 @@ func renderTaskBenchmarkResultsCard(target, logText string) string {
 		return ""
 	}
 	return renderBenchmarkResultsCardFromRuns(
-		"Benchmark Results",
+		"Perf Results",
 		"Composite score for this benchmark task.",
 		"No benchmark results were saved for this task.",
 		columns,
@@ -271,15 +276,42 @@ func renderTaskBenchmarkResultsCard(target, logText string) string {
 	)
 }

+func renderTaskPowerResultsCard(target, logText string) string {
+	if strings.TrimSpace(target) != "nvidia-bench-power" {
+		return ""
+	}
+	resultPath := taskBenchmarkResultPath(logText)
+	if strings.TrimSpace(resultPath) == "" {
+		return ""
+	}
+	raw, err := os.ReadFile(resultPath)
+	if err != nil {
+		return ""
+	}
+	var result platform.NvidiaPowerBenchResult
+	if err := json.Unmarshal(raw, &result); err != nil {
+		return ""
+	}
+	var b strings.Builder
+	b.WriteString(`<div class="card"><div class="card-head">Power Results</div><div class="card-body">`)
+	if len(result.RecommendedSlotOrder) > 0 {
+		b.WriteString(`<p style="margin-bottom:10px"><strong>Recommended slot order:</strong> ` + html.EscapeString(joinTaskIndices(result.RecommendedSlotOrder)) + `</p>`)
+	}
+	b.WriteString(`<table><tr><th>GPU</th><th>Status</th><th>Max Power</th><th>Applied Limit</th></tr>`)
+	for _, gpu := range result.GPUs {
+		fmt.Fprintf(&b, `<tr><td>GPU %d</td><td>%s</td><td>%.0f W</td><td>%.0f W</td></tr>`,
+			gpu.Index, html.EscapeString(gpu.Status), gpu.MaxObservedPowerW, gpu.AppliedPowerLimitW)
+	}
+	b.WriteString(`</table></div></div>`)
+	return b.String()
+}
+
 func taskBenchmarkResultPath(logText string) string {
 	archivePath := taskArchivePathFromLog(logText)
 	if archivePath == "" {
 		return ""
 	}
 	runDir := strings.TrimSuffix(archivePath, ".tar.gz")
-	if runDir == archivePath {
-		return ""
-	}
 	return filepath.Join(runDir, "result.json")
 }

--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -32,7 +32,8 @@ const (
 var taskNames = map[string]string{
 	"nvidia":                 "NVIDIA SAT",
 	"nvidia-targeted-stress": "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)",
-	"nvidia-benchmark":       "NVIDIA Benchmark",
+	"nvidia-bench-perf":      "NVIDIA Bee Bench Perf",
+	"nvidia-bench-power":     "NVIDIA Bee Bench Power",
 	"nvidia-compute":         "NVIDIA Max Compute Load (dcgmproftester)",
 	"nvidia-targeted-power":  "NVIDIA Targeted Power (dcgmi diag targeted_power)",
 	"nvidia-pulse":           "NVIDIA Pulse Test (dcgmi diag pulse_test)",
@@ -628,7 +629,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			dur = 300
 		}
 		archive, err = a.RunNvidiaTargetedStressValidatePack(ctx, "", dur, t.params.GPUIndices, j.append)
-	case "nvidia-benchmark":
+	case "nvidia-bench-perf":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
@@ -644,6 +645,19 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			RampTotal:         t.params.RampTotal,
 			RampRunID:         t.params.RampRunID,
 		}, j.append)
+	case "nvidia-bench-power":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
+		archive, err = a.RunNvidiaPowerBenchCtx(ctx, app.DefaultBeeBenchPowerDir, platform.NvidiaBenchmarkOptions{
+			Profile:           t.params.BenchmarkProfile,
+			GPUIndices:        t.params.GPUIndices,
+			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
+			RampStep:          t.params.RampStep,
+			RampTotal:         t.params.RampTotal,
+			RampRunID:         t.params.RampRunID,
+		}, j.append)
 	case "nvidia-compute":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -366,7 +366,7 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	taskReportMetricsDBPath = metricsPath
 	t.Cleanup(func() { taskReportMetricsDBPath = prevMetricsPath })

-	benchmarkDir := filepath.Join(dir, "bee-benchmark", "gpu-benchmark-20260406-120000")
+	benchmarkDir := filepath.Join(dir, "bee-bench", "perf", "perf-20260406-120000")
 	if err := os.MkdirAll(benchmarkDir, 0755); err != nil {
 		t.Fatal(err)
 	}
@@ -398,14 +398,14 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	}
 	task := &Task{
 		ID:           "task-bench",
-		Name:         "NVIDIA Benchmark",
-		Target:       "nvidia-benchmark",
+		Name:         "NVIDIA Bee Bench Perf",
+		Target:       "nvidia-bench-perf",
 		Status:       TaskDone,
 		CreatedAt:    time.Now().UTC().Add(-time.Minute),
 		ArtifactsDir: artifactsDir,
 	}
 	ensureTaskReportPaths(task)
-	logText := "line-1\nArchive: " + filepath.Join(dir, "bee-benchmark", "gpu-benchmark-20260406-120000.tar.gz") + "\n"
+	logText := "line-1\nArchive: " + filepath.Join(dir, "bee-bench", "perf", "perf-20260406-120000.tar.gz") + "\n"
 	if err := os.WriteFile(task.LogPath, []byte(logText), 0644); err != nil {
 		t.Fatal(err)
 	}
@@ -420,9 +420,9 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	}
 	html := string(body)
 	for _, needle := range []string{
-		`Benchmark Results`,
+		`Perf Results`,
 		`Composite score for this benchmark task.`,
-		`GPU #0 — NVIDIA H100 PCIe`,
+		`GPU 0`,
 		`1176.25`,
 	} {
 		if !strings.Contains(html, needle) {
--- a/bible-local/docs/benchmark-clock-calibration.md
+++ b/bible-local/docs/benchmark-clock-calibration.md
@@ -1,5 +1,34 @@
 # Benchmark clock calibration research

+## Benchmark methodology versioning
+
+Every benchmark methodology change must bump the benchmark version constant in
+source code by exactly `+1`.
+
+Methodology change means any change that affects comparability of benchmark
+results, including for example:
+- phase durations or phase order
+- enabled/disabled precisions
+- fallback rules
+- normalization rules
+- score formulas or weights
+- degradation thresholds
+- power calibration logic
+- thermal/power penalty logic
+
+Requirements:
+- benchmark version must be stored in source code as an explicit version
+  constant, not inferred from git tag or build metadata
+- benchmark report must always print the benchmark version
+- `result.json` must always include the benchmark version
+- results from different benchmark versions must be treated as non-comparable by
+  default
+
+Purpose:
+- prevent accidental comparison of runs produced by different methodologies
+- make historical benchmark archives self-describing even when detached from git
+- force deliberate version bumps whenever scoring or execution semantics change
+
 ## Status
 In progress. Baseline data from production servers pending.

--- a/iso/builder/VERSIONS
+++ b/iso/builder/VERSIONS
@@ -6,7 +6,7 @@ NCCL_CUDA_VERSION=13.0
 NCCL_SHA256=2e6faafd2c19cffc7738d9283976a3200ea9db9895907f337f0c7e5a25563186
 NCCL_TESTS_VERSION=2.13.10
 NVCC_VERSION=12.8
-CUBLAS_VERSION=13.0.2.14-1
+CUBLAS_VERSION=13.1.1.3-1
 CUDA_USERSPACE_VERSION=13.0.96-1
 DCGM_VERSION=4.5.3-1
 JOHN_JUMBO_COMMIT=67fcf9fe5a
@@ -21,3 +21,4 @@ HIPBLASLT_VERSION=0.10.0.60304-76~22.04
 COMGR_VERSION=2.8.0.60304-76~22.04
 GO_VERSION=1.24.0
 AUDIT_VERSION=1.0.0
+MEMTEST_VERSION=6.10-4
--- a/iso/builder/auto/config
+++ b/iso/builder/auto/config
@@ -23,10 +23,10 @@ lb config noauto \
    --bootloaders "grub-efi,syslinux" \
    --debian-installer none \
    --archive-areas "main contrib non-free non-free-firmware" \
-    --mirror-bootstrap "https://deb.debian.org/debian" \
-    --mirror-chroot "https://deb.debian.org/debian" \
-    --mirror-binary "https://deb.debian.org/debian" \
-    --security true \
+    --mirror-bootstrap "http://mirror.mephi.ru/debian/" \
+    --mirror-chroot "http://mirror.mephi.ru/debian/" \
+    --mirror-binary "http://mirror.mephi.ru/debian/" \
+    --security false \
    --linux-flavours "amd64" \
    --linux-packages "${LB_LINUX_PACKAGES}" \
    --memtest memtest86+ \
--- a/iso/builder/bee-gpu-stress.c
+++ b/iso/builder/bee-gpu-stress.c
@@ -33,7 +33,6 @@ typedef void *CUstream;
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76
 #define MAX_STRESS_STREAMS 16
-#define MAX_CUBLAS_PROFILES 5
 #define MIN_PROFILE_BUDGET_BYTES ((size_t)4u * 1024u * 1024u)
 #define MIN_STREAM_BUDGET_BYTES ((size_t)64u * 1024u * 1024u)

@@ -643,6 +642,20 @@ static const struct profile_desc k_profiles[] = {
        CUDA_R_16F,
        CUBLAS_COMPUTE_32F_FAST_16F,
    },
+    {
+        "int8_tensor",
+        "int8",
+        75,
+        1,
+        0,
+        0,
+        128,
+        CUDA_R_8I,
+        CUDA_R_8I,
+        CUDA_R_32I,
+        CUDA_R_32I,
+        CUBLAS_COMPUTE_32I,
+    },
    {
        "fp8_e4m3",
        "fp8",
@@ -689,6 +702,8 @@ static const struct profile_desc k_profiles[] = {
 #endif
 };

+#define PROFILE_COUNT ((int)(sizeof(k_profiles) / sizeof(k_profiles[0])))
+
 static int load_cublaslt(struct cublaslt_api *api) {
    memset(api, 0, sizeof(*api));
    api->lib = dlopen("libcublasLt.so.13", RTLD_NOW | RTLD_LOCAL);
@@ -759,10 +774,12 @@ static int check_cublas(const char *step, cublasStatus_t status) {
 static size_t bytes_for_elements(cudaDataType_t type, uint64_t elements) {
    switch (type) {
        case CUDA_R_32F:
+        case CUDA_R_32I:
            return (size_t)(elements * 4u);
        case CUDA_R_16F:
        case CUDA_R_16BF:
            return (size_t)(elements * 2u);
+        case CUDA_R_8I:
        case CUDA_R_8F_E4M3:
        case CUDA_R_8F_E5M2:
            return (size_t)(elements);
@@ -775,6 +792,16 @@ static size_t bytes_for_elements(cudaDataType_t type, uint64_t elements) {
    }
 }

+static cudaDataType_t matmul_scale_type(const struct profile_desc *desc) {
+    if (desc->compute_type == CUBLAS_COMPUTE_32I) {
+        return CUDA_R_32I;
+    }
+    if (desc->compute_type == CUBLAS_COMPUTE_64F) {
+        return CUDA_R_64F;
+    }
+    return CUDA_R_32F;
+}
+
 static size_t fp4_scale_bytes(uint64_t rows, uint64_t cols) {
    uint64_t row_tiles = (rows + 127u) / 128u;
    uint64_t col_tiles = (cols + 63u) / 64u;
@@ -943,8 +970,9 @@ static int prepare_profile(struct cublaslt_api *cublas,
        return 0;
    }

+    cudaDataType_t scale_type = matmul_scale_type(desc);
    if (!check_cublas("cublasLtMatmulDescCreate",
-                      cublas->cublasLtMatmulDescCreate(&out->op_desc, desc->compute_type, CUDA_R_32F))) {
+                      cublas->cublasLtMatmulDescCreate(&out->op_desc, desc->compute_type, scale_type))) {
        destroy_profile(cublas, cuda, out);
        return 0;
    }
@@ -1093,17 +1121,30 @@ static int prepare_profile(struct cublaslt_api *cublas,
 static int run_cublas_profile(cublasLtHandle_t handle,
                              struct cublaslt_api *cublas,
                              struct prepared_profile *profile) {
+    int32_t alpha_i32 = 1;
+    int32_t beta_i32 = 0;
+    double alpha_f64 = 1.0;
+    double beta_f64 = 0.0;
    float alpha = 1.0f;
    float beta = 0.0f;
+    const void *alpha_ptr = &alpha;
+    const void *beta_ptr = &beta;
+    if (profile->desc.compute_type == CUBLAS_COMPUTE_32I) {
+        alpha_ptr = &alpha_i32;
+        beta_ptr = &beta_i32;
+    } else if (profile->desc.compute_type == CUBLAS_COMPUTE_64F) {
+        alpha_ptr = &alpha_f64;
+        beta_ptr = &beta_f64;
+    }
    return check_cublas(profile->desc.name,
                        cublas->cublasLtMatmul(handle,
                                               profile->op_desc,
-                                               &alpha,
+                                               alpha_ptr,
                                               (const void *)(uintptr_t)profile->a_dev,
                                               profile->a_layout,
                                               (const void *)(uintptr_t)profile->b_dev,
                                               profile->b_layout,
-                                               &beta,
+                                               beta_ptr,
                                               (const void *)(uintptr_t)profile->c_dev,
                                               profile->c_layout,
                                               (void *)(uintptr_t)profile->d_dev,
@@ -1121,9 +1162,10 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                               int cc_minor,
                               int seconds,
                               int size_mb,
+                               const char *precision_filter,
                               struct stress_report *report) {
    struct cublaslt_api cublas;
-    struct prepared_profile prepared[MAX_STRESS_STREAMS * MAX_CUBLAS_PROFILES];
+    struct prepared_profile prepared[MAX_STRESS_STREAMS * PROFILE_COUNT];
    cublasLtHandle_t handle = NULL;
    CUcontext ctx = NULL;
    CUstream streams[MAX_STRESS_STREAMS] = {0};
@@ -1133,7 +1175,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
    int active = 0;
    int mp_count = 0;
    int stream_count = 1;
-    int profile_count = (int)(sizeof(k_profiles) / sizeof(k_profiles[0]));
+    int profile_count = PROFILE_COUNT;
    int prepared_count = 0;
    size_t requested_budget = 0;
    size_t total_budget = 0;
@@ -1158,8 +1200,10 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }

+    /* Count profiles matching the filter (for deciding what to run). */
    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
-        if (k_profiles[i].enabled && cc >= k_profiles[i].min_cc) {
+        if (k_profiles[i].enabled && cc >= k_profiles[i].min_cc &&
+            (precision_filter == NULL || strcmp(k_profiles[i].block_label, precision_filter) == 0)) {
            planned++;
        }
    }
@@ -1170,18 +1214,31 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }

+    /* Count all profiles active on this GPU regardless of filter.
+     * Used as the budget divisor so matrix sizes stay consistent whether
+     * running all precisions together or a single-precision phase. */
+    int planned_total = 0;
+    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
+        if (k_profiles[i].enabled && cc >= k_profiles[i].min_cc) {
+            planned_total++;
+        }
+    }
+    if (planned_total < planned) {
+        planned_total = planned;
+    }
+
    requested_budget = (size_t)size_mb * 1024u * 1024u;
-    if (requested_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
-        requested_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
+    if (requested_budget < (size_t)planned_total * MIN_PROFILE_BUDGET_BYTES) {
+        requested_budget = (size_t)planned_total * MIN_PROFILE_BUDGET_BYTES;
    }
    total_budget = clamp_budget_to_free_memory(cuda, requested_budget);
-    if (total_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
-        total_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
+    if (total_budget < (size_t)planned_total * MIN_PROFILE_BUDGET_BYTES) {
+        total_budget = (size_t)planned_total * MIN_PROFILE_BUDGET_BYTES;
    }
    if (query_multiprocessor_count(cuda, dev, &mp_count) &&
        cuda->cuStreamCreate &&
        cuda->cuStreamDestroy) {
-        stream_count = choose_stream_count(mp_count, planned, total_budget, 1);
+        stream_count = choose_stream_count(mp_count, planned_total, total_budget, 1);
    }
    if (stream_count > 1) {
        int created = 0;
@@ -1194,7 +1251,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        }
    }
    report->stream_count = stream_count;
-    per_profile_budget = total_budget / ((size_t)planned * (size_t)stream_count);
+    per_profile_budget = total_budget / ((size_t)planned_total * (size_t)stream_count);
    if (per_profile_budget < MIN_PROFILE_BUDGET_BYTES) {
        per_profile_budget = MIN_PROFILE_BUDGET_BYTES;
    }
@@ -1218,6 +1275,13 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                          desc->min_cc);
            continue;
        }
+        if (precision_filter != NULL && strcmp(desc->block_label, precision_filter) != 0) {
+            append_detail(report->details,
+                          sizeof(report->details),
+                          "%s=SKIPPED precision_filter\n",
+                          desc->name);
+            continue;
+        }
        for (int lane = 0; lane < stream_count; lane++) {
            CUstream stream = streams[lane];
            if (prepared_count >= (int)(sizeof(prepared) / sizeof(prepared[0]))) {
@@ -1335,10 +1399,29 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
 }
 #endif

+static void print_stress_report(const struct stress_report *report, int device_index, int seconds) {
+    printf("device=%s\n", report->device);
+    printf("device_index=%d\n", device_index);
+    printf("compute_capability=%d.%d\n", report->cc_major, report->cc_minor);
+    printf("backend=%s\n", report->backend);
+    printf("duration_s=%d\n", seconds);
+    printf("buffer_mb=%d\n", report->buffer_mb);
+    printf("streams=%d\n", report->stream_count);
+    printf("iterations=%lu\n", report->iterations);
+    printf("checksum=%llu\n", (unsigned long long)report->checksum);
+    if (report->details[0] != '\0') {
+        printf("%s", report->details);
+    }
+    printf("status=OK\n");
+}
+
 int main(int argc, char **argv) {
    int seconds = 5;
    int size_mb = 64;
    int device_index = 0;
+    const char *precision_filter = NULL; /* NULL = all; else block_label to match */
+    const char *precision_plan = NULL;
+    const char *precision_plan_seconds = NULL;
    for (int i = 1; i < argc; i++) {
        if ((strcmp(argv[i], "--seconds") == 0 || strcmp(argv[i], "-t") == 0) && i + 1 < argc) {
            seconds = atoi(argv[++i]);
@@ -1346,8 +1429,16 @@ int main(int argc, char **argv) {
            size_mb = atoi(argv[++i]);
        } else if ((strcmp(argv[i], "--device") == 0 || strcmp(argv[i], "-d") == 0) && i + 1 < argc) {
            device_index = atoi(argv[++i]);
+        } else if (strcmp(argv[i], "--precision") == 0 && i + 1 < argc) {
+            precision_filter = argv[++i];
+        } else if (strcmp(argv[i], "--precision-plan") == 0 && i + 1 < argc) {
+            precision_plan = argv[++i];
+        } else if (strcmp(argv[i], "--precision-plan-seconds") == 0 && i + 1 < argc) {
+            precision_plan_seconds = argv[++i];
        } else {
-            fprintf(stderr, "usage: %s [--seconds N] [--size-mb N] [--device N]\n", argv[0]);
+            fprintf(stderr,
+                    "usage: %s [--seconds N] [--size-mb N] [--device N] [--precision int8|fp8|fp16|fp32|fp64|fp4] [--precision-plan p1,p2,...,mixed] [--precision-plan-seconds s1,s2,...]\n",
+                    argv[0]);
            return 2;
        }
    }
@@ -1407,26 +1498,94 @@ int main(int argc, char **argv) {
    int ok = 0;

 #if HAVE_CUBLASLT_HEADERS
-    ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, &report);
+    if (precision_plan != NULL && precision_plan[0] != '\0') {
+        char *plan_copy = strdup(precision_plan);
+        char *plan_seconds_copy = NULL;
+        int phase_seconds[32] = {0};
+        int phase_seconds_count = 0;
+        int phase_ok = 0;
+        if (plan_copy == NULL) {
+            fprintf(stderr, "failed to allocate precision plan buffer\n");
+            return 1;
+        }
+        if (precision_plan_seconds != NULL && precision_plan_seconds[0] != '\0') {
+            plan_seconds_copy = strdup(precision_plan_seconds);
+            if (plan_seconds_copy == NULL) {
+                free(plan_copy);
+                fprintf(stderr, "failed to allocate precision plan seconds buffer\n");
+                return 1;
+            }
+            for (char *sec_token = strtok(plan_seconds_copy, ",");
+                 sec_token != NULL && phase_seconds_count < (int)(sizeof(phase_seconds) / sizeof(phase_seconds[0]));
+                 sec_token = strtok(NULL, ",")) {
+                while (*sec_token == ' ' || *sec_token == '\t') {
+                    sec_token++;
+                }
+                if (*sec_token == '\0') {
+                    continue;
+                }
+                phase_seconds[phase_seconds_count++] = atoi(sec_token);
+            }
+        }
+        int phase_idx = 0;
+        for (char *token = strtok(plan_copy, ","); token != NULL; token = strtok(NULL, ","), phase_idx++) {
+            while (*token == ' ' || *token == '\t') {
+                token++;
+            }
+            if (*token == '\0') {
+                continue;
+            }
+            const char *phase_name = token;
+            const char *phase_filter = token;
+            if (strcmp(token, "mixed") == 0 || strcmp(token, "all") == 0) {
+                phase_filter = NULL;
+            }
+            int phase_duration = seconds;
+            if (phase_idx < phase_seconds_count && phase_seconds[phase_idx] > 0) {
+                phase_duration = phase_seconds[phase_idx];
+            }
+            printf("phase_begin=%s\n", phase_name);
+            fflush(stdout);
+            memset(&report, 0, sizeof(report));
+            ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, phase_duration, size_mb, phase_filter, &report);
+            if (ok) {
+                print_stress_report(&report, device_index, phase_duration);
+                phase_ok = 1;
+            } else {
+                printf("phase_error=%s\n", phase_name);
+                if (report.details[0] != '\0') {
+                    printf("%s", report.details);
+                    if (report.details[strlen(report.details) - 1] != '\n') {
+                        printf("\n");
+                    }
+                }
+                printf("status=FAILED\n");
+            }
+            printf("phase_end=%s\n", phase_name);
+            fflush(stdout);
+        }
+        free(plan_seconds_copy);
+        free(plan_copy);
+        return phase_ok ? 0 : 1;
+    }
+    ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, precision_filter, &report);
 #endif
    if (!ok) {
-        if (!run_ptx_fallback(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, &report)) {
+        if (precision_filter != NULL) {
+            fprintf(stderr,
+                    "requested precision path unavailable: precision=%s device=%s cc=%d.%d\n",
+                    precision_filter,
+                    name,
+                    cc_major,
+                    cc_minor);
+            return 1;
+        }
+        int ptx_mb = size_mb;
+        if (!run_ptx_fallback(&cuda, dev, name, cc_major, cc_minor, seconds, ptx_mb, &report)) {
            return 1;
        }
    }

-    printf("device=%s\n", report.device);
-    printf("device_index=%d\n", device_index);
-    printf("compute_capability=%d.%d\n", report.cc_major, report.cc_minor);
-    printf("backend=%s\n", report.backend);
-    printf("duration_s=%d\n", seconds);
-    printf("buffer_mb=%d\n", report.buffer_mb);
-    printf("streams=%d\n", report.stream_count);
-    printf("iterations=%lu\n", report.iterations);
-    printf("checksum=%llu\n", (unsigned long long)report.checksum);
-    if (report.details[0] != '\0') {
-        printf("%s", report.details);
-    }
-    printf("status=OK\n");
+    print_stress_report(&report, device_index, seconds);
    return 0;
 }
--- a/iso/builder/build-in-container.sh
+++ b/iso/builder/build-in-container.sh
@@ -161,6 +161,7 @@ run_variant() {
            -e GOMODCACHE=/cache/go-mod \
            -e TMPDIR=/cache/tmp \
            -e BEE_CACHE_DIR=/cache/bee \
+            -e BEE_REQUIRE_MEMTEST=1 \
            -w /work \
            "${IMAGE_REF}" \
            sh /work/iso/builder/build.sh --variant "${_v}" \
@@ -175,6 +176,7 @@ run_variant() {
            -e GOMODCACHE=/cache/go-mod \
            -e TMPDIR=/cache/tmp \
            -e BEE_CACHE_DIR=/cache/bee \
+            -e BEE_REQUIRE_MEMTEST=1 \
            -w /work \
            "${IMAGE_REF}" \
            sh /work/iso/builder/build.sh --variant "${_v}"
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
@@ -57,6 +57,7 @@ OVERLAY_STAGE_DIR="${DIST_DIR}/overlay-stage-${BUILD_VARIANT}"
 export BEE_GPU_VENDOR BEE_NVIDIA_MODULE_FLAVOR BUILD_VARIANT

 . "${BUILDER_DIR}/VERSIONS"
+export MEMTEST_VERSION
 export PATH="$PATH:/usr/local/go/bin"
 : "${BEE_REQUIRE_MEMTEST:=0}"

@@ -775,6 +776,7 @@ run_optional_step_sh() {
        return 0
    fi

+    mkdir -p "${LOG_DIR}" 2>/dev/null || true
    step_log="${LOG_DIR}/${step_slug}.log"
    echo ""
    echo "=== optional step: ${step_name} ==="
@@ -798,13 +800,14 @@ start_build_log
 # install them on the fly so NVIDIA modules and ISO kernel always match.
 if [ -z "${DEBIAN_KERNEL_ABI}" ] || [ "${DEBIAN_KERNEL_ABI}" = "auto" ]; then
    echo "=== refreshing apt index to detect current kernel ABI ==="
-    apt-get update -qq
+    apt-get update -qq || echo "WARNING: apt-get update failed, trying cached index"
    DEBIAN_KERNEL_ABI=$(apt-cache depends linux-image-amd64 2>/dev/null \
        | awk '/Depends:.*linux-image-[0-9]/{print $2}' \
        | grep -oE '[0-9]+\.[0-9]+\.[0-9]+-[0-9]+' \
        | head -1)
    if [ -z "${DEBIAN_KERNEL_ABI}" ]; then
        echo "ERROR: could not auto-detect kernel ABI from apt-cache" >&2
+        echo "Hint: set DEBIAN_KERNEL_ABI=x.y.z-N in iso/builder/VERSIONS to skip auto-detection" >&2
        exit 1
    fi
    echo "=== kernel ABI: ${DEBIAN_KERNEL_ABI} ==="
@@ -873,9 +876,37 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then

    CUBLAS_CACHE="${DIST_DIR}/cublas-${CUBLAS_VERSION}+cuda${NCCL_CUDA_VERSION}"

+    echo "=== bee-gpu-burn FP4 header probe ==="
+    fp4_type_match="$(grep -Rsnm 1 'CUDA_R_4F_E2M1' "${CUBLAS_CACHE}/include" 2>/dev/null || true)"
+    fp4_scale_match="$(grep -Rsnm 1 'CUBLASLT_MATMUL_MATRIX_SCALE_VEC16_UE4M3' "${CUBLAS_CACHE}/include" 2>/dev/null || true)"
+    if [ -n "$fp4_type_match" ]; then
+        echo "fp4_header_symbol=present"
+        echo "$fp4_type_match"
+    else
+        echo "fp4_header_symbol=missing"
+    fi
+    if [ -n "$fp4_scale_match" ]; then
+        echo "fp4_scale_mode_symbol=present"
+        echo "$fp4_scale_match"
+    else
+        echo "fp4_scale_mode_symbol=missing"
+    fi
+
    GPU_STRESS_NEED_BUILD=1
-    if [ -f "$GPU_BURN_WORKER_BIN" ] && [ "${BUILDER_DIR}/bee-gpu-stress.c" -ot "$GPU_BURN_WORKER_BIN" ]; then
+    if [ -f "$GPU_BURN_WORKER_BIN" ]; then
        GPU_STRESS_NEED_BUILD=0
+        for dep in \
+            "${BUILDER_DIR}/bee-gpu-stress.c" \
+            "${BUILDER_DIR}/VERSIONS"; do
+            if [ "$dep" -nt "$GPU_BURN_WORKER_BIN" ]; then
+                GPU_STRESS_NEED_BUILD=1
+                break
+            fi
+        done
+        if [ "$GPU_STRESS_NEED_BUILD" = "0" ] && \
+            find "${CUBLAS_CACHE}/include" "${CUBLAS_CACHE}/lib" -type f -newer "$GPU_BURN_WORKER_BIN" | grep -q .; then
+            GPU_STRESS_NEED_BUILD=1
+        fi
    fi

    if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then
@@ -889,6 +920,12 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
    else
        echo "=== bee-gpu-burn worker up to date, skipping build ==="
    fi
+    echo "=== bee-gpu-burn compiled profile probe ==="
+    if grep -aq 'fp4_e2m1' "$GPU_BURN_WORKER_BIN"; then
+        echo "fp4_profile_string=present"
+    else
+        echo "fp4_profile_string=missing"
+    fi
 fi

 echo "=== preparing staged overlay (${BUILD_VARIANT}) ==="
--- a/iso/builder/config/hooks/normal/9100-memtest.hook.binary
+++ b/iso/builder/config/hooks/normal/9100-memtest.hook.binary
@@ -5,6 +5,8 @@ set -e

 : "${BEE_REQUIRE_MEMTEST:=0}"

+# memtest86+ 6.x uses memtest86+.bin (no x64 suffix) for the BIOS binary,
+# while 5.x used memtest86+x64.bin. We normalise both to x64 names in the ISO.
 MEMTEST_FILES="memtest86+x64.bin memtest86+x64.efi"
 BINARY_BOOT_DIR="binary/boot"
 GRUB_CFG="binary/boot/grub/grub.cfg"
@@ -26,13 +28,13 @@ fail_or_warn() {

 copy_memtest_file() {
    src="$1"
-    base="$(basename "$src")"
-    dst="${BINARY_BOOT_DIR}/${base}"
+    dst_name="${2:-$(basename "$src")}"
+    dst="${BINARY_BOOT_DIR}/${dst_name}"

    [ -f "$src" ] || return 1
    mkdir -p "${BINARY_BOOT_DIR}"
    cp "$src" "$dst"
-    log "copied ${base} from ${src}"
+    log "copied ${dst_name} from ${src}"
 }

 extract_memtest_from_deb() {
@@ -41,14 +43,42 @@ extract_memtest_from_deb() {

    log "extracting memtest payload from ${deb}"
    dpkg-deb -x "$deb" "$tmpdir"
-    for f in ${MEMTEST_FILES}; do
-        if [ -f "${tmpdir}/boot/${f}" ]; then
-            copy_memtest_file "${tmpdir}/boot/${f}"
-        fi
-    done
+
+    # EFI binary: both 5.x and 6.x use memtest86+x64.efi
+    if [ -f "${tmpdir}/boot/memtest86+x64.efi" ]; then
+        copy_memtest_file "${tmpdir}/boot/memtest86+x64.efi"
+    fi
+
+    # BIOS binary: 5.x = memtest86+x64.bin, 6.x = memtest86+.bin
+    if [ -f "${tmpdir}/boot/memtest86+x64.bin" ]; then
+        copy_memtest_file "${tmpdir}/boot/memtest86+x64.bin"
+    elif [ -f "${tmpdir}/boot/memtest86+.bin" ]; then
+        copy_memtest_file "${tmpdir}/boot/memtest86+.bin" "memtest86+x64.bin"
+    fi
+
    rm -rf "$tmpdir"
 }

+download_and_extract_memtest() {
+    tmpdl="$(mktemp -d)"
+    ver_arg=""
+    if [ -n "${MEMTEST_VERSION:-}" ]; then
+        ver_arg="=memtest86+=${MEMTEST_VERSION}"
+        log "downloading memtest86+=${MEMTEST_VERSION} from apt"
+    else
+        log "downloading memtest86+ from apt (no version pinned)"
+    fi
+    # shellcheck disable=SC2086
+    ( cd "$tmpdl" && apt-get download "memtest86+${ver_arg}" ) 2>/dev/null || true
+    deb="$(find "$tmpdl" -maxdepth 1 -type f -name 'memtest86+*.deb' 2>/dev/null | head -1)"
+    if [ -n "$deb" ]; then
+        extract_memtest_from_deb "$deb"
+    else
+        log "apt download of memtest86+ failed"
+    fi
+    rm -rf "$tmpdl"
+}
+
 ensure_memtest_binaries() {
    missing=0
    for f in ${MEMTEST_FILES}; do
@@ -56,10 +86,15 @@ ensure_memtest_binaries() {
    done
    [ "$missing" -eq 1 ] || return 0

+    # 1. Try files already placed by lb binary_memtest or chroot
    for root in chroot/boot /boot; do
        for f in ${MEMTEST_FILES}; do
            [ -f "${BINARY_BOOT_DIR}/${f}" ] || copy_memtest_file "${root}/${f}" || true
        done
+        # 6.x BIOS binary may lack x64 in name — copy with normalised name
+        if [ ! -f "${BINARY_BOOT_DIR}/memtest86+x64.bin" ]; then
+            copy_memtest_file "${root}/memtest86+.bin" "memtest86+x64.bin" || true
+        fi
    done

    missing=0
@@ -68,6 +103,7 @@ ensure_memtest_binaries() {
    done
    [ "$missing" -eq 1 ] || return 0

+    # 2. Try apt package cache (may be empty if lb binary_memtest already purged)
    for root in cache chroot/var/cache/apt/archives /var/cache/apt/archives; do
        [ -d "$root" ] || continue
        deb="$(find "$root" -type f \( -name 'memtest86+_*.deb' -o -name 'memtest86+*.deb' \) 2>/dev/null | head -1)"
@@ -76,6 +112,15 @@ ensure_memtest_binaries() {
        break
    done

+    missing=0
+    for f in ${MEMTEST_FILES}; do
+        [ -f "${BINARY_BOOT_DIR}/${f}" ] || missing=1
+    done
+    [ "$missing" -eq 1 ] || return 0
+
+    # 3. Fallback: download fresh from apt (lb binary_memtest purges the cache)
+    download_and_extract_memtest
+
    missing=0
    for f in ${MEMTEST_FILES}; do
        if [ ! -f "${BINARY_BOOT_DIR}/${f}" ]; then
--- a/iso/overlay/usr/local/bin/bee-gpu-burn
+++ b/iso/overlay/usr/local/bin/bee-gpu-burn
@@ -6,10 +6,13 @@ STAGGER_SECONDS=0
 SIZE_MB=0
 DEVICES=""
 EXCLUDE=""
+PRECISION=""
+PRECISION_PLAN=""
+PRECISION_PLAN_SECONDS=""
 WORKER="/usr/local/lib/bee/bee-gpu-burn-worker"

 usage() {
-    echo "usage: $0 [--seconds N] [--stagger-seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3]" >&2
+    echo "usage: $0 [--seconds N] [--stagger-seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3] [--precision int8|fp8|fp16|fp32|fp64|fp4] [--precision-plan p1,p2,...,mixed] [--precision-plan-seconds s1,s2,...]" >&2
    exit 2
 }

@@ -30,6 +33,9 @@ while [ "$#" -gt 0 ]; do
        --size-mb|-m) [ "$#" -ge 2 ] || usage; SIZE_MB="$2"; shift 2 ;;
        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
+        --precision) [ "$#" -ge 2 ] || usage; PRECISION="$2"; shift 2 ;;
+        --precision-plan) [ "$#" -ge 2 ] || usage; PRECISION_PLAN="$2"; shift 2 ;;
+        --precision-plan-seconds) [ "$#" -ge 2 ] || usage; PRECISION_PLAN_SECONDS="$2"; shift 2 ;;
        *) usage ;;
    esac
 done
@@ -88,8 +94,14 @@ for id in $(echo "${FINAL}" | tr ',' ' '); do
    extra_sec=$(( STAGGER_SECONDS * (GPU_COUNT - gpu_pos) ))
    gpu_seconds=$(( SECONDS + extra_sec ))
    echo "starting gpu ${id} size=${gpu_size_mb}MB seconds=${gpu_seconds}"
+    precision_arg=""
+    [ -n "${PRECISION}" ] && precision_arg="--precision ${PRECISION}"
+    precision_plan_arg=""
+    [ -n "${PRECISION_PLAN}" ] && precision_plan_arg="--precision-plan ${PRECISION_PLAN}"
+    precision_plan_seconds_arg=""
+    [ -n "${PRECISION_PLAN_SECONDS}" ] && precision_plan_seconds_arg="--precision-plan-seconds ${PRECISION_PLAN_SECONDS}"
    CUDA_VISIBLE_DEVICES="${id}" \
-        "${WORKER}" --device 0 --seconds "${gpu_seconds}" --size-mb "${gpu_size_mb}" >"${log}" 2>&1 &
+        "${WORKER}" --device 0 --seconds "${gpu_seconds}" --size-mb "${gpu_size_mb}" ${precision_arg} ${precision_plan_arg} ${precision_plan_seconds_arg} >"${log}" 2>&1 &
    pid=$!
    WORKERS="${WORKERS} ${pid}:${id}:${log}"
    if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${gpu_pos}" -lt "${GPU_COUNT}" ]; then
Author	SHA1	Message	Date
Mikhail Chusavitin	2dccbc010c	Use MEPHI mirror, disable security repo, fix memtest in ISO build - Switch all lb mirrors to mirror.mephi.ru/debian/ for faster/reliable downloads - Disable security repo (--security false) — not needed for LiveCD - Pin MEMTEST_VERSION=6.10-4 in VERSIONS, export to hook environment - Set BEE_REQUIRE_MEMTEST=1 in build-in-container.sh — missing memtest is now fatal - Fix 9100-memtest.hook.binary: add apt-get download fallback when lb binary_memtest has already purged the package cache; handle both 5.x (memtest86+x64.bin) and 6.x (memtest86+.bin) BIOS binary naming Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-15 09:57:29 +03:00
Michael Chus	e84c69d360	Fix optional step log dir missing after memtest recovery mkdir -p LOG_DIR before writing the optional step log so that a race with cleanup_build_log (EXIT trap archiving the log dir) does not cause a "Directory nonexistent" error during lb binary_checksums / lb binary_iso. Also downgrade apt-get update failure to a warning so a transient mirror outage does not block kernel ABI auto-detection when the apt cache is warm. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-15 07:28:36 +03:00
Michael Chus	c80a39e7ac	Add power results table, fix benchmark results refresh, bound memtester - Benchmark page now shows two result sections: Performance (scores) and Power / Thermal Fit (slot table). After any benchmark task completes the results section auto-refreshes via GET /api/benchmark/results without a full page reload. - Power results table shows each GPU slot with nominal TDP, achieved stable power limit, and P95 observed power. Rows with derated cards are highlighted amber so under-performing slots stand out at a glance. Older runs are collapsed in a <details> summary. - memtester is now wrapped with timeout(1) so a stuck memory controller cannot cause Validate Memory to hang indefinitely. Wall-clock limit is ~2.5 min per 100 MB per pass plus a 2-minute buffer. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-15 07:16:18 +03:00
Michael Chus	a5e0261ff2	Refactor power ramp to use true single-card baselines Phase 1 now calibrates each GPU individually (sequentially) so that PowerRealizationPct reflects real degradation from neighbour thermals and shared power rails. Previously the baseline came from an all-GPU-together run, making realization always ≈100% at the final ramp step. Ramp step 1 reuses single-card calibration results (no extra run); steps 2..N run targeted_power on the growing GPU subset with derating active. Remove OccupiedSlots/OccupiedSlotsNote fields and occupiedSlots() helper — they were compensation for the old all-GPU calibration approach. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-14 23:47:57 +03:00
Michael Chus	ee422ede3c	Revert "Add raster Easy Bee branding assets" This reverts commit `d560b2fead`.	2026-04-14 23:00:15 +03:00
Michael Chus	d560b2fead	Add raster Easy Bee branding assets	2026-04-14 22:39:25 +03:00
Michael Chus	3cf2e9c9dc	Run power calibration for all GPUs simultaneously Previously each GPU was calibrated sequentially (one card fully done before the next started), producing the staircase temperature pattern seen on the graph. Now all GPUs run together in a single dcgmi diag -r targeted_power session per attempt. This means: - All cards are under realistic thermal load at the same time. - A single DCGM session handles the run — no resource-busy contention from concurrent dcgmi processes. - Binary search state (lo/hi) is tracked independently per GPU; each card converges to its own highest stable power limit. - Throttle counter polling covers all active GPUs in the shared ticker. - Resource-busy exponential back-off is shared (one DCGM session). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-14 22:25:05 +03:00
Michael Chus	19dbabd71d	Simplify power calibration: pure binary search, no telemetry guessing Remove telemetry-guided initial candidate; use strict binary search midpoint at every step. Clean and predictable convergence in O(log N) attempts within the allowed power range [minLimitW, startingLimitW]. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-14 22:12:45 +03:00
Michael Chus	a6a07f2626	Replace linear power derate with binary search + telemetry-guided jump Power calibration previously stepped down 25 W at a time (linear), requiring up to 6 attempts to find a stable limit within 150 W range. New strategy: - Binary search between minLimitW (lo, assumed stable floor) and the starting/failed limit (hi, confirmed unstable), converging within a 10 W tolerance in ~4 attempts. - For thermal throttle: the first-quarter telemetry rows estimate the GPU's pre-throttle power draw. nextLimit = round5W(onset - 10 W) is used as the initial candidate instead of the binary midpoint, landing much closer to the true limit on the first step. - On success: lo is updated and a higher level is tried (binary search upward) until hi-lo ≤ tolerance, ensuring the highest stable limit is found rather than the first stable one. - Let targeted_power run to natural completion on throttle (no mid-run SIGKILL) so nv-hostengine releases its diagnostic slot cleanly before the next attempt. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-14 22:05:23 +03:00
Michael Chus	f87461ee4a	Detect thermal throttle with fans below 100% as cooling misconfiguration During power calibration: if a thermal throttle (sw_thermal/hw_thermal) causes ≥20% clock drop while server fans are below 98% P95 duty cycle, record a CoolingWarning on the GPU result and emit an actionable finding telling the operator to rerun with fans manually fixed at 100%. During steady-state benchmark: same signal enriches the existing thermal_limited finding with fan duty cycle and clock drift values. Covers both the main benchmark (buildBenchmarkFindings) and the power bench (NvidiaPowerBenchResult.Findings). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-14 21:44:57 +03:00
Michael Chus	a636146dbd	Fix power calibration failing due to DCGM resource contention When a targeted_power attempt is cancelled (e.g. after sw_thermal throttle), nv-hostengine holds the diagnostic slot asynchronously. The next attempt immediately received DCGM_ST_IN_USE (exit 222) and incorrectly derated the power limit. Now: exit 222 is detected via isDCGMResourceBusy and triggers an exponential back-off retry at the same power limit (1s, 2s, 4s, … up to 256s). Once the back-off delay would exceed 300s the calibration fails, indicating the slot is persistently held. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-14 20:41:17 +03:00
Mikhail Chusavitin	303de2df04	Add slot-aware ramp sequence to bee-bench power	2026-04-14 17:47:40 +03:00
Mikhail Chusavitin	95124d228f	Split bee-bench into perf and power workflows	2026-04-14 17:33:13 +03:00
Mikhail Chusavitin	54338dbae5	Unify live RAM runtime state	2026-04-14 16:18:33 +03:00
Mikhail Chusavitin	2be7ae6d28	Refine NVIDIA benchmark phase timing	2026-04-14 14:12:06 +03:00
Mikhail Chusavitin	b1a5035edd	Normalize task queue priorities by workflow	2026-04-14 11:13:54 +03:00
Mikhail Chusavitin	8fc986c933	Add benchmark fan duty cycle summary to report	2026-04-14 10:24:02 +03:00
Mikhail Chusavitin	88b5e0edf2	Harden IPMI power probe timeout	2026-04-14 10:18:23 +03:00
Mikhail Chusavitin	82fe1f6d26	Disable precision fallback and pin cuBLAS 13.1	2026-04-14 10:17:44 +03:00
Michael Chus	81e7c921f8	дебаг при сборке	2026-04-14 07:02:37 +03:00
Michael Chus	0fb8f2777f	Fix combined gpu burn profile capacity for fp4	2026-04-14 00:00:40 +03:00
Michael Chus	bf182daa89	Fix benchmark report methodology and rebuild gpu burn worker on toolchain changes	2026-04-13 23:43:12 +03:00
Michael Chus	457ea1cf04	Unify benchmark exports and drop ASCII charts	2026-04-13 21:38:28 +03:00
Michael Chus	bf6ecab4f0	Add per-precision benchmark phases, weighted TOPS scoring, and ECC tracking - Split steady window into 6 equal slots: fp8/fp16/fp32/fp64/fp4 + combined - Each precision phase runs bee-gpu-burn with --precision filter so PowerCVPct reflects single-kernel stability (not round-robin artifact) - Add fp4 support in bee-gpu-stress.c for Blackwell (cc>=100) via existing CUDA_R_4F_E2M1 guard - Weighted TOPS: fp64×2.0, fp32×1.0, fp16×0.5, fp8×0.25, fp4×0.125 - SyntheticScore = sum of weighted TOPS from per-precision phases - MixedScore = sum from combined phase; MixedEfficiency = Mixed/Synthetic - ComputeScore = SyntheticScore × (1 + MixedEfficiency × 0.3) - ECC volatile counters sampled before/after each phase and overall - DegradationReasons: ecc_uncorrected_errors, ecc_corrected_errors - Report: per-precision stability table with ECC columns, methodology section - Ramp-up history table redesign: GPU indices as columns, runs as rows Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-13 10:49:49 +03:00