Use MEPHI mirror, disable security repo, fix memtest in ISO build

- Switch all lb mirrors to mirror.mephi.ru/debian/ for faster/reliable downloads - Disable security repo (--security false) — not needed for LiveCD - Pin MEMTEST_VERSION=6.10-4 in VERSIONS, export to hook environment - Set BEE_REQUIRE_MEMTEST=1 in build-in-container.sh — missing memtest is now fatal - Fix 9100-memtest.hook.binary: add apt-get download fallback when lb binary_memtest has already purged the package cache; handle both 5.x (memtest86+x64.bin) and 6.x (memtest86+.bin) BIOS binary naming Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Fix optional step log dir missing after memtest recovery
2026-04-15 09:57:29 +03:00 · 2026-04-15 07:28:36 +03:00 · 2026-04-15 07:16:18 +03:00 · 2026-04-14 23:47:57 +03:00 · 2026-04-14 23:00:15 +03:00 · 2026-04-14 22:39:25 +03:00
33 changed files with 2630 additions and 503 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 .DS_Store
 dist/
 iso/out/
+build-cache/
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -30,7 +30,9 @@ var (
 	DefaultRuntimeLogPath   = DefaultExportDir + "/runtime-health.log"
 	DefaultTechDumpDir      = DefaultExportDir + "/techdump"
 	DefaultSATBaseDir       = DefaultExportDir + "/bee-sat"
-	DefaultBenchmarkBaseDir = DefaultExportDir + "/bee-benchmark"
+	DefaultBeeBenchBaseDir  = DefaultExportDir + "/bee-bench"
+	DefaultBeeBenchPerfDir  = DefaultBeeBenchBaseDir + "/perf"
+	DefaultBeeBenchPowerDir = DefaultBeeBenchBaseDir + "/power"
 )

 type App struct {
@@ -84,6 +86,7 @@ type installer interface {
 	InstallToDisk(ctx context.Context, device string, logFile string) error
 	IsLiveMediaInRAM() bool
 	LiveBootSource() platform.LiveBootSource
+	LiveMediaRAMState() platform.LiveMediaRAMState
 	RunInstallToRAM(ctx context.Context, logFunc func(string)) error
 }

@@ -108,6 +111,10 @@ func (a *App) LiveBootSource() platform.LiveBootSource {
 	return a.installer.LiveBootSource()
 }

+func (a *App) LiveMediaRAMState() platform.LiveMediaRAMState {
+	return a.installer.LiveMediaRAMState()
+}
+
 func (a *App) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
 	return a.installer.RunInstallToRAM(ctx, logFunc)
 }
@@ -117,6 +124,7 @@ type satRunner interface {
 	RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBenchmark(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
+	RunNvidiaPowerBench(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
 	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
@@ -562,11 +570,18 @@ func (a *App) RunNvidiaBenchmark(baseDir string, opts platform.NvidiaBenchmarkOp

 func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
-		baseDir = DefaultBenchmarkBaseDir
+		baseDir = DefaultBeeBenchPerfDir
 	}
 	return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
 }

+func (a *App) RunNvidiaPowerBenchCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
+	if strings.TrimSpace(baseDir) == "" {
+		baseDir = DefaultBeeBenchPowerDir
+	}
+	return a.sat.RunNvidiaPowerBench(ctx, baseDir, opts, logFunc)
+}
+
 func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -122,6 +122,7 @@ func (f fakeTools) CheckTools(names []string) []platform.ToolStatus {
 type fakeSAT struct {
 	runNvidiaFn               func(string) (string, error)
 	runNvidiaBenchmarkFn      func(string, platform.NvidiaBenchmarkOptions) (string, error)
+	runNvidiaPowerBenchFn     func(string, platform.NvidiaBenchmarkOptions) (string, error)
 	runNvidiaStressFn         func(string, platform.NvidiaStressOptions) (string, error)
 	runNvidiaComputeFn        func(string, int, []int) (string, error)
 	runNvidiaPowerFn          func(string, int, []int) (string, error)
@@ -154,6 +155,13 @@ func (f fakeSAT) RunNvidiaBenchmark(_ context.Context, baseDir string, opts plat
 	return f.runNvidiaFn(baseDir)
 }

+func (f fakeSAT) RunNvidiaPowerBench(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, _ func(string)) (string, error) {
+	if f.runNvidiaPowerBenchFn != nil {
+		return f.runNvidiaPowerBenchFn(baseDir, opts)
+	}
+	return f.runNvidiaFn(baseDir)
+}
+
 func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaTargetedStressFn != nil {
 		return f.runNvidiaTargetedStressFn(baseDir, durationSec, gpuIndices)
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -48,7 +48,7 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 		fmt.Fprintf(&b, "**GPU(s):** %s  \n", strings.Join(parts, ", "))
 	}
 	fmt.Fprintf(&b, "**Profile:** %s  \n", result.BenchmarkProfile)
-	fmt.Fprintf(&b, "**App version:** %s  \n", result.BenchmarkVersion)
+	fmt.Fprintf(&b, "**Benchmark version:** %s  \n", result.BenchmarkVersion)
 	fmt.Fprintf(&b, "**Generated:** %s  \n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC"))
 	if result.RampStep > 0 && result.RampTotal > 0 {
 		fmt.Fprintf(&b, "**Ramp-up step:** %d of %d  \n", result.RampStep, result.RampTotal)
@@ -83,15 +83,15 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {

 	// ── Methodology ───────────────────────────────────────────────────────────
 	b.WriteString("## Methodology\n\n")
-	fmt.Fprintf(&b, "- Profile `%s` uses standardized baseline -> warmup -> steady-state -> interconnect -> cooldown phases.\n", result.BenchmarkProfile)
+	fmt.Fprintf(&b, "- Profile `%s` uses standardized baseline -> warmup -> steady-state -> interconnect phases.\n", result.BenchmarkProfile)
 	b.WriteString("- Single-GPU compute score comes from `bee-gpu-burn` on the cuBLASLt path when available.\n")
 	b.WriteString("- Thermal and power limits are inferred from NVIDIA clock-event counters plus sustained telemetry.\n")
 	b.WriteString("- `result.json` is the canonical machine-readable source for the run.\n\n")
 	b.WriteString("**Compute score** is derived from two phases:\n\n")
-	b.WriteString("- **Synthetic** — each precision type (fp8, fp16, fp32, fp64, fp4) runs alone for a dedicated window. ")
+	b.WriteString("- **Synthetic** — each precision type (int8, fp8, fp16, fp32, fp64, fp4) runs alone for a dedicated window. ")
 	b.WriteString("Measures peak throughput with the full GPU dedicated to one kernel type. ")
 	b.WriteString("Each result is normalised to fp32-equivalent TOPS using precision weights: ")
-	b.WriteString("fp64 ×2.0 · fp32 ×1.0 · fp16 ×0.5 · fp8 ×0.25 · fp4 ×0.125.\n")
+	b.WriteString("fp64 ×2.0 · fp32 ×1.0 · fp16 ×0.5 · int8 ×0.25 · fp8 ×0.25 · fp4 ×0.125.\n")
 	b.WriteString("- **Mixed** — all precision types run simultaneously (combined phase). ")
 	b.WriteString("Reflects real inference workloads where fp8 matrix ops, fp16 attention and fp32 accumulation compete for bandwidth and SM scheduler slots.\n\n")
 	b.WriteString("**Formula:** `Compute = Synthetic × (1 + MixedEfficiency × 0.3)`\n\n")
@@ -170,6 +170,16 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 		if gpu.PowerLimitW > 0 {
 			fmt.Fprintf(&b, "- **Power limit:** %.0f W (default %.0f W)\n", gpu.PowerLimitW, gpu.DefaultPowerLimitW)
 		}
+		if gpu.PowerLimitDerated {
+			fmt.Fprintf(&b, "- **Power limit derating:** active after %d targeted_power attempt(s)\n", gpu.PowerCalibrationTries)
+		}
+		if gpu.CalibratedPeakPowerW > 0 {
+			if gpu.CalibratedPeakTempC > 0 {
+				fmt.Fprintf(&b, "- **Power calibration (`dcgmi targeted_power`):** %.0f W p95 at %.1f °C p95\n", gpu.CalibratedPeakPowerW, gpu.CalibratedPeakTempC)
+			} else {
+				fmt.Fprintf(&b, "- **Power calibration (`dcgmi targeted_power`):** %.0f W p95\n", gpu.CalibratedPeakPowerW)
+			}
+		}
 		if gpu.LockedGraphicsClockMHz > 0 {
 			fmt.Fprintf(&b, "- **Locked clocks:** GPU %.0f MHz / Mem %.0f MHz\n", gpu.LockedGraphicsClockMHz, gpu.LockedMemoryClockMHz)
 		}
@@ -188,7 +198,7 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 		// Per-precision stability phases.
 		if len(gpu.PrecisionSteady) > 0 {
 			b.WriteString("**Per-precision stability:**\n\n")
-			b.WriteString("| Precision | Clock CV | Power CV | Clock Drift | ECC corr | ECC uncorr |\n|-----------|----------|----------|-------------|----------|------------|\n")
+			b.WriteString("| Precision | Status | Clock CV | Power CV | Clock Drift | ECC corr | ECC uncorr |\n|-----------|--------|----------|----------|-------------|----------|------------|\n")
 			for _, p := range gpu.PrecisionSteady {
 				eccCorr := "—"
 				eccUncorr := "—"
@@ -196,8 +206,12 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 					eccCorr = fmt.Sprintf("%d", p.ECC.Corrected)
 					eccUncorr = fmt.Sprintf("%d", p.ECC.Uncorrected)
 				}
-				fmt.Fprintf(&b, "| %s | %.1f%% | %.1f%% | %.1f%% | %s | %s |\n",
-					p.Precision, p.Steady.ClockCVPct, p.Steady.PowerCVPct, p.Steady.ClockDriftPct,
+				status := p.Status
+				if strings.TrimSpace(status) == "" {
+					status = "OK"
+				}
+				fmt.Fprintf(&b, "| %s | %s | %.1f%% | %.1f%% | %.1f%% | %s | %s |\n",
+					p.Precision, status, p.Steady.ClockCVPct, p.Steady.PowerCVPct, p.Steady.ClockDriftPct,
 					eccCorr, eccUncorr)
 			}
 			b.WriteString("\n")
@@ -290,6 +304,31 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 		}
 	}

+	// ── Cooling ───────────────────────────────────────────────────────────────
+	if cooling := result.Cooling; cooling != nil {
+		b.WriteString("## Cooling\n\n")
+		if cooling.Available {
+			b.WriteString("| Metric | Value |\n|--------|-------|\n")
+			fmt.Fprintf(&b, "| Average fan speed | %.0f RPM |\n", cooling.AvgFanRPM)
+			if cooling.FanDutyCycleAvailable {
+				fmt.Fprintf(&b, "| Average fan duty cycle | %.1f%% |\n", cooling.AvgFanDutyCyclePct)
+				fmt.Fprintf(&b, "| P95 fan duty cycle | %.1f%% |\n", cooling.P95FanDutyCyclePct)
+			} else {
+				b.WriteString("| Average fan duty cycle | N/A |\n")
+				b.WriteString("| P95 fan duty cycle | N/A |\n")
+			}
+			b.WriteString("\n")
+		} else {
+			b.WriteString("Cooling telemetry unavailable.\n\n")
+		}
+		for _, note := range cooling.Notes {
+			fmt.Fprintf(&b, "- %s\n", note)
+		}
+		if len(cooling.Notes) > 0 {
+			b.WriteString("\n")
+		}
+	}
+
 	// ── Raw files ─────────────────────────────────────────────────────────────
 	b.WriteString("## Raw Files\n\n")
 	b.WriteString("- `result.json`\n- `report.md`\n- `summary.txt`\n- `verbose.log`\n")
@@ -339,6 +378,7 @@ func formatThrottleLine(t BenchmarkThrottleCounters, steadyDurationSec float64)
 func renderBenchmarkSummary(result NvidiaBenchmarkResult) string {
 	var b strings.Builder
 	fmt.Fprintf(&b, "run_at_utc=%s\n", result.GeneratedAt.Format(time.RFC3339))
+	fmt.Fprintf(&b, "benchmark_version=%s\n", result.BenchmarkVersion)
 	fmt.Fprintf(&b, "benchmark_profile=%s\n", result.BenchmarkProfile)
 	fmt.Fprintf(&b, "overall_status=%s\n", result.OverallStatus)
 	fmt.Fprintf(&b, "gpu_count=%d\n", len(result.GPUs))
--- a/audit/internal/platform/benchmark_test.go
+++ b/audit/internal/platform/benchmark_test.go
@@ -16,17 +16,17 @@ func TestResolveBenchmarkProfile(t *testing.T) {
 		{
 			name:    "default",
 			profile: "",
-			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 120, SteadySec: 480, NCCLSec: 180, CooldownSec: 120},
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 45, SteadySec: 480, NCCLSec: 180, CooldownSec: 0},
 		},
 		{
 			name:    "stability",
 			profile: "stability",
-			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 300, SteadySec: 3600, NCCLSec: 300, CooldownSec: 300},
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 120, SteadySec: 3600, NCCLSec: 300, CooldownSec: 0},
 		},
 		{
 			name:    "overnight",
 			profile: "overnight",
-			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 600, SteadySec: 27000, NCCLSec: 600, CooldownSec: 300},
+			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 180, SteadySec: 27000, NCCLSec: 600, CooldownSec: 0},
 		},
 	}

@@ -41,6 +41,129 @@ func TestResolveBenchmarkProfile(t *testing.T) {
 	}
 }

+func TestBuildBenchmarkSteadyPlanStandard(t *testing.T) {
+	t.Parallel()
+
+	labels, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
+		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, SteadySec: 480},
+		benchmarkPrecisionPhases,
+		func(label string) string { return label },
+	)
+	if len(labels) != 7 || len(phases) != 7 {
+		t.Fatalf("labels=%d phases=%d want 7", len(labels), len(phases))
+	}
+	if basePhaseSec != 60 {
+		t.Fatalf("basePhaseSec=%d want 60", basePhaseSec)
+	}
+	if mixedPhaseSec != 300 {
+		t.Fatalf("mixedPhaseSec=%d want 300", mixedPhaseSec)
+	}
+	if phases[len(phases)-1].PlanLabel != "mixed" || phases[len(phases)-1].DurationSec != 300 {
+		t.Fatalf("mixed phase=%+v want duration 300", phases[len(phases)-1])
+	}
+	if benchmarkPlanDurationsCSV(phases) != "60,60,60,60,60,60,300" {
+		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
+	}
+}
+
+func TestBuildBenchmarkSteadyPlanStability(t *testing.T) {
+	t.Parallel()
+
+	_, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
+		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, SteadySec: 3600},
+		benchmarkPrecisionPhases,
+		func(label string) string { return label },
+	)
+	if basePhaseSec != 300 {
+		t.Fatalf("basePhaseSec=%d want 300", basePhaseSec)
+	}
+	if mixedPhaseSec != 3600 {
+		t.Fatalf("mixedPhaseSec=%d want 3600", mixedPhaseSec)
+	}
+	if benchmarkPlanDurationsCSV(phases) != "300,300,300,300,300,300,3600" {
+		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
+	}
+}
+
+func TestBuildBenchmarkSteadyPlanOvernight(t *testing.T) {
+	t.Parallel()
+
+	_, phases, basePhaseSec, mixedPhaseSec := buildBenchmarkSteadyPlan(
+		benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, SteadySec: 27000},
+		benchmarkPrecisionPhases,
+		func(label string) string { return label },
+	)
+	if basePhaseSec != 3600 {
+		t.Fatalf("basePhaseSec=%d want 3600", basePhaseSec)
+	}
+	if mixedPhaseSec != 14400 {
+		t.Fatalf("mixedPhaseSec=%d want 14400", mixedPhaseSec)
+	}
+	if benchmarkPlanDurationsCSV(phases) != "3600,3600,3600,3600,3600,3600,14400" {
+		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
+	}
+}
+
+func TestSplitBenchmarkRowsByPlannedPhaseUsesPhaseDurations(t *testing.T) {
+	t.Parallel()
+
+	phases := []benchmarkPlannedPhase{
+		{PlanLabel: "fp8", MetricStage: "fp8", DurationSec: 10},
+		{PlanLabel: "fp16", MetricStage: "fp16", DurationSec: 10},
+		{PlanLabel: "mixed", MetricStage: "mixed", DurationSec: 50},
+	}
+	rows := []GPUMetricRow{
+		{ElapsedSec: 5},
+		{ElapsedSec: 15},
+		{ElapsedSec: 25},
+		{ElapsedSec: 65},
+	}
+	got := splitBenchmarkRowsByPlannedPhase(rows, phases)
+	if len(got["fp8"]) != 1 {
+		t.Fatalf("fp8 rows=%d want 1", len(got["fp8"]))
+	}
+	if len(got["fp16"]) != 1 {
+		t.Fatalf("fp16 rows=%d want 1", len(got["fp16"]))
+	}
+	if len(got["mixed"]) != 2 {
+		t.Fatalf("mixed rows=%d want 2", len(got["mixed"]))
+	}
+}
+
+func TestBenchmarkSupportedPrecisionsSkipsFP4BeforeBlackwell(t *testing.T) {
+	t.Parallel()
+
+	if got := benchmarkSupportedPrecisions("9.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32,fp64" {
+		t.Fatalf("supported=%v", got)
+	}
+	if got := benchmarkSupportedPrecisions("10.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32,fp64,fp4" {
+		t.Fatalf("supported=%v", got)
+	}
+}
+
+func TestBenchmarkPlannedPhaseStatus(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name       string
+		raw        string
+		wantStatus string
+	}{
+		{name: "ok", raw: "status=OK\n", wantStatus: "OK"},
+		{name: "failed", raw: "phase_error=fp16\n", wantStatus: "FAILED"},
+		{name: "unsupported", raw: "cublasLt_profiles=unsupported\nphase_error=fp4\n", wantStatus: "UNSUPPORTED"},
+	}
+	for _, tc := range cases {
+		tc := tc
+		t.Run(tc.name, func(t *testing.T) {
+			got, _ := benchmarkPlannedPhaseStatus([]byte(tc.raw))
+			if got != tc.wantStatus {
+				t.Fatalf("status=%q want %q", got, tc.wantStatus)
+			}
+		})
+	}
+}
+
 func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
 	t.Parallel()

@@ -65,8 +188,10 @@ func TestParseBenchmarkBurnLog(t *testing.T) {
 		"[gpu 0] compute_capability=9.0",
 		"[gpu 0] backend=cublasLt",
 		"[gpu 0] duration_s=10",
+		"[gpu 0] int8_tensor[0]=READY dim=16384x16384x8192 block=128 stream=0",
 		"[gpu 0] fp16_tensor[0]=READY dim=4096x4096x4096 block=128 stream=0",
 		"[gpu 0] fp8_e4m3[0]=READY dim=8192x8192x4096 block=128 stream=0",
+		"[gpu 0] int8_tensor_iterations=80",
 		"[gpu 0] fp16_tensor_iterations=200",
 		"[gpu 0] fp8_e4m3_iterations=50",
 		"[gpu 0] status=OK",
@@ -79,15 +204,24 @@ func TestParseBenchmarkBurnLog(t *testing.T) {
 	if got.ComputeCapability != "9.0" {
 		t.Fatalf("compute capability=%q want 9.0", got.ComputeCapability)
 	}
-	if len(got.Profiles) != 2 {
-		t.Fatalf("profiles=%d want 2", len(got.Profiles))
+	if len(got.Profiles) != 3 {
+		t.Fatalf("profiles=%d want 3", len(got.Profiles))
 	}
 	if got.Profiles[0].TeraOpsPerSec <= 0 {
 		t.Fatalf("profile[0] teraops=%f want >0", got.Profiles[0].TeraOpsPerSec)
 	}
+	if got.Profiles[0].Category != "fp16_bf16" {
+		t.Fatalf("profile[0] category=%q want fp16_bf16", got.Profiles[0].Category)
+	}
 	if got.Profiles[1].Category != "fp8" {
 		t.Fatalf("profile[1] category=%q want fp8", got.Profiles[1].Category)
 	}
+	if got.Profiles[2].Category != "int8" {
+		t.Fatalf("profile[2] category=%q want int8", got.Profiles[2].Category)
+	}
+	if got.Profiles[2].Weight != 0.25 {
+		t.Fatalf("profile[2] weight=%f want 0.25", got.Profiles[2].Weight)
+	}
 }

 func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
@@ -131,6 +265,13 @@ func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 				DegradationReasons: []string{"power_capped"},
 			},
 		},
+		Cooling: &BenchmarkCoolingSummary{
+			Available:             true,
+			AvgFanRPM:             9200,
+			FanDutyCycleAvailable: true,
+			AvgFanDutyCyclePct:    47.5,
+			P95FanDutyCyclePct:    62.0,
+		},
 	}

 	report := renderBenchmarkReport(result)
@@ -140,6 +281,9 @@ func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
 		"1176.00",
 		"fp16_tensor",
 		"700.00",
+		"Cooling",
+		"Average fan duty cycle",
+		"47.5%",
 	} {
 		if !strings.Contains(report, needle) {
 			t.Fatalf("report missing %q\n%s", needle, report)
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -25,6 +25,17 @@ type BenchmarkCPULoad struct {
 	Note   string `json:"note,omitempty"`
 }

+// BenchmarkCoolingSummary captures fan telemetry averaged across the full
+// benchmark run.
+type BenchmarkCoolingSummary struct {
+	Available             bool     `json:"available"`
+	AvgFanRPM             float64  `json:"avg_fan_rpm,omitempty"`
+	FanDutyCycleAvailable bool     `json:"fan_duty_cycle_available,omitempty"`
+	AvgFanDutyCyclePct    float64  `json:"avg_fan_duty_cycle_pct,omitempty"`
+	P95FanDutyCyclePct    float64  `json:"p95_fan_duty_cycle_pct,omitempty"`
+	Notes                 []string `json:"notes,omitempty"`
+}
+
 const (
 	NvidiaBenchmarkProfileStandard  = "standard"
 	NvidiaBenchmarkProfileStability = "stability"
@@ -61,6 +72,7 @@ type NvidiaBenchmarkResult struct {
 	Normalization      BenchmarkNormalization       `json:"normalization"`
 	HostConfig         *BenchmarkHostConfig         `json:"host_config,omitempty"`
 	CPULoad            *BenchmarkCPULoad            `json:"cpu_load,omitempty"`
+	Cooling            *BenchmarkCoolingSummary     `json:"cooling,omitempty"`
 	GPUs               []BenchmarkGPUResult         `json:"gpus"`
 	Interconnect       *BenchmarkInterconnectResult `json:"interconnect,omitempty"`
 	ServerPower        *BenchmarkServerPower        `json:"server_power,omitempty"`
@@ -92,6 +104,7 @@ type BenchmarkGPUResult struct {
 	Backend             string  `json:"backend,omitempty"`
 	Status              string  `json:"status"`
 	PowerLimitW         float64 `json:"power_limit_w,omitempty"`
+	PowerLimitDerated   bool    `json:"power_limit_derated,omitempty"`
 	MultiprocessorCount int     `json:"multiprocessor_count,omitempty"`
 	DefaultPowerLimitW  float64 `json:"default_power_limit_w,omitempty"`
 	// CalibratedPeakPowerW is the p95 power measured during a short
@@ -99,6 +112,8 @@ type BenchmarkGPUResult struct {
 	// Used as the reference denominator for PowerSustainScore instead of
 	// the hardware default limit, which bee-gpu-burn cannot reach.
 	CalibratedPeakPowerW   float64                         `json:"calibrated_peak_power_w,omitempty"`
+	CalibratedPeakTempC    float64                         `json:"calibrated_peak_temp_c,omitempty"`
+	PowerCalibrationTries  int                             `json:"power_calibration_tries,omitempty"`
 	MaxGraphicsClockMHz    float64                         `json:"max_graphics_clock_mhz,omitempty"`
 	BaseGraphicsClockMHz   float64                         `json:"base_graphics_clock_mhz,omitempty"`
 	MaxMemoryClockMHz      float64                         `json:"max_memory_clock_mhz,omitempty"`
@@ -107,6 +122,7 @@ type BenchmarkGPUResult struct {
 	Baseline               BenchmarkTelemetrySummary       `json:"baseline"`
 	Steady                 BenchmarkTelemetrySummary       `json:"steady"`
 	PrecisionSteady        []BenchmarkPrecisionSteadyPhase `json:"precision_steady,omitempty"`
+	PrecisionFailures      []string                        `json:"precision_failures,omitempty"`
 	Cooldown               BenchmarkTelemetrySummary       `json:"cooldown"`
 	Throttle               BenchmarkThrottleCounters       `json:"throttle_counters"`
 	// ECC error delta accumulated over the full benchmark (all phases combined).
@@ -115,6 +131,9 @@ type BenchmarkGPUResult struct {
 	Scores             BenchmarkScorecard         `json:"scores"`
 	DegradationReasons []string                   `json:"degradation_reasons,omitempty"`
 	Notes              []string                   `json:"notes,omitempty"`
+	// CoolingWarning is non-empty when a thermal throttle event occurred with
+	// a clock drop ≥20% while server fans were not at 100% duty cycle.
+	CoolingWarning string `json:"cooling_warning,omitempty"`
 }

 type BenchmarkTelemetrySummary struct {
@@ -167,7 +186,7 @@ type BenchmarkPrecisionResult struct {
 	Iterations    uint64  `json:"iterations,omitempty"`
 	TeraOpsPerSec float64 `json:"teraops_per_sec,omitempty"`
 	// Weight is the fp32-equivalence factor for this precision category.
-	// fp32 = 1.0 (baseline), fp64 = 2.0, fp16 = 0.5, fp8 = 0.25, fp4 = 0.125.
+	// fp32 = 1.0 (baseline), fp64 = 2.0, fp16 = 0.5, int8/fp8 = 0.25, fp4 = 0.125.
 	// WeightedTOPS = TeraOpsPerSec * Weight gives fp32-equivalent throughput.
 	Weight                float64 `json:"weight,omitempty"`
 	WeightedTeraOpsPerSec float64 `json:"weighted_teraops_per_sec,omitempty"`
@@ -213,13 +232,15 @@ type BenchmarkServerPower struct {
 // type runs at a time the PowerCVPct here is a genuine stability signal.
 type BenchmarkPrecisionSteadyPhase struct {
 	Precision             string                    `json:"precision"` // e.g. "fp8", "fp16", "fp32"
+	Status                string                    `json:"status,omitempty"`
 	Steady                BenchmarkTelemetrySummary `json:"steady"`
 	TeraOpsPerSec         float64                   `json:"teraops_per_sec,omitempty"`
 	WeightedTeraOpsPerSec float64                   `json:"weighted_teraops_per_sec,omitempty"`
 	// ECC errors accumulated during this precision phase only.
 	// Non-zero corrected = stress-induced DRAM errors for this kernel type.
 	// Any uncorrected = serious fault triggered by this precision workload.
-	ECC BenchmarkECCCounters `json:"ecc,omitempty"`
+	ECC   BenchmarkECCCounters `json:"ecc,omitempty"`
+	Notes string               `json:"notes,omitempty"`
 }

 type BenchmarkInterconnectResult struct {
@@ -233,3 +254,45 @@ type BenchmarkInterconnectResult struct {
 	MaxBusBWGBps       float64  `json:"max_busbw_gbps,omitempty"`
 	Notes              []string `json:"notes,omitempty"`
 }
+
+type NvidiaPowerBenchResult struct {
+	BenchmarkVersion     string                 `json:"benchmark_version"`
+	GeneratedAt          time.Time              `json:"generated_at"`
+	Hostname             string                 `json:"hostname,omitempty"`
+	ServerModel          string                 `json:"server_model,omitempty"`
+	BenchmarkProfile     string                 `json:"benchmark_profile"`
+	SelectedGPUIndices   []int                  `json:"selected_gpu_indices"`
+	RecommendedSlotOrder []int                  `json:"recommended_slot_order,omitempty"`
+	RampSteps            []NvidiaPowerBenchStep `json:"ramp_steps,omitempty"`
+	OverallStatus        string                 `json:"overall_status"`
+	Findings             []string               `json:"findings,omitempty"`
+	GPUs                 []NvidiaPowerBenchGPU  `json:"gpus"`
+}
+
+type NvidiaPowerBenchGPU struct {
+	Index               int      `json:"index"`
+	Name                string   `json:"name,omitempty"`
+	BusID               string   `json:"bus_id,omitempty"`
+	DefaultPowerLimitW  float64  `json:"default_power_limit_w,omitempty"`
+	AppliedPowerLimitW  float64  `json:"applied_power_limit_w,omitempty"`
+	MaxObservedPowerW   float64  `json:"max_observed_power_w,omitempty"`
+	MaxObservedTempC    float64  `json:"max_observed_temp_c,omitempty"`
+	CalibrationAttempts int      `json:"calibration_attempts,omitempty"`
+	Derated             bool     `json:"derated,omitempty"`
+	Status              string   `json:"status"`
+	Notes               []string `json:"notes,omitempty"`
+	// CoolingWarning mirrors BenchmarkGPUResult.CoolingWarning for the power workflow.
+	CoolingWarning string `json:"cooling_warning,omitempty"`
+}
+
+type NvidiaPowerBenchStep struct {
+	StepIndex              int      `json:"step_index"`
+	GPUIndices             []int    `json:"gpu_indices"`
+	TotalObservedPowerW    float64  `json:"total_observed_power_w,omitempty"`
+	AvgObservedPowerW      float64  `json:"avg_observed_power_w,omitempty"`
+	MinPowerRealizationPct float64  `json:"min_power_realization_pct,omitempty"`
+	AvgPowerRealizationPct float64  `json:"avg_power_realization_pct,omitempty"`
+	DeratedGPUCount        int      `json:"derated_gpu_count,omitempty"`
+	Status                 string   `json:"status"`
+	Notes                  []string `json:"notes,omitempty"`
+}
--- a/audit/internal/platform/gpu_metrics.go
+++ b/audit/internal/platform/gpu_metrics.go
@@ -13,15 +13,20 @@ import (

 // GPUMetricRow is one telemetry sample from nvidia-smi during a stress test.
 type GPUMetricRow struct {
-	Stage       string  `json:"stage,omitempty"`
-	ElapsedSec  float64 `json:"elapsed_sec"`
-	GPUIndex    int     `json:"index"`
-	TempC       float64 `json:"temp_c"`
-	UsagePct    float64 `json:"usage_pct"`
-	MemUsagePct float64 `json:"mem_usage_pct"`
-	PowerW      float64 `json:"power_w"`
-	ClockMHz    float64 `json:"clock_mhz"`
-	MemClockMHz float64 `json:"mem_clock_mhz"`
+	Stage                 string  `json:"stage,omitempty"`
+	StageStartSec         float64 `json:"stage_start_sec,omitempty"`
+	StageEndSec           float64 `json:"stage_end_sec,omitempty"`
+	ElapsedSec            float64 `json:"elapsed_sec"`
+	GPUIndex              int     `json:"index"`
+	TempC                 float64 `json:"temp_c"`
+	UsagePct              float64 `json:"usage_pct"`
+	MemUsagePct           float64 `json:"mem_usage_pct"`
+	PowerW                float64 `json:"power_w"`
+	ClockMHz              float64 `json:"clock_mhz"`
+	MemClockMHz           float64 `json:"mem_clock_mhz"`
+	FanAvgRPM             float64 `json:"fan_avg_rpm,omitempty"`
+	FanDutyCyclePct       float64 `json:"fan_duty_cycle_pct,omitempty"`
+	FanDutyCycleAvailable bool    `json:"fan_duty_cycle_available,omitempty"`
 }

 // sampleGPUMetrics runs nvidia-smi once and returns current metrics for each GPU.
@@ -142,10 +147,14 @@ func sampleAMDGPUMetrics() ([]GPUMetricRow, error) {
 // WriteGPUMetricsCSV writes collected rows as a CSV file.
 func WriteGPUMetricsCSV(path string, rows []GPUMetricRow) error {
 	var b bytes.Buffer
-	b.WriteString("stage,elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz\n")
+	b.WriteString("stage,elapsed_sec,gpu_index,temperature_c,usage_pct,mem_usage_pct,power_w,clock_mhz,mem_clock_mhz,fan_avg_rpm,fan_duty_cycle_pct,fan_duty_cycle_available\n")
 	for _, r := range rows {
-		fmt.Fprintf(&b, "%s,%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f\n",
-			strconv.Quote(strings.TrimSpace(r.Stage)), r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz)
+		dutyAvail := 0
+		if r.FanDutyCycleAvailable {
+			dutyAvail = 1
+		}
+		fmt.Fprintf(&b, "%s,%.1f,%d,%.1f,%.1f,%.1f,%.1f,%.0f,%.0f,%.0f,%.1f,%d\n",
+			strconv.Quote(strings.TrimSpace(r.Stage)), r.ElapsedSec, r.GPUIndex, r.TempC, r.UsagePct, r.MemUsagePct, r.PowerW, r.ClockMHz, r.MemClockMHz, r.FanAvgRPM, r.FanDutyCyclePct, dutyAvail)
 	}
 	return os.WriteFile(path, b.Bytes(), 0644)
 }
@@ -502,11 +511,22 @@ func buildGPUMetricStageSpans(rows []GPUMetricRow) []gpuMetricStageSpan {
 		if name == "" {
 			name = "run"
 		}
+		start := row.StageStartSec
+		end := row.StageEndSec
+		if end <= start {
+			start = row.ElapsedSec
+			end = row.ElapsedSec
+		}
 		if len(spans) == 0 || spans[len(spans)-1].Name != name {
-			spans = append(spans, gpuMetricStageSpan{Name: name, Start: row.ElapsedSec, End: row.ElapsedSec})
+			spans = append(spans, gpuMetricStageSpan{Name: name, Start: start, End: end})
 			continue
 		}
-		spans[len(spans)-1].End = row.ElapsedSec
+		if start < spans[len(spans)-1].Start {
+			spans[len(spans)-1].Start = start
+		}
+		if end > spans[len(spans)-1].End {
+			spans[len(spans)-1].End = end
+		}
 	}
 	for i := range spans {
 		if spans[i].End <= spans[i].Start {
--- a/audit/internal/platform/install_to_ram.go
+++ b/audit/internal/platform/install_to_ram.go
@@ -11,20 +11,10 @@ import (
 	"strings"
 )

+const installToRAMDir = "/dev/shm/bee-live"
+
 func (s *System) IsLiveMediaInRAM() bool {
-	fsType := mountFSType("/run/live/medium")
-	if fsType == "" {
-		// No medium mount at all — fall back to toram kernel parameter.
-		return toramActive()
-	}
-	if strings.EqualFold(fsType, "tmpfs") {
-		return true
-	}
-	// When RunInstallToRAM copies squashfs to /dev/shm/bee-live but the bind
-	// mount of /run/live/medium fails (common for CD-ROM boots), the medium
-	// fstype still shows the CD-ROM type. Check whether the RAM copy exists.
-	files, _ := filepath.Glob("/dev/shm/bee-live/*.squashfs")
-	return len(files) > 0
+	return s.LiveMediaRAMState().InRAM
 }

 func (s *System) LiveBootSource() LiveBootSource {
@@ -56,14 +46,95 @@ func (s *System) LiveBootSource() LiveBootSource {
 	return status
 }

-func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) error {
+func (s *System) LiveMediaRAMState() LiveMediaRAMState {
+	return evaluateLiveMediaRAMState(
+		s.LiveBootSource(),
+		toramActive(),
+		globPaths("/run/live/medium/live/*.squashfs"),
+		globPaths(filepath.Join(installToRAMDir, "*.squashfs")),
+	)
+}
+
+func evaluateLiveMediaRAMState(status LiveBootSource, toram bool, sourceSquashfs, copiedSquashfs []string) LiveMediaRAMState {
+	state := LiveMediaRAMState{
+		LiveBootSource: status,
+		ToramActive:    toram,
+		CopyPresent:    len(copiedSquashfs) > 0,
+	}
+	if status.InRAM {
+		state.State = "in_ram"
+		state.Status = "ok"
+		state.CopyComplete = true
+		state.Message = "Running from RAM — installation media can be safely disconnected."
+		return state
+	}
+
+	expected := pathBaseSet(sourceSquashfs)
+	copied := pathBaseSet(copiedSquashfs)
+	state.CopyComplete = len(expected) > 0 && setContainsAll(copied, expected)
+
+	switch {
+	case state.CopyComplete:
+		state.State = "partial"
+		state.Status = "partial"
+		state.CanStartCopy = true
+		state.Message = "Live media files were copied to RAM, but the system is still mounted from the original boot source."
+	case state.CopyPresent:
+		state.State = "partial"
+		state.Status = "partial"
+		state.CanStartCopy = true
+		state.Message = "Partial RAM copy detected. A previous Copy to RAM run was interrupted or cancelled."
+	case toram:
+		state.State = "toram_failed"
+		state.Status = "failed"
+		state.CanStartCopy = true
+		state.Message = "toram boot parameter is set but the live medium is not mounted from RAM."
+	default:
+		state.State = "not_in_ram"
+		state.Status = "warning"
+		state.CanStartCopy = true
+		state.Message = "ISO not copied to RAM. Use Copy to RAM to free the boot drive and improve performance."
+	}
+	return state
+}
+
+func globPaths(pattern string) []string {
+	matches, _ := filepath.Glob(pattern)
+	return matches
+}
+
+func pathBaseSet(paths []string) map[string]struct{} {
+	out := make(map[string]struct{}, len(paths))
+	for _, path := range paths {
+		base := strings.TrimSpace(filepath.Base(path))
+		if base != "" {
+			out[base] = struct{}{}
+		}
+	}
+	return out
+}
+
+func setContainsAll(have, want map[string]struct{}) bool {
+	if len(want) == 0 {
+		return false
+	}
+	for name := range want {
+		if _, ok := have[name]; !ok {
+			return false
+		}
+	}
+	return true
+}
+
+func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) (retErr error) {
 	log := func(msg string) {
 		if logFunc != nil {
 			logFunc(msg)
 		}
 	}

-	if s.IsLiveMediaInRAM() {
+	state := s.LiveMediaRAMState()
+	if state.InRAM {
 		log("Already running from RAM — installation media can be safely disconnected.")
 		return nil
 	}
@@ -88,10 +159,21 @@ func (s *System) RunInstallToRAM(ctx context.Context, logFunc func(string)) erro
 			humanBytes(needed+headroom), humanBytes(free))
 	}

-	dstDir := "/dev/shm/bee-live"
+	dstDir := installToRAMDir
+	if state.CopyPresent {
+		log("Removing stale partial RAM copy before retry...")
+	}
+	_ = os.RemoveAll(dstDir)
 	if err := os.MkdirAll(dstDir, 0755); err != nil {
 		return fmt.Errorf("create tmpfs dir: %v", err)
 	}
+	defer func() {
+		if retErr == nil {
+			return
+		}
+		_ = os.RemoveAll(dstDir)
+		log("Removed incomplete RAM copy.")
+	}()

 	for _, sf := range squashfsFiles {
 		if err := ctx.Err(); err != nil {
--- a/audit/internal/platform/install_to_ram_test.go
+++ b/audit/internal/platform/install_to_ram_test.go
@@ -58,3 +58,46 @@ func TestDescribeLiveBootSource(t *testing.T) {
 		t.Fatalf("got %q want /run/live/medium", got)
 	}
 }
+
+func TestEvaluateLiveMediaRAMState(t *testing.T) {
+	t.Parallel()
+
+	t.Run("in_ram", func(t *testing.T) {
+		state := evaluateLiveMediaRAMState(
+			LiveBootSource{InRAM: true, Kind: "ram", Source: "tmpfs"},
+			false,
+			nil,
+			nil,
+		)
+		if state.State != "in_ram" || state.Status != "ok" || state.CanStartCopy {
+			t.Fatalf("state=%+v", state)
+		}
+	})
+
+	t.Run("partial_copy_after_cancel", func(t *testing.T) {
+		state := evaluateLiveMediaRAMState(
+			LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"},
+			false,
+			[]string{"/run/live/medium/live/filesystem.squashfs", "/run/live/medium/live/firmware.squashfs"},
+			[]string{"/dev/shm/bee-live/filesystem.squashfs"},
+		)
+		if state.State != "partial" || state.Status != "partial" || !state.CanStartCopy {
+			t.Fatalf("state=%+v", state)
+		}
+		if state.CopyComplete {
+			t.Fatalf("CopyComplete=%v want false", state.CopyComplete)
+		}
+	})
+
+	t.Run("toram_failed", func(t *testing.T) {
+		state := evaluateLiveMediaRAMState(
+			LiveBootSource{InRAM: false, Kind: "usb", Device: "/dev/sdb1"},
+			true,
+			nil,
+			nil,
+		)
+		if state.State != "toram_failed" || state.Status != "failed" || !state.CanStartCopy {
+			t.Fatalf("state=%+v", state)
+		}
+	})
+}
--- a/audit/internal/platform/runtime.go
+++ b/audit/internal/platform/runtime.go
@@ -171,25 +171,28 @@ func resolvedToolStatus(display string, candidates ...string) ToolStatus {
 	return ToolStatus{Name: display}
 }

-// collectToRAMHealth checks whether the LiveCD ISO has been copied to RAM.
-// Status values: "ok" = in RAM, "warning" = toram not active (no copy attempted),
-// "failed" = toram was requested but medium is not in RAM (copy failed or in progress).
+// collectToRAMHealth evaluates whether the live system is fully running from RAM.
+// Status values: "ok" = fully in RAM, "warning" = not copied, "partial" = stale or
+// incomplete RAM copy exists but runtime still depends on the boot medium,
+// "failed" = toram was requested but medium is not in RAM.
 func (s *System) collectToRAMHealth(health *schema.RuntimeHealth) {
-	inRAM := s.IsLiveMediaInRAM()
-	active := toramActive()
-	switch {
-	case inRAM:
-		health.ToRAMStatus = "ok"
-	case active:
-		// toram was requested but medium is not yet/no longer in RAM
-		health.ToRAMStatus = "failed"
+	state := s.LiveMediaRAMState()
+	health.ToRAMStatus = state.Status
+	switch state.Status {
+	case "ok":
+		return
+	case "failed":
 		health.Issues = append(health.Issues, schema.RuntimeIssue{
 			Code:        "toram_copy_failed",
 			Severity:    "warning",
-			Description: "toram boot parameter is set but the live medium is not mounted from RAM.",
+			Description: state.Message,
+		})
+	case "partial":
+		health.Issues = append(health.Issues, schema.RuntimeIssue{
+			Code:        "toram_copy_partial",
+			Severity:    "warning",
+			Description: state.Message,
 		})
-	default:
-		health.ToRAMStatus = "warning"
 	}
 }

@@ -211,13 +214,13 @@ func findUSBExportMount() string {

 	// fs types that are expected on USB export drives
 	exportFSTypes := map[string]bool{
-		"vfat":  true,
-		"exfat": true,
-		"ext2":  true,
-		"ext3":  true,
-		"ext4":  true,
-		"ntfs":  true,
-		"ntfs3": true,
+		"vfat":    true,
+		"exfat":   true,
+		"ext2":    true,
+		"ext3":    true,
+		"ext4":    true,
+		"ntfs":    true,
+		"ntfs3":   true,
 		"fuseblk": true,
 	}

--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -552,9 +552,13 @@ func (s *System) RunMemoryAcceptancePack(ctx context.Context, baseDir string, si
 	if passes <= 0 {
 		passes = 1
 	}
+	// Bound memtester with a hard wall-clock timeout: ~2.5 min per 100 MB per
+	// pass, plus a fixed 2-minute buffer. Without this, a stuck memory
+	// controller can cause memtester to spin forever on a single subtest.
+	timeoutSec := sizeMB*passes*150/100 + 120
 	return runAcceptancePackCtx(ctx, baseDir, "memory", []satJob{
 		{name: "01-free-before.log", cmd: []string{"free", "-h"}},
-		{name: "02-memtester.log", cmd: []string{"memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
+		{name: "02-memtester.log", cmd: []string{"timeout", fmt.Sprintf("%d", timeoutSec), "memtester", fmt.Sprintf("%dM", sizeMB), fmt.Sprintf("%d", passes)}},
 		{name: "03-free-after.log", cmd: []string{"free", "-h"}},
 	}, logFunc)
 }
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -426,6 +426,101 @@ func sampleFanSpeedsViaSensorsJSON() ([]FanReading, error) {
 	return fans, nil
 }

+// sampleFanDutyCyclePct reads fan PWM/duty-cycle controls from lm-sensors.
+// Returns the average duty cycle across all exposed PWM controls.
+func sampleFanDutyCyclePct() (float64, bool) {
+	out, err := exec.Command("sensors", "-j").Output()
+	if err != nil || len(out) == 0 {
+		return 0, false
+	}
+	return parseFanDutyCyclePctSensorsJSON(out)
+}
+
+func parseFanDutyCyclePctSensorsJSON(raw []byte) (float64, bool) {
+	var doc map[string]map[string]any
+	if err := json.Unmarshal(raw, &doc); err != nil {
+		return 0, false
+	}
+	var samples []float64
+	for _, features := range doc {
+		for name, feature := range features {
+			if strings.EqualFold(name, "Adapter") {
+				continue
+			}
+			featureMap, ok := feature.(map[string]any)
+			if !ok {
+				continue
+			}
+			if duty, ok := firstFanDutyValue(name, featureMap); ok {
+				samples = append(samples, duty)
+			}
+		}
+	}
+	if len(samples) == 0 {
+		return 0, false
+	}
+	return benchmarkMean(samples), true
+}
+
+func firstFanDutyValue(featureName string, feature map[string]any) (float64, bool) {
+	featureName = strings.ToLower(strings.TrimSpace(featureName))
+	if strings.Contains(featureName, "enable") || strings.Contains(featureName, "mode") || strings.Contains(featureName, "alarm") {
+		return 0, false
+	}
+	if strings.Contains(featureName, "pwm") {
+		for _, key := range []string{"input", "value", "current"} {
+			if value, ok := feature[key]; ok {
+				if duty, parsed := parseFanDutyValue(value); parsed {
+					return duty, true
+				}
+			}
+		}
+	}
+	keys := make([]string, 0, len(feature))
+	for key := range feature {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+	for _, key := range keys {
+		lower := strings.ToLower(key)
+		if !strings.Contains(lower, "pwm") {
+			continue
+		}
+		if strings.Contains(lower, "enable") || strings.Contains(lower, "mode") || strings.Contains(lower, "alarm") {
+			continue
+		}
+		if duty, parsed := parseFanDutyValue(feature[key]); parsed {
+			return duty, true
+		}
+	}
+	return 0, false
+}
+
+func parseFanDutyValue(value any) (float64, bool) {
+	switch v := value.(type) {
+	case float64:
+		return normalizePWMAsDutyPct(v)
+	case string:
+		if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil {
+			return normalizePWMAsDutyPct(f)
+		}
+	}
+	return 0, false
+}
+
+func normalizePWMAsDutyPct(raw float64) (float64, bool) {
+	if raw < 0 {
+		return 0, false
+	}
+	if raw <= 100 {
+		return raw, true
+	}
+	if raw <= 255 {
+		return raw / 255.0 * 100.0, true
+	}
+	return 0, false
+}
+
 func firstFanInputValue(feature map[string]any) (float64, bool) {
 	keys := make([]string, 0, len(feature))
 	for key := range feature {
--- a/audit/internal/platform/sat_fan_stress_test.go
+++ b/audit/internal/platform/sat_fan_stress_test.go
@@ -29,6 +29,27 @@ func TestFirstFanInputValue(t *testing.T) {
 	}
 }

+func TestParseFanDutyCyclePctSensorsJSON(t *testing.T) {
+	raw := []byte(`{
+		"chip0": {
+			"fan1": {"input": 9000},
+			"pwm1": {"input": 128},
+			"pwm1_enable": {"input": 1}
+		},
+		"chip1": {
+			"pwm2": {"input": 64}
+		}
+	}`)
+
+	got, ok := parseFanDutyCyclePctSensorsJSON(raw)
+	if !ok {
+		t.Fatalf("expected duty cycle telemetry to be parsed")
+	}
+	if got < 57 || got > 58 {
+		t.Fatalf("got=%v want ~57.1", got)
+	}
+}
+
 func TestParseDCMIPowerReading(t *testing.T) {
 	raw := `
 Instantaneous power reading:                   512 Watts
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -9,6 +9,17 @@ type LiveBootSource struct {
 	Device string `json:"device,omitempty"`
 }

+type LiveMediaRAMState struct {
+	LiveBootSource
+	State        string `json:"state"`
+	Status       string `json:"status"`
+	ToramActive  bool   `json:"toram_active,omitempty"`
+	CopyPresent  bool   `json:"copy_present,omitempty"`
+	CopyComplete bool   `json:"copy_complete,omitempty"`
+	CanStartCopy bool   `json:"can_start_copy,omitempty"`
+	Message      string `json:"message,omitempty"`
+}
+
 type InterfaceInfo struct {
 	Name  string
 	State string
--- a/audit/internal/schema/hardware.go
+++ b/audit/internal/schema/hardware.go
@@ -15,17 +15,17 @@ type HardwareIngestRequest struct {
 }

 type RuntimeHealth struct {
-	Status        string                 `json:"status"`
-	CheckedAt     string                 `json:"checked_at"`
-	ExportDir     string                 `json:"export_dir,omitempty"`
-	DriverReady   bool                   `json:"driver_ready,omitempty"`
-	CUDAReady     bool                   `json:"cuda_ready,omitempty"`
-	NvidiaGSPMode string                 `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
-	NetworkStatus string                 `json:"network_status,omitempty"`
-	// ToRAMStatus: "ok" (ISO in RAM), "warning" (toram not active), "failed" (toram active but copy failed)
-	ToRAMStatus   string `json:"toram_status,omitempty"`
+	Status        string `json:"status"`
+	CheckedAt     string `json:"checked_at"`
+	ExportDir     string `json:"export_dir,omitempty"`
+	DriverReady   bool   `json:"driver_ready,omitempty"`
+	CUDAReady     bool   `json:"cuda_ready,omitempty"`
+	NvidiaGSPMode string `json:"nvidia_gsp_mode,omitempty"` // "gsp-on", "gsp-off", "gsp-stuck"
+	NetworkStatus string `json:"network_status,omitempty"`
+	// ToRAMStatus: "ok" (fully in RAM), "warning" (not copied), "partial" (stale/incomplete copy exists), "failed" (toram active but copy failed)
+	ToRAMStatus string `json:"toram_status,omitempty"`
 	// USBExportPath: mount point of the first writable USB drive found, empty if none.
-	USBExportPath string `json:"usb_export_path,omitempty"`
+	USBExportPath string                 `json:"usb_export_path,omitempty"`
 	Issues        []RuntimeIssue         `json:"issues,omitempty"`
 	Tools         []RuntimeToolStatus    `json:"tools,omitempty"`
 	Services      []RuntimeServiceStatus `json:"services,omitempty"`
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -36,6 +36,16 @@ var apiListNvidiaGPUStatuses = func(a *app.App) ([]platform.NvidiaGPUStatus, err
 	return a.ListNvidiaGPUStatuses()
 }

+const (
+	taskPriorityBenchmark      = 10
+	taskPriorityBurn           = 20
+	taskPriorityValidateStress = 30
+	taskPriorityValidate       = 40
+	taskPriorityAudit          = 50
+	taskPriorityInstallToRAM   = 60
+	taskPriorityInstall        = 70
+)
+
 // ── Job ID counter ────────────────────────────────────────────────────────────

 var jobCounter atomic.Uint64
@@ -100,7 +110,7 @@ func writeTaskRunResponse(w http.ResponseWriter, tasks []*Task) {

 func shouldSplitHomogeneousNvidiaTarget(target string) bool {
 	switch strings.TrimSpace(target) {
-	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute",
+	case "nvidia", "nvidia-targeted-stress", "nvidia-bench-perf", "nvidia-bench-power", "nvidia-compute",
 		"nvidia-targeted-power", "nvidia-pulse", "nvidia-interconnect",
 		"nvidia-bandwidth", "nvidia-stress":
 		return true
@@ -109,6 +119,30 @@ func shouldSplitHomogeneousNvidiaTarget(target string) bool {
 	}
 }

+func defaultTaskPriority(target string, params taskParams) int {
+	switch strings.TrimSpace(target) {
+	case "install":
+		return taskPriorityInstall
+	case "install-to-ram":
+		return taskPriorityInstallToRAM
+	case "audit":
+		return taskPriorityAudit
+	case "nvidia-bench-perf", "nvidia-bench-power":
+		return taskPriorityBenchmark
+	case "nvidia-stress", "amd-stress", "memory-stress", "sat-stress", "platform-stress", "nvidia-compute":
+		return taskPriorityBurn
+	case "nvidia", "nvidia-targeted-stress", "nvidia-targeted-power", "nvidia-pulse",
+		"nvidia-interconnect", "nvidia-bandwidth", "memory", "storage", "cpu",
+		"amd", "amd-mem", "amd-bandwidth":
+		if params.StressMode {
+			return taskPriorityValidateStress
+		}
+		return taskPriorityValidate
+	default:
+		return 0
+	}
+}
+
 func expandHomogeneousNvidiaSelections(gpus []platform.NvidiaGPU, include, exclude []int) ([]nvidiaTaskSelection, error) {
 	if len(gpus) == 0 {
 		return nil, fmt.Errorf("no NVIDIA GPUs detected")
@@ -458,6 +492,7 @@ func (h *handler) handleAPIAuditRun(w http.ResponseWriter, _ *http.Request) {
 		ID:        newJobID("audit"),
 		Name:      "Audit",
 		Target:    "audit",
+		Priority:  defaultTaskPriority("audit", taskParams{}),
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 	}
@@ -491,14 +526,14 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 			return
 		}

-			var body struct {
-				Duration           int      `json:"duration"`
-				StressMode         bool     `json:"stress_mode"`
-				GPUIndices         []int    `json:"gpu_indices"`
-				ExcludeGPUIndices  []int    `json:"exclude_gpu_indices"`
-				StaggerGPUStart    bool     `json:"stagger_gpu_start"`
-				ParallelGPUs       bool     `json:"parallel_gpus"`
-				Loader             string   `json:"loader"`
+		var body struct {
+			Duration           int      `json:"duration"`
+			StressMode         bool     `json:"stress_mode"`
+			GPUIndices         []int    `json:"gpu_indices"`
+			ExcludeGPUIndices  []int    `json:"exclude_gpu_indices"`
+			StaggerGPUStart    bool     `json:"stagger_gpu_start"`
+			ParallelGPUs       bool     `json:"parallel_gpus"`
+			Loader             string   `json:"loader"`
 			Profile            string   `json:"profile"`
 			DisplayName        string   `json:"display_name"`
 			PlatformComponents []string `json:"platform_components"`
@@ -514,19 +549,153 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 		if strings.TrimSpace(body.DisplayName) != "" {
 			name = body.DisplayName
 		}
-			params := taskParams{
-				Duration:           body.Duration,
-				StressMode:         body.StressMode,
-				GPUIndices:         body.GPUIndices,
-				ExcludeGPUIndices:  body.ExcludeGPUIndices,
-				StaggerGPUStart:    body.StaggerGPUStart,
-				ParallelGPUs:       body.ParallelGPUs,
-				Loader:             body.Loader,
+		params := taskParams{
+			Duration:           body.Duration,
+			StressMode:         body.StressMode,
+			GPUIndices:         body.GPUIndices,
+			ExcludeGPUIndices:  body.ExcludeGPUIndices,
+			StaggerGPUStart:    body.StaggerGPUStart,
+			ParallelGPUs:       body.ParallelGPUs,
+			Loader:             body.Loader,
 			BurnProfile:        body.Profile,
 			DisplayName:        body.DisplayName,
 			PlatformComponents: body.PlatformComponents,
 		}
-		tasks, err := buildNvidiaTaskSet(target, 0, time.Now(), params, name, h.opts.App, "sat-"+target)
+		tasks, err := buildNvidiaTaskSet(target, defaultTaskPriority(target, params), time.Now(), params, name, h.opts.App, "sat-"+target)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, err.Error())
+			return
+		}
+		for _, t := range tasks {
+			globalQueue.enqueue(t)
+		}
+		writeTaskRunResponse(w, tasks)
+	}
+}
+
+func (h *handler) handleAPIBenchmarkNvidiaRunKind(target string) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		if h.opts.App == nil {
+			writeError(w, http.StatusServiceUnavailable, "app not configured")
+			return
+		}
+
+		var body struct {
+			Profile           string `json:"profile"`
+			SizeMB            int    `json:"size_mb"`
+			GPUIndices        []int  `json:"gpu_indices"`
+			ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
+			RunNCCL           *bool  `json:"run_nccl"`
+			ParallelGPUs      *bool  `json:"parallel_gpus"`
+			RampUp            *bool  `json:"ramp_up"`
+			DisplayName       string `json:"display_name"`
+		}
+		if r.Body != nil {
+			if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
+				writeError(w, http.StatusBadRequest, "invalid request body")
+				return
+			}
+		}
+
+		runNCCL := true
+		if body.RunNCCL != nil {
+			runNCCL = *body.RunNCCL
+		}
+		parallelGPUs := false
+		if body.ParallelGPUs != nil {
+			parallelGPUs = *body.ParallelGPUs
+		}
+		rampUp := false
+		if body.RampUp != nil {
+			rampUp = *body.RampUp
+		}
+		// Build a descriptive base name that includes profile and mode so the task
+		// list is self-explanatory without opening individual task detail pages.
+		profile := strings.TrimSpace(body.Profile)
+		if profile == "" {
+			profile = "standard"
+		}
+		name := taskDisplayName(target, "", "")
+		if strings.TrimSpace(body.DisplayName) != "" {
+			name = body.DisplayName
+		}
+		// Append profile tag.
+		name = fmt.Sprintf("%s · %s", name, profile)
+
+		if target == "nvidia-bench-power" && parallelGPUs {
+			writeError(w, http.StatusBadRequest, "power / thermal fit benchmark uses sequential or ramp-up modes only")
+			return
+		}
+
+		if rampUp && len(body.GPUIndices) > 1 {
+			// Ramp-up mode: resolve GPU list, then create one task per prefix
+			// [gpu0], [gpu0,gpu1], ..., [gpu0,...,gpuN-1], each running in parallel.
+			gpus, err := apiListNvidiaGPUs(h.opts.App)
+			if err != nil {
+				writeError(w, http.StatusBadRequest, err.Error())
+				return
+			}
+			resolved, err := expandSelectedGPUIndices(gpus, body.GPUIndices, body.ExcludeGPUIndices)
+			if err != nil {
+				writeError(w, http.StatusBadRequest, err.Error())
+				return
+			}
+			if len(resolved) < 2 {
+				// Fall through to normal single-task path.
+				rampUp = false
+			} else {
+				now := time.Now()
+				rampRunID := fmt.Sprintf("ramp-%s", now.UTC().Format("20060102-150405"))
+				var allTasks []*Task
+				for step := 1; step <= len(resolved); step++ {
+					subset := resolved[:step]
+					stepName := fmt.Sprintf("%s · ramp %d/%d · GPU %s", name, step, len(resolved), formatGPUIndexList(subset))
+					t := &Task{
+						ID:        newJobID("bee-bench-nvidia"),
+						Name:      stepName,
+						Target:    target,
+						Priority:  defaultTaskPriority(target, taskParams{}),
+						Status:    TaskPending,
+						CreatedAt: now,
+						params: taskParams{
+							GPUIndices:       append([]int(nil), subset...),
+							SizeMB:           body.SizeMB,
+							BenchmarkProfile: body.Profile,
+							RunNCCL:          runNCCL && step == len(resolved),
+							ParallelGPUs:     true,
+							RampStep:         step,
+							RampTotal:        len(resolved),
+							RampRunID:        rampRunID,
+							DisplayName:      stepName,
+						},
+					}
+					allTasks = append(allTasks, t)
+				}
+				for _, t := range allTasks {
+					globalQueue.enqueue(t)
+				}
+				writeTaskRunResponse(w, allTasks)
+				return
+			}
+		}
+
+		// For non-ramp tasks append mode tag.
+		if parallelGPUs {
+			name = fmt.Sprintf("%s · parallel", name)
+		} else {
+			name = fmt.Sprintf("%s · sequential", name)
+		}
+
+		params := taskParams{
+			GPUIndices:        body.GPUIndices,
+			ExcludeGPUIndices: body.ExcludeGPUIndices,
+			SizeMB:            body.SizeMB,
+			BenchmarkProfile:  body.Profile,
+			RunNCCL:           runNCCL,
+			ParallelGPUs:      parallelGPUs,
+			DisplayName:       body.DisplayName,
+		}
+		tasks, err := buildNvidiaTaskSet(target, defaultTaskPriority(target, params), time.Now(), params, name, h.opts.App, "bee-bench-nvidia")
 		if err != nil {
 			writeError(w, http.StatusBadRequest, err.Error())
 			return
@@ -539,129 +708,7 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 }

 func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
-	if h.opts.App == nil {
-		writeError(w, http.StatusServiceUnavailable, "app not configured")
-		return
-	}
-
-	var body struct {
-		Profile           string `json:"profile"`
-		SizeMB            int    `json:"size_mb"`
-		GPUIndices        []int  `json:"gpu_indices"`
-		ExcludeGPUIndices []int  `json:"exclude_gpu_indices"`
-		RunNCCL           *bool  `json:"run_nccl"`
-		ParallelGPUs      *bool  `json:"parallel_gpus"`
-		RampUp            *bool  `json:"ramp_up"`
-		DisplayName       string `json:"display_name"`
-	}
-	if r.Body != nil {
-		if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
-			writeError(w, http.StatusBadRequest, "invalid request body")
-			return
-		}
-	}
-
-	runNCCL := true
-	if body.RunNCCL != nil {
-		runNCCL = *body.RunNCCL
-	}
-	parallelGPUs := false
-	if body.ParallelGPUs != nil {
-		parallelGPUs = *body.ParallelGPUs
-	}
-	rampUp := false
-	if body.RampUp != nil {
-		rampUp = *body.RampUp
-	}
-	// Build a descriptive base name that includes profile and mode so the task
-	// list is self-explanatory without opening individual task detail pages.
-	profile := strings.TrimSpace(body.Profile)
-	if profile == "" {
-		profile = "standard"
-	}
-	name := taskDisplayName("nvidia-benchmark", "", "")
-	if strings.TrimSpace(body.DisplayName) != "" {
-		name = body.DisplayName
-	}
-	// Append profile tag.
-	name = fmt.Sprintf("%s · %s", name, profile)
-
-	if rampUp && len(body.GPUIndices) > 1 {
-		// Ramp-up mode: resolve GPU list, then create one task per prefix
-		// [gpu0], [gpu0,gpu1], ..., [gpu0,...,gpuN-1], each running in parallel.
-		gpus, err := apiListNvidiaGPUs(h.opts.App)
-		if err != nil {
-			writeError(w, http.StatusBadRequest, err.Error())
-			return
-		}
-		resolved, err := expandSelectedGPUIndices(gpus, body.GPUIndices, body.ExcludeGPUIndices)
-		if err != nil {
-			writeError(w, http.StatusBadRequest, err.Error())
-			return
-		}
-		if len(resolved) < 2 {
-			// Fall through to normal single-task path.
-			rampUp = false
-		} else {
-			now := time.Now()
-			rampRunID := fmt.Sprintf("ramp-%s", now.UTC().Format("20060102-150405"))
-			var allTasks []*Task
-			for step := 1; step <= len(resolved); step++ {
-				subset := resolved[:step]
-				stepName := fmt.Sprintf("%s · ramp %d/%d · GPU %s", name, step, len(resolved), formatGPUIndexList(subset))
-				t := &Task{
-					ID:        newJobID("benchmark-nvidia"),
-					Name:      stepName,
-					Target:    "nvidia-benchmark",
-					Priority:  15,
-					Status:    TaskPending,
-					CreatedAt: now,
-					params: taskParams{
-						GPUIndices:       append([]int(nil), subset...),
-						SizeMB:           body.SizeMB,
-						BenchmarkProfile: body.Profile,
-						RunNCCL:          runNCCL && step == len(resolved),
-						ParallelGPUs:     true,
-						RampStep:         step,
-						RampTotal:        len(resolved),
-						RampRunID:        rampRunID,
-						DisplayName:      stepName,
-					},
-				}
-				allTasks = append(allTasks, t)
-			}
-			for _, t := range allTasks {
-				globalQueue.enqueue(t)
-			}
-			writeTaskRunResponse(w, allTasks)
-			return
-		}
-	}
-
-	// For non-ramp tasks append mode tag.
-	if parallelGPUs {
-		name = fmt.Sprintf("%s · parallel", name)
-	} else {
-		name = fmt.Sprintf("%s · sequential", name)
-	}
-
-	tasks, err := buildNvidiaTaskSet("nvidia-benchmark", 15, time.Now(), taskParams{
-		GPUIndices:        body.GPUIndices,
-		ExcludeGPUIndices: body.ExcludeGPUIndices,
-		SizeMB:            body.SizeMB,
-		BenchmarkProfile:  body.Profile,
-		RunNCCL:           runNCCL,
-		ParallelGPUs:      parallelGPUs,
-		DisplayName:       body.DisplayName,
-	}, name, h.opts.App, "benchmark-nvidia")
-	if err != nil {
-		writeError(w, http.StatusBadRequest, err.Error())
-		return
-	}
-	for _, t := range tasks {
-		globalQueue.enqueue(t)
-	}
-	writeTaskRunResponse(w, tasks)
+	h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf").ServeHTTP(w, r)
 }

 func (h *handler) handleAPISATStream(w http.ResponseWriter, r *http.Request) {
@@ -1036,25 +1083,62 @@ func (h *handler) handleAPIRAMStatus(w http.ResponseWriter, r *http.Request) {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-	status := h.opts.App.LiveBootSource()
+	status := h.currentRAMStatus()
 	w.Header().Set("Content-Type", "application/json")
 	_ = json.NewEncoder(w).Encode(status)
 }

+type ramStatusResponse struct {
+	platform.LiveMediaRAMState
+	InstallTaskActive bool   `json:"install_task_active,omitempty"`
+	CopyTaskActive    bool   `json:"copy_task_active,omitempty"`
+	CanStartTask      bool   `json:"can_start_task,omitempty"`
+	BlockedReason     string `json:"blocked_reason,omitempty"`
+}
+
+func (h *handler) currentRAMStatus() ramStatusResponse {
+	state := h.opts.App.LiveMediaRAMState()
+	resp := ramStatusResponse{LiveMediaRAMState: state}
+	if globalQueue.hasActiveTarget("install") {
+		resp.InstallTaskActive = true
+		resp.BlockedReason = "install to disk is already running"
+		return resp
+	}
+	if globalQueue.hasActiveTarget("install-to-ram") {
+		resp.CopyTaskActive = true
+		resp.BlockedReason = "install to RAM task is already pending or running"
+		return resp
+	}
+	if state.InRAM {
+		resp.BlockedReason = "system is already running from RAM"
+		return resp
+	}
+	resp.CanStartTask = state.CanStartCopy
+	if !resp.CanStartTask && resp.BlockedReason == "" {
+		resp.BlockedReason = state.Message
+	}
+	return resp
+}
+
 func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
-	if globalQueue.hasActiveTarget("install") {
-		writeError(w, http.StatusConflict, "install to disk is already running")
+	status := h.currentRAMStatus()
+	if !status.CanStartTask {
+		msg := strings.TrimSpace(status.BlockedReason)
+		if msg == "" {
+			msg = "install to RAM is not available"
+		}
+		writeError(w, http.StatusConflict, msg)
 		return
 	}
 	t := &Task{
 		ID:        newJobID("install-to-ram"),
 		Name:      "Install to RAM",
 		Target:    "install-to-ram",
-		Priority:  10,
+		Priority:  defaultTaskPriority("install-to-ram", taskParams{}),
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 	}
@@ -1169,7 +1253,7 @@ func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) {
 		ID:        newJobID("install"),
 		Name:      "Install to Disk",
 		Target:    "install",
-		Priority:  20,
+		Priority:  defaultTaskPriority("install", taskParams{}),
 		Status:    TaskPending,
 		CreatedAt: time.Now(),
 		params: taskParams{
@@ -1445,6 +1529,11 @@ func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Reques
 	writeJSON(w, map[string]string{"status": "rolled back"})
 }

+func (h *handler) handleAPIBenchmarkResults(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+	fmt.Fprint(w, renderBenchmarkResultsCard(h.opts.ExportDir))
+}
+
 func (h *handler) rollbackPendingNetworkChange() error {
 	h.pendingNetMu.Lock()
 	pnc := h.pendingNet
@@ -1461,4 +1550,3 @@ func (h *handler) rollbackPendingNetworkChange() error {
 	}
 	return nil
 }
-
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -39,6 +39,9 @@ func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) {
 	if got := globalQueue.tasks[0].params.BurnProfile; got != "smoke" {
 		t.Fatalf("burn profile=%q want smoke", got)
 	}
+	if got := globalQueue.tasks[0].Priority; got != taskPriorityValidate {
+		t.Fatalf("priority=%d want %d", got, taskPriorityValidate)
+	}
 }

 func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
@@ -61,7 +64,7 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })

 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
-	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
+	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/perf/run", strings.NewReader(`{"profile":"standard","gpu_indices":[1,3],"run_nccl":false}`))
 	rec := httptest.NewRecorder()

 	h.handleAPIBenchmarkNvidiaRun(rec, req)
@@ -75,8 +78,8 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	task := globalQueue.tasks[0]
-	if task.Target != "nvidia-benchmark" {
-		t.Fatalf("target=%q want nvidia-benchmark", task.Target)
+	if task.Target != "nvidia-bench-perf" {
+		t.Fatalf("target=%q want nvidia-bench-perf", task.Target)
 	}
 	if got := task.params.GPUIndices; len(got) != 2 || got[0] != 1 || got[1] != 3 {
 		t.Fatalf("gpu indices=%v want [1 3]", got)
@@ -84,6 +87,9 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) {
 	if task.params.RunNCCL {
 		t.Fatal("RunNCCL should reflect explicit false from request")
 	}
+	if task.Priority != taskPriorityBenchmark {
+		t.Fatalf("priority=%d want %d", task.Priority, taskPriorityBenchmark)
+	}
 }

 func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
@@ -107,7 +113,7 @@ func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
 	t.Cleanup(func() { apiListNvidiaGPUs = prevList })

 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
-	req := httptest.NewRequest("POST", "/api/benchmark/nvidia/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"run_nccl":false}`))
+	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/perf/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"run_nccl":false}`))
 	rec := httptest.NewRecorder()

 	h.handleAPIBenchmarkNvidiaRun(rec, req)
@@ -133,6 +139,56 @@ func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) {
 	if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 {
 		t.Fatalf("task[1] gpu indices=%v want [2]", got)
 	}
+	if got := globalQueue.tasks[0].Priority; got != taskPriorityBenchmark {
+		t.Fatalf("task[0] priority=%d want %d", got, taskPriorityBenchmark)
+	}
+	if got := globalQueue.tasks[1].Priority; got != taskPriorityBenchmark {
+		t.Fatalf("task[1] priority=%d want %d", got, taskPriorityBenchmark)
+	}
+}
+
+func TestHandleAPIBenchmarkPowerFitRampQueuesBenchmarkPowerFitTasks(t *testing.T) {
+	globalQueue.mu.Lock()
+	originalTasks := globalQueue.tasks
+	globalQueue.tasks = nil
+	globalQueue.mu.Unlock()
+	t.Cleanup(func() {
+		globalQueue.mu.Lock()
+		globalQueue.tasks = originalTasks
+		globalQueue.mu.Unlock()
+	})
+	prevList := apiListNvidiaGPUs
+	apiListNvidiaGPUs = func(_ *app.App) ([]platform.NvidiaGPU, error) {
+		return []platform.NvidiaGPU{
+			{Index: 0, Name: "NVIDIA H100 PCIe"},
+			{Index: 1, Name: "NVIDIA H100 PCIe"},
+			{Index: 2, Name: "NVIDIA H100 PCIe"},
+		}, nil
+	}
+	t.Cleanup(func() { apiListNvidiaGPUs = prevList })
+
+	h := &handler{opts: HandlerOptions{App: &app.App{}}}
+	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/power/run", strings.NewReader(`{"profile":"standard","gpu_indices":[0,1,2],"ramp_up":true}`))
+	rec := httptest.NewRecorder()
+
+	h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-power").ServeHTTP(rec, req)
+
+	if rec.Code != 200 {
+		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
+	}
+	globalQueue.mu.Lock()
+	defer globalQueue.mu.Unlock()
+	if len(globalQueue.tasks) != 3 {
+		t.Fatalf("tasks=%d want 3", len(globalQueue.tasks))
+	}
+	for i, task := range globalQueue.tasks {
+		if task.Target != "nvidia-bench-power" {
+			t.Fatalf("task[%d] target=%q", i, task.Target)
+		}
+		if task.Priority != taskPriorityBenchmark {
+			t.Fatalf("task[%d] priority=%d want %d", i, task.Priority, taskPriorityBenchmark)
+		}
+	}
 }

 func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) {
@@ -175,6 +231,41 @@ func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) {
 	if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 {
 		t.Fatalf("task[1] gpu indices=%v want [2]", got)
 	}
+	if got := globalQueue.tasks[0].Priority; got != taskPriorityValidate {
+		t.Fatalf("task[0] priority=%d want %d", got, taskPriorityValidate)
+	}
+	if got := globalQueue.tasks[1].Priority; got != taskPriorityValidate {
+		t.Fatalf("task[1] priority=%d want %d", got, taskPriorityValidate)
+	}
+}
+
+func TestDefaultTaskPriorityOrder(t *testing.T) {
+	got := []int{
+		defaultTaskPriority("install-to-ram", taskParams{}),
+		defaultTaskPriority("audit", taskParams{}),
+		defaultTaskPriority("cpu", taskParams{}),
+		defaultTaskPriority("cpu", taskParams{StressMode: true}),
+		defaultTaskPriority("nvidia-stress", taskParams{}),
+		defaultTaskPriority("nvidia-bench-perf", taskParams{}),
+		defaultTaskPriority("nvidia-bench-power", taskParams{}),
+	}
+	want := []int{
+		taskPriorityInstallToRAM,
+		taskPriorityAudit,
+		taskPriorityValidate,
+		taskPriorityValidateStress,
+		taskPriorityBurn,
+		taskPriorityBenchmark,
+		taskPriorityBenchmark,
+	}
+	for i := range want {
+		if got[i] != want[i] {
+			t.Fatalf("priority[%d]=%d want %d", i, got[i], want[i])
+		}
+	}
+	if !(got[0] > got[1] && got[1] > got[2] && got[2] > got[3] && got[3] > got[4] && got[4] > got[5] && got[5] == got[6]) {
+		t.Fatalf("priority order=%v", got)
+	}
 }

 func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {
--- a/audit/internal/webui/kmsg_watcher.go
+++ b/audit/internal/webui/kmsg_watcher.go
@@ -232,7 +232,7 @@ func truncate(s string, max int) string {
 // isSATTarget returns true for task targets that run hardware acceptance tests.
 func isSATTarget(target string) bool {
 	switch target {
-	case "nvidia", "nvidia-targeted-stress", "nvidia-benchmark", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
+	case "nvidia", "nvidia-targeted-stress", "nvidia-bench-perf", "nvidia-bench-power", "nvidia-compute", "nvidia-targeted-power", "nvidia-pulse",
 		"nvidia-interconnect", "nvidia-bandwidth", "nvidia-stress", "memory", "memory-stress", "storage",
 		"cpu", "sat-stress", "amd", "amd-mem", "amd-bandwidth", "amd-stress",
 		"platform-stress":
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
@@ -845,6 +845,13 @@ func buildRuntimeToRAMRow(health schema.RuntimeHealth) runtimeHealthRow {
 			Source: "live-boot / /proc/mounts",
 			Issue:  "",
 		}
+	case "partial":
+		return runtimeHealthRow{
+			Title:  "LiveCD in RAM",
+			Status: "WARNING",
+			Source: "live-boot / /proc/mounts / /dev/shm/bee-live",
+			Issue:  "Partial or staged RAM copy detected. System is not fully running from RAM; Copy to RAM can be retried.",
+		}
 	case "failed":
 		return runtimeHealthRow{
 			Title:  "LiveCD in RAM",
@@ -1939,7 +1946,7 @@ func renderBenchmark(opts HandlerOptions) string {

 <div class="grid2">
  <div class="card">
-    <div class="card-head">NVIDIA Benchmark</div>
+    <div class="card-head">Benchmark Setup</div>
    <div class="card-body">
      <div class="form-row">
        <label>Profile</label>
@@ -1972,26 +1979,30 @@ func renderBenchmark(opts HandlerOptions) string {
        <span>Ramp-up — 1 GPU → 2 → … → all selected (separate tasks)</span>
      </label>
      <p id="benchmark-selection-note" style="font-size:12px;color:var(--muted);margin:10px 0 14px">Select one GPU for single-card benchmarking or several GPUs for a constrained multi-GPU run.</p>
-      <button id="benchmark-run-btn" class="btn btn-primary" onclick="runNvidiaBenchmark()" disabled>&#9654; Run Benchmark</button>
+      <div style="display:flex;gap:8px;flex-wrap:wrap;align-items:center">
+        <button id="benchmark-run-performance-btn" class="btn btn-primary" onclick="runNvidiaBenchmark('performance')" disabled>&#9654; Run Performance Benchmark</button>
+        <button id="benchmark-run-power-fit-btn" class="btn btn-secondary" onclick="runNvidiaBenchmark('power-fit')" disabled>&#9654; Run Power / Thermal Fit</button>
+      </div>
+      <span id="benchmark-run-nccl" hidden>nccl-auto</span>
      <span id="benchmark-run-status" style="margin-left:10px;font-size:12px;color:var(--muted)"></span>
    </div>
  </div>

  <div class="card">
-    <div class="card-head">Method</div>
+    <div class="card-head">Method Split</div>
    <div class="card-body">
-      <p style="font-size:13px;color:var(--muted);margin-bottom:10px">Each benchmark run performs warmup, sustained compute, telemetry capture, cooldown, and optional NCCL interconnect checks.</p>
+      <p style="font-size:13px;color:var(--muted);margin-bottom:10px">The benchmark page now exposes two fundamentally different test families so compute score and server power-fit are not mixed into one number.</p>
      <table>
-        <tr><th>Profile</th><th>Purpose</th></tr>
-        <tr><td>Standard</td><td>Fast, repeatable performance check for server-to-server comparison.</td></tr>
-        <tr><td>Stability</td><td>Longer run for thermal drift, power caps, and clock instability.</td></tr>
-        <tr><td>Overnight</td><td>Extended verification of long-run stability and late throttling.</td></tr>
+        <tr><th>Run Type</th><th>Engine</th><th>Question</th></tr>
+        <tr><td>Performance Benchmark</td><td><code>bee-gpu-burn</code></td><td>How much isolated compute performance does the GPU realize in this server?</td></tr>
+        <tr><td>Power / Thermal Fit</td><td><code>dcgmi targeted_power</code></td><td>How much power per GPU can this server sustain as GPU count ramps up?</td></tr>
      </table>
+      <p style="font-size:12px;color:var(--muted);margin-top:10px">Use ramp-up mode for capacity work: it creates 1 GPU → 2 GPU → … → all selected steps so analysis software can derive server total score and watts-per-GPU curves.</p>
    </div>
  </div>
 </div>

-` + renderBenchmarkResultsCard(opts.ExportDir) + `
+`+`<div id="benchmark-results-section">`+renderBenchmarkResultsCard(opts.ExportDir)+`</div>`+`

 <div id="benchmark-output" style="display:none;margin-top:16px" class="card">
  <div class="card-head">Benchmark Output <span id="benchmark-title"></span></div>
@@ -2029,21 +2040,24 @@ function benchmarkMode() {

 function benchmarkUpdateSelectionNote() {
  const selected = benchmarkSelectedGPUIndices();
-  const btn = document.getElementById('benchmark-run-btn');
+  const perfBtn = document.getElementById('benchmark-run-performance-btn');
+  const fitBtn = document.getElementById('benchmark-run-power-fit-btn');
  const note = document.getElementById('benchmark-selection-note');
  if (!selected.length) {
-    btn.disabled = true;
+    perfBtn.disabled = true;
+    fitBtn.disabled = true;
    note.textContent = 'Select at least one NVIDIA GPU to run the benchmark.';
    return;
  }
-  btn.disabled = false;
+  perfBtn.disabled = false;
+  fitBtn.disabled = false;
  const mode = benchmarkMode();
  if (mode === 'ramp-up') {
-    note.textContent = 'Ramp-up: ' + selected.length + ' tasks (1 GPU → ' + selected.length + ' GPUs). NCCL on final step.';
+    note.textContent = 'Ramp-up: ' + selected.length + ' tasks (1 GPU → ' + selected.length + ' GPUs). Performance uses compute benchmark; Power / Thermal Fit uses targeted_power per step.';
  } else if (mode === 'parallel') {
-    note.textContent = 'Parallel: all ' + selected.length + ' GPU(s) simultaneously.' + (selected.length > 1 ? ' NCCL included.' : '');
+    note.textContent = 'Parallel: all ' + selected.length + ' GPU(s) simultaneously. Only the performance benchmark supports this mode.';
  } else {
-    note.textContent = 'Sequential: each GPU benchmarked separately.' + (selected.length > 1 ? ' NCCL included on each.' : '');
+    note.textContent = 'Sequential: each selected GPU benchmarked separately.';
  }
 }

@@ -2117,7 +2131,7 @@ function benchmarkSelectNone() {
  benchmarkUpdateSelectionNote();
 }

-function runNvidiaBenchmark() {
+function runNvidiaBenchmark(kind) {
  const selected = benchmarkSelectedGPUIndices();
  const status = document.getElementById('benchmark-run-status');
  if (!selected.length) {
@@ -2127,21 +2141,26 @@ function runNvidiaBenchmark() {
  if (benchmarkES) { benchmarkES.close(); benchmarkES = null; }
  const mode = benchmarkMode();
  const rampUp = mode === 'ramp-up' && selected.length > 1;
-  const parallelGPUs = mode === 'parallel';
+  const parallelGPUs = mode === 'parallel' && kind === 'performance';
+  if (kind === 'power-fit' && mode === 'parallel') {
+    status.textContent = 'Power / Thermal Fit supports sequential or ramp-up only.';
+    return;
+  }
  const body = {
    profile: document.getElementById('benchmark-profile').value || 'standard',
    gpu_indices: selected,
-    run_nccl: selected.length > 1,
+    run_nccl: kind === 'performance' && selected.length > 1,
    parallel_gpus: parallelGPUs,
    ramp_up: rampUp,
-    display_name: 'NVIDIA Benchmark'
+    display_name: kind === 'power-fit' ? 'NVIDIA Power / Thermal Fit' : 'NVIDIA Performance Benchmark'
  };
  document.getElementById('benchmark-output').style.display = 'block';
-  document.getElementById('benchmark-title').textContent = '— ' + body.profile + ' [' + selected.join(', ') + ']';
+  document.getElementById('benchmark-title').textContent = '— ' + body.display_name + ' · ' + body.profile + ' [' + selected.join(', ') + ']';
  const term = document.getElementById('benchmark-terminal');
-  term.textContent = 'Enqueuing benchmark for GPUs ' + selected.join(', ') + '...\n';
+  term.textContent = 'Enqueuing ' + body.display_name + ' for GPUs ' + selected.join(', ') + '...\n';
  status.textContent = 'Queueing...';
-  fetch('/api/benchmark/nvidia/run', {
+  const endpoint = kind === 'power-fit' ? '/api/bee-bench/nvidia/power/run' : '/api/bee-bench/nvidia/perf/run';
+  fetch(endpoint, {
    method: 'POST',
    headers: {'Content-Type':'application/json'},
    body: JSON.stringify(body)
@@ -2169,7 +2188,9 @@ function runNvidiaBenchmark() {
        if (e.data) failures += 1;
        term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
        term.scrollTop = term.scrollHeight;
+        const isLast = (idx + 1 >= taskIds.length);
        streamNext(idx + 1, failures);
+        if (isLast) { benchmarkRefreshResults(); }
      });
      benchmarkES.onerror = function() {
        if (benchmarkES) {
@@ -2189,18 +2210,30 @@ function runNvidiaBenchmark() {
 }

 benchmarkLoadGPUs();
+
+function benchmarkRefreshResults() {
+  fetch('/api/benchmark/results')
+    .then(function(r) { return r.text(); })
+    .then(function(html) {
+      const el = document.getElementById('benchmark-results-section');
+      if (el) el.innerHTML = html;
+    })
+    .catch(function() {});
+}
 </script>`
 }

 func renderBenchmarkResultsCard(exportDir string) string {
 	maxIdx, runs := loadBenchmarkHistory(exportDir)
-	return renderBenchmarkResultsCardFromRuns(
-		"Benchmark Results",
+	perf := renderBenchmarkResultsCardFromRuns(
+		"Performance Results",
 		"Composite score by saved benchmark run and GPU.",
-		"No saved benchmark runs yet.",
+		"No saved performance benchmark runs yet.",
 		maxIdx,
 		runs,
 	)
+	power := renderPowerBenchmarkResultsCard(exportDir)
+	return perf + "\n" + power
 }

 func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string, maxGPUIndex int, runs []benchmarkHistoryRun) string {
@@ -2237,11 +2270,11 @@ func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string,
 }

 func loadBenchmarkHistory(exportDir string) (int, []benchmarkHistoryRun) {
-	baseDir := app.DefaultBenchmarkBaseDir
+	baseDir := app.DefaultBeeBenchPerfDir
 	if strings.TrimSpace(exportDir) != "" {
-		baseDir = filepath.Join(exportDir, "bee-benchmark")
+		baseDir = filepath.Join(exportDir, "bee-bench", "perf")
 	}
-	paths, err := filepath.Glob(filepath.Join(baseDir, "gpu-benchmark-*", "result.json"))
+	paths, err := filepath.Glob(filepath.Join(baseDir, "perf-*", "result.json"))
 	if err != nil || len(paths) == 0 {
 		return -1, nil
 	}
@@ -2280,6 +2313,125 @@ func loadBenchmarkHistoryFromPaths(paths []string) (int, []benchmarkHistoryRun)
 	return maxGPUIndex, runs
 }

+func renderPowerBenchmarkResultsCard(exportDir string) string {
+	baseDir := app.DefaultBeeBenchPowerDir
+	if strings.TrimSpace(exportDir) != "" {
+		baseDir = filepath.Join(exportDir, "bee-bench", "power")
+	}
+	paths, err := filepath.Glob(filepath.Join(baseDir, "power-*", "result.json"))
+	if err != nil || len(paths) == 0 {
+		return `<div class="card" style="margin-top:16px"><div class="card-head">Power / Thermal Fit Results</div><div class="card-body"><p style="color:var(--muted);font-size:13px">No saved power benchmark runs yet.</p></div></div>`
+	}
+	sort.Strings(paths)
+
+	type powerRun struct {
+		generatedAt time.Time
+		displayTime string
+		result      platform.NvidiaPowerBenchResult
+	}
+	var runs []powerRun
+	for _, path := range paths {
+		raw, err := os.ReadFile(path)
+		if err != nil {
+			continue
+		}
+		var r platform.NvidiaPowerBenchResult
+		if err := json.Unmarshal(raw, &r); err != nil {
+			continue
+		}
+		runs = append(runs, powerRun{
+			generatedAt: r.GeneratedAt,
+			displayTime: r.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
+			result:      r,
+		})
+	}
+	sort.Slice(runs, func(i, j int) bool {
+		return runs[i].generatedAt.After(runs[j].generatedAt)
+	})
+
+	// Show only the most recent run's GPU slot table, plus a run history summary.
+	var b strings.Builder
+	b.WriteString(`<div class="card" style="margin-top:16px"><div class="card-head">Power / Thermal Fit Results</div><div class="card-body">`)
+
+	latest := runs[0].result
+	b.WriteString(`<p style="font-size:12px;color:var(--muted);margin-bottom:10px">Latest run: ` + html.EscapeString(runs[0].displayTime))
+	if latest.Hostname != "" {
+		b.WriteString(` — ` + html.EscapeString(latest.Hostname))
+	}
+	if latest.OverallStatus != "" {
+		statusColor := "var(--ok)"
+		if latest.OverallStatus != "OK" {
+			statusColor = "var(--warn)"
+		}
+		b.WriteString(` — <span style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(latest.OverallStatus) + `</span>`)
+	}
+	b.WriteString(`</p>`)
+
+	if len(latest.GPUs) > 0 {
+		b.WriteString(`<div style="overflow-x:auto"><table><thead><tr>`)
+		b.WriteString(`<th>GPU</th><th>Model</th><th>Nominal W</th><th>Achieved W</th><th>P95 Observed W</th><th>Status</th>`)
+		b.WriteString(`</tr></thead><tbody>`)
+		for _, gpu := range latest.GPUs {
+			derated := gpu.Derated || (gpu.DefaultPowerLimitW > 0 && gpu.AppliedPowerLimitW < gpu.DefaultPowerLimitW-1)
+			rowStyle := ""
+			achievedStyle := ""
+			if derated {
+				rowStyle = ` style="background:rgba(255,180,0,0.08)"`
+				achievedStyle = ` style="color:#e6a000;font-weight:600"`
+			}
+			statusLabel := gpu.Status
+			if statusLabel == "" {
+				statusLabel = "OK"
+			}
+			statusColor := "var(--ok)"
+			if statusLabel != "OK" {
+				statusColor = "var(--warn)"
+			}
+			nominalStr := "-"
+			if gpu.DefaultPowerLimitW > 0 {
+				nominalStr = fmt.Sprintf("%.0f", gpu.DefaultPowerLimitW)
+			}
+			achievedStr := "-"
+			if gpu.AppliedPowerLimitW > 0 {
+				achievedStr = fmt.Sprintf("%.0f", gpu.AppliedPowerLimitW)
+			}
+			p95Str := "-"
+			if gpu.MaxObservedPowerW > 0 {
+				p95Str = fmt.Sprintf("%.0f", gpu.MaxObservedPowerW)
+			}
+			b.WriteString(`<tr` + rowStyle + `>`)
+			b.WriteString(`<td>` + strconv.Itoa(gpu.Index) + `</td>`)
+			b.WriteString(`<td>` + html.EscapeString(gpu.Name) + `</td>`)
+			b.WriteString(`<td>` + nominalStr + `</td>`)
+			b.WriteString(`<td` + achievedStyle + `>` + achievedStr + `</td>`)
+			b.WriteString(`<td>` + p95Str + `</td>`)
+			b.WriteString(`<td style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(statusLabel) + `</td>`)
+			b.WriteString(`</tr>`)
+		}
+		b.WriteString(`</tbody></table></div>`)
+	}
+
+	if len(runs) > 1 {
+		b.WriteString(`<details style="margin-top:12px"><summary style="font-size:12px;color:var(--muted);cursor:pointer">` + strconv.Itoa(len(runs)) + ` runs total</summary>`)
+		b.WriteString(`<div style="overflow-x:auto;margin-top:8px"><table><thead><tr><th>#</th><th>Time</th><th>GPUs</th><th>Status</th></tr></thead><tbody>`)
+		for i, run := range runs {
+			statusColor := "var(--ok)"
+			if run.result.OverallStatus != "OK" {
+				statusColor = "var(--warn)"
+			}
+			b.WriteString(`<tr>`)
+			b.WriteString(`<td>#` + strconv.Itoa(i+1) + `</td>`)
+			b.WriteString(`<td>` + html.EscapeString(run.displayTime) + `</td>`)
+			b.WriteString(`<td>` + strconv.Itoa(len(run.result.GPUs)) + `</td>`)
+			b.WriteString(`<td style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(run.result.OverallStatus) + `</td>`)
+			b.WriteString(`</tr>`)
+		}
+		b.WriteString(`</tbody></table></div></details>`)
+	}
+
+	b.WriteString(`</div></div>`)
+	return b.String()
+}

 // ── Burn ──────────────────────────────────────────────────────────────────────

@@ -3245,12 +3397,19 @@ fetch('/api/system/ram-status').then(r=>r.json()).then(d=>{
  else if (kind === 'disk') label = 'disk (' + source + ')';
  else label = source;
  boot.textContent = 'Current boot source: ' + label + '.';
-  if (d.in_ram) {
-    txt.textContent = '✓ Running from RAM — installation media can be safely disconnected.';
+  txt.textContent = d.message || 'Checking...';
+  if (d.status === 'ok' || d.in_ram) {
    txt.style.color = 'var(--ok, green)';
+  } else if (d.status === 'failed') {
+    txt.style.color = 'var(--err, #b91c1c)';
  } else {
-    txt.textContent = 'Live media is mounted from installation device. Copy to RAM to allow media removal.';
+    txt.style.color = 'var(--muted)';
+  }
+  if (d.can_start_task) {
    btn.style.display = '';
+    btn.disabled = false;
+  } else {
+    btn.style.display = 'none';
  }
 });
 function installToRAM() {
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -261,7 +261,9 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	mux.HandleFunc("POST /api/sat/platform-stress/run", h.handleAPISATRun("platform-stress"))
 	mux.HandleFunc("GET /api/sat/stream", h.handleAPISATStream)
 	mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
-	mux.HandleFunc("POST /api/benchmark/nvidia/run", h.handleAPIBenchmarkNvidiaRun)
+	mux.HandleFunc("POST /api/bee-bench/nvidia/perf/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf"))
+	mux.HandleFunc("POST /api/bee-bench/nvidia/power/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-power"))
+	mux.HandleFunc("GET /api/benchmark/results", h.handleAPIBenchmarkResults)

 	// Tasks
 	mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -11,6 +11,7 @@ import (
 	"time"

 	"bee/audit/internal/platform"
+	"bee/audit/internal/schema"
 )

 func TestChartLegendNumber(t *testing.T) {
@@ -78,6 +79,16 @@ func TestRecoverMiddlewarePreservesStreamingInterfaces(t *testing.T) {
 	}
 }

+func TestBuildRuntimeToRAMRowShowsPartialCopyWarning(t *testing.T) {
+	row := buildRuntimeToRAMRow(schema.RuntimeHealth{ToRAMStatus: "partial"})
+	if row.Status != "WARNING" {
+		t.Fatalf("status=%q want WARNING", row.Status)
+	}
+	if !strings.Contains(row.Issue, "Partial or staged RAM copy detected") {
+		t.Fatalf("issue=%q", row.Issue)
+	}
+}
+
 func TestChartDataFromSamplesUsesFullHistory(t *testing.T) {
 	samples := []platform.LiveMetricSample{
 		{
@@ -637,8 +648,11 @@ func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
 		`href="/benchmark"`,
 		`id="benchmark-gpu-list"`,
 		`/api/gpu/nvidia`,
-		`/api/benchmark/nvidia/run`,
+		`/api/bee-bench/nvidia/perf/run`,
+		`/api/bee-bench/nvidia/power/run`,
 		`benchmark-run-nccl`,
+		`Run Performance Benchmark`,
+		`Run Power / Thermal Fit`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("benchmark page missing %q: %s", needle, body)
@@ -649,7 +663,7 @@ func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
 func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
 	dir := t.TempDir()
 	exportDir := filepath.Join(dir, "export")
-	runDir := filepath.Join(exportDir, "bee-benchmark", "gpu-benchmark-20260406-120000")
+	runDir := filepath.Join(exportDir, "bee-bench", "perf", "perf-20260406-120000")
 	if err := os.MkdirAll(runDir, 0755); err != nil {
 		t.Fatal(err)
 	}
@@ -691,7 +705,7 @@ func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
 	body := rec.Body.String()
 	wantTime := result.GeneratedAt.Local().Format("2006-01-02 15:04:05")
 	for _, needle := range []string{
-		`Benchmark Results`,
+		`Perf Results`,
 		`Composite score by saved benchmark run and GPU.`,
 		`GPU 0`,
 		`GPU 1`,
@@ -1113,8 +1127,8 @@ func TestDashboardRendersRuntimeHealthTable(t *testing.T) {
 		`>Storage<`,
 		`>GPU<`,
 		`>PSU<`,
-		`badge-warn`,   // cpu Warning badge
-		`badge-err`,    // storage Critical badge
+		`badge-warn`, // cpu Warning badge
+		`badge-err`,  // storage Critical badge
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("dashboard missing %q: %s", needle, body)
--- a/audit/internal/webui/task_report.go
+++ b/audit/internal/webui/task_report.go
@@ -233,6 +233,9 @@ func renderTaskReportFragment(report taskReport, charts map[string]string, logTe
 	if benchmarkCard := renderTaskBenchmarkResultsCard(report.Target, logText); benchmarkCard != "" {
 		b.WriteString(benchmarkCard)
 	}
+	if powerCard := renderTaskPowerResultsCard(report.Target, logText); powerCard != "" {
+		b.WriteString(powerCard)
+	}

 	if len(report.Charts) > 0 {
 		for _, chart := range report.Charts {
@@ -251,7 +254,9 @@ func renderTaskReportFragment(report taskReport, charts map[string]string, logTe
 }

 func renderTaskBenchmarkResultsCard(target, logText string) string {
-	if strings.TrimSpace(target) != "nvidia-benchmark" {
+	switch strings.TrimSpace(target) {
+	case "nvidia-bench-perf":
+	default:
 		return ""
 	}
 	resultPath := taskBenchmarkResultPath(logText)
@@ -263,7 +268,7 @@ func renderTaskBenchmarkResultsCard(target, logText string) string {
 		return ""
 	}
 	return renderBenchmarkResultsCardFromRuns(
-		"Benchmark Results",
+		"Perf Results",
 		"Composite score for this benchmark task.",
 		"No benchmark results were saved for this task.",
 		columns,
@@ -271,15 +276,42 @@ func renderTaskBenchmarkResultsCard(target, logText string) string {
 	)
 }

+func renderTaskPowerResultsCard(target, logText string) string {
+	if strings.TrimSpace(target) != "nvidia-bench-power" {
+		return ""
+	}
+	resultPath := taskBenchmarkResultPath(logText)
+	if strings.TrimSpace(resultPath) == "" {
+		return ""
+	}
+	raw, err := os.ReadFile(resultPath)
+	if err != nil {
+		return ""
+	}
+	var result platform.NvidiaPowerBenchResult
+	if err := json.Unmarshal(raw, &result); err != nil {
+		return ""
+	}
+	var b strings.Builder
+	b.WriteString(`<div class="card"><div class="card-head">Power Results</div><div class="card-body">`)
+	if len(result.RecommendedSlotOrder) > 0 {
+		b.WriteString(`<p style="margin-bottom:10px"><strong>Recommended slot order:</strong> ` + html.EscapeString(joinTaskIndices(result.RecommendedSlotOrder)) + `</p>`)
+	}
+	b.WriteString(`<table><tr><th>GPU</th><th>Status</th><th>Max Power</th><th>Applied Limit</th></tr>`)
+	for _, gpu := range result.GPUs {
+		fmt.Fprintf(&b, `<tr><td>GPU %d</td><td>%s</td><td>%.0f W</td><td>%.0f W</td></tr>`,
+			gpu.Index, html.EscapeString(gpu.Status), gpu.MaxObservedPowerW, gpu.AppliedPowerLimitW)
+	}
+	b.WriteString(`</table></div></div>`)
+	return b.String()
+}
+
 func taskBenchmarkResultPath(logText string) string {
 	archivePath := taskArchivePathFromLog(logText)
 	if archivePath == "" {
 		return ""
 	}
 	runDir := strings.TrimSuffix(archivePath, ".tar.gz")
-	if runDir == archivePath {
-		return ""
-	}
 	return filepath.Join(runDir, "result.json")
 }

--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -32,7 +32,8 @@ const (
 var taskNames = map[string]string{
 	"nvidia":                 "NVIDIA SAT",
 	"nvidia-targeted-stress": "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)",
-	"nvidia-benchmark":       "NVIDIA Benchmark",
+	"nvidia-bench-perf":      "NVIDIA Bee Bench Perf",
+	"nvidia-bench-power":     "NVIDIA Bee Bench Power",
 	"nvidia-compute":         "NVIDIA Max Compute Load (dcgmproftester)",
 	"nvidia-targeted-power":  "NVIDIA Targeted Power (dcgmi diag targeted_power)",
 	"nvidia-pulse":           "NVIDIA Pulse Test (dcgmi diag pulse_test)",
@@ -628,7 +629,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			dur = 300
 		}
 		archive, err = a.RunNvidiaTargetedStressValidatePack(ctx, "", dur, t.params.GPUIndices, j.append)
-	case "nvidia-benchmark":
+	case "nvidia-bench-perf":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
@@ -644,6 +645,19 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			RampTotal:         t.params.RampTotal,
 			RampRunID:         t.params.RampRunID,
 		}, j.append)
+	case "nvidia-bench-power":
+		if a == nil {
+			err = fmt.Errorf("app not configured")
+			break
+		}
+		archive, err = a.RunNvidiaPowerBenchCtx(ctx, app.DefaultBeeBenchPowerDir, platform.NvidiaBenchmarkOptions{
+			Profile:           t.params.BenchmarkProfile,
+			GPUIndices:        t.params.GPUIndices,
+			ExcludeGPUIndices: t.params.ExcludeGPUIndices,
+			RampStep:          t.params.RampStep,
+			RampTotal:         t.params.RampTotal,
+			RampRunID:         t.params.RampRunID,
+		}, j.append)
 	case "nvidia-compute":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -366,7 +366,7 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	taskReportMetricsDBPath = metricsPath
 	t.Cleanup(func() { taskReportMetricsDBPath = prevMetricsPath })

-	benchmarkDir := filepath.Join(dir, "bee-benchmark", "gpu-benchmark-20260406-120000")
+	benchmarkDir := filepath.Join(dir, "bee-bench", "perf", "perf-20260406-120000")
 	if err := os.MkdirAll(benchmarkDir, 0755); err != nil {
 		t.Fatal(err)
 	}
@@ -398,14 +398,14 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	}
 	task := &Task{
 		ID:           "task-bench",
-		Name:         "NVIDIA Benchmark",
-		Target:       "nvidia-benchmark",
+		Name:         "NVIDIA Bee Bench Perf",
+		Target:       "nvidia-bench-perf",
 		Status:       TaskDone,
 		CreatedAt:    time.Now().UTC().Add(-time.Minute),
 		ArtifactsDir: artifactsDir,
 	}
 	ensureTaskReportPaths(task)
-	logText := "line-1\nArchive: " + filepath.Join(dir, "bee-benchmark", "gpu-benchmark-20260406-120000.tar.gz") + "\n"
+	logText := "line-1\nArchive: " + filepath.Join(dir, "bee-bench", "perf", "perf-20260406-120000.tar.gz") + "\n"
 	if err := os.WriteFile(task.LogPath, []byte(logText), 0644); err != nil {
 		t.Fatal(err)
 	}
@@ -420,7 +420,7 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	}
 	html := string(body)
 	for _, needle := range []string{
-		`Benchmark Results`,
+		`Perf Results`,
 		`Composite score for this benchmark task.`,
 		`GPU 0`,
 		`1176.25`,
--- a/bible-local/docs/benchmark-clock-calibration.md
+++ b/bible-local/docs/benchmark-clock-calibration.md
@@ -1,5 +1,34 @@
 # Benchmark clock calibration research

+## Benchmark methodology versioning
+
+Every benchmark methodology change must bump the benchmark version constant in
+source code by exactly `+1`.
+
+Methodology change means any change that affects comparability of benchmark
+results, including for example:
+- phase durations or phase order
+- enabled/disabled precisions
+- fallback rules
+- normalization rules
+- score formulas or weights
+- degradation thresholds
+- power calibration logic
+- thermal/power penalty logic
+
+Requirements:
+- benchmark version must be stored in source code as an explicit version
+  constant, not inferred from git tag or build metadata
+- benchmark report must always print the benchmark version
+- `result.json` must always include the benchmark version
+- results from different benchmark versions must be treated as non-comparable by
+  default
+
+Purpose:
+- prevent accidental comparison of runs produced by different methodologies
+- make historical benchmark archives self-describing even when detached from git
+- force deliberate version bumps whenever scoring or execution semantics change
+
 ## Status
 In progress. Baseline data from production servers pending.

--- a/iso/builder/VERSIONS
+++ b/iso/builder/VERSIONS
@@ -6,7 +6,7 @@ NCCL_CUDA_VERSION=13.0
 NCCL_SHA256=2e6faafd2c19cffc7738d9283976a3200ea9db9895907f337f0c7e5a25563186
 NCCL_TESTS_VERSION=2.13.10
 NVCC_VERSION=12.8
-CUBLAS_VERSION=13.0.2.14-1
+CUBLAS_VERSION=13.1.1.3-1
 CUDA_USERSPACE_VERSION=13.0.96-1
 DCGM_VERSION=4.5.3-1
 JOHN_JUMBO_COMMIT=67fcf9fe5a
@@ -21,3 +21,4 @@ HIPBLASLT_VERSION=0.10.0.60304-76~22.04
 COMGR_VERSION=2.8.0.60304-76~22.04
 GO_VERSION=1.24.0
 AUDIT_VERSION=1.0.0
+MEMTEST_VERSION=6.10-4
--- a/iso/builder/auto/config
+++ b/iso/builder/auto/config
@@ -23,10 +23,10 @@ lb config noauto \
    --bootloaders "grub-efi,syslinux" \
    --debian-installer none \
    --archive-areas "main contrib non-free non-free-firmware" \
-    --mirror-bootstrap "https://deb.debian.org/debian" \
-    --mirror-chroot "https://deb.debian.org/debian" \
-    --mirror-binary "https://deb.debian.org/debian" \
-    --security true \
+    --mirror-bootstrap "http://mirror.mephi.ru/debian/" \
+    --mirror-chroot "http://mirror.mephi.ru/debian/" \
+    --mirror-binary "http://mirror.mephi.ru/debian/" \
+    --security false \
    --linux-flavours "amd64" \
    --linux-packages "${LB_LINUX_PACKAGES}" \
    --memtest memtest86+ \
--- a/iso/builder/bee-gpu-stress.c
+++ b/iso/builder/bee-gpu-stress.c
@@ -33,7 +33,6 @@ typedef void *CUstream;
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75
 #define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76
 #define MAX_STRESS_STREAMS 16
-#define MAX_CUBLAS_PROFILES 5
 #define MIN_PROFILE_BUDGET_BYTES ((size_t)4u * 1024u * 1024u)
 #define MIN_STREAM_BUDGET_BYTES ((size_t)64u * 1024u * 1024u)

@@ -643,6 +642,20 @@ static const struct profile_desc k_profiles[] = {
        CUDA_R_16F,
        CUBLAS_COMPUTE_32F_FAST_16F,
    },
+    {
+        "int8_tensor",
+        "int8",
+        75,
+        1,
+        0,
+        0,
+        128,
+        CUDA_R_8I,
+        CUDA_R_8I,
+        CUDA_R_32I,
+        CUDA_R_32I,
+        CUBLAS_COMPUTE_32I,
+    },
    {
        "fp8_e4m3",
        "fp8",
@@ -689,6 +702,8 @@ static const struct profile_desc k_profiles[] = {
 #endif
 };

+#define PROFILE_COUNT ((int)(sizeof(k_profiles) / sizeof(k_profiles[0])))
+
 static int load_cublaslt(struct cublaslt_api *api) {
    memset(api, 0, sizeof(*api));
    api->lib = dlopen("libcublasLt.so.13", RTLD_NOW | RTLD_LOCAL);
@@ -759,10 +774,12 @@ static int check_cublas(const char *step, cublasStatus_t status) {
 static size_t bytes_for_elements(cudaDataType_t type, uint64_t elements) {
    switch (type) {
        case CUDA_R_32F:
+        case CUDA_R_32I:
            return (size_t)(elements * 4u);
        case CUDA_R_16F:
        case CUDA_R_16BF:
            return (size_t)(elements * 2u);
+        case CUDA_R_8I:
        case CUDA_R_8F_E4M3:
        case CUDA_R_8F_E5M2:
            return (size_t)(elements);
@@ -775,6 +792,16 @@ static size_t bytes_for_elements(cudaDataType_t type, uint64_t elements) {
    }
 }

+static cudaDataType_t matmul_scale_type(const struct profile_desc *desc) {
+    if (desc->compute_type == CUBLAS_COMPUTE_32I) {
+        return CUDA_R_32I;
+    }
+    if (desc->compute_type == CUBLAS_COMPUTE_64F) {
+        return CUDA_R_64F;
+    }
+    return CUDA_R_32F;
+}
+
 static size_t fp4_scale_bytes(uint64_t rows, uint64_t cols) {
    uint64_t row_tiles = (rows + 127u) / 128u;
    uint64_t col_tiles = (cols + 63u) / 64u;
@@ -943,8 +970,9 @@ static int prepare_profile(struct cublaslt_api *cublas,
        return 0;
    }

+    cudaDataType_t scale_type = matmul_scale_type(desc);
    if (!check_cublas("cublasLtMatmulDescCreate",
-                      cublas->cublasLtMatmulDescCreate(&out->op_desc, desc->compute_type, CUDA_R_32F))) {
+                      cublas->cublasLtMatmulDescCreate(&out->op_desc, desc->compute_type, scale_type))) {
        destroy_profile(cublas, cuda, out);
        return 0;
    }
@@ -1093,17 +1121,30 @@ static int prepare_profile(struct cublaslt_api *cublas,
 static int run_cublas_profile(cublasLtHandle_t handle,
                              struct cublaslt_api *cublas,
                              struct prepared_profile *profile) {
+    int32_t alpha_i32 = 1;
+    int32_t beta_i32 = 0;
+    double alpha_f64 = 1.0;
+    double beta_f64 = 0.0;
    float alpha = 1.0f;
    float beta = 0.0f;
+    const void *alpha_ptr = &alpha;
+    const void *beta_ptr = &beta;
+    if (profile->desc.compute_type == CUBLAS_COMPUTE_32I) {
+        alpha_ptr = &alpha_i32;
+        beta_ptr = &beta_i32;
+    } else if (profile->desc.compute_type == CUBLAS_COMPUTE_64F) {
+        alpha_ptr = &alpha_f64;
+        beta_ptr = &beta_f64;
+    }
    return check_cublas(profile->desc.name,
                        cublas->cublasLtMatmul(handle,
                                               profile->op_desc,
-                                               &alpha,
+                                               alpha_ptr,
                                               (const void *)(uintptr_t)profile->a_dev,
                                               profile->a_layout,
                                               (const void *)(uintptr_t)profile->b_dev,
                                               profile->b_layout,
-                                               &beta,
+                                               beta_ptr,
                                               (const void *)(uintptr_t)profile->c_dev,
                                               profile->c_layout,
                                               (void *)(uintptr_t)profile->d_dev,
@@ -1124,7 +1165,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
                               const char *precision_filter,
                               struct stress_report *report) {
    struct cublaslt_api cublas;
-    struct prepared_profile prepared[MAX_STRESS_STREAMS * MAX_CUBLAS_PROFILES];
+    struct prepared_profile prepared[MAX_STRESS_STREAMS * PROFILE_COUNT];
    cublasLtHandle_t handle = NULL;
    CUcontext ctx = NULL;
    CUstream streams[MAX_STRESS_STREAMS] = {0};
@@ -1134,7 +1175,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
    int active = 0;
    int mp_count = 0;
    int stream_count = 1;
-    int profile_count = (int)(sizeof(k_profiles) / sizeof(k_profiles[0]));
+    int profile_count = PROFILE_COUNT;
    int prepared_count = 0;
    size_t requested_budget = 0;
    size_t total_budget = 0;
@@ -1159,6 +1200,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }

+    /* Count profiles matching the filter (for deciding what to run). */
    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
        if (k_profiles[i].enabled && cc >= k_profiles[i].min_cc &&
            (precision_filter == NULL || strcmp(k_profiles[i].block_label, precision_filter) == 0)) {
@@ -1172,18 +1214,31 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        return 0;
    }

+    /* Count all profiles active on this GPU regardless of filter.
+     * Used as the budget divisor so matrix sizes stay consistent whether
+     * running all precisions together or a single-precision phase. */
+    int planned_total = 0;
+    for (size_t i = 0; i < sizeof(k_profiles) / sizeof(k_profiles[0]); i++) {
+        if (k_profiles[i].enabled && cc >= k_profiles[i].min_cc) {
+            planned_total++;
+        }
+    }
+    if (planned_total < planned) {
+        planned_total = planned;
+    }
+
    requested_budget = (size_t)size_mb * 1024u * 1024u;
-    if (requested_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
-        requested_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
+    if (requested_budget < (size_t)planned_total * MIN_PROFILE_BUDGET_BYTES) {
+        requested_budget = (size_t)planned_total * MIN_PROFILE_BUDGET_BYTES;
    }
    total_budget = clamp_budget_to_free_memory(cuda, requested_budget);
-    if (total_budget < (size_t)planned * MIN_PROFILE_BUDGET_BYTES) {
-        total_budget = (size_t)planned * MIN_PROFILE_BUDGET_BYTES;
+    if (total_budget < (size_t)planned_total * MIN_PROFILE_BUDGET_BYTES) {
+        total_budget = (size_t)planned_total * MIN_PROFILE_BUDGET_BYTES;
    }
    if (query_multiprocessor_count(cuda, dev, &mp_count) &&
        cuda->cuStreamCreate &&
        cuda->cuStreamDestroy) {
-        stream_count = choose_stream_count(mp_count, planned, total_budget, 1);
+        stream_count = choose_stream_count(mp_count, planned_total, total_budget, 1);
    }
    if (stream_count > 1) {
        int created = 0;
@@ -1196,7 +1251,7 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
        }
    }
    report->stream_count = stream_count;
-    per_profile_budget = total_budget / ((size_t)planned * (size_t)stream_count);
+    per_profile_budget = total_budget / ((size_t)planned_total * (size_t)stream_count);
    if (per_profile_budget < MIN_PROFILE_BUDGET_BYTES) {
        per_profile_budget = MIN_PROFILE_BUDGET_BYTES;
    }
@@ -1344,11 +1399,29 @@ static int run_cublaslt_stress(struct cuda_api *cuda,
 }
 #endif

+static void print_stress_report(const struct stress_report *report, int device_index, int seconds) {
+    printf("device=%s\n", report->device);
+    printf("device_index=%d\n", device_index);
+    printf("compute_capability=%d.%d\n", report->cc_major, report->cc_minor);
+    printf("backend=%s\n", report->backend);
+    printf("duration_s=%d\n", seconds);
+    printf("buffer_mb=%d\n", report->buffer_mb);
+    printf("streams=%d\n", report->stream_count);
+    printf("iterations=%lu\n", report->iterations);
+    printf("checksum=%llu\n", (unsigned long long)report->checksum);
+    if (report->details[0] != '\0') {
+        printf("%s", report->details);
+    }
+    printf("status=OK\n");
+}
+
 int main(int argc, char **argv) {
    int seconds = 5;
    int size_mb = 64;
    int device_index = 0;
    const char *precision_filter = NULL; /* NULL = all; else block_label to match */
+    const char *precision_plan = NULL;
+    const char *precision_plan_seconds = NULL;
    for (int i = 1; i < argc; i++) {
        if ((strcmp(argv[i], "--seconds") == 0 || strcmp(argv[i], "-t") == 0) && i + 1 < argc) {
            seconds = atoi(argv[++i]);
@@ -1358,9 +1431,13 @@ int main(int argc, char **argv) {
            device_index = atoi(argv[++i]);
        } else if (strcmp(argv[i], "--precision") == 0 && i + 1 < argc) {
            precision_filter = argv[++i];
+        } else if (strcmp(argv[i], "--precision-plan") == 0 && i + 1 < argc) {
+            precision_plan = argv[++i];
+        } else if (strcmp(argv[i], "--precision-plan-seconds") == 0 && i + 1 < argc) {
+            precision_plan_seconds = argv[++i];
        } else {
            fprintf(stderr,
-                    "usage: %s [--seconds N] [--size-mb N] [--device N] [--precision fp8|fp16|fp32|fp64|fp4]\n",
+                    "usage: %s [--seconds N] [--size-mb N] [--device N] [--precision int8|fp8|fp16|fp32|fp64|fp4] [--precision-plan p1,p2,...,mixed] [--precision-plan-seconds s1,s2,...]\n",
                    argv[0]);
            return 2;
        }
@@ -1421,26 +1498,94 @@ int main(int argc, char **argv) {
    int ok = 0;

 #if HAVE_CUBLASLT_HEADERS
+    if (precision_plan != NULL && precision_plan[0] != '\0') {
+        char *plan_copy = strdup(precision_plan);
+        char *plan_seconds_copy = NULL;
+        int phase_seconds[32] = {0};
+        int phase_seconds_count = 0;
+        int phase_ok = 0;
+        if (plan_copy == NULL) {
+            fprintf(stderr, "failed to allocate precision plan buffer\n");
+            return 1;
+        }
+        if (precision_plan_seconds != NULL && precision_plan_seconds[0] != '\0') {
+            plan_seconds_copy = strdup(precision_plan_seconds);
+            if (plan_seconds_copy == NULL) {
+                free(plan_copy);
+                fprintf(stderr, "failed to allocate precision plan seconds buffer\n");
+                return 1;
+            }
+            for (char *sec_token = strtok(plan_seconds_copy, ",");
+                 sec_token != NULL && phase_seconds_count < (int)(sizeof(phase_seconds) / sizeof(phase_seconds[0]));
+                 sec_token = strtok(NULL, ",")) {
+                while (*sec_token == ' ' || *sec_token == '\t') {
+                    sec_token++;
+                }
+                if (*sec_token == '\0') {
+                    continue;
+                }
+                phase_seconds[phase_seconds_count++] = atoi(sec_token);
+            }
+        }
+        int phase_idx = 0;
+        for (char *token = strtok(plan_copy, ","); token != NULL; token = strtok(NULL, ","), phase_idx++) {
+            while (*token == ' ' || *token == '\t') {
+                token++;
+            }
+            if (*token == '\0') {
+                continue;
+            }
+            const char *phase_name = token;
+            const char *phase_filter = token;
+            if (strcmp(token, "mixed") == 0 || strcmp(token, "all") == 0) {
+                phase_filter = NULL;
+            }
+            int phase_duration = seconds;
+            if (phase_idx < phase_seconds_count && phase_seconds[phase_idx] > 0) {
+                phase_duration = phase_seconds[phase_idx];
+            }
+            printf("phase_begin=%s\n", phase_name);
+            fflush(stdout);
+            memset(&report, 0, sizeof(report));
+            ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, phase_duration, size_mb, phase_filter, &report);
+            if (ok) {
+                print_stress_report(&report, device_index, phase_duration);
+                phase_ok = 1;
+            } else {
+                printf("phase_error=%s\n", phase_name);
+                if (report.details[0] != '\0') {
+                    printf("%s", report.details);
+                    if (report.details[strlen(report.details) - 1] != '\n') {
+                        printf("\n");
+                    }
+                }
+                printf("status=FAILED\n");
+            }
+            printf("phase_end=%s\n", phase_name);
+            fflush(stdout);
+        }
+        free(plan_seconds_copy);
+        free(plan_copy);
+        return phase_ok ? 0 : 1;
+    }
    ok = run_cublaslt_stress(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, precision_filter, &report);
 #endif
    if (!ok) {
-        if (!run_ptx_fallback(&cuda, dev, name, cc_major, cc_minor, seconds, size_mb, &report)) {
+        if (precision_filter != NULL) {
+            fprintf(stderr,
+                    "requested precision path unavailable: precision=%s device=%s cc=%d.%d\n",
+                    precision_filter,
+                    name,
+                    cc_major,
+                    cc_minor);
+            return 1;
+        }
+        int ptx_mb = size_mb;
+        if (!run_ptx_fallback(&cuda, dev, name, cc_major, cc_minor, seconds, ptx_mb, &report)) {
            return 1;
        }
    }

-    printf("device=%s\n", report.device);
-    printf("device_index=%d\n", device_index);
-    printf("compute_capability=%d.%d\n", report.cc_major, report.cc_minor);
-    printf("backend=%s\n", report.backend);
-    printf("duration_s=%d\n", seconds);
-    printf("buffer_mb=%d\n", report.buffer_mb);
-    printf("streams=%d\n", report.stream_count);
-    printf("iterations=%lu\n", report.iterations);
-    printf("checksum=%llu\n", (unsigned long long)report.checksum);
-    if (report.details[0] != '\0') {
-        printf("%s", report.details);
-    }
-    printf("status=OK\n");
+    print_stress_report(&report, device_index, seconds);
    return 0;
 }
--- a/iso/builder/build-in-container.sh
+++ b/iso/builder/build-in-container.sh
@@ -161,6 +161,7 @@ run_variant() {
            -e GOMODCACHE=/cache/go-mod \
            -e TMPDIR=/cache/tmp \
            -e BEE_CACHE_DIR=/cache/bee \
+            -e BEE_REQUIRE_MEMTEST=1 \
            -w /work \
            "${IMAGE_REF}" \
            sh /work/iso/builder/build.sh --variant "${_v}" \
@@ -175,6 +176,7 @@ run_variant() {
            -e GOMODCACHE=/cache/go-mod \
            -e TMPDIR=/cache/tmp \
            -e BEE_CACHE_DIR=/cache/bee \
+            -e BEE_REQUIRE_MEMTEST=1 \
            -w /work \
            "${IMAGE_REF}" \
            sh /work/iso/builder/build.sh --variant "${_v}"
--- a/iso/builder/build.sh
+++ b/iso/builder/build.sh
@@ -57,6 +57,7 @@ OVERLAY_STAGE_DIR="${DIST_DIR}/overlay-stage-${BUILD_VARIANT}"
 export BEE_GPU_VENDOR BEE_NVIDIA_MODULE_FLAVOR BUILD_VARIANT

 . "${BUILDER_DIR}/VERSIONS"
+export MEMTEST_VERSION
 export PATH="$PATH:/usr/local/go/bin"
 : "${BEE_REQUIRE_MEMTEST:=0}"

@@ -775,6 +776,7 @@ run_optional_step_sh() {
        return 0
    fi

+    mkdir -p "${LOG_DIR}" 2>/dev/null || true
    step_log="${LOG_DIR}/${step_slug}.log"
    echo ""
    echo "=== optional step: ${step_name} ==="
@@ -798,13 +800,14 @@ start_build_log
 # install them on the fly so NVIDIA modules and ISO kernel always match.
 if [ -z "${DEBIAN_KERNEL_ABI}" ] || [ "${DEBIAN_KERNEL_ABI}" = "auto" ]; then
    echo "=== refreshing apt index to detect current kernel ABI ==="
-    apt-get update -qq
+    apt-get update -qq || echo "WARNING: apt-get update failed, trying cached index"
    DEBIAN_KERNEL_ABI=$(apt-cache depends linux-image-amd64 2>/dev/null \
        | awk '/Depends:.*linux-image-[0-9]/{print $2}' \
        | grep -oE '[0-9]+\.[0-9]+\.[0-9]+-[0-9]+' \
        | head -1)
    if [ -z "${DEBIAN_KERNEL_ABI}" ]; then
        echo "ERROR: could not auto-detect kernel ABI from apt-cache" >&2
+        echo "Hint: set DEBIAN_KERNEL_ABI=x.y.z-N in iso/builder/VERSIONS to skip auto-detection" >&2
        exit 1
    fi
    echo "=== kernel ABI: ${DEBIAN_KERNEL_ABI} ==="
@@ -873,6 +876,22 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then

    CUBLAS_CACHE="${DIST_DIR}/cublas-${CUBLAS_VERSION}+cuda${NCCL_CUDA_VERSION}"

+    echo "=== bee-gpu-burn FP4 header probe ==="
+    fp4_type_match="$(grep -Rsnm 1 'CUDA_R_4F_E2M1' "${CUBLAS_CACHE}/include" 2>/dev/null || true)"
+    fp4_scale_match="$(grep -Rsnm 1 'CUBLASLT_MATMUL_MATRIX_SCALE_VEC16_UE4M3' "${CUBLAS_CACHE}/include" 2>/dev/null || true)"
+    if [ -n "$fp4_type_match" ]; then
+        echo "fp4_header_symbol=present"
+        echo "$fp4_type_match"
+    else
+        echo "fp4_header_symbol=missing"
+    fi
+    if [ -n "$fp4_scale_match" ]; then
+        echo "fp4_scale_mode_symbol=present"
+        echo "$fp4_scale_match"
+    else
+        echo "fp4_scale_mode_symbol=missing"
+    fi
+
    GPU_STRESS_NEED_BUILD=1
    if [ -f "$GPU_BURN_WORKER_BIN" ]; then
        GPU_STRESS_NEED_BUILD=0
@@ -901,6 +920,12 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
    else
        echo "=== bee-gpu-burn worker up to date, skipping build ==="
    fi
+    echo "=== bee-gpu-burn compiled profile probe ==="
+    if grep -aq 'fp4_e2m1' "$GPU_BURN_WORKER_BIN"; then
+        echo "fp4_profile_string=present"
+    else
+        echo "fp4_profile_string=missing"
+    fi
 fi

 echo "=== preparing staged overlay (${BUILD_VARIANT}) ==="
--- a/iso/builder/config/hooks/normal/9100-memtest.hook.binary
+++ b/iso/builder/config/hooks/normal/9100-memtest.hook.binary
@@ -5,6 +5,8 @@ set -e

 : "${BEE_REQUIRE_MEMTEST:=0}"

+# memtest86+ 6.x uses memtest86+.bin (no x64 suffix) for the BIOS binary,
+# while 5.x used memtest86+x64.bin. We normalise both to x64 names in the ISO.
 MEMTEST_FILES="memtest86+x64.bin memtest86+x64.efi"
 BINARY_BOOT_DIR="binary/boot"
 GRUB_CFG="binary/boot/grub/grub.cfg"
@@ -26,13 +28,13 @@ fail_or_warn() {

 copy_memtest_file() {
    src="$1"
-    base="$(basename "$src")"
-    dst="${BINARY_BOOT_DIR}/${base}"
+    dst_name="${2:-$(basename "$src")}"
+    dst="${BINARY_BOOT_DIR}/${dst_name}"

    [ -f "$src" ] || return 1
    mkdir -p "${BINARY_BOOT_DIR}"
    cp "$src" "$dst"
-    log "copied ${base} from ${src}"
+    log "copied ${dst_name} from ${src}"
 }

 extract_memtest_from_deb() {
@@ -41,14 +43,42 @@ extract_memtest_from_deb() {

    log "extracting memtest payload from ${deb}"
    dpkg-deb -x "$deb" "$tmpdir"
-    for f in ${MEMTEST_FILES}; do
-        if [ -f "${tmpdir}/boot/${f}" ]; then
-            copy_memtest_file "${tmpdir}/boot/${f}"
-        fi
-    done
+
+    # EFI binary: both 5.x and 6.x use memtest86+x64.efi
+    if [ -f "${tmpdir}/boot/memtest86+x64.efi" ]; then
+        copy_memtest_file "${tmpdir}/boot/memtest86+x64.efi"
+    fi
+
+    # BIOS binary: 5.x = memtest86+x64.bin, 6.x = memtest86+.bin
+    if [ -f "${tmpdir}/boot/memtest86+x64.bin" ]; then
+        copy_memtest_file "${tmpdir}/boot/memtest86+x64.bin"
+    elif [ -f "${tmpdir}/boot/memtest86+.bin" ]; then
+        copy_memtest_file "${tmpdir}/boot/memtest86+.bin" "memtest86+x64.bin"
+    fi
+
    rm -rf "$tmpdir"
 }

+download_and_extract_memtest() {
+    tmpdl="$(mktemp -d)"
+    ver_arg=""
+    if [ -n "${MEMTEST_VERSION:-}" ]; then
+        ver_arg="=memtest86+=${MEMTEST_VERSION}"
+        log "downloading memtest86+=${MEMTEST_VERSION} from apt"
+    else
+        log "downloading memtest86+ from apt (no version pinned)"
+    fi
+    # shellcheck disable=SC2086
+    ( cd "$tmpdl" && apt-get download "memtest86+${ver_arg}" ) 2>/dev/null || true
+    deb="$(find "$tmpdl" -maxdepth 1 -type f -name 'memtest86+*.deb' 2>/dev/null | head -1)"
+    if [ -n "$deb" ]; then
+        extract_memtest_from_deb "$deb"
+    else
+        log "apt download of memtest86+ failed"
+    fi
+    rm -rf "$tmpdl"
+}
+
 ensure_memtest_binaries() {
    missing=0
    for f in ${MEMTEST_FILES}; do
@@ -56,10 +86,15 @@ ensure_memtest_binaries() {
    done
    [ "$missing" -eq 1 ] || return 0

+    # 1. Try files already placed by lb binary_memtest or chroot
    for root in chroot/boot /boot; do
        for f in ${MEMTEST_FILES}; do
            [ -f "${BINARY_BOOT_DIR}/${f}" ] || copy_memtest_file "${root}/${f}" || true
        done
+        # 6.x BIOS binary may lack x64 in name — copy with normalised name
+        if [ ! -f "${BINARY_BOOT_DIR}/memtest86+x64.bin" ]; then
+            copy_memtest_file "${root}/memtest86+.bin" "memtest86+x64.bin" || true
+        fi
    done

    missing=0
@@ -68,6 +103,7 @@ ensure_memtest_binaries() {
    done
    [ "$missing" -eq 1 ] || return 0

+    # 2. Try apt package cache (may be empty if lb binary_memtest already purged)
    for root in cache chroot/var/cache/apt/archives /var/cache/apt/archives; do
        [ -d "$root" ] || continue
        deb="$(find "$root" -type f \( -name 'memtest86+_*.deb' -o -name 'memtest86+*.deb' \) 2>/dev/null | head -1)"
@@ -76,6 +112,15 @@ ensure_memtest_binaries() {
        break
    done

+    missing=0
+    for f in ${MEMTEST_FILES}; do
+        [ -f "${BINARY_BOOT_DIR}/${f}" ] || missing=1
+    done
+    [ "$missing" -eq 1 ] || return 0
+
+    # 3. Fallback: download fresh from apt (lb binary_memtest purges the cache)
+    download_and_extract_memtest
+
    missing=0
    for f in ${MEMTEST_FILES}; do
        if [ ! -f "${BINARY_BOOT_DIR}/${f}" ]; then
--- a/iso/overlay/usr/local/bin/bee-gpu-burn
+++ b/iso/overlay/usr/local/bin/bee-gpu-burn
@@ -7,10 +7,12 @@ SIZE_MB=0
 DEVICES=""
 EXCLUDE=""
 PRECISION=""
+PRECISION_PLAN=""
+PRECISION_PLAN_SECONDS=""
 WORKER="/usr/local/lib/bee/bee-gpu-burn-worker"

 usage() {
-    echo "usage: $0 [--seconds N] [--stagger-seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3] [--precision fp8|fp16|fp32|fp64|fp4]" >&2
+    echo "usage: $0 [--seconds N] [--stagger-seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3] [--precision int8|fp8|fp16|fp32|fp64|fp4] [--precision-plan p1,p2,...,mixed] [--precision-plan-seconds s1,s2,...]" >&2
    exit 2
 }

@@ -32,6 +34,8 @@ while [ "$#" -gt 0 ]; do
        --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
        --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
        --precision) [ "$#" -ge 2 ] || usage; PRECISION="$2"; shift 2 ;;
+        --precision-plan) [ "$#" -ge 2 ] || usage; PRECISION_PLAN="$2"; shift 2 ;;
+        --precision-plan-seconds) [ "$#" -ge 2 ] || usage; PRECISION_PLAN_SECONDS="$2"; shift 2 ;;
        *) usage ;;
    esac
 done
@@ -92,8 +96,12 @@ for id in $(echo "${FINAL}" | tr ',' ' '); do
    echo "starting gpu ${id} size=${gpu_size_mb}MB seconds=${gpu_seconds}"
    precision_arg=""
    [ -n "${PRECISION}" ] && precision_arg="--precision ${PRECISION}"
+    precision_plan_arg=""
+    [ -n "${PRECISION_PLAN}" ] && precision_plan_arg="--precision-plan ${PRECISION_PLAN}"
+    precision_plan_seconds_arg=""
+    [ -n "${PRECISION_PLAN_SECONDS}" ] && precision_plan_seconds_arg="--precision-plan-seconds ${PRECISION_PLAN_SECONDS}"
    CUDA_VISIBLE_DEVICES="${id}" \
-        "${WORKER}" --device 0 --seconds "${gpu_seconds}" --size-mb "${gpu_size_mb}" ${precision_arg} >"${log}" 2>&1 &
+        "${WORKER}" --device 0 --seconds "${gpu_seconds}" --size-mb "${gpu_size_mb}" ${precision_arg} ${precision_plan_arg} ${precision_plan_seconds_arg} >"${log}" 2>&1 &
    pid=$!
    WORKERS="${WORKERS} ${pid}:${id}:${log}"
    if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${gpu_pos}" -lt "${GPU_COUNT}" ]; then
Author	SHA1	Message	Date
Mikhail Chusavitin	2dccbc010c	Use MEPHI mirror, disable security repo, fix memtest in ISO build - Switch all lb mirrors to mirror.mephi.ru/debian/ for faster/reliable downloads - Disable security repo (--security false) — not needed for LiveCD - Pin MEMTEST_VERSION=6.10-4 in VERSIONS, export to hook environment - Set BEE_REQUIRE_MEMTEST=1 in build-in-container.sh — missing memtest is now fatal - Fix 9100-memtest.hook.binary: add apt-get download fallback when lb binary_memtest has already purged the package cache; handle both 5.x (memtest86+x64.bin) and 6.x (memtest86+.bin) BIOS binary naming Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-15 09:57:29 +03:00
Michael Chus	e84c69d360	Fix optional step log dir missing after memtest recovery mkdir -p LOG_DIR before writing the optional step log so that a race with cleanup_build_log (EXIT trap archiving the log dir) does not cause a "Directory nonexistent" error during lb binary_checksums / lb binary_iso. Also downgrade apt-get update failure to a warning so a transient mirror outage does not block kernel ABI auto-detection when the apt cache is warm. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-15 07:28:36 +03:00
Michael Chus	c80a39e7ac	Add power results table, fix benchmark results refresh, bound memtester - Benchmark page now shows two result sections: Performance (scores) and Power / Thermal Fit (slot table). After any benchmark task completes the results section auto-refreshes via GET /api/benchmark/results without a full page reload. - Power results table shows each GPU slot with nominal TDP, achieved stable power limit, and P95 observed power. Rows with derated cards are highlighted amber so under-performing slots stand out at a glance. Older runs are collapsed in a <details> summary. - memtester is now wrapped with timeout(1) so a stuck memory controller cannot cause Validate Memory to hang indefinitely. Wall-clock limit is ~2.5 min per 100 MB per pass plus a 2-minute buffer. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-15 07:16:18 +03:00
Michael Chus	a5e0261ff2	Refactor power ramp to use true single-card baselines Phase 1 now calibrates each GPU individually (sequentially) so that PowerRealizationPct reflects real degradation from neighbour thermals and shared power rails. Previously the baseline came from an all-GPU-together run, making realization always ≈100% at the final ramp step. Ramp step 1 reuses single-card calibration results (no extra run); steps 2..N run targeted_power on the growing GPU subset with derating active. Remove OccupiedSlots/OccupiedSlotsNote fields and occupiedSlots() helper — they were compensation for the old all-GPU calibration approach. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-14 23:47:57 +03:00
Michael Chus	ee422ede3c	Revert "Add raster Easy Bee branding assets" This reverts commit `d560b2fead`.	2026-04-14 23:00:15 +03:00
Michael Chus	d560b2fead	Add raster Easy Bee branding assets	2026-04-14 22:39:25 +03:00
Michael Chus	3cf2e9c9dc	Run power calibration for all GPUs simultaneously Previously each GPU was calibrated sequentially (one card fully done before the next started), producing the staircase temperature pattern seen on the graph. Now all GPUs run together in a single dcgmi diag -r targeted_power session per attempt. This means: - All cards are under realistic thermal load at the same time. - A single DCGM session handles the run — no resource-busy contention from concurrent dcgmi processes. - Binary search state (lo/hi) is tracked independently per GPU; each card converges to its own highest stable power limit. - Throttle counter polling covers all active GPUs in the shared ticker. - Resource-busy exponential back-off is shared (one DCGM session). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-14 22:25:05 +03:00
Michael Chus	19dbabd71d	Simplify power calibration: pure binary search, no telemetry guessing Remove telemetry-guided initial candidate; use strict binary search midpoint at every step. Clean and predictable convergence in O(log N) attempts within the allowed power range [minLimitW, startingLimitW]. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-14 22:12:45 +03:00
Michael Chus	a6a07f2626	Replace linear power derate with binary search + telemetry-guided jump Power calibration previously stepped down 25 W at a time (linear), requiring up to 6 attempts to find a stable limit within 150 W range. New strategy: - Binary search between minLimitW (lo, assumed stable floor) and the starting/failed limit (hi, confirmed unstable), converging within a 10 W tolerance in ~4 attempts. - For thermal throttle: the first-quarter telemetry rows estimate the GPU's pre-throttle power draw. nextLimit = round5W(onset - 10 W) is used as the initial candidate instead of the binary midpoint, landing much closer to the true limit on the first step. - On success: lo is updated and a higher level is tried (binary search upward) until hi-lo ≤ tolerance, ensuring the highest stable limit is found rather than the first stable one. - Let targeted_power run to natural completion on throttle (no mid-run SIGKILL) so nv-hostengine releases its diagnostic slot cleanly before the next attempt. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-14 22:05:23 +03:00
Michael Chus	f87461ee4a	Detect thermal throttle with fans below 100% as cooling misconfiguration During power calibration: if a thermal throttle (sw_thermal/hw_thermal) causes ≥20% clock drop while server fans are below 98% P95 duty cycle, record a CoolingWarning on the GPU result and emit an actionable finding telling the operator to rerun with fans manually fixed at 100%. During steady-state benchmark: same signal enriches the existing thermal_limited finding with fan duty cycle and clock drift values. Covers both the main benchmark (buildBenchmarkFindings) and the power bench (NvidiaPowerBenchResult.Findings). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-14 21:44:57 +03:00
Michael Chus	a636146dbd	Fix power calibration failing due to DCGM resource contention When a targeted_power attempt is cancelled (e.g. after sw_thermal throttle), nv-hostengine holds the diagnostic slot asynchronously. The next attempt immediately received DCGM_ST_IN_USE (exit 222) and incorrectly derated the power limit. Now: exit 222 is detected via isDCGMResourceBusy and triggers an exponential back-off retry at the same power limit (1s, 2s, 4s, … up to 256s). Once the back-off delay would exceed 300s the calibration fails, indicating the slot is persistently held. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-14 20:41:17 +03:00
Mikhail Chusavitin	303de2df04	Add slot-aware ramp sequence to bee-bench power	2026-04-14 17:47:40 +03:00
Mikhail Chusavitin	95124d228f	Split bee-bench into perf and power workflows	2026-04-14 17:33:13 +03:00
Mikhail Chusavitin	54338dbae5	Unify live RAM runtime state	2026-04-14 16:18:33 +03:00
Mikhail Chusavitin	2be7ae6d28	Refine NVIDIA benchmark phase timing	2026-04-14 14:12:06 +03:00
Mikhail Chusavitin	b1a5035edd	Normalize task queue priorities by workflow	2026-04-14 11:13:54 +03:00
Mikhail Chusavitin	8fc986c933	Add benchmark fan duty cycle summary to report	2026-04-14 10:24:02 +03:00
Mikhail Chusavitin	88b5e0edf2	Harden IPMI power probe timeout	2026-04-14 10:18:23 +03:00
Mikhail Chusavitin	82fe1f6d26	Disable precision fallback and pin cuBLAS 13.1	2026-04-14 10:17:44 +03:00
Michael Chus	81e7c921f8	дебаг при сборке	2026-04-14 07:02:37 +03:00
Michael Chus	0fb8f2777f	Fix combined gpu burn profile capacity for fp4	2026-04-14 00:00:40 +03:00