From 51b721aeb3e3e0fb9f515a2a7ea55e7512eb7306 Mon Sep 17 00:00:00 2001
From: Michael Chus <mike@mchus.pro>
Date: Sat, 18 Apr 2026 10:54:50 +0300
Subject: [PATCH] Add real-data duration estimates to benchmark and burn pages

- Add BenchmarkEstimated* constants to benchmark_types.go from _v8 logs
  (Standard Perf ~16 min, Standard Power Fit ~43 min, Stability Perf ~92 min)
- Update benchmark profile dropdown to show Perf / Power Fit timing per profile
- Add timing columns to Method Split table (Standard vs Stability per run type)
- Update burn preset labels to show "N min/GPU (sequential) or N min (parallel)"
- Clarify burn "one by one" description with sequential vs parallel scaling

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 audit/internal/platform/benchmark_types.go | 25 ++++++++++++++++++++++
 audit/internal/webui/pages.go              | 22 +++++++++----------
 2 files changed, 36 insertions(+), 11 deletions(-)
diff --git a/audit/internal/platform/benchmark_types.go b/audit/internal/platform/benchmark_types.go
index 902eeb7..6896589 100644
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -43,6 +43,31 @@ const (
 	NvidiaBenchmarkProfileOvernight = "overnight"
 )
 
+// Estimated wall-clock durations for benchmark runs, derived from real _v8 logs.
+// Rule: when changing profile phase durations in resolveBenchmarkProfile(),
+// re-measure from actual task logs and update the constants here.
+//
+// Sources:
+//   - BenchmarkEstimatedPerfStandardSec:   MLT v8.22 ramp 1-4: 927 s; xFusion v8.22 parallel 8GPU: 1080 s
+//   - BenchmarkEstimatedPerfStabilitySec:  xFusion v8.22 ramp 1-8: 5532 s
+//   - BenchmarkEstimatedPerfOvernightSec:  derived from profile phases (SteadySec=27000)
+//   - BenchmarkEstimatedPowerStandardSec:  MLT v8.22 ramp 1-4: 2663 s; MSI v8.22 ramp 1-8: 2375 s
+//   - BenchmarkEstimatedPowerStabilitySec: xFusion v8.17/v8.22 ramp 1-8: 1977-2002 s
+const (
+	// Performance Benchmark (bee-gpu-burn).
+	// Duration is per full ramp-up run (ramp 1→N) or per single parallel run.
+	// Sequential per-GPU mode scales approximately linearly.
+	BenchmarkEstimatedPerfStandardSec  = 960  // ~16 min; ramp-up 1-4: 927 s, parallel 8GPU: 1080 s
+	BenchmarkEstimatedPerfStabilitySec = 5532 // ~92 min; ramp-up 1-8 measured
+	BenchmarkEstimatedPerfOvernightSec = 8 * 3600
+
+	// Power / Thermal Fit (dcgmi targeted_power binary-search calibration).
+	// Duration is for the full ramp-up run; individual steps vary with convergence speed.
+	BenchmarkEstimatedPowerStandardSec  = 2600 // ~43 min; ramp 1-4: 2663 s, ramp 1-8: 2375 s
+	BenchmarkEstimatedPowerStabilitySec = 2000 // ~33 min; stability profile converges faster (longer steady → faster convergence)
+	BenchmarkEstimatedPowerOvernightSec = 3 * 3600
+)
+
 type NvidiaBenchmarkOptions struct {
 	Profile           string
 	SizeMB            int
diff --git a/audit/internal/webui/pages.go b/audit/internal/webui/pages.go
index 07da464..4aab57b 100644
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
@@ -2110,9 +2110,9 @@ func renderBenchmark(opts HandlerOptions) string {
       <div class="form-row">
         <label>Profile</label>
         <select id="benchmark-profile">
-          <option value="standard" selected>Standard — about 15 minutes</option>
-          <option value="stability">Stability — 1 to 2 hours</option>
-          <option value="overnight">Overnight — 8 hours</option>
+          <option value="standard" selected>Standard — Perf ` + validateFmtDur(platform.BenchmarkEstimatedPerfStandardSec) + ` / Power Fit ` + validateFmtDur(platform.BenchmarkEstimatedPowerStandardSec) + `</option>
+          <option value="stability">Stability — Perf ` + validateFmtDur(platform.BenchmarkEstimatedPerfStabilitySec) + ` / Power Fit ` + validateFmtDur(platform.BenchmarkEstimatedPowerStabilitySec) + `</option>
+          <option value="overnight">Overnight — Perf ` + validateFmtDur(platform.BenchmarkEstimatedPerfOvernightSec) + ` / Power Fit ` + validateFmtDur(platform.BenchmarkEstimatedPowerOvernightSec) + `</option>
         </select>
       </div>
       <div class="form-row">
@@ -2152,11 +2152,11 @@ func renderBenchmark(opts HandlerOptions) string {
     <div class="card-body">
       <p style="font-size:13px;color:var(--muted);margin-bottom:10px">The benchmark page now exposes two fundamentally different test families so compute score and server power-fit are not mixed into one number.</p>
       <table>
-        <tr><th>Run Type</th><th>Engine</th><th>Question</th></tr>
-        <tr><td>Performance Benchmark</td><td><code>bee-gpu-burn</code></td><td>How much isolated compute performance does the GPU realize in this server?</td></tr>
-        <tr><td>Power / Thermal Fit</td><td><code>dcgmi targeted_power</code></td><td>How much power per GPU can this server sustain as GPU count ramps up?</td></tr>
+        <tr><th>Run Type</th><th>Engine</th><th>Question</th><th>Standard</th><th>Stability</th></tr>
+        <tr><td>Performance Benchmark</td><td><code>bee-gpu-burn</code></td><td>How much isolated compute performance does the GPU realize in this server?</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPerfStandardSec) + `</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPerfStabilitySec) + `</td></tr>
+        <tr><td>Power / Thermal Fit</td><td><code>dcgmi targeted_power</code></td><td>How much power per GPU can this server sustain as GPU count ramps up?</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPowerStandardSec) + `</td><td>` + validateFmtDur(platform.BenchmarkEstimatedPowerStabilitySec) + `</td></tr>
       </table>
-      <p style="font-size:12px;color:var(--muted);margin-top:10px">Use ramp-up mode for capacity work: it creates 1 GPU → 2 GPU → … → all selected steps so analysis software can derive server total score and watts-per-GPU curves.</p>
+      <p style="font-size:12px;color:var(--muted);margin-top:10px">Timings are per full ramp-up run (1 GPU → all selected), measured on 4–8 GPU servers. Use ramp-up mode for capacity work: it creates 1 GPU → 2 GPU → … → all selected steps so analysis software can derive server total score and watts-per-GPU curves.</p>
     </div>
   </div>
 </div>
@@ -2645,13 +2645,13 @@ func renderBurn() string {
   <div class="card-body burn-profile-body">
     <div class="burn-profile-col">
       <div class="form-row" style="margin:0 0 8px"><label>Preset</label></div>
-      <label class="cb-row"><input type="radio" name="burn-profile" value="smoke" checked><span>Smoke — quick check (~5 min)</span></label>
-      <label class="cb-row"><input type="radio" name="burn-profile" value="acceptance"><span>Acceptance — 1 hour</span></label>
-      <label class="cb-row"><input type="radio" name="burn-profile" value="overnight"><span>Overnight — 8 hours</span></label>
+      <label class="cb-row"><input type="radio" name="burn-profile" value="smoke" checked><span>Smoke — 5 min/GPU (sequential) or 5 min (parallel)</span></label>
+      <label class="cb-row"><input type="radio" name="burn-profile" value="acceptance"><span>Acceptance — 1 h/GPU (sequential) or 1 h (parallel)</span></label>
+      <label class="cb-row"><input type="radio" name="burn-profile" value="overnight"><span>Overnight — 8 h/GPU (sequential) or 8 h (parallel)</span></label>
     </div>
     <div class="burn-profile-col burn-profile-action">
       <button type="button" class="btn btn-primary" onclick="runAllBurnTasks()">Burn one by one</button>
-      <p>Run checked tests one by one. Tests run without cooldown. Each test duration is determined by the Burn Profile. Total test duration is the sum of all selected tests multiplied by the Burn Profile duration.</p>
+      <p>Runs checked tests as separate sequential tasks. In sequential GPU mode, total time = profile duration × N GPU. In parallel mode, all selected GPUs burn simultaneously for one profile duration.</p>
     </div>
     <div class="burn-profile-col burn-profile-action">
       <button type="button" class="btn btn-secondary" onclick="runPlatformStress()">Thermal Cycling</button>

Run Type	Engine	Question
Performance Benchmark	`bee-gpu-burn`	How much isolated compute performance does the GPU realize in this server?
Power / Thermal Fit	`dcgmi targeted_power`	How much power per GPU can this server sustain as GPU count ramps up?
Run Type	Engine	Question	Standard	Stability
Performance Benchmark	`bee-gpu-burn`	How much isolated compute performance does the GPU realize in this server?	` + validateFmtDur(platform.BenchmarkEstimatedPerfStandardSec) + `	` + validateFmtDur(platform.BenchmarkEstimatedPerfStabilitySec) + `
Power / Thermal Fit	`dcgmi targeted_power`	How much power per GPU can this server sustain as GPU count ramps up?	` + validateFmtDur(platform.BenchmarkEstimatedPowerStandardSec) + `	` + validateFmtDur(platform.BenchmarkEstimatedPowerStabilitySec) + `