Add per-precision benchmark phases, weighted TOPS scoring, and ECC tracking

- Split steady window into 6 equal slots: fp8/fp16/fp32/fp64/fp4 + combined - Each precision phase runs bee-gpu-burn with --precision filter so PowerCVPct reflects single-kernel stability (not round-robin artifact) - Add fp4 support in bee-gpu-stress.c for Blackwell (cc>=100) via existing CUDA_R_4F_E2M1 guard - Weighted TOPS: fp64×2.0, fp32×1.0, fp16×0.5, fp8×0.25, fp4×0.125 - SyntheticScore = sum of weighted TOPS from per-precision phases - MixedScore = sum from combined phase; MixedEfficiency = Mixed/Synthetic - ComputeScore = SyntheticScore × (1 + MixedEfficiency × 0.3) - ECC volatile counters sampled before/after each phase and overall - DegradationReasons: ecc_uncorrected_errors, ecc_corrected_errors - Report: per-precision stability table with ECC columns, methodology section - Ramp-up history table redesign: GPU indices as columns, runs as rows Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-13 10:49:49 +03:00
parent 02e44b1172
commit bf6ecab4f0
9 changed files with 390 additions and 144 deletions
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -497,6 +497,7 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 				GPUIndices         []int    `json:"gpu_indices"`
 				ExcludeGPUIndices  []int    `json:"exclude_gpu_indices"`
 				StaggerGPUStart    bool     `json:"stagger_gpu_start"`
+				ParallelGPUs       bool     `json:"parallel_gpus"`
 				Loader             string   `json:"loader"`
 			Profile            string   `json:"profile"`
 			DisplayName        string   `json:"display_name"`
@@ -519,6 +520,7 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
 				GPUIndices:         body.GPUIndices,
 				ExcludeGPUIndices:  body.ExcludeGPUIndices,
 				StaggerGPUStart:    body.StaggerGPUStart,
+				ParallelGPUs:       body.ParallelGPUs,
 				Loader:             body.Loader,
 			BurnProfile:        body.Profile,
 			DisplayName:        body.DisplayName,
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
@@ -1928,23 +1928,10 @@ func renderSATCard(id, label, runAction, headerActions, body string) string {

 // ── Benchmark ─────────────────────────────────────────────────────────────────

-type benchmarkHistoryColumn struct {
-	key      string
-	label    string
-	name     string
-	index    int
-	parallel bool
-}
-
-type benchmarkHistoryCell struct {
-	score   float64
-	present bool
-}
-
 type benchmarkHistoryRun struct {
 	generatedAt time.Time
 	displayTime string
-	cells       map[string]benchmarkHistoryCell
+	gpuScores   map[int]float64 // GPU index → composite score
 }

 func renderBenchmark(opts HandlerOptions) string {
@@ -2206,17 +2193,17 @@ benchmarkLoadGPUs();
 }

 func renderBenchmarkResultsCard(exportDir string) string {
-	columns, runs := loadBenchmarkHistory(exportDir)
+	maxIdx, runs := loadBenchmarkHistory(exportDir)
 	return renderBenchmarkResultsCardFromRuns(
 		"Benchmark Results",
 		"Composite score by saved benchmark run and GPU.",
 		"No saved benchmark runs yet.",
-		columns,
+		maxIdx,
 		runs,
 	)
 }

-func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string, columns []benchmarkHistoryColumn, runs []benchmarkHistoryRun) string {
+func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string, maxGPUIndex int, runs []benchmarkHistoryRun) string {
 	if len(runs) == 0 {
 		return `<div class="card"><div class="card-head">` + html.EscapeString(title) + `</div><div class="card-body"><p style="color:var(--muted);font-size:13px">` + html.EscapeString(emptyMessage) + `</p></div></div>`
 	}
@@ -2226,22 +2213,22 @@ func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string,
 		b.WriteString(`<p style="color:var(--muted);font-size:13px;margin-bottom:12px">` + html.EscapeString(description) + `</p>`)
 	}
 	b.WriteString(`<div style="overflow-x:auto">`)
-	b.WriteString(`<table><thead><tr><th>Test</th><th>Time</th>`)
-	for _, col := range columns {
-		b.WriteString(`<th>` + html.EscapeString(col.label) + `</th>`)
+	b.WriteString(`<table><thead><tr><th>Run</th><th>Time</th>`)
+	for i := 0; i <= maxGPUIndex; i++ {
+		b.WriteString(`<th>GPU ` + strconv.Itoa(i) + `</th>`)
 	}
 	b.WriteString(`</tr></thead><tbody>`)
 	for i, run := range runs {
 		b.WriteString(`<tr>`)
 		b.WriteString(`<td>#` + strconv.Itoa(i+1) + `</td>`)
 		b.WriteString(`<td>` + html.EscapeString(run.displayTime) + `</td>`)
-		for _, col := range columns {
-			cell, ok := run.cells[col.key]
-			if !ok || !cell.present {
+		for idx := 0; idx <= maxGPUIndex; idx++ {
+			score, ok := run.gpuScores[idx]
+			if !ok {
 				b.WriteString(`<td style="color:var(--muted)">-</td>`)
 				continue
 			}
-			b.WriteString(`<td>` + fmt.Sprintf("%.2f", cell.score) + `</td>`)
+			b.WriteString(`<td>` + fmt.Sprintf("%.2f", score) + `</td>`)
 		}
 		b.WriteString(`</tr>`)
 	}
@@ -2249,22 +2236,22 @@ func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string,
 	return b.String()
 }

-func loadBenchmarkHistory(exportDir string) ([]benchmarkHistoryColumn, []benchmarkHistoryRun) {
+func loadBenchmarkHistory(exportDir string) (int, []benchmarkHistoryRun) {
 	baseDir := app.DefaultBenchmarkBaseDir
 	if strings.TrimSpace(exportDir) != "" {
 		baseDir = filepath.Join(exportDir, "bee-benchmark")
 	}
 	paths, err := filepath.Glob(filepath.Join(baseDir, "gpu-benchmark-*", "result.json"))
 	if err != nil || len(paths) == 0 {
-		return nil, nil
+		return -1, nil
 	}
 	sort.Strings(paths)
 	return loadBenchmarkHistoryFromPaths(paths)
 }

-func loadBenchmarkHistoryFromPaths(paths []string) ([]benchmarkHistoryColumn, []benchmarkHistoryRun) {
-	columnByKey := make(map[string]benchmarkHistoryColumn)
+func loadBenchmarkHistoryFromPaths(paths []string) (int, []benchmarkHistoryRun) {
 	runs := make([]benchmarkHistoryRun, 0, len(paths))
+	maxGPUIndex := -1
 	for _, path := range paths {
 		raw, err := os.ReadFile(path)
 		if err != nil {
@@ -2277,102 +2264,22 @@ func loadBenchmarkHistoryFromPaths(paths []string) ([]benchmarkHistoryColumn, []
 		run := benchmarkHistoryRun{
 			generatedAt: result.GeneratedAt,
 			displayTime: result.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
-			cells:       make(map[string]benchmarkHistoryCell),
+			gpuScores:   make(map[int]float64),
 		}
-
-		if result.ParallelGPUs {
-			// All GPUs ran simultaneously — one column per server, score = avg composite.
-			gpuModelCount := make(map[string]int)
-			for _, gpu := range result.GPUs {
-				gpuModelCount[strings.TrimSpace(gpu.Name)]++
-			}
-			scoreSum := make(map[string]float64)
-			scoreCnt := make(map[string]int)
-			for _, gpu := range result.GPUs {
-				key := "parallel|" + strings.TrimSpace(result.ServerModel) + "|" + strings.TrimSpace(gpu.Name)
-				scoreSum[key] += gpu.Scores.CompositeScore
-				scoreCnt[key]++
-				count := gpuModelCount[strings.TrimSpace(gpu.Name)]
-				columnByKey[key] = benchmarkHistoryColumn{
-					key:      key,
-					label:    benchmarkHistoryParallelLabel(result.ServerModel, gpu.Name, count),
-					name:     strings.TrimSpace(gpu.Name),
-					index:    -1,
-					parallel: true,
-				}
-			}
-			for key, sum := range scoreSum {
-				run.cells[key] = benchmarkHistoryCell{score: sum / float64(scoreCnt[key]), present: true}
-			}
-		} else {
-			// Each GPU ran independently — one column per GPU index.
-			for _, gpu := range result.GPUs {
-				key := "gpu|" + strings.TrimSpace(result.ServerModel) + "|" + strings.TrimSpace(gpu.Name) + "|" + strconv.Itoa(gpu.Index)
-				columnByKey[key] = benchmarkHistoryColumn{
-					key:      key,
-					label:    benchmarkHistoryPerGPULabel(gpu.Name, gpu.Index),
-					name:     strings.TrimSpace(gpu.Name),
-					index:    gpu.Index,
-					parallel: false,
-				}
-				run.cells[key] = benchmarkHistoryCell{score: gpu.Scores.CompositeScore, present: true}
+		for _, gpu := range result.GPUs {
+			run.gpuScores[gpu.Index] = gpu.Scores.CompositeScore
+			if gpu.Index > maxGPUIndex {
+				maxGPUIndex = gpu.Index
 			}
 		}
 		runs = append(runs, run)
 	}
-
-	columns := make([]benchmarkHistoryColumn, 0, len(columnByKey))
-	for _, col := range columnByKey {
-		columns = append(columns, col)
-	}
-	// Sequential GPU columns first (sorted by GPU index), then parallel server columns.
-	sort.Slice(columns, func(i, j int) bool {
-		if columns[i].parallel != columns[j].parallel {
-			return !columns[i].parallel // sequential first
-		}
-		if columns[i].parallel {
-			li := strings.ToLower(columns[i].label)
-			lj := strings.ToLower(columns[j].label)
-			if li != lj {
-				return li < lj
-			}
-			return columns[i].key < columns[j].key
-		}
-		// Sequential: sort by GPU index, then name.
-		if columns[i].index != columns[j].index {
-			return columns[i].index < columns[j].index
-		}
-		return strings.ToLower(columns[i].name) < strings.ToLower(columns[j].name)
-	})
 	sort.Slice(runs, func(i, j int) bool {
 		return runs[i].generatedAt.After(runs[j].generatedAt)
 	})
-	return columns, runs
+	return maxGPUIndex, runs
 }

-// benchmarkHistoryPerGPULabel formats a label for a single-GPU column: "GPU #N — ModelName".
-func benchmarkHistoryPerGPULabel(gpuName string, index int) string {
-	gpuName = strings.TrimSpace(gpuName)
-	if gpuName == "" {
-		gpuName = "Unknown GPU"
-	}
-	return fmt.Sprintf("GPU #%d — %s", index, gpuName)
-}
-
-// benchmarkHistoryParallelLabel formats a label for an all-GPU parallel column:
-// "ServerModel — N× ModelName (All GPUs)" or "N× ModelName (All GPUs)" if no server.
-func benchmarkHistoryParallelLabel(serverModel, gpuName string, count int) string {
-	serverModel = strings.TrimSpace(serverModel)
-	gpuName = strings.TrimSpace(gpuName)
-	if gpuName == "" {
-		gpuName = "Unknown GPU"
-	}
-	gpuPart := fmt.Sprintf("%d× %s (All GPUs)", count, gpuName)
-	if serverModel == "" {
-		return gpuPart
-	}
-	return fmt.Sprintf("%s — %s", serverModel, gpuPart)
-}

 // ── Burn ──────────────────────────────────────────────────────────────────────

--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -693,8 +693,8 @@ func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
 	for _, needle := range []string{
 		`Benchmark Results`,
 		`Composite score by saved benchmark run and GPU.`,
-		`GPU #0 — NVIDIA H100 PCIe`,
-		`GPU #1 — NVIDIA H100 PCIe`,
+		`GPU 0`,
+		`GPU 1`,
 		`#1`,
 		wantTime,
 		`1176.25`,
--- a/audit/internal/webui/tasks_test.go
+++ b/audit/internal/webui/tasks_test.go
@@ -422,7 +422,7 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
 	for _, needle := range []string{
 		`Benchmark Results`,
 		`Composite score for this benchmark task.`,
-		`GPU #0 — NVIDIA H100 PCIe`,
+		`GPU 0`,
 		`1176.25`,
 	} {
 		if !strings.Contains(html, needle) {