Add per-precision benchmark phases, weighted TOPS scoring, and ECC tracking

- Split steady window into 6 equal slots: fp8/fp16/fp32/fp64/fp4 + combined
- Each precision phase runs bee-gpu-burn with --precision filter so PowerCVPct reflects single-kernel stability (not round-robin artifact)
- Add fp4 support in bee-gpu-stress.c for Blackwell (cc>=100) via existing CUDA_R_4F_E2M1 guard
- Weighted TOPS: fp64×2.0, fp32×1.0, fp16×0.5, fp8×0.25, fp4×0.125
- SyntheticScore = sum of weighted TOPS from per-precision phases
- MixedScore = sum from combined phase; MixedEfficiency = Mixed/Synthetic
- ComputeScore = SyntheticScore × (1 + MixedEfficiency × 0.3)
- ECC volatile counters sampled before/after each phase and overall
- DegradationReasons: ecc_uncorrected_errors, ecc_corrected_errors
- Report: per-precision stability table with ECC columns, methodology section
- Ramp-up history table redesign: GPU indices as columns, runs as rows

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-13 10:49:49 +03:00
parent 02e44b1172
commit bf6ecab4f0
9 changed files with 390 additions and 144 deletions

View File

@@ -497,6 +497,7 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
GPUIndices []int `json:"gpu_indices"`
ExcludeGPUIndices []int `json:"exclude_gpu_indices"`
StaggerGPUStart bool `json:"stagger_gpu_start"`
ParallelGPUs bool `json:"parallel_gpus"`
Loader string `json:"loader"`
Profile string `json:"profile"`
DisplayName string `json:"display_name"`
@@ -519,6 +520,7 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
GPUIndices: body.GPUIndices,
ExcludeGPUIndices: body.ExcludeGPUIndices,
StaggerGPUStart: body.StaggerGPUStart,
ParallelGPUs: body.ParallelGPUs,
Loader: body.Loader,
BurnProfile: body.Profile,
DisplayName: body.DisplayName,

View File

@@ -1928,23 +1928,10 @@ func renderSATCard(id, label, runAction, headerActions, body string) string {
// ── Benchmark ─────────────────────────────────────────────────────────────────
type benchmarkHistoryColumn struct {
key string
label string
name string
index int
parallel bool
}
type benchmarkHistoryCell struct {
score float64
present bool
}
type benchmarkHistoryRun struct {
generatedAt time.Time
displayTime string
cells map[string]benchmarkHistoryCell
gpuScores map[int]float64 // GPU index → composite score
}
func renderBenchmark(opts HandlerOptions) string {
@@ -2206,17 +2193,17 @@ benchmarkLoadGPUs();
}
func renderBenchmarkResultsCard(exportDir string) string {
columns, runs := loadBenchmarkHistory(exportDir)
maxIdx, runs := loadBenchmarkHistory(exportDir)
return renderBenchmarkResultsCardFromRuns(
"Benchmark Results",
"Composite score by saved benchmark run and GPU.",
"No saved benchmark runs yet.",
columns,
maxIdx,
runs,
)
}
func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string, columns []benchmarkHistoryColumn, runs []benchmarkHistoryRun) string {
func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string, maxGPUIndex int, runs []benchmarkHistoryRun) string {
if len(runs) == 0 {
return `<div class="card"><div class="card-head">` + html.EscapeString(title) + `</div><div class="card-body"><p style="color:var(--muted);font-size:13px">` + html.EscapeString(emptyMessage) + `</p></div></div>`
}
@@ -2226,22 +2213,22 @@ func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string,
b.WriteString(`<p style="color:var(--muted);font-size:13px;margin-bottom:12px">` + html.EscapeString(description) + `</p>`)
}
b.WriteString(`<div style="overflow-x:auto">`)
b.WriteString(`<table><thead><tr><th>Test</th><th>Time</th>`)
for _, col := range columns {
b.WriteString(`<th>` + html.EscapeString(col.label) + `</th>`)
b.WriteString(`<table><thead><tr><th>Run</th><th>Time</th>`)
for i := 0; i <= maxGPUIndex; i++ {
b.WriteString(`<th>GPU ` + strconv.Itoa(i) + `</th>`)
}
b.WriteString(`</tr></thead><tbody>`)
for i, run := range runs {
b.WriteString(`<tr>`)
b.WriteString(`<td>#` + strconv.Itoa(i+1) + `</td>`)
b.WriteString(`<td>` + html.EscapeString(run.displayTime) + `</td>`)
for _, col := range columns {
cell, ok := run.cells[col.key]
if !ok || !cell.present {
for idx := 0; idx <= maxGPUIndex; idx++ {
score, ok := run.gpuScores[idx]
if !ok {
b.WriteString(`<td style="color:var(--muted)">-</td>`)
continue
}
b.WriteString(`<td>` + fmt.Sprintf("%.2f", cell.score) + `</td>`)
b.WriteString(`<td>` + fmt.Sprintf("%.2f", score) + `</td>`)
}
b.WriteString(`</tr>`)
}
@@ -2249,22 +2236,22 @@ func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string,
return b.String()
}
func loadBenchmarkHistory(exportDir string) ([]benchmarkHistoryColumn, []benchmarkHistoryRun) {
func loadBenchmarkHistory(exportDir string) (int, []benchmarkHistoryRun) {
baseDir := app.DefaultBenchmarkBaseDir
if strings.TrimSpace(exportDir) != "" {
baseDir = filepath.Join(exportDir, "bee-benchmark")
}
paths, err := filepath.Glob(filepath.Join(baseDir, "gpu-benchmark-*", "result.json"))
if err != nil || len(paths) == 0 {
return nil, nil
return -1, nil
}
sort.Strings(paths)
return loadBenchmarkHistoryFromPaths(paths)
}
func loadBenchmarkHistoryFromPaths(paths []string) ([]benchmarkHistoryColumn, []benchmarkHistoryRun) {
columnByKey := make(map[string]benchmarkHistoryColumn)
func loadBenchmarkHistoryFromPaths(paths []string) (int, []benchmarkHistoryRun) {
runs := make([]benchmarkHistoryRun, 0, len(paths))
maxGPUIndex := -1
for _, path := range paths {
raw, err := os.ReadFile(path)
if err != nil {
@@ -2277,102 +2264,22 @@ func loadBenchmarkHistoryFromPaths(paths []string) ([]benchmarkHistoryColumn, []
run := benchmarkHistoryRun{
generatedAt: result.GeneratedAt,
displayTime: result.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
cells: make(map[string]benchmarkHistoryCell),
gpuScores: make(map[int]float64),
}
if result.ParallelGPUs {
// All GPUs ran simultaneously — one column per server, score = avg composite.
gpuModelCount := make(map[string]int)
for _, gpu := range result.GPUs {
gpuModelCount[strings.TrimSpace(gpu.Name)]++
}
scoreSum := make(map[string]float64)
scoreCnt := make(map[string]int)
for _, gpu := range result.GPUs {
key := "parallel|" + strings.TrimSpace(result.ServerModel) + "|" + strings.TrimSpace(gpu.Name)
scoreSum[key] += gpu.Scores.CompositeScore
scoreCnt[key]++
count := gpuModelCount[strings.TrimSpace(gpu.Name)]
columnByKey[key] = benchmarkHistoryColumn{
key: key,
label: benchmarkHistoryParallelLabel(result.ServerModel, gpu.Name, count),
name: strings.TrimSpace(gpu.Name),
index: -1,
parallel: true,
}
}
for key, sum := range scoreSum {
run.cells[key] = benchmarkHistoryCell{score: sum / float64(scoreCnt[key]), present: true}
}
} else {
// Each GPU ran independently — one column per GPU index.
for _, gpu := range result.GPUs {
key := "gpu|" + strings.TrimSpace(result.ServerModel) + "|" + strings.TrimSpace(gpu.Name) + "|" + strconv.Itoa(gpu.Index)
columnByKey[key] = benchmarkHistoryColumn{
key: key,
label: benchmarkHistoryPerGPULabel(gpu.Name, gpu.Index),
name: strings.TrimSpace(gpu.Name),
index: gpu.Index,
parallel: false,
}
run.cells[key] = benchmarkHistoryCell{score: gpu.Scores.CompositeScore, present: true}
for _, gpu := range result.GPUs {
run.gpuScores[gpu.Index] = gpu.Scores.CompositeScore
if gpu.Index > maxGPUIndex {
maxGPUIndex = gpu.Index
}
}
runs = append(runs, run)
}
columns := make([]benchmarkHistoryColumn, 0, len(columnByKey))
for _, col := range columnByKey {
columns = append(columns, col)
}
// Sequential GPU columns first (sorted by GPU index), then parallel server columns.
sort.Slice(columns, func(i, j int) bool {
if columns[i].parallel != columns[j].parallel {
return !columns[i].parallel // sequential first
}
if columns[i].parallel {
li := strings.ToLower(columns[i].label)
lj := strings.ToLower(columns[j].label)
if li != lj {
return li < lj
}
return columns[i].key < columns[j].key
}
// Sequential: sort by GPU index, then name.
if columns[i].index != columns[j].index {
return columns[i].index < columns[j].index
}
return strings.ToLower(columns[i].name) < strings.ToLower(columns[j].name)
})
sort.Slice(runs, func(i, j int) bool {
return runs[i].generatedAt.After(runs[j].generatedAt)
})
return columns, runs
return maxGPUIndex, runs
}
// benchmarkHistoryPerGPULabel formats a label for a single-GPU column: "GPU #N — ModelName".
func benchmarkHistoryPerGPULabel(gpuName string, index int) string {
gpuName = strings.TrimSpace(gpuName)
if gpuName == "" {
gpuName = "Unknown GPU"
}
return fmt.Sprintf("GPU #%d — %s", index, gpuName)
}
// benchmarkHistoryParallelLabel formats a label for an all-GPU parallel column:
// "ServerModel — N× ModelName (All GPUs)" or "N× ModelName (All GPUs)" if no server.
func benchmarkHistoryParallelLabel(serverModel, gpuName string, count int) string {
serverModel = strings.TrimSpace(serverModel)
gpuName = strings.TrimSpace(gpuName)
if gpuName == "" {
gpuName = "Unknown GPU"
}
gpuPart := fmt.Sprintf("%d× %s (All GPUs)", count, gpuName)
if serverModel == "" {
return gpuPart
}
return fmt.Sprintf("%s — %s", serverModel, gpuPart)
}
// ── Burn ──────────────────────────────────────────────────────────────────────

View File

@@ -693,8 +693,8 @@ func TestBenchmarkPageRendersSavedResultsTable(t *testing.T) {
for _, needle := range []string{
`Benchmark Results`,
`Composite score by saved benchmark run and GPU.`,
`GPU #0 — NVIDIA H100 PCIe`,
`GPU #1 — NVIDIA H100 PCIe`,
`GPU 0`,
`GPU 1`,
`#1`,
wantTime,
`1176.25`,

View File

@@ -422,7 +422,7 @@ func TestWriteTaskReportArtifactsIncludesBenchmarkResultsForTask(t *testing.T) {
for _, needle := range []string{
`Benchmark Results`,
`Composite score for this benchmark task.`,
`GPU #0 — NVIDIA H100 PCIe`,
`GPU 0`,
`1176.25`,
} {
if !strings.Contains(html, needle) {