From bf182daa8997ae4f3c2b9441a77f7b04a08f8c02 Mon Sep 17 00:00:00 2001 From: Michael Chus Date: Mon, 13 Apr 2026 23:43:12 +0300 Subject: [PATCH] Fix benchmark report methodology and rebuild gpu burn worker on toolchain changes --- audit/internal/platform/benchmark_report.go | 15 ++++++--------- iso/builder/build.sh | 14 +++++++++++++- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/audit/internal/platform/benchmark_report.go b/audit/internal/platform/benchmark_report.go index 614846b..e07a8d6 100644 --- a/audit/internal/platform/benchmark_report.go +++ b/audit/internal/platform/benchmark_report.go @@ -81,8 +81,12 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string { b.WriteString("\n") } - // ── Scoring methodology ─────────────────────────────────────────────────── - b.WriteString("## Scoring Methodology\n\n") + // ── Methodology ─────────────────────────────────────────────────────────── + b.WriteString("## Methodology\n\n") + fmt.Fprintf(&b, "- Profile `%s` uses standardized baseline -> warmup -> steady-state -> interconnect -> cooldown phases.\n", result.BenchmarkProfile) + b.WriteString("- Single-GPU compute score comes from `bee-gpu-burn` on the cuBLASLt path when available.\n") + b.WriteString("- Thermal and power limits are inferred from NVIDIA clock-event counters plus sustained telemetry.\n") + b.WriteString("- `result.json` is the canonical machine-readable source for the run.\n\n") b.WriteString("**Compute score** is derived from two phases:\n\n") b.WriteString("- **Synthetic** — each precision type (fp8, fp16, fp32, fp64, fp4) runs alone for a dedicated window. ") b.WriteString("Measures peak throughput with the full GPU dedicated to one kernel type. ") @@ -286,13 +290,6 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string { } } - // ── Methodology ─────────────────────────────────────────────────────────── - b.WriteString("## Methodology\n\n") - fmt.Fprintf(&b, "- Profile `%s` uses standardized baseline → warmup → steady-state → interconnect → cooldown phases.\n", result.BenchmarkProfile) - b.WriteString("- Single-GPU compute score from bee-gpu-burn cuBLASLt when available.\n") - b.WriteString("- Thermal and power limitations inferred from NVIDIA clock event reason counters and sustained telemetry.\n") - b.WriteString("- `result.json` is the canonical machine-readable source for this benchmark run.\n\n") - // ── Raw files ───────────────────────────────────────────────────────────── b.WriteString("## Raw Files\n\n") b.WriteString("- `result.json`\n- `report.md`\n- `summary.txt`\n- `verbose.log`\n") diff --git a/iso/builder/build.sh b/iso/builder/build.sh index a2160e3..ac89fcf 100755 --- a/iso/builder/build.sh +++ b/iso/builder/build.sh @@ -874,8 +874,20 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then CUBLAS_CACHE="${DIST_DIR}/cublas-${CUBLAS_VERSION}+cuda${NCCL_CUDA_VERSION}" GPU_STRESS_NEED_BUILD=1 - if [ -f "$GPU_BURN_WORKER_BIN" ] && [ "${BUILDER_DIR}/bee-gpu-stress.c" -ot "$GPU_BURN_WORKER_BIN" ]; then + if [ -f "$GPU_BURN_WORKER_BIN" ]; then GPU_STRESS_NEED_BUILD=0 + for dep in \ + "${BUILDER_DIR}/bee-gpu-stress.c" \ + "${BUILDER_DIR}/VERSIONS"; do + if [ "$dep" -nt "$GPU_BURN_WORKER_BIN" ]; then + GPU_STRESS_NEED_BUILD=1 + break + fi + done + if [ "$GPU_STRESS_NEED_BUILD" = "0" ] && \ + find "${CUBLAS_CACHE}/include" "${CUBLAS_CACHE}/lib" -type f -newer "$GPU_BURN_WORKER_BIN" | grep -q .; then + GPU_STRESS_NEED_BUILD=1 + fi fi if [ "$GPU_STRESS_NEED_BUILD" = "1" ]; then