From c5b2081ac98ba9f8776f97966371ca938fe8c209 Mon Sep 17 00:00:00 2001
From: Mikhail Chusavitin <mchusavitin@mchusmbp.local>
Date: Thu, 16 Apr 2026 09:58:02 +0300
Subject: [PATCH] Disable unstable fp4/fp64 benchmark phases

---
 audit/internal/platform/benchmark.go        | 195 +++++++++++++++-----
 audit/internal/platform/benchmark_report.go |  20 +-
 audit/internal/platform/benchmark_test.go   |  38 +++-
 3 files changed, 187 insertions(+), 66 deletions(-)

diff --git a/audit/internal/platform/benchmark.go b/audit/internal/platform/benchmark.go
index 5cc1426..0714d77 100644
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
@@ -94,9 +94,13 @@ var (
 )
 
 // benchmarkPrecisionPhases lists the precision categories run as individual
-// steady-state windows before the combined steady pass.  Order is from lowest
+// steady-state windows before the combined steady pass. Order is from lowest
 // to highest power draw so thermal ramp-up is gradual.
-var benchmarkPrecisionPhases = []string{"int8", "fp8", "fp16", "fp32", "fp64", "fp4"}
+//
+// fp64 and fp4 are intentionally disabled for now: both are currently unstable
+// on the target fleet and can abort the mixed steady stage after the earlier
+// phases already collected useful telemetry.
+var benchmarkPrecisionPhases = []string{"int8", "fp8", "fp16", "fp32"}
 
 func computeCapabilityCode(raw string) int {
 	raw = strings.TrimSpace(raw)
@@ -124,6 +128,15 @@ func benchmarkSupportedPrecisions(computeCapability string) []string {
 	return out
 }
 
+func benchmarkPrecisionEnabled(category string) bool {
+	switch category {
+	case "int8", "fp8", "fp16", "fp16_bf16", "fp32", "fp32_tf32":
+		return true
+	default:
+		return false
+	}
+}
+
 func buildBenchmarkSteadyPlan(spec benchmarkProfileSpec, precisions []string, metricStage func(string) string) (planLabels []string, planPhases []benchmarkPlannedPhase, basePhaseSec int, mixedPhaseSec int) {
 	if len(precisions) == 0 {
 		precisions = append([]string(nil), benchmarkPrecisionPhases...)
@@ -514,6 +527,7 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 				appendBenchmarkMetrics(&metricRows, cooldownRows, fmt.Sprintf("gpu-%d-cooldown", idx), &metricTimelineSec, float64(spec.CooldownSec))
 			}
 
+			applyBenchmarkSteadyFallback(&gpuResult)
 			gpuResult.Scores = scoreBenchmarkGPUResult(gpuResult)
 			gpuResult.DegradationReasons = detectBenchmarkDegradationReasons(gpuResult, result.Normalization.Status)
 			if anomaly := detectPowerAnomaly(metricRows, idx); anomaly != "" {
@@ -1398,19 +1412,58 @@ func summarizeBenchmarkCooling(rows []GPUMetricRow) *BenchmarkCoolingSummary {
 	return summary
 }
 
+func benchmarkTelemetryAvailable(summary BenchmarkTelemetrySummary) bool {
+	return summary.Samples > 0 || summary.DurationSec > 0
+}
+
+func benchmarkPrecisionSteadyFallback(phases []BenchmarkPrecisionSteadyPhase) (BenchmarkTelemetrySummary, string, bool) {
+	var (
+		best      BenchmarkTelemetrySummary
+		bestLabel string
+		found     bool
+	)
+	for _, phase := range phases {
+		if !benchmarkTelemetryAvailable(phase.Steady) {
+			continue
+		}
+		if !found ||
+			phase.Steady.DurationSec > best.DurationSec ||
+			(phase.Steady.DurationSec == best.DurationSec && phase.Steady.P95PowerW > best.P95PowerW) {
+			best = phase.Steady
+			bestLabel = phase.Precision
+			found = true
+		}
+	}
+	return best, bestLabel, found
+}
+
+func applyBenchmarkSteadyFallback(gpu *BenchmarkGPUResult) {
+	if gpu == nil || benchmarkTelemetryAvailable(gpu.Steady) {
+		return
+	}
+	if fallback, label, ok := benchmarkPrecisionSteadyFallback(gpu.PrecisionSteady); ok {
+		gpu.Steady = fallback
+		gpu.Notes = append(gpu.Notes,
+			fmt.Sprintf("mixed steady telemetry unavailable; reporting steady-state fallback from %s precision phase", label))
+	}
+}
+
 func scoreBenchmarkGPUResult(gpu BenchmarkGPUResult) BenchmarkScorecard {
 	score := BenchmarkScorecard{}
 
 	// SyntheticScore: sum of fp32-equivalent TOPS from per-precision phases.
 	// Each precision ran alone with full GPU dedicated — peak capability.
 	for _, p := range gpu.PrecisionSteady {
+		if !benchmarkPrecisionEnabled(p.Precision) {
+			continue
+		}
 		score.SyntheticScore += p.WeightedTeraOpsPerSec
 	}
 
 	// MixedScore: sum of fp32-equivalent TOPS from the combined phase.
 	// All precisions compete simultaneously — closer to real inference workloads.
 	for _, p := range gpu.PrecisionResults {
-		if p.Supported {
+		if p.Supported && benchmarkPrecisionEnabled(p.Category) {
 			score.MixedScore += p.WeightedTeraOpsPerSec
 		}
 	}
@@ -1441,10 +1494,17 @@ func scoreBenchmarkGPUResult(gpu BenchmarkGPUResult) BenchmarkScorecard {
 	// so CV reflects genuine power regulation, not workload switching).
 	if len(gpu.PrecisionSteady) > 0 {
 		var sum float64
+		var count int
 		for _, p := range gpu.PrecisionSteady {
+			if !benchmarkPrecisionEnabled(p.Precision) {
+				continue
+			}
 			sum += clampScore(100 - p.Steady.PowerCVPct*3)
+			count++
+		}
+		if count > 0 {
+			score.PowerSustainScore = sum / float64(count)
 		}
-		score.PowerSustainScore = sum / float64(len(gpu.PrecisionSteady))
 	} else if gpu.Steady.PowerCVPct > 0 {
 		score.PowerSustainScore = clampScore(100 - gpu.Steady.PowerCVPct*3)
 	}
@@ -2512,6 +2572,7 @@ func runNvidiaBenchmarkParallel(
 	// Score and finalize each GPU.
 	for _, idx := range selected {
 		r := gpuResults[idx]
+		applyBenchmarkSteadyFallback(r)
 		r.Scores = scoreBenchmarkGPUResult(*r)
 		r.DegradationReasons = detectBenchmarkDegradationReasons(*r, result.Normalization.Status)
 		pr := parseResults[idx]
@@ -2694,18 +2755,21 @@ func summarizeCPULoad(samples []float64) *BenchmarkCPULoad {
 	return cl
 }
 
-// runBenchmarkPowerCalibration runs targeted_power per GPU and actively watches
-// throttle counters. If a GPU starts throttling, the current targeted_power run
-// is canceled immediately, the power limit is reduced, and a fresh full cycle
-// is started again from the beginning. The selected reduced power limit stays
-// active for the main benchmark and is restored by the caller afterwards.
+// runBenchmarkPowerCalibration runs targeted_power for the supplied GPU set and
+// actively watches throttle counters. seedLimits, when provided, are treated as
+// the starting point for this calibration pass rather than as immutable fixed
+// limits. This matters during cumulative ramp-up: once an additional GPU is
+// introduced, every already-active GPU must be revalidated under the new
+// thermal state instead of assuming its previous single-step limit is still
+// valid. The selected reduced power limits stay active for the main benchmark
+// and are restored by the caller afterwards.
 func runBenchmarkPowerCalibration(
 	ctx context.Context,
 	verboseLog, runDir string,
 	gpuIndices []int,
 	infoByIndex map[int]benchmarkGPUInfo,
 	logFunc func(string),
-	fixedLimits map[int]int,
+	seedLimits map[int]int,
 ) (map[int]benchmarkPowerCalibrationResult, []benchmarkRestoreAction) {
 	const calibDurationSec = 120
 	const maxDerateW = 150
@@ -2739,7 +2803,6 @@ func runBenchmarkPowerCalibration(
 		err  error
 	}
 
-
 	// gpuCalibState holds per-GPU binary search state during parallel calibration.
 	type gpuCalibState struct {
 		idx            int
@@ -2796,19 +2859,20 @@ func runBenchmarkPowerCalibration(
 			hi:             appliedLimitW + 1, // not yet tested, not yet confirmed unstable
 			calib:          benchmarkPowerCalibrationResult{AppliedPowerLimitW: float64(appliedLimitW)},
 		}
-		if fixedLimits != nil {
-			if fixedW, ok := fixedLimits[idx]; ok {
-				// This GPU's limit was established in a prior ramp step and must
-				// remain unchanged. Apply it immediately and skip the binary search.
-				if canDerate && fixedW > 0 {
-					_ = setBenchmarkPowerLimit(ctx, verboseLog, idx, fixedW)
+		if seedLimits != nil {
+			if seedW, ok := seedLimits[idx]; ok && seedW > 0 {
+				// A previously validated limit is only a starting point. Re-run
+				// targeted_power under the current multi-GPU thermal load and derate
+				// again if this step shows new throttling.
+				if canDerate {
+					_ = setBenchmarkPowerLimit(ctx, verboseLog, idx, seedW)
 				}
-				s.appliedLimitW = fixedW
-				s.calib.AppliedPowerLimitW = float64(fixedW)
-				s.calib.Completed = true
-				s.converged = true
+				s.appliedLimitW = seedW
+				s.hi = seedW + 1
+				s.calib.AppliedPowerLimitW = float64(seedW)
+				s.calib.Derated = seedW < s.originalLimitW
 				s.calib.Notes = append(s.calib.Notes,
-					fmt.Sprintf("fixed limit: %d W (held from prior ramp step)", fixedW))
+					fmt.Sprintf("seed limit: %d W (revalidating under current thermal load)", seedW))
 			}
 		}
 		states = append(states, s)
@@ -3091,7 +3155,6 @@ func powerBenchDurationSec(profile string) int {
 	}
 }
 
-
 func cloneBenchmarkGPUInfoMap(src map[int]benchmarkGPUInfo) map[int]benchmarkGPUInfo {
 	out := make(map[int]benchmarkGPUInfo, len(src))
 	for k, v := range src {
@@ -3392,14 +3455,14 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 		firstCalib := calibByIndex[firstIdx]
 		stableLimits[firstIdx] = int(math.Round(firstCalib.AppliedPowerLimitW))
 		ramp := NvidiaPowerBenchStep{
-			StepIndex:         1,
-			GPUIndices:        []int{firstIdx},
-			NewGPUIndex:       firstIdx,
-			NewGPUStableLimitW: firstCalib.AppliedPowerLimitW,
+			StepIndex:           1,
+			GPUIndices:          []int{firstIdx},
+			NewGPUIndex:         firstIdx,
+			NewGPUStableLimitW:  firstCalib.AppliedPowerLimitW,
 			TotalObservedPowerW: firstCalib.Summary.P95PowerW,
 			AvgObservedPowerW:   firstCalib.Summary.P95PowerW,
-			Derated:           firstCalib.Derated,
-			Status:            "OK",
+			Derated:             firstCalib.Derated,
+			Status:              "OK",
 		}
 		if !firstCalib.Completed {
 			ramp.Status = "FAILED"
@@ -3417,8 +3480,9 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 			len(result.RecommendedSlotOrder), firstIdx, firstCalib.AppliedPowerLimitW))
 	}
 
-	// Steps 2..N: each step fixes previously calibrated GPUs and searches only
-	// the new GPU's stable limit in the combined thermal environment.
+	// Steps 2..N: each step revalidates every already-active GPU under the new
+	// cumulative thermal environment and also calibrates the newly introduced
+	// GPU. Previously found limits are used only as seeds for the search.
 	for stepNum := 1; stepNum < len(result.RecommendedSlotOrder); stepNum++ {
 		step := stepNum + 1
 		subset := append([]int(nil), result.RecommendedSlotOrder[:step]...)
@@ -3426,17 +3490,18 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 		stepDir := filepath.Join(runDir, fmt.Sprintf("step-%02d", step))
 		_ = os.MkdirAll(stepDir, 0755)
 
-		// All previously calibrated GPUs are fixed at their stable limits.
-		fixedForStep := make(map[int]int, len(stableLimits))
+		// Reuse the latest stable limits as starting points, but re-check every
+		// active GPU in this hotter configuration.
+		seedForStep := make(map[int]int, len(stableLimits))
 		for k, v := range stableLimits {
-			fixedForStep[k] = v
+			seedForStep[k] = v
 		}
 
-		logFunc(fmt.Sprintf("power ramp: step %d/%d — calibrating GPU %d with %d fixed GPU(s)",
-			step, len(result.RecommendedSlotOrder), newGPUIdx, len(fixedForStep)))
+		logFunc(fmt.Sprintf("power ramp: step %d/%d — revalidating %d active GPU(s) including new GPU %d",
+			step, len(result.RecommendedSlotOrder), len(subset), newGPUIdx))
 
 		stepInfo := cloneBenchmarkGPUInfoMap(infoByIndex)
-		stepCalib, stepRestore := runBenchmarkPowerCalibration(ctx, verboseLog, stepDir, subset, stepInfo, logFunc, fixedForStep)
+		stepCalib, stepRestore := runBenchmarkPowerCalibration(ctx, verboseLog, stepDir, subset, stepInfo, logFunc, seedForStep)
 		// Accumulate restore actions; they all run in the outer defer.
 		allRestoreActions = append(allRestoreActions, stepRestore...)
 
@@ -3457,26 +3522,46 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 			ramp.AvgObservedPowerW = ramp.TotalObservedPowerW / float64(len(subset))
 		}
 
-		// Determine stable limit for the new GPU.
-		if c, ok := stepCalib[newGPUIdx]; ok && c.Completed {
-			stableLimits[newGPUIdx] = int(math.Round(c.AppliedPowerLimitW))
-			ramp.NewGPUStableLimitW = c.AppliedPowerLimitW
-			ramp.Derated = c.Derated
+		for _, idx := range subset {
+			c, ok := stepCalib[idx]
+			if !ok || !c.Completed {
+				fallback := 0
+				if lim, ok := stableLimits[idx]; ok && lim > 0 {
+					fallback = lim
+				} else if fb, ok := calibByIndex[idx]; ok {
+					fallback = int(math.Round(fb.AppliedPowerLimitW))
+				}
+				if fallback > 0 {
+					stableLimits[idx] = fallback
+				}
+				ramp.Status = "FAILED"
+				ramp.Notes = append(ramp.Notes,
+					fmt.Sprintf("GPU %d did not complete targeted_power in ramp step %d; keeping previous stable limit %d W", idx, step, fallback))
+				result.OverallStatus = "PARTIAL"
+				continue
+			}
+
+			prevLimit, hadPrev := stableLimits[idx]
+			newLimit := int(math.Round(c.AppliedPowerLimitW))
+			stableLimits[idx] = newLimit
+			if idx == newGPUIdx {
+				ramp.NewGPUStableLimitW = c.AppliedPowerLimitW
+				ramp.Derated = c.Derated
+			}
 			if c.Derated {
 				ramp.Status = "PARTIAL"
 				if result.OverallStatus == "OK" {
 					result.OverallStatus = "PARTIAL"
 				}
-				result.Findings = append(result.Findings, fmt.Sprintf("Ramp step %d (GPU %d) required derating to %.0f W under combined thermal load.", step, newGPUIdx, c.AppliedPowerLimitW))
 			}
-		} else {
-			// Calibration failed — fall back to single-card limit.
-			fb := calibByIndex[newGPUIdx]
-			stableLimits[newGPUIdx] = int(math.Round(fb.AppliedPowerLimitW))
-			ramp.NewGPUStableLimitW = fb.AppliedPowerLimitW
-			ramp.Status = "FAILED"
-			ramp.Notes = append(ramp.Notes, fmt.Sprintf("GPU %d did not complete targeted_power in ramp step %d; using single-card limit %.0f W", newGPUIdx, step, fb.AppliedPowerLimitW))
-			result.OverallStatus = "PARTIAL"
+			if hadPrev && newLimit < prevLimit {
+				ramp.Notes = append(ramp.Notes,
+					fmt.Sprintf("GPU %d was re-derated from %d W to %d W under combined thermal load.", idx, prevLimit, newLimit))
+			}
+		}
+
+		if c, ok := stepCalib[newGPUIdx]; ok && c.Completed && c.Derated {
+			result.Findings = append(result.Findings, fmt.Sprintf("Ramp step %d (GPU %d) required derating to %.0f W under combined thermal load.", step, newGPUIdx, c.AppliedPowerLimitW))
 		}
 
 		result.RampSteps = append(result.RampSteps, ramp)
@@ -3495,6 +3580,14 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 		if lim, ok := stableLimits[result.GPUs[i].Index]; ok {
 			result.GPUs[i].StablePowerLimitW = float64(lim)
 		}
+		if result.GPUs[i].StablePowerLimitW > 0 && result.GPUs[i].AppliedPowerLimitW > 0 &&
+			result.GPUs[i].StablePowerLimitW < result.GPUs[i].AppliedPowerLimitW {
+			result.GPUs[i].Derated = true
+			result.Findings = append(result.Findings, fmt.Sprintf(
+				"GPU %d required additional derating from %.0f W (single-card) to %.0f W under full-system thermal load.",
+				result.GPUs[i].Index, result.GPUs[i].AppliedPowerLimitW, result.GPUs[i].StablePowerLimitW,
+			))
+		}
 	}
 
 	// PlatformMaxTDPW = sum of all stable limits — the actual sustained power
diff --git a/audit/internal/platform/benchmark_report.go b/audit/internal/platform/benchmark_report.go
index 22ca59e..c285234 100644
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -261,14 +261,18 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 		b.WriteString("\n")
 
 		// Steady-state telemetry
-		fmt.Fprintf(&b, "**Steady-state telemetry** (%ds):\n\n", int(gpu.Steady.DurationSec))
-		b.WriteString("| | Avg | P95 |\n|---|---|---|\n")
-		fmt.Fprintf(&b, "| Power | %.1f W | %.1f W |\n", gpu.Steady.AvgPowerW, gpu.Steady.P95PowerW)
-		fmt.Fprintf(&b, "| Temperature | %.1f °C | %.1f °C |\n", gpu.Steady.AvgTempC, gpu.Steady.P95TempC)
-		fmt.Fprintf(&b, "| GPU clock | %.0f MHz | %.0f MHz |\n", gpu.Steady.AvgGraphicsClockMHz, gpu.Steady.P95GraphicsClockMHz)
-		fmt.Fprintf(&b, "| Memory clock | %.0f MHz | %.0f MHz |\n", gpu.Steady.AvgMemoryClockMHz, gpu.Steady.P95MemoryClockMHz)
-		fmt.Fprintf(&b, "| GPU utilisation | %.1f %% | — |\n", gpu.Steady.AvgUsagePct)
-		b.WriteString("\n")
+		if benchmarkTelemetryAvailable(gpu.Steady) {
+			fmt.Fprintf(&b, "**Steady-state telemetry** (%ds):\n\n", int(gpu.Steady.DurationSec))
+			b.WriteString("| | Avg | P95 |\n|---|---|---|\n")
+			fmt.Fprintf(&b, "| Power | %.1f W | %.1f W |\n", gpu.Steady.AvgPowerW, gpu.Steady.P95PowerW)
+			fmt.Fprintf(&b, "| Temperature | %.1f °C | %.1f °C |\n", gpu.Steady.AvgTempC, gpu.Steady.P95TempC)
+			fmt.Fprintf(&b, "| GPU clock | %.0f MHz | %.0f MHz |\n", gpu.Steady.AvgGraphicsClockMHz, gpu.Steady.P95GraphicsClockMHz)
+			fmt.Fprintf(&b, "| Memory clock | %.0f MHz | %.0f MHz |\n", gpu.Steady.AvgMemoryClockMHz, gpu.Steady.P95MemoryClockMHz)
+			fmt.Fprintf(&b, "| GPU utilisation | %.1f %% | — |\n", gpu.Steady.AvgUsagePct)
+			b.WriteString("\n")
+		} else {
+			b.WriteString("**Steady-state telemetry:** unavailable\n\n")
+		}
 
 		// Per-precision stability phases.
 		if len(gpu.PrecisionSteady) > 0 {
diff --git a/audit/internal/platform/benchmark_test.go b/audit/internal/platform/benchmark_test.go
index 7c0d540..92b26a4 100644
--- a/audit/internal/platform/benchmark_test.go
+++ b/audit/internal/platform/benchmark_test.go
@@ -49,8 +49,8 @@ func TestBuildBenchmarkSteadyPlanStandard(t *testing.T) {
 		benchmarkPrecisionPhases,
 		func(label string) string { return label },
 	)
-	if len(labels) != 7 || len(phases) != 7 {
-		t.Fatalf("labels=%d phases=%d want 7", len(labels), len(phases))
+	if len(labels) != 5 || len(phases) != 5 {
+		t.Fatalf("labels=%d phases=%d want 5", len(labels), len(phases))
 	}
 	if basePhaseSec != 60 {
 		t.Fatalf("basePhaseSec=%d want 60", basePhaseSec)
@@ -61,7 +61,7 @@ func TestBuildBenchmarkSteadyPlanStandard(t *testing.T) {
 	if phases[len(phases)-1].PlanLabel != "mixed" || phases[len(phases)-1].DurationSec != 300 {
 		t.Fatalf("mixed phase=%+v want duration 300", phases[len(phases)-1])
 	}
-	if benchmarkPlanDurationsCSV(phases) != "60,60,60,60,60,60,300" {
+	if benchmarkPlanDurationsCSV(phases) != "60,60,60,60,300" {
 		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
 	}
 }
@@ -80,7 +80,7 @@ func TestBuildBenchmarkSteadyPlanStability(t *testing.T) {
 	if mixedPhaseSec != 3600 {
 		t.Fatalf("mixedPhaseSec=%d want 3600", mixedPhaseSec)
 	}
-	if benchmarkPlanDurationsCSV(phases) != "300,300,300,300,300,300,3600" {
+	if benchmarkPlanDurationsCSV(phases) != "300,300,300,300,3600" {
 		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
 	}
 }
@@ -99,7 +99,7 @@ func TestBuildBenchmarkSteadyPlanOvernight(t *testing.T) {
 	if mixedPhaseSec != 14400 {
 		t.Fatalf("mixedPhaseSec=%d want 14400", mixedPhaseSec)
 	}
-	if benchmarkPlanDurationsCSV(phases) != "3600,3600,3600,3600,3600,3600,14400" {
+	if benchmarkPlanDurationsCSV(phases) != "3600,3600,3600,3600,14400" {
 		t.Fatalf("durations=%q", benchmarkPlanDurationsCSV(phases))
 	}
 }
@@ -133,10 +133,10 @@ func TestSplitBenchmarkRowsByPlannedPhaseUsesPhaseDurations(t *testing.T) {
 func TestBenchmarkSupportedPrecisionsSkipsFP4BeforeBlackwell(t *testing.T) {
 	t.Parallel()
 
-	if got := benchmarkSupportedPrecisions("9.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32,fp64" {
+	if got := benchmarkSupportedPrecisions("9.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32" {
 		t.Fatalf("supported=%v", got)
 	}
-	if got := benchmarkSupportedPrecisions("10.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32,fp64,fp4" {
+	if got := benchmarkSupportedPrecisions("10.0"); strings.Join(got, ",") != "int8,fp8,fp16,fp32" {
 		t.Fatalf("supported=%v", got)
 	}
 }
@@ -314,6 +314,30 @@ func TestRenderBenchmarkReportListsUnifiedArtifacts(t *testing.T) {
 	}
 }
 
+func TestScoreBenchmarkGPUIgnoresDisabledPrecisions(t *testing.T) {
+	t.Parallel()
+
+	score := scoreBenchmarkGPUResult(BenchmarkGPUResult{
+		PrecisionSteady: []BenchmarkPrecisionSteadyPhase{
+			{Precision: "fp16", WeightedTeraOpsPerSec: 100},
+			{Precision: "fp64", WeightedTeraOpsPerSec: 999},
+			{Precision: "fp4", WeightedTeraOpsPerSec: 999},
+		},
+		PrecisionResults: []BenchmarkPrecisionResult{
+			{Category: "fp32_tf32", Supported: true, WeightedTeraOpsPerSec: 50},
+			{Category: "fp64", Supported: true, WeightedTeraOpsPerSec: 999},
+			{Category: "fp4", Supported: true, WeightedTeraOpsPerSec: 999},
+		},
+	})
+
+	if score.SyntheticScore != 100 {
+		t.Fatalf("SyntheticScore=%f want 100", score.SyntheticScore)
+	}
+	if score.MixedScore != 50 {
+		t.Fatalf("MixedScore=%f want 50", score.MixedScore)
+	}
+}
+
 func TestEnrichGPUInfoWithMaxClocks(t *testing.T) {
 	t.Parallel()