Separate compute score from server quality score

CompositeScore = raw ComputeScore (TOPS). Throttling GPUs score lower automatically — no quality multiplier distorting the compute signal. Add ServerQualityScore (0-100): server infrastructure quality independent of GPU model. Formula: 0.40×Stability + 0.30×PowerSustain + 0.30×Thermal. Use to compare servers with the same GPU or flag bad server conditions. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 00:45:55 +03:00
parent d8ca0dca2c
commit 7a0b0934df
3 changed files with 36 additions and 24 deletions
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
@@ -591,7 +591,6 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 		if result.Interconnect != nil && result.Interconnect.Supported {
 			for i := range result.GPUs {
 				result.GPUs[i].Scores.InterconnectScore = result.Interconnect.MaxBusBWGBps
-				result.GPUs[i].Scores.CompositeScore = compositeBenchmarkScore(result.GPUs[i].Scores)
 			}
 		}
 	}
@@ -1433,28 +1432,32 @@ func scoreBenchmarkGPUResult(gpu BenchmarkGPUResult) BenchmarkScorecard {
 	runtimeUS := math.Max(1, gpu.Steady.DurationSec*1e6)
 	throttleUS := float64(gpu.Throttle.HWThermalSlowdownUS+gpu.Throttle.SWThermalSlowdownUS) + float64(gpu.Throttle.SWPowerCapUS)
 	score.StabilityScore = clampScore(100 - throttleUS/runtimeUS*100)
-	score.CompositeScore = compositeBenchmarkScore(score)
+	score.ServerQualityScore = serverQualityScore(score)
+	score.CompositeScore = score.ComputeScore
 	if gpu.MultiprocessorCount > 0 && gpu.Steady.AvgGraphicsClockMHz > 0 && score.ComputeScore > 0 {
 		score.TOPSPerSMPerGHz = score.ComputeScore / float64(gpu.MultiprocessorCount) / (gpu.Steady.AvgGraphicsClockMHz / 1000.0)
 	}
 	return score
 }

+// compositeBenchmarkScore is kept for compatibility with legacy callers.
+// CompositeScore = ComputeScore (no quality multiplier; throttling already
+// reduces TOPS directly, so no additional penalty is needed).
 func compositeBenchmarkScore(score BenchmarkScorecard) float64 {
-	// quality_factor weights:
-	//   base          0.35 — floor so a GPU that fails all sustain checks still scores
-	//   StabilityScore 0.35 — throttle time: heaviest, direct signal of GPU not keeping up
-	//   PowerSustainScore 0.15 — power variance: unstable draw hints at regulation issues
-	//   ThermalSustainScore 0.15 — temp variance: unstable cooling hints at airflow issues
-	//   cap           1.00
-	quality := 0.35 +
-		0.35*(score.StabilityScore/100.0) +
-		0.15*(score.PowerSustainScore/100.0) +
-		0.15*(score.ThermalSustainScore/100.0)
-	if quality > 1.00 {
-		quality = 1.00
-	}
-	return score.ComputeScore * quality
+	return score.ComputeScore
+}
+
+// serverQualityScore returns a 0–100 score reflecting server infrastructure
+// quality, independent of GPU model or compute speed.
+//
+//	StabilityScore (throttle time)   0.40 — heaviest: direct evidence GPU can't sustain load
+//	PowerSustainScore (power CV)     0.30 — unstable draw hints at PSU/VRM issues
+//	ThermalSustainScore (temp CV)    0.30 — unstable temp hints at airflow/cooling issues
+func serverQualityScore(score BenchmarkScorecard) float64 {
+	q := 0.40*(score.StabilityScore/100.0) +
+		0.30*(score.PowerSustainScore/100.0) +
+		0.30*(score.ThermalSustainScore/100.0)
+	return clampScore(q * 100)
 }

 func detectBenchmarkDegradationReasons(gpu BenchmarkGPUResult, normalizationStatus string) []string {
@@ -1646,7 +1649,7 @@ func finalizeBenchmarkGPUResult(gpu BenchmarkGPUResult) BenchmarkGPUResult {
 		gpu.Status = "OK"
 	}
 	if gpu.Scores.CompositeScore == 0 {
-		gpu.Scores.CompositeScore = compositeBenchmarkScore(gpu.Scores)
+		gpu.Scores.CompositeScore = gpu.Scores.ComputeScore
 	}
 	return gpu
 }
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -110,13 +110,15 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 	b.WriteString("High variance means inconsistent cooling (fan bursts, flow instability). Score = max(0, 100 − TempCV × 3).\n\n")
 	b.WriteString("**StabilityScore** — fraction of benchmark time the GPU spent throttling (thermal + power-cap). ")
 	b.WriteString("1% throttle → score 99; 10% throttle → score 90. This is the heaviest quality signal.\n\n")
-	b.WriteString("**Composite score** = `Compute × quality_factor`  \n")
-	b.WriteString("`quality = 0.35 + 0.35×Stability + 0.15×PowerSustain + 0.15×ThermalSustain`, capped at 1.00.\n\n")
+	b.WriteString("**CompositeScore** = raw compute TOPS (fp32-equivalent). A throttling GPU scores lower automatically.\n\n")
+	b.WriteString("**ServerQualityScore** (0–100) — server infrastructure quality, independent of GPU model:  \n")
+	b.WriteString("`0.40×Stability + 0.30×PowerSustain + 0.30×ThermalSustain`  \n")
+	b.WriteString("Use this to compare servers with the same GPU type, or to flag a bad server.\n\n")

 	// ── Scorecard table ───────────────────────────────────────────────────────
 	b.WriteString("## Scorecard\n\n")
-	b.WriteString("| GPU | Status | Composite | Compute | Synthetic | Mixed | Mixed Eff. | TOPS/SM/GHz | Power Sustain | Thermal Sustain | Stability | Interconnect |\n")
-	b.WriteString("|-----|--------|-----------|---------|-----------|-------|------------|-------------|---------------|-----------------|-----------|-------------|\n")
+	b.WriteString("| GPU | Status | Compute TOPS | Synthetic | Mixed | Mixed Eff. | TOPS/SM/GHz | Server Quality | Power Sustain | Thermal Sustain | Stability | Interconnect |\n")
+	b.WriteString("|-----|--------|--------------|-----------|-------|------------|-------------|----------------|---------------|-----------------|-----------|-------------|\n")
 	for _, gpu := range result.GPUs {
 		name := strings.TrimSpace(gpu.Name)
 		if name == "" {
@@ -142,15 +144,15 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 		if gpu.Scores.MixedEfficiency > 0 {
 			mixedEff = fmt.Sprintf("%.1f%%", gpu.Scores.MixedEfficiency*100)
 		}
-		fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %.2f | %s | %s | %s | %s | %.1f | %.1f | %.1f | %s |\n",
+		fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %s | %s | %s | %s | %.1f | %.1f | %.1f | %.1f | %s |\n",
 			gpu.Index, name,
 			gpu.Status,
 			gpu.Scores.CompositeScore,
-			gpu.Scores.ComputeScore,
 			synthetic,
 			mixed,
 			mixedEff,
 			topsPerSM,
+			gpu.Scores.ServerQualityScore,
 			gpu.Scores.PowerSustainScore,
 			gpu.Scores.ThermalSustainScore,
 			gpu.Scores.StabilityScore,
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -213,7 +213,14 @@ type BenchmarkScorecard struct {
 	ThermalSustainScore float64 `json:"thermal_sustain_score"`
 	StabilityScore      float64 `json:"stability_score"`
 	InterconnectScore   float64 `json:"interconnect_score"`
-	CompositeScore      float64 `json:"composite_score"`
+	// ServerQualityScore (0–100) reflects server infrastructure quality independent
+	// of GPU model. Combines throttle time, power variance, and temp variance.
+	// Use this to compare servers with the same GPU, or to flag a bad server
+	// that throttles an otherwise fast GPU.
+	ServerQualityScore float64 `json:"server_quality_score"`
+	// CompositeScore is the raw compute score (TOPS, fp32-equivalent).
+	// A throttling GPU will score lower here automatically — no quality multiplier.
+	CompositeScore float64 `json:"composite_score"`
 	// TOPSPerSMPerGHz is compute efficiency independent of clock speed and SM count.
 	TOPSPerSMPerGHz float64 `json:"tops_per_sm_per_ghz,omitempty"`
 }