Separate compute score from server quality score

CompositeScore = raw ComputeScore (TOPS). Throttling GPUs score lower
automatically — no quality multiplier distorting the compute signal.

Add ServerQualityScore (0-100): server infrastructure quality independent
of GPU model. Formula: 0.40×Stability + 0.30×PowerSustain + 0.30×Thermal.
Use to compare servers with the same GPU or flag bad server conditions.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-16 00:45:55 +03:00
parent d8ca0dca2c
commit 7a0b0934df
3 changed files with 36 additions and 24 deletions

View File

@@ -591,7 +591,6 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
if result.Interconnect != nil && result.Interconnect.Supported {
for i := range result.GPUs {
result.GPUs[i].Scores.InterconnectScore = result.Interconnect.MaxBusBWGBps
result.GPUs[i].Scores.CompositeScore = compositeBenchmarkScore(result.GPUs[i].Scores)
}
}
}
@@ -1433,28 +1432,32 @@ func scoreBenchmarkGPUResult(gpu BenchmarkGPUResult) BenchmarkScorecard {
runtimeUS := math.Max(1, gpu.Steady.DurationSec*1e6)
throttleUS := float64(gpu.Throttle.HWThermalSlowdownUS+gpu.Throttle.SWThermalSlowdownUS) + float64(gpu.Throttle.SWPowerCapUS)
score.StabilityScore = clampScore(100 - throttleUS/runtimeUS*100)
score.CompositeScore = compositeBenchmarkScore(score)
score.ServerQualityScore = serverQualityScore(score)
score.CompositeScore = score.ComputeScore
if gpu.MultiprocessorCount > 0 && gpu.Steady.AvgGraphicsClockMHz > 0 && score.ComputeScore > 0 {
score.TOPSPerSMPerGHz = score.ComputeScore / float64(gpu.MultiprocessorCount) / (gpu.Steady.AvgGraphicsClockMHz / 1000.0)
}
return score
}
// compositeBenchmarkScore is kept for compatibility with legacy callers.
// CompositeScore = ComputeScore (no quality multiplier; throttling already
// reduces TOPS directly, so no additional penalty is needed).
func compositeBenchmarkScore(score BenchmarkScorecard) float64 {
// quality_factor weights:
// base 0.35 — floor so a GPU that fails all sustain checks still scores
// StabilityScore 0.35 — throttle time: heaviest, direct signal of GPU not keeping up
// PowerSustainScore 0.15 — power variance: unstable draw hints at regulation issues
// ThermalSustainScore 0.15 — temp variance: unstable cooling hints at airflow issues
// cap 1.00
quality := 0.35 +
0.35*(score.StabilityScore/100.0) +
0.15*(score.PowerSustainScore/100.0) +
0.15*(score.ThermalSustainScore/100.0)
if quality > 1.00 {
quality = 1.00
}
return score.ComputeScore * quality
return score.ComputeScore
}
// serverQualityScore returns a 0100 score reflecting server infrastructure
// quality, independent of GPU model or compute speed.
//
// StabilityScore (throttle time) 0.40 — heaviest: direct evidence GPU can't sustain load
// PowerSustainScore (power CV) 0.30 — unstable draw hints at PSU/VRM issues
// ThermalSustainScore (temp CV) 0.30 — unstable temp hints at airflow/cooling issues
func serverQualityScore(score BenchmarkScorecard) float64 {
q := 0.40*(score.StabilityScore/100.0) +
0.30*(score.PowerSustainScore/100.0) +
0.30*(score.ThermalSustainScore/100.0)
return clampScore(q * 100)
}
func detectBenchmarkDegradationReasons(gpu BenchmarkGPUResult, normalizationStatus string) []string {
@@ -1646,7 +1649,7 @@ func finalizeBenchmarkGPUResult(gpu BenchmarkGPUResult) BenchmarkGPUResult {
gpu.Status = "OK"
}
if gpu.Scores.CompositeScore == 0 {
gpu.Scores.CompositeScore = compositeBenchmarkScore(gpu.Scores)
gpu.Scores.CompositeScore = gpu.Scores.ComputeScore
}
return gpu
}

View File

@@ -110,13 +110,15 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
b.WriteString("High variance means inconsistent cooling (fan bursts, flow instability). Score = max(0, 100 TempCV × 3).\n\n")
b.WriteString("**StabilityScore** — fraction of benchmark time the GPU spent throttling (thermal + power-cap). ")
b.WriteString("1% throttle → score 99; 10% throttle → score 90. This is the heaviest quality signal.\n\n")
b.WriteString("**Composite score** = `Compute × quality_factor` \n")
b.WriteString("`quality = 0.35 + 0.35×Stability + 0.15×PowerSustain + 0.15×ThermalSustain`, capped at 1.00.\n\n")
b.WriteString("**CompositeScore** = raw compute TOPS (fp32-equivalent). A throttling GPU scores lower automatically.\n\n")
b.WriteString("**ServerQualityScore** (0100) — server infrastructure quality, independent of GPU model: \n")
b.WriteString("`0.40×Stability + 0.30×PowerSustain + 0.30×ThermalSustain` \n")
b.WriteString("Use this to compare servers with the same GPU type, or to flag a bad server.\n\n")
// ── Scorecard table ───────────────────────────────────────────────────────
b.WriteString("## Scorecard\n\n")
b.WriteString("| GPU | Status | Composite | Compute | Synthetic | Mixed | Mixed Eff. | TOPS/SM/GHz | Power Sustain | Thermal Sustain | Stability | Interconnect |\n")
b.WriteString("|-----|--------|-----------|---------|-----------|-------|------------|-------------|---------------|-----------------|-----------|-------------|\n")
b.WriteString("| GPU | Status | Compute TOPS | Synthetic | Mixed | Mixed Eff. | TOPS/SM/GHz | Server Quality | Power Sustain | Thermal Sustain | Stability | Interconnect |\n")
b.WriteString("|-----|--------|--------------|-----------|-------|------------|-------------|----------------|---------------|-----------------|-----------|-------------|\n")
for _, gpu := range result.GPUs {
name := strings.TrimSpace(gpu.Name)
if name == "" {
@@ -142,15 +144,15 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
if gpu.Scores.MixedEfficiency > 0 {
mixedEff = fmt.Sprintf("%.1f%%", gpu.Scores.MixedEfficiency*100)
}
fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %.2f | %s | %s | %s | %s | %.1f | %.1f | %.1f | %s |\n",
fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %s | %s | %s | %s | %.1f | %.1f | %.1f | %.1f | %s |\n",
gpu.Index, name,
gpu.Status,
gpu.Scores.CompositeScore,
gpu.Scores.ComputeScore,
synthetic,
mixed,
mixedEff,
topsPerSM,
gpu.Scores.ServerQualityScore,
gpu.Scores.PowerSustainScore,
gpu.Scores.ThermalSustainScore,
gpu.Scores.StabilityScore,

View File

@@ -213,7 +213,14 @@ type BenchmarkScorecard struct {
ThermalSustainScore float64 `json:"thermal_sustain_score"`
StabilityScore float64 `json:"stability_score"`
InterconnectScore float64 `json:"interconnect_score"`
CompositeScore float64 `json:"composite_score"`
// ServerQualityScore (0100) reflects server infrastructure quality independent
// of GPU model. Combines throttle time, power variance, and temp variance.
// Use this to compare servers with the same GPU, or to flag a bad server
// that throttles an otherwise fast GPU.
ServerQualityScore float64 `json:"server_quality_score"`
// CompositeScore is the raw compute score (TOPS, fp32-equivalent).
// A throttling GPU will score lower here automatically — no quality multiplier.
CompositeScore float64 `json:"composite_score"`
// TOPSPerSMPerGHz is compute efficiency independent of clock speed and SM count.
TOPSPerSMPerGHz float64 `json:"tops_per_sm_per_ghz,omitempty"`
}