Separate compute score from server quality score
CompositeScore = raw ComputeScore (TOPS). Throttling GPUs score lower automatically — no quality multiplier distorting the compute signal. Add ServerQualityScore (0-100): server infrastructure quality independent of GPU model. Formula: 0.40×Stability + 0.30×PowerSustain + 0.30×Thermal. Use to compare servers with the same GPU or flag bad server conditions. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -591,7 +591,6 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
|
||||
if result.Interconnect != nil && result.Interconnect.Supported {
|
||||
for i := range result.GPUs {
|
||||
result.GPUs[i].Scores.InterconnectScore = result.Interconnect.MaxBusBWGBps
|
||||
result.GPUs[i].Scores.CompositeScore = compositeBenchmarkScore(result.GPUs[i].Scores)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1433,28 +1432,32 @@ func scoreBenchmarkGPUResult(gpu BenchmarkGPUResult) BenchmarkScorecard {
|
||||
runtimeUS := math.Max(1, gpu.Steady.DurationSec*1e6)
|
||||
throttleUS := float64(gpu.Throttle.HWThermalSlowdownUS+gpu.Throttle.SWThermalSlowdownUS) + float64(gpu.Throttle.SWPowerCapUS)
|
||||
score.StabilityScore = clampScore(100 - throttleUS/runtimeUS*100)
|
||||
score.CompositeScore = compositeBenchmarkScore(score)
|
||||
score.ServerQualityScore = serverQualityScore(score)
|
||||
score.CompositeScore = score.ComputeScore
|
||||
if gpu.MultiprocessorCount > 0 && gpu.Steady.AvgGraphicsClockMHz > 0 && score.ComputeScore > 0 {
|
||||
score.TOPSPerSMPerGHz = score.ComputeScore / float64(gpu.MultiprocessorCount) / (gpu.Steady.AvgGraphicsClockMHz / 1000.0)
|
||||
}
|
||||
return score
|
||||
}
|
||||
|
||||
// compositeBenchmarkScore is kept for compatibility with legacy callers.
|
||||
// CompositeScore = ComputeScore (no quality multiplier; throttling already
|
||||
// reduces TOPS directly, so no additional penalty is needed).
|
||||
func compositeBenchmarkScore(score BenchmarkScorecard) float64 {
|
||||
// quality_factor weights:
|
||||
// base 0.35 — floor so a GPU that fails all sustain checks still scores
|
||||
// StabilityScore 0.35 — throttle time: heaviest, direct signal of GPU not keeping up
|
||||
// PowerSustainScore 0.15 — power variance: unstable draw hints at regulation issues
|
||||
// ThermalSustainScore 0.15 — temp variance: unstable cooling hints at airflow issues
|
||||
// cap 1.00
|
||||
quality := 0.35 +
|
||||
0.35*(score.StabilityScore/100.0) +
|
||||
0.15*(score.PowerSustainScore/100.0) +
|
||||
0.15*(score.ThermalSustainScore/100.0)
|
||||
if quality > 1.00 {
|
||||
quality = 1.00
|
||||
}
|
||||
return score.ComputeScore * quality
|
||||
return score.ComputeScore
|
||||
}
|
||||
|
||||
// serverQualityScore returns a 0–100 score reflecting server infrastructure
|
||||
// quality, independent of GPU model or compute speed.
|
||||
//
|
||||
// StabilityScore (throttle time) 0.40 — heaviest: direct evidence GPU can't sustain load
|
||||
// PowerSustainScore (power CV) 0.30 — unstable draw hints at PSU/VRM issues
|
||||
// ThermalSustainScore (temp CV) 0.30 — unstable temp hints at airflow/cooling issues
|
||||
func serverQualityScore(score BenchmarkScorecard) float64 {
|
||||
q := 0.40*(score.StabilityScore/100.0) +
|
||||
0.30*(score.PowerSustainScore/100.0) +
|
||||
0.30*(score.ThermalSustainScore/100.0)
|
||||
return clampScore(q * 100)
|
||||
}
|
||||
|
||||
func detectBenchmarkDegradationReasons(gpu BenchmarkGPUResult, normalizationStatus string) []string {
|
||||
@@ -1646,7 +1649,7 @@ func finalizeBenchmarkGPUResult(gpu BenchmarkGPUResult) BenchmarkGPUResult {
|
||||
gpu.Status = "OK"
|
||||
}
|
||||
if gpu.Scores.CompositeScore == 0 {
|
||||
gpu.Scores.CompositeScore = compositeBenchmarkScore(gpu.Scores)
|
||||
gpu.Scores.CompositeScore = gpu.Scores.ComputeScore
|
||||
}
|
||||
return gpu
|
||||
}
|
||||
|
||||
@@ -110,13 +110,15 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
|
||||
b.WriteString("High variance means inconsistent cooling (fan bursts, flow instability). Score = max(0, 100 − TempCV × 3).\n\n")
|
||||
b.WriteString("**StabilityScore** — fraction of benchmark time the GPU spent throttling (thermal + power-cap). ")
|
||||
b.WriteString("1% throttle → score 99; 10% throttle → score 90. This is the heaviest quality signal.\n\n")
|
||||
b.WriteString("**Composite score** = `Compute × quality_factor` \n")
|
||||
b.WriteString("`quality = 0.35 + 0.35×Stability + 0.15×PowerSustain + 0.15×ThermalSustain`, capped at 1.00.\n\n")
|
||||
b.WriteString("**CompositeScore** = raw compute TOPS (fp32-equivalent). A throttling GPU scores lower automatically.\n\n")
|
||||
b.WriteString("**ServerQualityScore** (0–100) — server infrastructure quality, independent of GPU model: \n")
|
||||
b.WriteString("`0.40×Stability + 0.30×PowerSustain + 0.30×ThermalSustain` \n")
|
||||
b.WriteString("Use this to compare servers with the same GPU type, or to flag a bad server.\n\n")
|
||||
|
||||
// ── Scorecard table ───────────────────────────────────────────────────────
|
||||
b.WriteString("## Scorecard\n\n")
|
||||
b.WriteString("| GPU | Status | Composite | Compute | Synthetic | Mixed | Mixed Eff. | TOPS/SM/GHz | Power Sustain | Thermal Sustain | Stability | Interconnect |\n")
|
||||
b.WriteString("|-----|--------|-----------|---------|-----------|-------|------------|-------------|---------------|-----------------|-----------|-------------|\n")
|
||||
b.WriteString("| GPU | Status | Compute TOPS | Synthetic | Mixed | Mixed Eff. | TOPS/SM/GHz | Server Quality | Power Sustain | Thermal Sustain | Stability | Interconnect |\n")
|
||||
b.WriteString("|-----|--------|--------------|-----------|-------|------------|-------------|----------------|---------------|-----------------|-----------|-------------|\n")
|
||||
for _, gpu := range result.GPUs {
|
||||
name := strings.TrimSpace(gpu.Name)
|
||||
if name == "" {
|
||||
@@ -142,15 +144,15 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
|
||||
if gpu.Scores.MixedEfficiency > 0 {
|
||||
mixedEff = fmt.Sprintf("%.1f%%", gpu.Scores.MixedEfficiency*100)
|
||||
}
|
||||
fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %.2f | %s | %s | %s | %s | %.1f | %.1f | %.1f | %s |\n",
|
||||
fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %s | %s | %s | %s | %.1f | %.1f | %.1f | %.1f | %s |\n",
|
||||
gpu.Index, name,
|
||||
gpu.Status,
|
||||
gpu.Scores.CompositeScore,
|
||||
gpu.Scores.ComputeScore,
|
||||
synthetic,
|
||||
mixed,
|
||||
mixedEff,
|
||||
topsPerSM,
|
||||
gpu.Scores.ServerQualityScore,
|
||||
gpu.Scores.PowerSustainScore,
|
||||
gpu.Scores.ThermalSustainScore,
|
||||
gpu.Scores.StabilityScore,
|
||||
|
||||
@@ -213,7 +213,14 @@ type BenchmarkScorecard struct {
|
||||
ThermalSustainScore float64 `json:"thermal_sustain_score"`
|
||||
StabilityScore float64 `json:"stability_score"`
|
||||
InterconnectScore float64 `json:"interconnect_score"`
|
||||
CompositeScore float64 `json:"composite_score"`
|
||||
// ServerQualityScore (0–100) reflects server infrastructure quality independent
|
||||
// of GPU model. Combines throttle time, power variance, and temp variance.
|
||||
// Use this to compare servers with the same GPU, or to flag a bad server
|
||||
// that throttles an otherwise fast GPU.
|
||||
ServerQualityScore float64 `json:"server_quality_score"`
|
||||
// CompositeScore is the raw compute score (TOPS, fp32-equivalent).
|
||||
// A throttling GPU will score lower here automatically — no quality multiplier.
|
||||
CompositeScore float64 `json:"composite_score"`
|
||||
// TOPSPerSMPerGHz is compute efficiency independent of clock speed and SM count.
|
||||
TOPSPerSMPerGHz float64 `json:"tops_per_sm_per_ghz,omitempty"`
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user