package platform import ( "fmt" "os" "path/filepath" "regexp" "strings" "time" ) func renderBenchmarkReport(result NvidiaBenchmarkResult) string { return renderBenchmarkReportWithCharts(result, nil) } type benchmarkReportChart struct { Title string Content string } var ansiEscapePattern = regexp.MustCompile(`\x1b\[[0-9;]*m`) func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benchmarkReportChart) string { var b strings.Builder fmt.Fprintf(&b, "Bee NVIDIA Benchmark Report\n") fmt.Fprintf(&b, "===========================\n\n") fmt.Fprintf(&b, "Generated: %s\n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC")) fmt.Fprintf(&b, "Host: %s\n", result.Hostname) fmt.Fprintf(&b, "Profile: %s\n", result.BenchmarkProfile) fmt.Fprintf(&b, "Overall status: %s\n", result.OverallStatus) fmt.Fprintf(&b, "Selected GPUs: %s\n", joinIndexList(result.SelectedGPUIndices)) fmt.Fprintf(&b, "Normalization: %s\n\n", result.Normalization.Status) if len(result.Findings) > 0 { fmt.Fprintf(&b, "Executive Summary\n") fmt.Fprintf(&b, "-----------------\n") for _, finding := range result.Findings { fmt.Fprintf(&b, "- %s\n", finding) } b.WriteString("\n") } if len(result.Warnings) > 0 { fmt.Fprintf(&b, "Warnings\n") fmt.Fprintf(&b, "--------\n") for _, warning := range result.Warnings { fmt.Fprintf(&b, "- %s\n", warning) } b.WriteString("\n") } fmt.Fprintf(&b, "Per GPU Scorecard\n") fmt.Fprintf(&b, "-----------------\n") for _, gpu := range result.GPUs { fmt.Fprintf(&b, "GPU %d %s\n", gpu.Index, gpu.Name) fmt.Fprintf(&b, " Status: %s\n", gpu.Status) fmt.Fprintf(&b, " Composite score: %.2f\n", gpu.Scores.CompositeScore) fmt.Fprintf(&b, " Compute score: %.2f\n", gpu.Scores.ComputeScore) if gpu.Scores.TOPSPerSMPerGHz > 0 { fmt.Fprintf(&b, " Compute efficiency: %.3f TOPS/SM/GHz\n", gpu.Scores.TOPSPerSMPerGHz) } fmt.Fprintf(&b, " Power sustain: %.1f\n", gpu.Scores.PowerSustainScore) fmt.Fprintf(&b, " Thermal sustain: %.1f\n", gpu.Scores.ThermalSustainScore) fmt.Fprintf(&b, " Stability: %.1f\n", gpu.Scores.StabilityScore) if gpu.Scores.InterconnectScore > 0 { fmt.Fprintf(&b, " Interconnect: %.1f\n", gpu.Scores.InterconnectScore) } if len(gpu.DegradationReasons) > 0 { fmt.Fprintf(&b, " Degradation reasons: %s\n", strings.Join(gpu.DegradationReasons, ", ")) } fmt.Fprintf(&b, " Avg power/temp/clock: %.1f W / %.1f C / %.0f MHz\n", gpu.Steady.AvgPowerW, gpu.Steady.AvgTempC, gpu.Steady.AvgGraphicsClockMHz) fmt.Fprintf(&b, " P95 power/temp/clock: %.1f W / %.1f C / %.0f MHz\n", gpu.Steady.P95PowerW, gpu.Steady.P95TempC, gpu.Steady.P95GraphicsClockMHz) if len(gpu.PrecisionResults) > 0 { fmt.Fprintf(&b, " Precision results:\n") for _, precision := range gpu.PrecisionResults { if precision.Supported { fmt.Fprintf(&b, " - %s: %.2f TOPS lanes=%d iterations=%d\n", precision.Name, precision.TeraOpsPerSec, precision.Lanes, precision.Iterations) } else { fmt.Fprintf(&b, " - %s: unsupported (%s)\n", precision.Name, precision.Notes) } } } fmt.Fprintf(&b, " Throttle: %s\n", formatThrottleLine(gpu.Throttle, gpu.Steady.DurationSec)) if len(gpu.Notes) > 0 { fmt.Fprintf(&b, " Notes:\n") for _, note := range gpu.Notes { fmt.Fprintf(&b, " - %s\n", note) } } b.WriteString("\n") } if result.Interconnect != nil { fmt.Fprintf(&b, "Interconnect\n") fmt.Fprintf(&b, "------------\n") fmt.Fprintf(&b, "Status: %s\n", result.Interconnect.Status) if result.Interconnect.Supported { fmt.Fprintf(&b, "Avg algbw / busbw: %.1f / %.1f GB/s\n", result.Interconnect.AvgAlgBWGBps, result.Interconnect.AvgBusBWGBps) fmt.Fprintf(&b, "Max algbw / busbw: %.1f / %.1f GB/s\n", result.Interconnect.MaxAlgBWGBps, result.Interconnect.MaxBusBWGBps) } for _, note := range result.Interconnect.Notes { fmt.Fprintf(&b, "- %s\n", note) } b.WriteString("\n") } if len(charts) > 0 { fmt.Fprintf(&b, "Terminal Charts\n") fmt.Fprintf(&b, "---------------\n") for _, chart := range charts { content := strings.TrimSpace(stripANSIEscapeSequences(chart.Content)) if content == "" { continue } fmt.Fprintf(&b, "%s\n", chart.Title) fmt.Fprintf(&b, "%s\n", strings.Repeat("~", len(chart.Title))) fmt.Fprintf(&b, "%s\n\n", content) } } if sp := result.ServerPower; sp != nil { fmt.Fprintf(&b, "Server Power (IPMI)\n") fmt.Fprintf(&b, "-------------------\n") if !sp.Available { fmt.Fprintf(&b, "Unavailable\n") } else { fmt.Fprintf(&b, " Server idle: %.0f W\n", sp.IdleW) fmt.Fprintf(&b, " Server under load: %.0f W\n", sp.LoadedW) fmt.Fprintf(&b, " Server delta: %.0f W\n", sp.DeltaW) fmt.Fprintf(&b, " GPU reported (sum): %.0f W\n", sp.GPUReportedSumW) if sp.ReportingRatio > 0 { fmt.Fprintf(&b, " Reporting ratio: %.2f (1.0 = accurate, <0.75 = GPU over-reports)\n", sp.ReportingRatio) } } for _, note := range sp.Notes { fmt.Fprintf(&b, " Note: %s\n", note) } b.WriteString("\n") } fmt.Fprintf(&b, "Methodology\n") fmt.Fprintf(&b, "-----------\n") fmt.Fprintf(&b, "- Profile %s uses standardized baseline, warmup, steady-state, interconnect, and cooldown phases.\n", result.BenchmarkProfile) fmt.Fprintf(&b, "- Single-GPU compute score comes from bee-gpu-burn cuBLASLt output when available.\n") fmt.Fprintf(&b, "- Thermal and power limitations are inferred from NVIDIA clock event reason counters and sustained telemetry.\n") fmt.Fprintf(&b, "- result.json is the canonical machine-readable source for this benchmark run.\n\n") fmt.Fprintf(&b, "Raw Files\n") fmt.Fprintf(&b, "---------\n") fmt.Fprintf(&b, "- result.json\n") fmt.Fprintf(&b, "- report.txt\n") fmt.Fprintf(&b, "- summary.txt\n") fmt.Fprintf(&b, "- verbose.log\n") fmt.Fprintf(&b, "- gpu-*-baseline-metrics.csv/html/term.txt\n") fmt.Fprintf(&b, "- gpu-*-warmup.log\n") fmt.Fprintf(&b, "- gpu-*-steady.log\n") fmt.Fprintf(&b, "- gpu-*-steady-metrics.csv/html/term.txt\n") fmt.Fprintf(&b, "- gpu-*-cooldown-metrics.csv/html/term.txt\n") if result.Interconnect != nil { fmt.Fprintf(&b, "- nccl-all-reduce.log\n") } return b.String() } func loadBenchmarkReportCharts(runDir string, gpuIndices []int) []benchmarkReportChart { phases := []struct { name string label string }{ {name: "baseline", label: "Baseline"}, {name: "steady", label: "Steady State"}, {name: "cooldown", label: "Cooldown"}, } var charts []benchmarkReportChart for _, idx := range gpuIndices { for _, phase := range phases { path := filepath.Join(runDir, fmt.Sprintf("gpu-%d-%s-metrics-term.txt", idx, phase.name)) raw, err := os.ReadFile(path) if err != nil || len(raw) == 0 { continue } charts = append(charts, benchmarkReportChart{ Title: fmt.Sprintf("GPU %d %s", idx, phase.label), Content: string(raw), }) } } return charts } func stripANSIEscapeSequences(raw string) string { return ansiEscapePattern.ReplaceAllString(raw, "") } // formatThrottleLine renders throttle counters as human-readable percentages of // the steady-state window. Only non-zero counters are shown. When the steady // duration is unknown (0), raw seconds are shown instead. func formatThrottleLine(t BenchmarkThrottleCounters, steadyDurationSec float64) string { type counter struct { label string us uint64 } counters := []counter{ {"sw_power", t.SWPowerCapUS}, {"sw_thermal", t.SWThermalSlowdownUS}, {"sync_boost", t.SyncBoostUS}, {"hw_thermal", t.HWThermalSlowdownUS}, {"hw_power_brake", t.HWPowerBrakeSlowdownUS}, } var parts []string for _, c := range counters { if c.us == 0 { continue } sec := float64(c.us) / 1e6 if steadyDurationSec > 0 { pct := sec / steadyDurationSec * 100 parts = append(parts, fmt.Sprintf("%s=%.1f%% (%.0fs)", c.label, pct, sec)) } else if sec < 1 { parts = append(parts, fmt.Sprintf("%s=%.0fms", c.label, sec*1000)) } else { parts = append(parts, fmt.Sprintf("%s=%.1fs", c.label, sec)) } } if len(parts) == 0 { return "none" } return strings.Join(parts, " ") } func renderBenchmarkSummary(result NvidiaBenchmarkResult) string { var b strings.Builder fmt.Fprintf(&b, "run_at_utc=%s\n", result.GeneratedAt.Format(time.RFC3339)) fmt.Fprintf(&b, "benchmark_profile=%s\n", result.BenchmarkProfile) fmt.Fprintf(&b, "overall_status=%s\n", result.OverallStatus) fmt.Fprintf(&b, "gpu_count=%d\n", len(result.GPUs)) fmt.Fprintf(&b, "normalization_status=%s\n", result.Normalization.Status) var best float64 for i, gpu := range result.GPUs { fmt.Fprintf(&b, "gpu_%d_status=%s\n", gpu.Index, gpu.Status) fmt.Fprintf(&b, "gpu_%d_composite_score=%.2f\n", gpu.Index, gpu.Scores.CompositeScore) if i == 0 || gpu.Scores.CompositeScore > best { best = gpu.Scores.CompositeScore } } fmt.Fprintf(&b, "best_composite_score=%.2f\n", best) if result.Interconnect != nil { fmt.Fprintf(&b, "interconnect_status=%s\n", result.Interconnect.Status) fmt.Fprintf(&b, "interconnect_max_busbw_gbps=%.1f\n", result.Interconnect.MaxBusBWGBps) } return b.String() }