package platform import ( "fmt" "os" "path/filepath" "regexp" "strings" "time" ) func renderBenchmarkReport(result NvidiaBenchmarkResult) string { return renderBenchmarkReportWithCharts(result, nil) } type benchmarkReportChart struct { Title string Content string } var ansiEscapePattern = regexp.MustCompile(`\x1b\[[0-9;]*m`) func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult, charts []benchmarkReportChart) string { var b strings.Builder // ── Header ──────────────────────────────────────────────────────────────── b.WriteString("# Bee NVIDIA Benchmark Report\n\n") // System identity block if result.ServerModel != "" { fmt.Fprintf(&b, "**Server:** %s \n", result.ServerModel) } if result.Hostname != "" { fmt.Fprintf(&b, "**Host:** %s \n", result.Hostname) } // GPU models summary if len(result.GPUs) > 0 { modelCount := make(map[string]int) var modelOrder []string for _, g := range result.GPUs { m := strings.TrimSpace(g.Name) if m == "" { m = "Unknown GPU" } if modelCount[m] == 0 { modelOrder = append(modelOrder, m) } modelCount[m]++ } var parts []string for _, m := range modelOrder { if modelCount[m] == 1 { parts = append(parts, m) } else { parts = append(parts, fmt.Sprintf("%d× %s", modelCount[m], m)) } } fmt.Fprintf(&b, "**GPU(s):** %s \n", strings.Join(parts, ", ")) } fmt.Fprintf(&b, "**Profile:** %s \n", result.BenchmarkProfile) fmt.Fprintf(&b, "**App version:** %s \n", result.BenchmarkVersion) fmt.Fprintf(&b, "**Generated:** %s \n", result.GeneratedAt.Format("2006-01-02 15:04:05 UTC")) if result.ParallelGPUs { fmt.Fprintf(&b, "**Mode:** parallel (all GPUs simultaneously) \n") } fmt.Fprintf(&b, "**Overall status:** %s \n", result.OverallStatus) b.WriteString("\n") // ── Executive Summary ───────────────────────────────────────────────────── if len(result.Findings) > 0 { b.WriteString("## Executive Summary\n\n") for _, finding := range result.Findings { fmt.Fprintf(&b, "- %s\n", finding) } b.WriteString("\n") } if len(result.Warnings) > 0 { b.WriteString("## Warnings\n\n") for _, warning := range result.Warnings { fmt.Fprintf(&b, "- %s\n", warning) } b.WriteString("\n") } // ── Scorecard table ─────────────────────────────────────────────────────── b.WriteString("## Scorecard\n\n") b.WriteString("| GPU | Status | Composite | Compute | TOPS/SM/GHz | Power Sustain | Thermal Sustain | Stability | Interconnect |\n") b.WriteString("|-----|--------|-----------|---------|-------------|---------------|-----------------|-----------|-------------|\n") for _, gpu := range result.GPUs { name := strings.TrimSpace(gpu.Name) if name == "" { name = "Unknown" } interconnect := "-" if gpu.Scores.InterconnectScore > 0 { interconnect = fmt.Sprintf("%.1f", gpu.Scores.InterconnectScore) } topsPerSM := "-" if gpu.Scores.TOPSPerSMPerGHz > 0 { topsPerSM = fmt.Sprintf("%.3f", gpu.Scores.TOPSPerSMPerGHz) } fmt.Fprintf(&b, "| GPU %d %s | %s | **%.2f** | %.2f | %s | %.1f | %.1f | %.1f | %s |\n", gpu.Index, name, gpu.Status, gpu.Scores.CompositeScore, gpu.Scores.ComputeScore, topsPerSM, gpu.Scores.PowerSustainScore, gpu.Scores.ThermalSustainScore, gpu.Scores.StabilityScore, interconnect, ) } b.WriteString("\n") // ── Per GPU detail ──────────────────────────────────────────────────────── b.WriteString("## Per-GPU Details\n\n") for _, gpu := range result.GPUs { name := strings.TrimSpace(gpu.Name) if name == "" { name = "Unknown GPU" } fmt.Fprintf(&b, "### GPU %d — %s\n\n", gpu.Index, name) // Identity if gpu.BusID != "" { fmt.Fprintf(&b, "- **Bus ID:** %s\n", gpu.BusID) } if gpu.VBIOS != "" { fmt.Fprintf(&b, "- **vBIOS:** %s\n", gpu.VBIOS) } if gpu.ComputeCapability != "" { fmt.Fprintf(&b, "- **Compute capability:** %s\n", gpu.ComputeCapability) } if gpu.MultiprocessorCount > 0 { fmt.Fprintf(&b, "- **SMs:** %d\n", gpu.MultiprocessorCount) } if gpu.PowerLimitW > 0 { fmt.Fprintf(&b, "- **Power limit:** %.0f W (default %.0f W)\n", gpu.PowerLimitW, gpu.DefaultPowerLimitW) } if gpu.LockedGraphicsClockMHz > 0 { fmt.Fprintf(&b, "- **Locked clocks:** GPU %.0f MHz / Mem %.0f MHz\n", gpu.LockedGraphicsClockMHz, gpu.LockedMemoryClockMHz) } b.WriteString("\n") // Steady-state telemetry fmt.Fprintf(&b, "**Steady-state telemetry** (%ds):\n\n", int(gpu.Steady.DurationSec)) b.WriteString("| | Avg | P95 |\n|---|---|---|\n") fmt.Fprintf(&b, "| Power | %.1f W | %.1f W |\n", gpu.Steady.AvgPowerW, gpu.Steady.P95PowerW) fmt.Fprintf(&b, "| Temperature | %.1f °C | %.1f °C |\n", gpu.Steady.AvgTempC, gpu.Steady.P95TempC) fmt.Fprintf(&b, "| GPU clock | %.0f MHz | %.0f MHz |\n", gpu.Steady.AvgGraphicsClockMHz, gpu.Steady.P95GraphicsClockMHz) fmt.Fprintf(&b, "| Memory clock | %.0f MHz | %.0f MHz |\n", gpu.Steady.AvgMemoryClockMHz, gpu.Steady.P95MemoryClockMHz) fmt.Fprintf(&b, "| GPU utilisation | %.1f %% | — |\n", gpu.Steady.AvgUsagePct) b.WriteString("\n") // Throttle throttle := formatThrottleLine(gpu.Throttle, gpu.Steady.DurationSec) if throttle != "none" { fmt.Fprintf(&b, "**Throttle:** %s\n\n", throttle) } // Precision results if len(gpu.PrecisionResults) > 0 { b.WriteString("**Precision results:**\n\n") b.WriteString("| Precision | TOPS | Lanes | Iterations |\n|-----------|------|-------|------------|\n") for _, p := range gpu.PrecisionResults { if p.Supported { fmt.Fprintf(&b, "| %s | %.2f | %d | %d |\n", p.Name, p.TeraOpsPerSec, p.Lanes, p.Iterations) } else { fmt.Fprintf(&b, "| %s | — (unsupported) | — | — |\n", p.Name) } } b.WriteString("\n") } // Degradation / Notes if len(gpu.DegradationReasons) > 0 { fmt.Fprintf(&b, "**Degradation reasons:** %s\n\n", strings.Join(gpu.DegradationReasons, ", ")) } if len(gpu.Notes) > 0 { b.WriteString("**Notes:**\n\n") for _, note := range gpu.Notes { fmt.Fprintf(&b, "- %s\n", note) } b.WriteString("\n") } } // ── Interconnect ────────────────────────────────────────────────────────── if result.Interconnect != nil { b.WriteString("## Interconnect (NCCL)\n\n") fmt.Fprintf(&b, "**Status:** %s\n\n", result.Interconnect.Status) if result.Interconnect.Supported { b.WriteString("| Metric | Avg | Max |\n|--------|-----|-----|\n") fmt.Fprintf(&b, "| Alg BW | %.1f GB/s | %.1f GB/s |\n", result.Interconnect.AvgAlgBWGBps, result.Interconnect.MaxAlgBWGBps) fmt.Fprintf(&b, "| Bus BW | %.1f GB/s | %.1f GB/s |\n", result.Interconnect.AvgBusBWGBps, result.Interconnect.MaxBusBWGBps) b.WriteString("\n") } for _, note := range result.Interconnect.Notes { fmt.Fprintf(&b, "- %s\n", note) } if len(result.Interconnect.Notes) > 0 { b.WriteString("\n") } } // ── Server Power (IPMI) ─────────────────────────────────────────────────── if sp := result.ServerPower; sp != nil { b.WriteString("## Server Power (IPMI)\n\n") if !sp.Available { b.WriteString("IPMI power measurement unavailable.\n\n") } else { b.WriteString("| | Value |\n|---|---|\n") fmt.Fprintf(&b, "| Server idle | %.0f W |\n", sp.IdleW) fmt.Fprintf(&b, "| Server under load | %.0f W |\n", sp.LoadedW) fmt.Fprintf(&b, "| Server delta (load − idle) | %.0f W |\n", sp.DeltaW) fmt.Fprintf(&b, "| GPU-reported sum | %.0f W |\n", sp.GPUReportedSumW) if sp.ReportingRatio > 0 { fmt.Fprintf(&b, "| Reporting ratio | %.2f (1.0 = accurate, <0.75 = GPU over-reports) |\n", sp.ReportingRatio) } b.WriteString("\n") } for _, note := range sp.Notes { fmt.Fprintf(&b, "- %s\n", note) } if len(sp.Notes) > 0 { b.WriteString("\n") } } // ── Terminal charts (steady-state only) ─────────────────────────────────── if len(charts) > 0 { b.WriteString("## Steady-State Charts\n\n") for _, chart := range charts { content := strings.TrimSpace(stripANSIEscapeSequences(chart.Content)) if content == "" { continue } fmt.Fprintf(&b, "### %s\n\n```\n%s\n```\n\n", chart.Title, content) } } // ── Methodology ─────────────────────────────────────────────────────────── b.WriteString("## Methodology\n\n") fmt.Fprintf(&b, "- Profile `%s` uses standardized baseline → warmup → steady-state → interconnect → cooldown phases.\n", result.BenchmarkProfile) b.WriteString("- Single-GPU compute score from bee-gpu-burn cuBLASLt when available.\n") b.WriteString("- Thermal and power limitations inferred from NVIDIA clock event reason counters and sustained telemetry.\n") b.WriteString("- `result.json` is the canonical machine-readable source for this benchmark run.\n\n") // ── Raw files ───────────────────────────────────────────────────────────── b.WriteString("## Raw Files\n\n") b.WriteString("- `result.json`\n- `report.md`\n- `summary.txt`\n- `verbose.log`\n") b.WriteString("- `gpu-*-baseline-metrics.csv/html/term.txt`\n") b.WriteString("- `gpu-*-warmup.log`\n") b.WriteString("- `gpu-*-steady.log`\n") b.WriteString("- `gpu-*-steady-metrics.csv/html/term.txt`\n") b.WriteString("- `gpu-*-cooldown-metrics.csv/html/term.txt`\n") if result.Interconnect != nil { b.WriteString("- `nccl-all-reduce.log`\n") } return b.String() } // loadBenchmarkReportCharts loads only steady-state terminal charts (baseline and // cooldown charts are not useful for human review). func loadBenchmarkReportCharts(runDir string, gpuIndices []int) []benchmarkReportChart { var charts []benchmarkReportChart for _, idx := range gpuIndices { path := filepath.Join(runDir, fmt.Sprintf("gpu-%d-steady-metrics-term.txt", idx)) raw, err := os.ReadFile(path) if err != nil || len(raw) == 0 { continue } charts = append(charts, benchmarkReportChart{ Title: fmt.Sprintf("GPU %d — Steady State", idx), Content: string(raw), }) } return charts } func stripANSIEscapeSequences(raw string) string { return ansiEscapePattern.ReplaceAllString(raw, "") } // formatThrottleLine renders throttle counters as human-readable percentages of // the steady-state window. Only non-zero counters are shown. When the steady // duration is unknown (0), raw seconds are shown instead. func formatThrottleLine(t BenchmarkThrottleCounters, steadyDurationSec float64) string { type counter struct { label string us uint64 } counters := []counter{ {"sw_power", t.SWPowerCapUS}, {"sw_thermal", t.SWThermalSlowdownUS}, {"sync_boost", t.SyncBoostUS}, {"hw_thermal", t.HWThermalSlowdownUS}, {"hw_power_brake", t.HWPowerBrakeSlowdownUS}, } var parts []string for _, c := range counters { if c.us == 0 { continue } sec := float64(c.us) / 1e6 if steadyDurationSec > 0 { pct := sec / steadyDurationSec * 100 parts = append(parts, fmt.Sprintf("%s=%.1f%% (%.0fs)", c.label, pct, sec)) } else if sec < 1 { parts = append(parts, fmt.Sprintf("%s=%.0fms", c.label, sec*1000)) } else { parts = append(parts, fmt.Sprintf("%s=%.1fs", c.label, sec)) } } if len(parts) == 0 { return "none" } return strings.Join(parts, " ") } func renderBenchmarkSummary(result NvidiaBenchmarkResult) string { var b strings.Builder fmt.Fprintf(&b, "run_at_utc=%s\n", result.GeneratedAt.Format(time.RFC3339)) fmt.Fprintf(&b, "benchmark_profile=%s\n", result.BenchmarkProfile) fmt.Fprintf(&b, "overall_status=%s\n", result.OverallStatus) fmt.Fprintf(&b, "gpu_count=%d\n", len(result.GPUs)) fmt.Fprintf(&b, "normalization_status=%s\n", result.Normalization.Status) var best float64 for i, gpu := range result.GPUs { fmt.Fprintf(&b, "gpu_%d_status=%s\n", gpu.Index, gpu.Status) fmt.Fprintf(&b, "gpu_%d_composite_score=%.2f\n", gpu.Index, gpu.Scores.CompositeScore) if i == 0 || gpu.Scores.CompositeScore > best { best = gpu.Scores.CompositeScore } } fmt.Fprintf(&b, "best_composite_score=%.2f\n", best) if result.Interconnect != nil { fmt.Fprintf(&b, "interconnect_status=%s\n", result.Interconnect.Status) fmt.Fprintf(&b, "interconnect_max_busbw_gbps=%.1f\n", result.Interconnect.MaxBusBWGBps) } return b.String() }