bee/audit/internal/platform/benchmark_test.go

package platform

import (
	"strings"
	"testing"
)

func TestResolveBenchmarkProfile(t *testing.T) {
	t.Parallel()

	cases := []struct {
		name    string
		profile string
		want    benchmarkProfileSpec
	}{
		{
			name:    "default",
			profile: "",
			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStandard, BaselineSec: 15, WarmupSec: 120, SteadySec: 480, NCCLSec: 180, CooldownSec: 120},
		},
		{
			name:    "stability",
			profile: "stability",
			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileStability, BaselineSec: 30, WarmupSec: 300, SteadySec: 3600, NCCLSec: 300, CooldownSec: 300},
		},
		{
			name:    "overnight",
			profile: "overnight",
			want:    benchmarkProfileSpec{Name: NvidiaBenchmarkProfileOvernight, BaselineSec: 60, WarmupSec: 600, SteadySec: 27000, NCCLSec: 600, CooldownSec: 300},
		},
	}

	for _, tc := range cases {
		tc := tc
		t.Run(tc.name, func(t *testing.T) {
			got := resolveBenchmarkProfile(tc.profile)
			if got != tc.want {
				t.Fatalf("profile=%q got %+v want %+v", tc.profile, got, tc.want)
			}
		})
	}
}

func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
	t.Parallel()

	opts := normalizeNvidiaBenchmarkOptionsForBenchmark(NvidiaBenchmarkOptions{
		Profile: "stability",
		RunNCCL: false,
	})
	if opts.Profile != NvidiaBenchmarkProfileStability {
		t.Fatalf("profile=%q want %q", opts.Profile, NvidiaBenchmarkProfileStability)
	}
	if opts.RunNCCL {
		t.Fatalf("RunNCCL should stay false when explicitly disabled")
	}
}

func TestParseBenchmarkBurnLog(t *testing.T) {
	t.Parallel()

	raw := strings.Join([]string{
		"loader=bee-gpu-burn",
		"[gpu 0] device=NVIDIA H100",
		"[gpu 0] compute_capability=9.0",
		"[gpu 0] backend=cublasLt",
		"[gpu 0] duration_s=10",
		"[gpu 0] fp16_tensor[0]=READY dim=4096x4096x4096 block=128 stream=0",
		"[gpu 0] fp8_e4m3[0]=READY dim=8192x8192x4096 block=128 stream=0",
		"[gpu 0] fp16_tensor_iterations=200",
		"[gpu 0] fp8_e4m3_iterations=50",
		"[gpu 0] status=OK",
	}, "\n")

	got := parseBenchmarkBurnLog(raw)
	if got.Backend != "cublasLt" {
		t.Fatalf("backend=%q want cublasLt", got.Backend)
	}
	if got.ComputeCapability != "9.0" {
		t.Fatalf("compute capability=%q want 9.0", got.ComputeCapability)
	}
	if len(got.Profiles) != 2 {
		t.Fatalf("profiles=%d want 2", len(got.Profiles))
	}
	if got.Profiles[0].TeraOpsPerSec <= 0 {
		t.Fatalf("profile[0] teraops=%f want >0", got.Profiles[0].TeraOpsPerSec)
	}
	if got.Profiles[1].Category != "fp8" {
		t.Fatalf("profile[1] category=%q want fp8", got.Profiles[1].Category)
	}
}

func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
	t.Parallel()

	result := NvidiaBenchmarkResult{
		BenchmarkVersion:   benchmarkVersion,
		BenchmarkProfile:   NvidiaBenchmarkProfileStandard,
		OverallStatus:      "PARTIAL",
		SelectedGPUIndices: []int{0},
		Normalization: BenchmarkNormalization{
			Status: "partial",
		},
		Findings: []string{"GPU 0 spent measurable time under SW power cap."},
		GPUs: []BenchmarkGPUResult{
			{
				Index:  0,
				Name:   "NVIDIA H100",
				Status: "OK",
				Steady: BenchmarkTelemetrySummary{
					AvgPowerW:           680,
					AvgTempC:            79,
					AvgGraphicsClockMHz: 1725,
					P95PowerW:           700,
					P95TempC:            82,
					P95GraphicsClockMHz: 1800,
				},
				Scores: BenchmarkScorecard{
					ComputeScore:        1200,
					PowerSustainScore:   96,
					ThermalSustainScore: 88,
					StabilityScore:      92,
					CompositeScore:      1176,
				},
				PrecisionResults: []BenchmarkPrecisionResult{
					{Name: "fp16_tensor", Supported: true, TeraOpsPerSec: 700},
				},
				Throttle: BenchmarkThrottleCounters{
					SWPowerCapUS: 1000000,
				},
				DegradationReasons: []string{"power_capped"},
			},
		},
	}

	report := renderBenchmarkReport(result)
	for _, needle := range []string{
		"Executive Summary",
		"GPU 0 spent measurable time under SW power cap.",
		"Composite score: 1176.00",
		"fp16_tensor: 700.00 TOPS",
	} {
		if !strings.Contains(report, needle) {
			t.Fatalf("report missing %q\n%s", needle, report)
		}
	}
}