Globalize autotuned system power source

This commit is contained in:
2026-04-20 07:02:12 +03:00
parent 17118298bd
commit b3cf8e3893
14 changed files with 327 additions and 108 deletions

View File

@@ -34,6 +34,7 @@ var taskNames = map[string]string{
"nvidia-targeted-stress": "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)",
"nvidia-bench-perf": "NVIDIA Bee Bench Perf",
"nvidia-bench-power": "NVIDIA Bee Bench Power",
"nvidia-bench-autotune": "NVIDIA Bee Bench Power Source Autotune",
"nvidia-compute": "NVIDIA Max Compute Load (dcgmproftester)",
"nvidia-targeted-power": "NVIDIA Targeted Power (dcgmi diag targeted_power)",
"nvidia-pulse": "NVIDIA Pulse Test (dcgmi diag pulse_test)",
@@ -125,6 +126,7 @@ type taskParams struct {
Loader string `json:"loader,omitempty"`
BurnProfile string `json:"burn_profile,omitempty"`
BenchmarkProfile string `json:"benchmark_profile,omitempty"`
BenchmarkKind string `json:"benchmark_kind,omitempty"`
RunNCCL bool `json:"run_nccl,omitempty"`
ParallelGPUs bool `json:"parallel_gpus,omitempty"`
RampStep int `json:"ramp_step,omitempty"`
@@ -686,6 +688,15 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
RampTotal: t.params.RampTotal,
RampRunID: t.params.RampRunID,
}, j.append)
case "nvidia-bench-autotune":
if a == nil {
err = fmt.Errorf("app not configured")
break
}
archive, err = a.RunNvidiaPowerSourceAutotuneCtx(ctx, app.DefaultBeeBenchAutotuneDir, platform.NvidiaBenchmarkOptions{
Profile: t.params.BenchmarkProfile,
SizeMB: t.params.SizeMB,
}, t.params.BenchmarkKind, j.append)
case "nvidia-compute":
if a == nil {
err = fmt.Errorf("app not configured")