diff --git a/audit/internal/webui/api.go b/audit/internal/webui/api.go index e756a46..75bf3ca 100644 --- a/audit/internal/webui/api.go +++ b/audit/internal/webui/api.go @@ -628,8 +628,10 @@ func (h *handler) handleAPIBenchmarkNvidiaRunKind(target string) http.HandlerFun } if rampUp && len(body.GPUIndices) > 1 { - // Ramp-up mode: resolve GPU list, then create one task per prefix - // [gpu0], [gpu0,gpu1], ..., [gpu0,...,gpuN-1], each running in parallel. + // Ramp-up mode: RunNvidiaPowerBench internally ramps from 1 to N GPUs + // in Phase 2 (one additional GPU per step). A single task with all + // selected GPUs is sufficient — spawning N tasks with growing subsets + // would repeat all earlier steps redundantly. gpus, err := apiListNvidiaGPUs(h.opts.App) if err != nil { writeError(w, http.StatusBadRequest, err.Error()) @@ -646,35 +648,27 @@ func (h *handler) handleAPIBenchmarkNvidiaRunKind(target string) http.HandlerFun } else { now := time.Now() rampRunID := fmt.Sprintf("ramp-%s", now.UTC().Format("20060102-150405")) - var allTasks []*Task - for step := 1; step <= len(resolved); step++ { - subset := resolved[:step] - stepName := fmt.Sprintf("%s · ramp %d/%d · GPU %s", name, step, len(resolved), formatGPUIndexList(subset)) - t := &Task{ - ID: newJobID("bee-bench-nvidia"), - Name: stepName, - Target: target, - Priority: defaultTaskPriority(target, taskParams{}), - Status: TaskPending, - CreatedAt: now, - params: taskParams{ - GPUIndices: append([]int(nil), subset...), - SizeMB: body.SizeMB, - BenchmarkProfile: body.Profile, - RunNCCL: runNCCL && step == len(resolved), - ParallelGPUs: true, - RampStep: step, - RampTotal: len(resolved), - RampRunID: rampRunID, - DisplayName: stepName, - }, - } - allTasks = append(allTasks, t) + taskName := fmt.Sprintf("%s · ramp 1–%d · GPU %s", name, len(resolved), formatGPUIndexList(resolved)) + t := &Task{ + ID: newJobID("bee-bench-nvidia"), + Name: taskName, + Target: target, + Priority: defaultTaskPriority(target, taskParams{}), + Status: TaskPending, + CreatedAt: now, + params: taskParams{ + GPUIndices: append([]int(nil), resolved...), + SizeMB: body.SizeMB, + BenchmarkProfile: body.Profile, + RunNCCL: runNCCL, + ParallelGPUs: true, + RampTotal: len(resolved), + RampRunID: rampRunID, + DisplayName: taskName, + }, } - for _, t := range allTasks { - globalQueue.enqueue(t) - } - writeTaskRunResponse(w, allTasks) + globalQueue.enqueue(t) + writeTaskRunResponse(w, []*Task{t}) return } }