diff --git a/audit/internal/webui/api.go b/audit/internal/webui/api.go index 8497294..129cdca 100644 --- a/audit/internal/webui/api.go +++ b/audit/internal/webui/api.go @@ -36,6 +36,16 @@ var apiListNvidiaGPUStatuses = func(a *app.App) ([]platform.NvidiaGPUStatus, err return a.ListNvidiaGPUStatuses() } +const ( + taskPriorityBenchmark = 10 + taskPriorityBurn = 20 + taskPriorityValidateStress = 30 + taskPriorityValidate = 40 + taskPriorityAudit = 50 + taskPriorityInstallToRAM = 60 + taskPriorityInstall = 70 +) + // ── Job ID counter ──────────────────────────────────────────────────────────── var jobCounter atomic.Uint64 @@ -109,6 +119,30 @@ func shouldSplitHomogeneousNvidiaTarget(target string) bool { } } +func defaultTaskPriority(target string, params taskParams) int { + switch strings.TrimSpace(target) { + case "install": + return taskPriorityInstall + case "install-to-ram": + return taskPriorityInstallToRAM + case "audit": + return taskPriorityAudit + case "nvidia-benchmark": + return taskPriorityBenchmark + case "nvidia-stress", "amd-stress", "memory-stress", "sat-stress", "platform-stress", "nvidia-compute": + return taskPriorityBurn + case "nvidia", "nvidia-targeted-stress", "nvidia-targeted-power", "nvidia-pulse", + "nvidia-interconnect", "nvidia-bandwidth", "memory", "storage", "cpu", + "amd", "amd-mem", "amd-bandwidth": + if params.StressMode { + return taskPriorityValidateStress + } + return taskPriorityValidate + default: + return 0 + } +} + func expandHomogeneousNvidiaSelections(gpus []platform.NvidiaGPU, include, exclude []int) ([]nvidiaTaskSelection, error) { if len(gpus) == 0 { return nil, fmt.Errorf("no NVIDIA GPUs detected") @@ -458,6 +492,7 @@ func (h *handler) handleAPIAuditRun(w http.ResponseWriter, _ *http.Request) { ID: newJobID("audit"), Name: "Audit", Target: "audit", + Priority: defaultTaskPriority("audit", taskParams{}), Status: TaskPending, CreatedAt: time.Now(), } @@ -526,7 +561,7 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc { DisplayName: body.DisplayName, PlatformComponents: body.PlatformComponents, } - tasks, err := buildNvidiaTaskSet(target, 0, time.Now(), params, name, h.opts.App, "sat-"+target) + tasks, err := buildNvidiaTaskSet(target, defaultTaskPriority(target, params), time.Now(), params, name, h.opts.App, "sat-"+target) if err != nil { writeError(w, http.StatusBadRequest, err.Error()) return @@ -613,7 +648,7 @@ func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Req ID: newJobID("benchmark-nvidia"), Name: stepName, Target: "nvidia-benchmark", - Priority: 15, + Priority: defaultTaskPriority("nvidia-benchmark", taskParams{}), Status: TaskPending, CreatedAt: now, params: taskParams{ @@ -645,7 +680,7 @@ func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Req name = fmt.Sprintf("%s · sequential", name) } - tasks, err := buildNvidiaTaskSet("nvidia-benchmark", 15, time.Now(), taskParams{ + params := taskParams{ GPUIndices: body.GPUIndices, ExcludeGPUIndices: body.ExcludeGPUIndices, SizeMB: body.SizeMB, @@ -653,7 +688,8 @@ func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Req RunNCCL: runNCCL, ParallelGPUs: parallelGPUs, DisplayName: body.DisplayName, - }, name, h.opts.App, "benchmark-nvidia") + } + tasks, err := buildNvidiaTaskSet("nvidia-benchmark", defaultTaskPriority("nvidia-benchmark", params), time.Now(), params, name, h.opts.App, "benchmark-nvidia") if err != nil { writeError(w, http.StatusBadRequest, err.Error()) return @@ -1054,7 +1090,7 @@ func (h *handler) handleAPIInstallToRAM(w http.ResponseWriter, r *http.Request) ID: newJobID("install-to-ram"), Name: "Install to RAM", Target: "install-to-ram", - Priority: 10, + Priority: defaultTaskPriority("install-to-ram", taskParams{}), Status: TaskPending, CreatedAt: time.Now(), } @@ -1169,7 +1205,7 @@ func (h *handler) handleAPIInstallRun(w http.ResponseWriter, r *http.Request) { ID: newJobID("install"), Name: "Install to Disk", Target: "install", - Priority: 20, + Priority: defaultTaskPriority("install", taskParams{}), Status: TaskPending, CreatedAt: time.Now(), params: taskParams{ @@ -1461,4 +1497,3 @@ func (h *handler) rollbackPendingNetworkChange() error { } return nil } - diff --git a/audit/internal/webui/api_test.go b/audit/internal/webui/api_test.go index 124090f..5a78970 100644 --- a/audit/internal/webui/api_test.go +++ b/audit/internal/webui/api_test.go @@ -39,6 +39,9 @@ func TestHandleAPISATRunDecodesBodyWithoutContentLength(t *testing.T) { if got := globalQueue.tasks[0].params.BurnProfile; got != "smoke" { t.Fatalf("burn profile=%q want smoke", got) } + if got := globalQueue.tasks[0].Priority; got != taskPriorityValidate { + t.Fatalf("priority=%d want %d", got, taskPriorityValidate) + } } func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) { @@ -84,6 +87,9 @@ func TestHandleAPIBenchmarkNvidiaRunQueuesSelectedGPUs(t *testing.T) { if task.params.RunNCCL { t.Fatal("RunNCCL should reflect explicit false from request") } + if task.Priority != taskPriorityBenchmark { + t.Fatalf("priority=%d want %d", task.Priority, taskPriorityBenchmark) + } } func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) { @@ -133,6 +139,12 @@ func TestHandleAPIBenchmarkNvidiaRunSplitsMixedGPUModels(t *testing.T) { if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 { t.Fatalf("task[1] gpu indices=%v want [2]", got) } + if got := globalQueue.tasks[0].Priority; got != taskPriorityBenchmark { + t.Fatalf("task[0] priority=%d want %d", got, taskPriorityBenchmark) + } + if got := globalQueue.tasks[1].Priority; got != taskPriorityBenchmark { + t.Fatalf("task[1] priority=%d want %d", got, taskPriorityBenchmark) + } } func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) { @@ -175,6 +187,39 @@ func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) { if got := globalQueue.tasks[1].params.GPUIndices; len(got) != 1 || got[0] != 2 { t.Fatalf("task[1] gpu indices=%v want [2]", got) } + if got := globalQueue.tasks[0].Priority; got != taskPriorityValidate { + t.Fatalf("task[0] priority=%d want %d", got, taskPriorityValidate) + } + if got := globalQueue.tasks[1].Priority; got != taskPriorityValidate { + t.Fatalf("task[1] priority=%d want %d", got, taskPriorityValidate) + } +} + +func TestDefaultTaskPriorityOrder(t *testing.T) { + got := []int{ + defaultTaskPriority("install-to-ram", taskParams{}), + defaultTaskPriority("audit", taskParams{}), + defaultTaskPriority("cpu", taskParams{}), + defaultTaskPriority("cpu", taskParams{StressMode: true}), + defaultTaskPriority("nvidia-stress", taskParams{}), + defaultTaskPriority("nvidia-benchmark", taskParams{}), + } + want := []int{ + taskPriorityInstallToRAM, + taskPriorityAudit, + taskPriorityValidate, + taskPriorityValidateStress, + taskPriorityBurn, + taskPriorityBenchmark, + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("priority[%d]=%d want %d", i, got[i], want[i]) + } + } + if !(got[0] > got[1] && got[1] > got[2] && got[2] > got[3] && got[3] > got[4] && got[4] > got[5]) { + t.Fatalf("priority order=%v", got) + } } func TestPushFanRingsTracksByNameAndCarriesForwardMissingSamples(t *testing.T) {