Fix dcgmproftester parallel mode: use staggered script for all multi-GPU runs

A single dcgmproftester process without -i only loads GPU 0 regardless of CUDA_VISIBLE_DEVICES. Now always routes multi-GPU runs through bee-dcgmproftester-staggered (--stagger-seconds 0 for parallel mode), which spawns one process per GPU so all GPUs are loaded simultaneously. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-19 18:31:34 +03:00
parent f8cd9a7376
commit df1385d3d6
1 changed files with 10 additions and 2 deletions
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -443,11 +443,19 @@ func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir strin
 		profCmd []string
 		profEnv []string
 	)
-	if staggerSec > 0 && len(selected) > 1 {
+	if len(selected) > 1 {
 		// For multiple GPUs, always spawn one dcgmproftester process per GPU via
 		// bee-dcgmproftester-staggered (stagger=0 means all start simultaneously).
 		// A single dcgmproftester process without -i only loads GPU 0 regardless
 		// of CUDA_VISIBLE_DEVICES.
 		stagger := staggerSec
 		if stagger < 0 {
 			stagger = 0
 		}
 		profCmd = []string{
 			"bee-dcgmproftester-staggered",
 			"--seconds", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)),
-			"--stagger-seconds", strconv.Itoa(staggerSec),
+			"--stagger-seconds", strconv.Itoa(stagger),
 			"--devices", joinIndexList(selected),
 		}
 	} else {