From df1385d3d65c0ea2e94360e9e73f0259af899a32 Mon Sep 17 00:00:00 2001
From: Michael Chus <mike@mchus.pro>
Date: Sun, 19 Apr 2026 18:31:34 +0300
Subject: [PATCH] Fix dcgmproftester parallel mode: use staggered script for
 all multi-GPU runs

A single dcgmproftester process without -i only loads GPU 0 regardless of
CUDA_VISIBLE_DEVICES. Now always routes multi-GPU runs through
bee-dcgmproftester-staggered (--stagger-seconds 0 for parallel mode),
which spawns one process per GPU so all GPUs are loaded simultaneously.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 audit/internal/platform/sat.go | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/audit/internal/platform/sat.go b/audit/internal/platform/sat.go
index 349d5aa..fb15b8c 100644
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -443,11 +443,19 @@ func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir strin
 		profCmd []string
 		profEnv []string
 	)
-	if staggerSec > 0 && len(selected) > 1 {
+	if len(selected) > 1 {
+		// For multiple GPUs, always spawn one dcgmproftester process per GPU via
+		// bee-dcgmproftester-staggered (stagger=0 means all start simultaneously).
+		// A single dcgmproftester process without -i only loads GPU 0 regardless
+		// of CUDA_VISIBLE_DEVICES.
+		stagger := staggerSec
+		if stagger < 0 {
+			stagger = 0
+		}
 		profCmd = []string{
 			"bee-dcgmproftester-staggered",
 			"--seconds", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)),
-			"--stagger-seconds", strconv.Itoa(staggerSec),
+			"--stagger-seconds", strconv.Itoa(stagger),
 			"--devices", joinIndexList(selected),
 		}
 	} else {