diff --git a/audit/internal/app/app.go b/audit/internal/app/app.go
index e93e337..28a85f9 100644
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -117,7 +117,7 @@ type satRunner interface {
RunNvidiaAcceptancePackWithOptions(ctx context.Context, baseDir string, diagLevel int, gpuIndices []int, logFunc func(string)) (string, error)
RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
RunNvidiaBenchmark(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
- RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
+ RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error)
RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
RunNvidiaBandwidthPack(ctx context.Context, baseDir string, gpuIndices []int, logFunc func(string)) (string, error)
@@ -566,11 +566,11 @@ func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts pl
return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
}
-func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
if strings.TrimSpace(baseDir) == "" {
baseDir = DefaultSATBaseDir
}
- return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, logFunc)
+ return a.sat.RunNvidiaOfficialComputePack(ctx, baseDir, durationSec, gpuIndices, staggerSec, logFunc)
}
func (a *App) RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
diff --git a/audit/internal/app/app_test.go b/audit/internal/app/app_test.go
index b809ce9..5440016 100644
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -161,7 +161,7 @@ func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir
return f.runNvidiaFn(baseDir)
}
-func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
+func (f fakeSAT) RunNvidiaOfficialComputePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ int, _ func(string)) (string, error) {
if f.runNvidiaComputeFn != nil {
return f.runNvidiaComputeFn(baseDir, durationSec, gpuIndices)
}
diff --git a/audit/internal/platform/nvidia_stress.go b/audit/internal/platform/nvidia_stress.go
index 8089a8d..cda024d 100644
--- a/audit/internal/platform/nvidia_stress.go
+++ b/audit/internal/platform/nvidia_stress.go
@@ -49,6 +49,9 @@ func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
"--seconds", strconv.Itoa(opts.DurationSec),
"--size-mb", strconv.Itoa(opts.SizeMB),
}
+ if opts.StaggerSeconds > 0 && len(selected) > 1 {
+ cmd = append(cmd, "--stagger-seconds", strconv.Itoa(opts.StaggerSeconds))
+ }
if len(selected) > 0 {
cmd = append(cmd, "--devices", joinIndexList(selected))
}
@@ -63,6 +66,9 @@ func buildNvidiaStressJob(opts NvidiaStressOptions) (satJob, error) {
"bee-john-gpu-stress",
"--seconds", strconv.Itoa(opts.DurationSec),
}
+ if opts.StaggerSeconds > 0 && len(selected) > 1 {
+ cmd = append(cmd, "--stagger-seconds", strconv.Itoa(opts.StaggerSeconds))
+ }
if len(selected) > 0 {
cmd = append(cmd, "--devices", joinIndexList(selected))
}
diff --git a/audit/internal/platform/sat.go b/audit/internal/platform/sat.go
index ca06bc5..49b8ed2 100644
--- a/audit/internal/platform/sat.go
+++ b/audit/internal/platform/sat.go
@@ -384,25 +384,39 @@ func (s *System) RunNCCLTests(ctx context.Context, baseDir string, logFunc func(
), logFunc)
}
-func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error) {
+func (s *System) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
selected, err := resolveDCGMGPUIndices(gpuIndices)
if err != nil {
return "", err
}
- profCmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)))
- if err != nil {
- return "", err
+ var (
+ profCmd []string
+ profEnv []string
+ )
+ if staggerSec > 0 && len(selected) > 1 {
+ profCmd = []string{
+ "bee-dcgmproftester-staggered",
+ "--seconds", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)),
+ "--stagger-seconds", strconv.Itoa(staggerSec),
+ "--devices", joinIndexList(selected),
+ }
+ } else {
+ profCmd, err = resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(normalizeNvidiaBurnDuration(durationSec)))
+ if err != nil {
+ return "", err
+ }
+ profEnv = nvidiaVisibleDevicesEnv(selected)
}
return runAcceptancePackCtx(ctx, baseDir, "gpu-nvidia-compute", withNvidiaPersistenceMode(
satJob{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
satJob{name: "02-dcgmi-version.log", cmd: []string{"dcgmi", "-v"}},
- satJob{
- name: "03-dcgmproftester.log",
- cmd: profCmd,
- env: nvidiaVisibleDevicesEnv(selected),
- collectGPU: true,
- gpuIndices: selected,
- },
+ satJob{
+ name: "03-dcgmproftester.log",
+ cmd: profCmd,
+ env: profEnv,
+ collectGPU: true,
+ gpuIndices: selected,
+ },
satJob{name: "04-nvidia-smi-after.log", cmd: []string{"nvidia-smi", "--query-gpu=index,name,temperature.gpu,power.draw,utilization.gpu,memory.used,memory.total", "--format=csv,noheader,nounits"}},
), logFunc)
}
diff --git a/audit/internal/platform/types.go b/audit/internal/platform/types.go
index 6acaa7c..1c95465 100644
--- a/audit/internal/platform/types.go
+++ b/audit/internal/platform/types.go
@@ -70,6 +70,7 @@ type NvidiaStressOptions struct {
Loader string
GPUIndices []int
ExcludeGPUIndices []int
+ StaggerSeconds int
}
func New() *System {
diff --git a/audit/internal/webui/api.go b/audit/internal/webui/api.go
index 419c7df..3264b0e 100644
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -482,12 +482,13 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
return
}
- var body struct {
- Duration int `json:"duration"`
- StressMode bool `json:"stress_mode"`
- GPUIndices []int `json:"gpu_indices"`
- ExcludeGPUIndices []int `json:"exclude_gpu_indices"`
- Loader string `json:"loader"`
+ var body struct {
+ Duration int `json:"duration"`
+ StressMode bool `json:"stress_mode"`
+ GPUIndices []int `json:"gpu_indices"`
+ ExcludeGPUIndices []int `json:"exclude_gpu_indices"`
+ StaggerGPUStart bool `json:"stagger_gpu_start"`
+ Loader string `json:"loader"`
Profile string `json:"profile"`
DisplayName string `json:"display_name"`
PlatformComponents []string `json:"platform_components"`
@@ -503,12 +504,13 @@ func (h *handler) handleAPISATRun(target string) http.HandlerFunc {
if strings.TrimSpace(body.DisplayName) != "" {
name = body.DisplayName
}
- params := taskParams{
- Duration: body.Duration,
- StressMode: body.StressMode,
- GPUIndices: body.GPUIndices,
- ExcludeGPUIndices: body.ExcludeGPUIndices,
- Loader: body.Loader,
+ params := taskParams{
+ Duration: body.Duration,
+ StressMode: body.StressMode,
+ GPUIndices: body.GPUIndices,
+ ExcludeGPUIndices: body.ExcludeGPUIndices,
+ StaggerGPUStart: body.StaggerGPUStart,
+ Loader: body.Loader,
BurnProfile: body.Profile,
DisplayName: body.DisplayName,
PlatformComponents: body.PlatformComponents,
diff --git a/audit/internal/webui/pages.go b/audit/internal/webui/pages.go
index 3cb26d3..f0858b6 100644
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
@@ -2117,12 +2117,16 @@ func renderBurn() string {
-
-
Loading NVIDIA GPUs...
-
- Select at least one NVIDIA GPU to enable NVIDIA burn recipes.
-
-
+
+
Loading NVIDIA GPUs...
+
+ Select at least one NVIDIA GPU to enable NVIDIA burn recipes.
+
+
+
Core Burn Paths
@@ -2196,6 +2200,11 @@ function burnSelectedGPUIndices() {
.sort(function(a, b) { return a - b; });
}
+function burnUseNvidiaRampUp() {
+ const el = document.getElementById('burn-stagger-nvidia');
+ return !!(el && el.checked);
+}
+
function burnUpdateSelectionNote() {
const note = document.getElementById('burn-selection-note');
const selected = burnSelectedGPUIndices();
@@ -2255,6 +2264,9 @@ function enqueueBurnTask(target, label, extra, useSelectedNvidia) {
return Promise.reject(new Error('Select at least one NVIDIA GPU.'));
}
body.gpu_indices = selected;
+ if (burnUseNvidiaRampUp() && selected.length > 1) {
+ body.stagger_gpu_start = true;
+ }
}
return fetch('/api/sat/' + target + '/run', {
method: 'POST',
diff --git a/audit/internal/webui/tasks.go b/audit/internal/webui/tasks.go
index 68c430d..283803a 100644
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -118,6 +118,7 @@ type taskParams struct {
StressMode bool `json:"stress_mode,omitempty"`
GPUIndices []int `json:"gpu_indices,omitempty"`
ExcludeGPUIndices []int `json:"exclude_gpu_indices,omitempty"`
+ StaggerGPUStart bool `json:"stagger_gpu_start,omitempty"`
SizeMB int `json:"size_mb,omitempty"`
Passes int `json:"passes,omitempty"`
Loader string `json:"loader,omitempty"`
@@ -162,6 +163,13 @@ func resolveBurnPreset(profile string) burnPreset {
}
}
+func boolToNvidiaStaggerSeconds(enabled bool, selected []int) int {
+ if enabled && len(selected) > 1 {
+ return 180
+ }
+ return 0
+}
+
func resolvePlatformStressPreset(profile string) platform.PlatformStressOptions {
acceptanceCycles := []platform.PlatformStressCycle{
{LoadSec: 85, IdleSec: 5},
@@ -592,7 +600,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
RunNCCL: t.params.RunNCCL,
ParallelGPUs: t.params.ParallelGPUs,
}, j.append)
- case "nvidia-compute":
+ case "nvidia-compute":
if a == nil {
err = fmt.Errorf("app not configured")
break
@@ -601,7 +609,11 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
if t.params.BurnProfile != "" && dur <= 0 {
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
}
- archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, j.append)
+ staggerSec := boolToNvidiaStaggerSeconds(t.params.StaggerGPUStart, t.params.GPUIndices)
+ if staggerSec > 0 {
+ j.append(fmt.Sprintf("NVIDIA staggered ramp-up enabled: %ds per GPU", staggerSec))
+ }
+ archive, err = a.RunNvidiaOfficialComputePack(ctx, "", dur, t.params.GPUIndices, staggerSec, j.append)
case "nvidia-targeted-power":
if a == nil {
err = fmt.Errorf("app not configured")
@@ -651,12 +663,13 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
if t.params.BurnProfile != "" && dur <= 0 {
dur = resolveBurnPreset(t.params.BurnProfile).DurationSec
}
- archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
- DurationSec: dur,
- Loader: t.params.Loader,
- GPUIndices: t.params.GPUIndices,
- ExcludeGPUIndices: t.params.ExcludeGPUIndices,
- }, j.append)
+ archive, err = runNvidiaStressPackCtx(a, ctx, "", platform.NvidiaStressOptions{
+ DurationSec: dur,
+ Loader: t.params.Loader,
+ GPUIndices: t.params.GPUIndices,
+ ExcludeGPUIndices: t.params.ExcludeGPUIndices,
+ StaggerSeconds: boolToNvidiaStaggerSeconds(t.params.StaggerGPUStart, t.params.GPUIndices),
+ }, j.append)
case "memory":
if a == nil {
err = fmt.Errorf("app not configured")
diff --git a/iso/overlay/usr/local/bin/bee-dcgmproftester-staggered b/iso/overlay/usr/local/bin/bee-dcgmproftester-staggered
new file mode 100755
index 0000000..414230a
--- /dev/null
+++ b/iso/overlay/usr/local/bin/bee-dcgmproftester-staggered
@@ -0,0 +1,110 @@
+#!/bin/sh
+set -eu
+
+SECONDS=300
+STAGGER_SECONDS=180
+DEVICES=""
+EXCLUDE=""
+
+usage() {
+ echo "usage: $0 [--seconds N] [--stagger-seconds N] [--devices 0,1] [--exclude 2,3]" >&2
+ exit 2
+}
+
+normalize_list() {
+ echo "${1:-}" | tr ',' '\n' | sed 's/[[:space:]]//g' | awk 'NF' | sort -n | uniq | paste -sd, -
+}
+
+contains_csv() {
+ needle="$1"
+ haystack="${2:-}"
+ echo ",${haystack}," | grep -q ",${needle},"
+}
+
+resolve_dcgmproftester() {
+ for candidate in dcgmproftester dcgmproftester13 dcgmproftester12 dcgmproftester11; do
+ if command -v "${candidate}" >/dev/null 2>&1; then
+ command -v "${candidate}"
+ return 0
+ fi
+ done
+ return 1
+}
+
+while [ "$#" -gt 0 ]; do
+ case "$1" in
+ --seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
+ --stagger-seconds) [ "$#" -ge 2 ] || usage; STAGGER_SECONDS="$2"; shift 2 ;;
+ --devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
+ --exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
+ *) usage ;;
+ esac
+done
+
+PROF=$(resolve_dcgmproftester) || { echo "dcgmproftester not found in PATH" >&2; exit 1; }
+ALL_DEVICES=$(nvidia-smi --query-gpu=index --format=csv,noheader,nounits 2>/dev/null | sed 's/[[:space:]]//g' | awk 'NF' | paste -sd, -)
+[ -n "${ALL_DEVICES}" ] || { echo "nvidia-smi found no NVIDIA GPUs" >&2; exit 1; }
+
+DEVICES=$(normalize_list "${DEVICES}")
+EXCLUDE=$(normalize_list "${EXCLUDE}")
+SELECTED="${DEVICES}"
+if [ -z "${SELECTED}" ]; then
+ SELECTED="${ALL_DEVICES}"
+fi
+
+FINAL=""
+for id in $(echo "${SELECTED}" | tr ',' ' '); do
+ [ -n "${id}" ] || continue
+ if contains_csv "${id}" "${EXCLUDE}"; then
+ continue
+ fi
+ if [ -z "${FINAL}" ]; then
+ FINAL="${id}"
+ else
+ FINAL="${FINAL},${id}"
+ fi
+done
+
+[ -n "${FINAL}" ] || { echo "no NVIDIA GPUs selected after filters" >&2; exit 1; }
+
+echo "loader=dcgmproftester-staggered"
+echo "selected_gpus=${FINAL}"
+echo "stagger_seconds=${STAGGER_SECONDS}"
+
+TMP_DIR=$(mktemp -d)
+trap 'rm -rf "${TMP_DIR}"' EXIT INT TERM
+
+GPU_COUNT=$(echo "${FINAL}" | tr ',' '\n' | awk 'NF' | wc -l | tr -d '[:space:]')
+gpu_pos=0
+WORKERS=""
+for id in $(echo "${FINAL}" | tr ',' ' '); do
+ gpu_pos=$((gpu_pos + 1))
+ log="${TMP_DIR}/gpu-${id}.log"
+ extra_sec=$(( STAGGER_SECONDS * (GPU_COUNT - gpu_pos) ))
+ gpu_seconds=$(( SECONDS + extra_sec ))
+ echo "starting gpu ${id} seconds=${gpu_seconds}"
+ CUDA_VISIBLE_DEVICES="${id}" "${PROF}" --no-dcgm-validation -t 1004 -d "${gpu_seconds}" >"${log}" 2>&1 &
+ pid=$!
+ WORKERS="${WORKERS} ${pid}:${id}:${log}"
+ if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${gpu_pos}" -lt "${GPU_COUNT}" ]; then
+ sleep "${STAGGER_SECONDS}"
+ fi
+done
+
+status=0
+for spec in ${WORKERS}; do
+ pid=${spec%%:*}
+ rest=${spec#*:}
+ id=${rest%%:*}
+ log=${rest#*:}
+ if wait "${pid}"; then
+ echo "gpu ${id} finished: OK"
+ else
+ rc=$?
+ echo "gpu ${id} finished: FAILED rc=${rc}"
+ status=1
+ fi
+ sed "s/^/[gpu ${id}] /" "${log}" || true
+done
+
+exit "${status}"
diff --git a/iso/overlay/usr/local/bin/bee-gpu-burn b/iso/overlay/usr/local/bin/bee-gpu-burn
old mode 100644
new mode 100755
index 899e4af..a41be50
--- a/iso/overlay/usr/local/bin/bee-gpu-burn
+++ b/iso/overlay/usr/local/bin/bee-gpu-burn
@@ -2,13 +2,14 @@
set -eu
SECONDS=5
+STAGGER_SECONDS=0
SIZE_MB=0
DEVICES=""
EXCLUDE=""
WORKER="/usr/local/lib/bee/bee-gpu-burn-worker"
usage() {
- echo "usage: $0 [--seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3]" >&2
+ echo "usage: $0 [--seconds N] [--stagger-seconds N] [--size-mb N] [--devices 0,1] [--exclude 2,3]" >&2
exit 2
}
@@ -25,6 +26,7 @@ contains_csv() {
while [ "$#" -gt 0 ]; do
case "$1" in
--seconds|-t) [ "$#" -ge 2 ] || usage; SECONDS="$2"; shift 2 ;;
+ --stagger-seconds) [ "$#" -ge 2 ] || usage; STAGGER_SECONDS="$2"; shift 2 ;;
--size-mb|-m) [ "$#" -ge 2 ] || usage; SIZE_MB="$2"; shift 2 ;;
--devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
--exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
@@ -61,14 +63,18 @@ done
echo "loader=bee-gpu-burn"
echo "selected_gpus=${FINAL}"
+echo "stagger_seconds=${STAGGER_SECONDS}"
export CUDA_DEVICE_ORDER="PCI_BUS_ID"
TMP_DIR=$(mktemp -d)
trap 'rm -rf "${TMP_DIR}"' EXIT INT TERM
+GPU_COUNT=$(echo "${FINAL}" | tr ',' '\n' | awk 'NF' | wc -l | tr -d '[:space:]')
+gpu_pos=0
WORKERS=""
for id in $(echo "${FINAL}" | tr ',' ' '); do
+ gpu_pos=$((gpu_pos + 1))
log="${TMP_DIR}/gpu-${id}.log"
gpu_size_mb="${SIZE_MB}"
if [ "${gpu_size_mb}" -le 0 ] 2>/dev/null; then
@@ -79,11 +85,16 @@ for id in $(echo "${FINAL}" | tr ',' ' '); do
gpu_size_mb=512
fi
fi
- echo "starting gpu ${id} size=${gpu_size_mb}MB"
+ extra_sec=$(( STAGGER_SECONDS * (GPU_COUNT - gpu_pos) ))
+ gpu_seconds=$(( SECONDS + extra_sec ))
+ echo "starting gpu ${id} size=${gpu_size_mb}MB seconds=${gpu_seconds}"
CUDA_VISIBLE_DEVICES="${id}" \
- "${WORKER}" --device 0 --seconds "${SECONDS}" --size-mb "${gpu_size_mb}" >"${log}" 2>&1 &
+ "${WORKER}" --device 0 --seconds "${gpu_seconds}" --size-mb "${gpu_size_mb}" >"${log}" 2>&1 &
pid=$!
WORKERS="${WORKERS} ${pid}:${id}:${log}"
+ if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${gpu_pos}" -lt "${GPU_COUNT}" ]; then
+ sleep "${STAGGER_SECONDS}"
+ fi
done
status=0
diff --git a/iso/overlay/usr/local/bin/bee-john-gpu-stress b/iso/overlay/usr/local/bin/bee-john-gpu-stress
old mode 100644
new mode 100755
index 24ac6a5..f1040a5
--- a/iso/overlay/usr/local/bin/bee-john-gpu-stress
+++ b/iso/overlay/usr/local/bin/bee-john-gpu-stress
@@ -2,6 +2,7 @@
set -eu
DURATION_SEC=300
+STAGGER_SECONDS=0
DEVICES=""
EXCLUDE=""
FORMAT=""
@@ -12,7 +13,7 @@ export OCL_ICD_VENDORS="/etc/OpenCL/vendors"
export LD_LIBRARY_PATH="/usr/lib:/usr/local/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
usage() {
- echo "usage: $0 [--seconds N] [--devices 0,1] [--exclude 2,3] [--format name]" >&2
+ echo "usage: $0 [--seconds N] [--stagger-seconds N] [--devices 0,1] [--exclude 2,3] [--format name]" >&2
exit 2
}
@@ -118,6 +119,7 @@ ensure_opencl_ready() {
while [ "$#" -gt 0 ]; do
case "$1" in
--seconds|-t) [ "$#" -ge 2 ] || usage; DURATION_SEC="$2"; shift 2 ;;
+ --stagger-seconds) [ "$#" -ge 2 ] || usage; STAGGER_SECONDS="$2"; shift 2 ;;
--devices) [ "$#" -ge 2 ] || usage; DEVICES="$2"; shift 2 ;;
--exclude) [ "$#" -ge 2 ] || usage; EXCLUDE="$2"; shift 2 ;;
--format) [ "$#" -ge 2 ] || usage; FORMAT="$2"; shift 2 ;;
@@ -170,6 +172,7 @@ done
echo "loader=john"
echo "selected_gpus=${FINAL}"
echo "john_devices=${JOHN_DEVICES}"
+echo "stagger_seconds=${STAGGER_SECONDS}"
cd "${JOHN_DIR}"
@@ -232,14 +235,21 @@ trap cleanup EXIT INT TERM
echo "format=${CHOSEN_FORMAT}"
echo "target_seconds=${DURATION_SEC}"
echo "slice_seconds=${TEST_SLICE_SECONDS}"
-DEADLINE=$(( $(date +%s) + DURATION_SEC ))
+TOTAL_DEVICES=$(echo "${JOHN_DEVICES}" | tr ',' '\n' | awk 'NF' | wc -l | tr -d '[:space:]')
_first=1
+pos=0
for opencl_id in $(echo "${JOHN_DEVICES}" | tr ',' ' '); do
+ pos=$((pos + 1))
[ "${_first}" = "1" ] || sleep 3
_first=0
- run_john_loop "${opencl_id}" "${DEADLINE}" &
+ extra_sec=$(( STAGGER_SECONDS * (TOTAL_DEVICES - pos) ))
+ deadline=$(( $(date +%s) + DURATION_SEC + extra_sec ))
+ run_john_loop "${opencl_id}" "${deadline}" &
pid=$!
PIDS="${PIDS} ${pid}"
+ if [ "${STAGGER_SECONDS}" -gt 0 ] && [ "${pos}" -lt "${TOTAL_DEVICES}" ]; then
+ sleep "${STAGGER_SECONDS}"
+ fi
done
FAIL=0
for pid in ${PIDS}; do