From 6937a4c6ec1e8302e6808b4040f0c9121a5f899d Mon Sep 17 00:00:00 2001 From: Michael Chus Date: Wed, 8 Apr 2026 00:19:11 +0300 Subject: [PATCH] Fix pulse_test: run all GPUs simultaneously, not per-GPU pulse_test is a PSU/power-delivery test, not a per-GPU compute test. Its purpose is to synchronously pulse all GPUs between idle and full load to create worst-case transient spikes on the power supply. Running it one GPU at a time would produce a fraction of the PSU load and miss any PSU-level failures. - Move nvidia-pulse from nvidiaPerGPUTargets to nvidiaAllGPUTargets (same dispatch path as NCCL and NVBandwidth) - Change card onclick to runNvidiaFabricValidate (all selected GPUs at once) - Update card title to "NVIDIA PSU Pulse Test" and description to explain why synchronous multi-GPU execution is required Co-Authored-By: Claude Sonnet 4.6 --- audit/internal/webui/pages.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/audit/internal/webui/pages.go b/audit/internal/webui/pages.go index 9b75d97..51b5602 100644 --- a/audit/internal/webui/pages.go +++ b/audit/internal/webui/pages.go @@ -1112,11 +1112,11 @@ func renderValidate(opts HandlerOptions) string { )) + `` + `
` + - renderSATCard("nvidia-pulse", "NVIDIA Pulse Test", "runNvidiaValidateSet('nvidia-pulse')", "", renderValidateCardBody( + renderSATCard("nvidia-pulse", "NVIDIA PSU Pulse Test", "runNvidiaFabricValidate('nvidia-pulse')", "", renderValidateCardBody( inv.NVIDIA, - `Verifies GPU transient power response using DCGM pulse load. Pass/fail determined by DCGM.`, + `Tests power supply transient response by pulsing all GPUs simultaneously between idle and full load. Synchronous pulses across all GPUs create worst-case PSU load spikes — running per-GPU would miss PSU-level failures.`, `dcgmi diag pulse_test`, - `Skipped in Validate mode. Runs in Stress mode only. Runs one GPU at a time.

Only runs in Stress mode. Switch mode above to enable in Run All.

`, + `Skipped in Validate mode. Runs in Stress mode only. Runs all selected GPUs simultaneously — synchronous pulsing is required to stress the PSU.

Only runs in Stress mode. Switch mode above to enable in Run All.

`, )) + `
` + `
` + @@ -1321,8 +1321,9 @@ function runSATWithOverrides(target, overrides) { return enqueueSATTarget(target, overrides) .then(d => streamSATTask(d.task_id, title, false)); } -const nvidiaPerGPUTargets = ['nvidia', 'nvidia-targeted-stress', 'nvidia-targeted-power', 'nvidia-pulse']; -const nvidiaAllGPUTargets = ['nvidia-interconnect', 'nvidia-bandwidth']; +const nvidiaPerGPUTargets = ['nvidia', 'nvidia-targeted-stress', 'nvidia-targeted-power']; +// pulse_test and fabric tests run on all selected GPUs simultaneously +const nvidiaAllGPUTargets = ['nvidia-pulse', 'nvidia-interconnect', 'nvidia-bandwidth']; function expandSATTarget(target) { if (nvidiaAllGPUTargets.indexOf(target) >= 0) { const selected = satSelectedGPUIndices();