feat: v3.10 GPU stress and NCCL burn updates

This commit is contained in:
Mikhail Chusavitin
2026-03-31 11:22:27 +03:00
parent 6dee8f3509
commit c850b39b01
12 changed files with 423 additions and 88 deletions

View File

@@ -246,6 +246,7 @@ rm -f \
"${OVERLAY_STAGE_DIR}/root/.ssh/authorized_keys" \
"${OVERLAY_STAGE_DIR}/usr/local/bin/bee" \
"${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress" \
"${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nccl-gpu-stress" \
"${OVERLAY_STAGE_DIR}/usr/local/bin/john" \
"${OVERLAY_STAGE_DIR}/usr/local/lib/bee/bee-gpu-burn-worker" \
"${OVERLAY_STAGE_DIR}/usr/local/lib/bee/john" \
@@ -302,6 +303,7 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ] && [ -f "$GPU_BURN_WORKER_BIN" ]; then
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/lib/bee/bee-gpu-burn-worker"
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-burn" 2>/dev/null || true
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-john-gpu-stress" 2>/dev/null || true
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-nccl-gpu-stress" 2>/dev/null || true
ln -sfn bee-gpu-burn "${OVERLAY_STAGE_DIR}/usr/local/bin/bee-gpu-stress"
fi
@@ -380,6 +382,7 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then
NCCL_TESTS_CACHE="${DIST_DIR}/nccl-tests-${NCCL_TESTS_VERSION}"
cp "${NCCL_TESTS_CACHE}/bin/all_reduce_perf" "${OVERLAY_STAGE_DIR}/usr/local/bin/all_reduce_perf"
chmod +x "${OVERLAY_STAGE_DIR}/usr/local/bin/all_reduce_perf"
cp "${NCCL_TESTS_CACHE}/lib/"* "${OVERLAY_STAGE_DIR}/usr/lib/" 2>/dev/null || true
echo "=== all_reduce_perf injected ==="
echo ""