fix(nccl-tests): upgrade to cuda-nvcc-12-8, add sm_100 (Blackwell B100/B200)

This commit is contained in:
2026-03-26 23:51:26 +03:00
parent 896bdb6ee8
commit 83bbc8a1bc
3 changed files with 7 additions and 6 deletions

View File

@@ -36,7 +36,7 @@ RUN wget -qO /tmp/cuda-keyring.gpg \
https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/ /" \ https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/ /" \
> /etc/apt/sources.list.d/cuda.list \ > /etc/apt/sources.list.d/cuda.list \
&& apt-get update -qq \ && apt-get update -qq \
&& apt-get install -y cuda-nvcc-12-6 \ && apt-get install -y cuda-nvcc-12-8 \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
RUN arch="$(dpkg --print-architecture)" \ RUN arch="$(dpkg --print-architecture)" \

View File

@@ -5,7 +5,7 @@ NCCL_VERSION=2.28.9-1
NCCL_CUDA_VERSION=13.0 NCCL_CUDA_VERSION=13.0
NCCL_SHA256=2e6faafd2c19cffc7738d9283976a3200ea9db9895907f337f0c7e5a25563186 NCCL_SHA256=2e6faafd2c19cffc7738d9283976a3200ea9db9895907f337f0c7e5a25563186
NCCL_TESTS_VERSION=2.13.10 NCCL_TESTS_VERSION=2.13.10
NVCC_VERSION=12.6 NVCC_VERSION=12.8
CUBLAS_VERSION=13.0.2.14-1 CUBLAS_VERSION=13.0.2.14-1
CUDA_USERSPACE_VERSION=13.0.96-1 CUDA_USERSPACE_VERSION=13.0.96-1
GO_VERSION=1.24.0 GO_VERSION=1.24.0

View File

@@ -34,9 +34,9 @@ if [ -f "${CACHE_DIR}/bin/all_reduce_perf" ]; then
exit 0 exit 0
fi fi
# Resolve nvcc path (cuda-nvcc-12-6 installs to /usr/local/cuda-12.6/bin/nvcc) # Resolve nvcc path (cuda-nvcc-12-8 installs to /usr/local/cuda-12.8/bin/nvcc)
NVCC="" NVCC=""
for candidate in nvcc /usr/local/cuda-12.6/bin/nvcc /usr/local/cuda-12/bin/nvcc /usr/local/cuda/bin/nvcc; do for candidate in nvcc /usr/local/cuda-12.8/bin/nvcc /usr/local/cuda-12/bin/nvcc /usr/local/cuda/bin/nvcc; do
if command -v "$candidate" >/dev/null 2>&1 || [ -x "$candidate" ]; then if command -v "$candidate" >/dev/null 2>&1 || [ -x "$candidate" ]; then
NVCC="$candidate" NVCC="$candidate"
break break
@@ -96,11 +96,12 @@ SRC_DIR=$(ls -d nccl-tests-* 2>/dev/null | head -1)
cd "$SRC_DIR" cd "$SRC_DIR"
echo "=== building all_reduce_perf ===" echo "=== building all_reduce_perf ==="
# CUDA 12.6 supports Volta through Hopper (sm_70..sm_90). # CUDA 12.8 supports Volta through Blackwell (sm_70..sm_100).
GENCODE="-gencode=arch=compute_70,code=sm_70 \ GENCODE="-gencode=arch=compute_70,code=sm_70 \
-gencode=arch=compute_80,code=sm_80 \ -gencode=arch=compute_80,code=sm_80 \
-gencode=arch=compute_86,code=sm_86 \ -gencode=arch=compute_86,code=sm_86 \
-gencode=arch=compute_90,code=sm_90" -gencode=arch=compute_90,code=sm_90 \
-gencode=arch=compute_100,code=sm_100"
make MPI=0 \ make MPI=0 \
NVCC="$NVCC" \ NVCC="$NVCC" \
CUDA_HOME="$CUDA_HOME" \ CUDA_HOME="$CUDA_HOME" \