diff --git a/iso/builder/Dockerfile b/iso/builder/Dockerfile index 36a3de6..a7746bd 100644 --- a/iso/builder/Dockerfile +++ b/iso/builder/Dockerfile @@ -37,7 +37,8 @@ https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/ /" \ > /etc/apt/sources.list.d/cuda.list \ && apt-get update -qq \ && apt-get install -y cuda-nvcc-12-8 \ - && rm -rf /var/lib/apt/lists/* + && rm -rf /var/lib/apt/lists/* \ + && ln -sfn /usr/local/cuda-12.8 /usr/local/cuda RUN arch="$(dpkg --print-architecture)" \ && case "$arch" in \ diff --git a/iso/builder/build-nccl-tests.sh b/iso/builder/build-nccl-tests.sh index a4352a3..5abc6f6 100755 --- a/iso/builder/build-nccl-tests.sh +++ b/iso/builder/build-nccl-tests.sh @@ -101,12 +101,23 @@ SRC_DIR=$(ls -d nccl-tests-* 2>/dev/null | head -1) cd "$SRC_DIR" echo "=== building all_reduce_perf ===" -# CUDA 12.8 supports Volta through Blackwell (sm_70..sm_100). -GENCODE="-gencode=arch=compute_70,code=sm_70 \ - -gencode=arch=compute_80,code=sm_80 \ - -gencode=arch=compute_86,code=sm_86 \ - -gencode=arch=compute_90,code=sm_90 \ - -gencode=arch=compute_100,code=sm_100" +# Pick gencode based on the actual nvcc version: +# CUDA 12.x — Volta..Blackwell (sm_70..sm_100) +# CUDA 13.x — Hopper..Blackwell (sm_90..sm_100, Pascal/Volta/Ampere dropped) +NVCC_MAJOR=$("$NVCC" --version 2>/dev/null | grep -oE 'release [0-9]+' | awk '{print $2}' | head -1) +echo "nvcc major version: ${NVCC_MAJOR:-unknown}" +if [ "${NVCC_MAJOR:-0}" -ge 13 ] 2>/dev/null; then + GENCODE="-gencode=arch=compute_90,code=sm_90 \ + -gencode=arch=compute_100,code=sm_100" + echo "gencode: sm_90 sm_100 (CUDA 13+)" +else + GENCODE="-gencode=arch=compute_70,code=sm_70 \ + -gencode=arch=compute_80,code=sm_80 \ + -gencode=arch=compute_86,code=sm_86 \ + -gencode=arch=compute_90,code=sm_90 \ + -gencode=arch=compute_100,code=sm_100" + echo "gencode: sm_70..sm_100 (CUDA 12)" +fi make MPI=0 \ NVCC="$NVCC" \ CUDA_HOME="$CUDA_HOME" \