#!/bin/sh # build-nccl-tests.sh — build nccl-tests all_reduce_perf for the LiveCD. # # Downloads nccl-tests source from GitHub, downloads libnccl-dev .deb for # nccl.h, and compiles all_reduce_perf with nvcc (cuda-nvcc-13-0). # # Output is cached in DIST_DIR/nccl-tests-/ so subsequent builds # are instant unless NCCL_TESTS_VERSION changes. # # Output layout: # $CACHE_DIR/bin/all_reduce_perf set -e NCCL_TESTS_VERSION="$1" NCCL_VERSION="$2" NCCL_CUDA_VERSION="$3" DIST_DIR="$4" [ -n "$NCCL_TESTS_VERSION" ] || { echo "usage: $0 "; exit 1; } [ -n "$NCCL_VERSION" ] || { echo "usage: $0 "; exit 1; } [ -n "$NCCL_CUDA_VERSION" ] || { echo "usage: $0 "; exit 1; } [ -n "$DIST_DIR" ] || { echo "usage: $0 "; exit 1; } echo "=== nccl-tests ${NCCL_TESTS_VERSION} ===" CACHE_DIR="${DIST_DIR}/nccl-tests-${NCCL_TESTS_VERSION}" CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}" DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nccl-tests-downloads" if [ -f "${CACHE_DIR}/bin/all_reduce_perf" ]; then echo "=== nccl-tests cached, skipping build ===" echo "binary: ${CACHE_DIR}/bin/all_reduce_perf" exit 0 fi # Resolve nvcc path (cuda-nvcc-12-8 installs to /usr/local/cuda-12.8/bin/nvcc) NVCC="" for candidate in nvcc /usr/local/cuda-12.8/bin/nvcc /usr/local/cuda-12/bin/nvcc /usr/local/cuda/bin/nvcc; do if command -v "$candidate" >/dev/null 2>&1 || [ -x "$candidate" ]; then NVCC="$candidate" break fi done [ -n "$NVCC" ] || { echo "ERROR: nvcc not found — install cuda-nvcc-13-0"; exit 1; } echo "nvcc: $NVCC" # Determine CUDA_HOME from nvcc location CUDA_HOME="$(dirname "$(dirname "$NVCC")")" echo "CUDA_HOME: $CUDA_HOME" # Download libnccl-dev for nccl.h REPO_BASE="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64" DEV_PKG="libnccl-dev_${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}_amd64.deb" DEV_URL="${REPO_BASE}/${DEV_PKG}" mkdir -p "$DOWNLOAD_CACHE_DIR" DEV_DEB="${DOWNLOAD_CACHE_DIR}/${DEV_PKG}" if [ ! -f "$DEV_DEB" ]; then echo "=== downloading libnccl-dev ===" wget --show-progress -O "$DEV_DEB" "$DEV_URL" fi # Extract nccl.h from libnccl-dev NCCL_INCLUDE_TMP=$(mktemp -d) trap 'rm -rf "$NCCL_INCLUDE_TMP" "$BUILD_TMP"' EXIT INT TERM cd "$NCCL_INCLUDE_TMP" ar x "$DEV_DEB" DATA_TAR=$(ls data.tar.* 2>/dev/null | head -1) [ -n "$DATA_TAR" ] || { echo "ERROR: data.tar.* not found in libnccl-dev .deb"; exit 1; } tar xf "$DATA_TAR" # nccl.h lands in ./usr/include/ or ./usr/local/cuda-X.Y/targets/.../include/ NCCL_H=$(find . -name 'nccl.h' -type f 2>/dev/null | head -1) [ -n "$NCCL_H" ] || { echo "ERROR: nccl.h not found in libnccl-dev package"; exit 1; } NCCL_INCLUDE_DIR="$(pwd)/$(dirname "$NCCL_H")" echo "nccl.h: $NCCL_H" # Download nccl-tests source SRC_TAR="${DOWNLOAD_CACHE_DIR}/nccl-tests-v${NCCL_TESTS_VERSION}.tar.gz" SRC_URL="https://github.com/NVIDIA/nccl-tests/archive/refs/tags/v${NCCL_TESTS_VERSION}.tar.gz" if [ ! -f "$SRC_TAR" ]; then echo "=== downloading nccl-tests v${NCCL_TESTS_VERSION} ===" wget --show-progress -O "$SRC_TAR" "$SRC_URL" fi # Extract and build BUILD_TMP=$(mktemp -d) cd "$BUILD_TMP" tar xf "$SRC_TAR" SRC_DIR=$(ls -d nccl-tests-* 2>/dev/null | head -1) [ -n "$SRC_DIR" ] || { echo "ERROR: source directory not found in archive"; exit 1; } cd "$SRC_DIR" echo "=== building all_reduce_perf ===" # CUDA 12.8 supports Volta through Blackwell (sm_70..sm_100). GENCODE="-gencode=arch=compute_70,code=sm_70 \ -gencode=arch=compute_80,code=sm_80 \ -gencode=arch=compute_86,code=sm_86 \ -gencode=arch=compute_90,code=sm_90 \ -gencode=arch=compute_100,code=sm_100" make MPI=0 \ NVCC="$NVCC" \ CUDA_HOME="$CUDA_HOME" \ NCCL_HOME="$NCCL_INCLUDE_DIR/.." \ NVCC_GENCODE="$GENCODE" \ BUILDDIR="./build" [ -f "./build/all_reduce_perf" ] || { echo "ERROR: all_reduce_perf not found after build"; exit 1; } mkdir -p "${CACHE_DIR}/bin" cp "./build/all_reduce_perf" "${CACHE_DIR}/bin/all_reduce_perf" chmod +x "${CACHE_DIR}/bin/all_reduce_perf" echo "=== nccl-tests build complete ===" echo "binary: ${CACHE_DIR}/bin/all_reduce_perf" ls -lh "${CACHE_DIR}/bin/all_reduce_perf"