#!/bin/sh # build-nccl.sh — download and extract NCCL shared library for the LiveCD. # # Downloads libnccl2 .deb from NVIDIA's CUDA apt repository (Debian 12, x86_64) # and extracts the shared library. Package integrity verified via sha256. # # Output is cached in DIST_DIR/nccl-+cuda/ so subsequent builds # are instant unless NCCL_VERSION or NCCL_CUDA_VERSION changes. # # Output layout: # $CACHE_DIR/lib/ — libnccl.so.* files set -e NCCL_VERSION="$1" NCCL_CUDA_VERSION="$2" DIST_DIR="$3" EXPECTED_SHA256="$4" [ -n "$NCCL_VERSION" ] || { echo "usage: $0 [sha256]"; exit 1; } [ -n "$NCCL_CUDA_VERSION" ] || { echo "usage: $0 [sha256]"; exit 1; } [ -n "$DIST_DIR" ] || { echo "usage: $0 [sha256]"; exit 1; } echo "=== NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ===" CACHE_DIR="${DIST_DIR}/nccl-${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}" CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}" DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nccl-downloads" if [ -d "$CACHE_DIR/lib" ] && [ "$(ls "$CACHE_DIR/lib/"libnccl.so.* 2>/dev/null | wc -l)" -gt 0 ]; then echo "=== NCCL cached, skipping download ===" echo "cache: $CACHE_DIR" echo "libs: $(ls "$CACHE_DIR/lib/" | wc -l) files" exit 0 fi REPO_BASE="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64" PKG_NAME="libnccl2_${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}_amd64.deb" PKG_URL="${REPO_BASE}/${PKG_NAME}" mkdir -p "$DOWNLOAD_CACHE_DIR" DEB_FILE="${DOWNLOAD_CACHE_DIR}/${PKG_NAME}" echo "=== downloading NCCL package ===" echo "URL: ${PKG_URL}" wget --show-progress -O "$DEB_FILE" "$PKG_URL" if [ -n "$EXPECTED_SHA256" ]; then echo "=== verifying sha256 ===" ACTUAL_SHA256=$(sha256sum "$DEB_FILE" | awk '{print $1}') if [ "$ACTUAL_SHA256" != "$EXPECTED_SHA256" ]; then echo "ERROR: sha256 mismatch" echo " expected: $EXPECTED_SHA256" echo " actual: $ACTUAL_SHA256" rm -f "$DEB_FILE" exit 1 fi echo "sha256 OK" fi echo "=== extracting NCCL libraries ===" EXTRACT_TMP=$(mktemp -d) trap 'rm -rf "$EXTRACT_TMP"' EXIT INT TERM # .deb is an ar archive; data.tar.* contains the actual files cd "$EXTRACT_TMP" ar x "$DEB_FILE" # Extract data tarball (xz, gz, or zst) DATA_TAR=$(ls data.tar.* 2>/dev/null | head -1) [ -n "$DATA_TAR" ] || { echo "ERROR: data.tar.* not found in .deb"; exit 1; } tar xf "$DATA_TAR" # Library lands in ./usr/lib/x86_64-linux-gnu/ or ./usr/lib/ mkdir -p "$CACHE_DIR/lib" found=0 for f in $(find . -name 'libnccl.so.*' -not -type d 2>/dev/null); do cp "$f" "$CACHE_DIR/lib/" found=$((found + 1)) done [ "$found" -gt 0 ] || { echo "ERROR: libnccl.so.* not found in package"; exit 1; } # Create soname symlinks: libnccl.so.2 -> libnccl.so., libnccl.so -> libnccl.so.2 versioned=$(ls "$CACHE_DIR/lib/libnccl.so."[0-9][0-9.]* 2>/dev/null | head -1) if [ -n "$versioned" ]; then base=$(basename "$versioned") ln -sf "$base" "$CACHE_DIR/lib/libnccl.so.2" 2>/dev/null || true ln -sf "libnccl.so.2" "$CACHE_DIR/lib/libnccl.so" 2>/dev/null || true fi echo "=== NCCL extraction complete ===" echo "cache: $CACHE_DIR" ls -lh "$CACHE_DIR/lib/"