fix(iso): fix NCCL version to 2.28.9+cuda13.0, add sha256 verification

NVIDIA's CUDA repo for Debian 12 only has NCCL packages for cuda13.x,
not cuda12.x. Update to the latest available: 2.28.9-1+cuda13.0.
Also pass sha256 from VERSIONS into build-nccl.sh for integrity check.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Chusavitin
2026-03-19 12:04:03 +03:00
parent 8233c9ee85
commit 2abe2ce3aa
3 changed files with 22 additions and 8 deletions

View File

@@ -1,7 +1,8 @@
DEBIAN_VERSION=12 DEBIAN_VERSION=12
DEBIAN_KERNEL_ABI=6.1.0-43 DEBIAN_KERNEL_ABI=6.1.0-43
NVIDIA_DRIVER_VERSION=590.48.01 NVIDIA_DRIVER_VERSION=590.48.01
NCCL_VERSION=2.26.2-1 NCCL_VERSION=2.28.9-1
NCCL_CUDA_VERSION=12.8 NCCL_CUDA_VERSION=13.0
NCCL_SHA256=2e6faafd2c19cffc7738d9283976a3200ea9db9895907f337f0c7e5a25563186
GO_VERSION=1.24.0 GO_VERSION=1.24.0
AUDIT_VERSION=1.0.0 AUDIT_VERSION=1.0.0

View File

@@ -2,8 +2,7 @@
# build-nccl.sh — download and extract NCCL shared library for the LiveCD. # build-nccl.sh — download and extract NCCL shared library for the LiveCD.
# #
# Downloads libnccl2 .deb from NVIDIA's CUDA apt repository (Debian 12, x86_64) # Downloads libnccl2 .deb from NVIDIA's CUDA apt repository (Debian 12, x86_64)
# and extracts the shared library. Transport security via HTTPS; package integrity # and extracts the shared library. Package integrity verified via sha256.
# verified by sha256 from NVIDIA's Packages index.
# #
# Output is cached in DIST_DIR/nccl-<version>+cuda<cuda>/ so subsequent builds # Output is cached in DIST_DIR/nccl-<version>+cuda<cuda>/ so subsequent builds
# are instant unless NCCL_VERSION or NCCL_CUDA_VERSION changes. # are instant unless NCCL_VERSION or NCCL_CUDA_VERSION changes.
@@ -16,10 +15,11 @@ set -e
NCCL_VERSION="$1" NCCL_VERSION="$1"
NCCL_CUDA_VERSION="$2" NCCL_CUDA_VERSION="$2"
DIST_DIR="$3" DIST_DIR="$3"
EXPECTED_SHA256="$4"
[ -n "$NCCL_VERSION" ] || { echo "usage: $0 <nccl-version> <cuda-version> <dist-dir>"; exit 1; } [ -n "$NCCL_VERSION" ] || { echo "usage: $0 <nccl-version> <cuda-version> <dist-dir> [sha256]"; exit 1; }
[ -n "$NCCL_CUDA_VERSION" ] || { echo "usage: $0 <nccl-version> <cuda-version> <dist-dir>"; exit 1; } [ -n "$NCCL_CUDA_VERSION" ] || { echo "usage: $0 <nccl-version> <cuda-version> <dist-dir> [sha256]"; exit 1; }
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nccl-version> <cuda-version> <dist-dir>"; exit 1; } [ -n "$DIST_DIR" ] || { echo "usage: $0 <nccl-version> <cuda-version> <dist-dir> [sha256]"; exit 1; }
echo "=== NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ===" echo "=== NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ==="
@@ -45,6 +45,19 @@ echo "=== downloading NCCL package ==="
echo "URL: ${PKG_URL}" echo "URL: ${PKG_URL}"
wget --show-progress -O "$DEB_FILE" "$PKG_URL" wget --show-progress -O "$DEB_FILE" "$PKG_URL"
if [ -n "$EXPECTED_SHA256" ]; then
echo "=== verifying sha256 ==="
ACTUAL_SHA256=$(sha256sum "$DEB_FILE" | awk '{print $1}')
if [ "$ACTUAL_SHA256" != "$EXPECTED_SHA256" ]; then
echo "ERROR: sha256 mismatch"
echo " expected: $EXPECTED_SHA256"
echo " actual: $ACTUAL_SHA256"
rm -f "$DEB_FILE"
exit 1
fi
echo "sha256 OK"
fi
echo "=== extracting NCCL libraries ===" echo "=== extracting NCCL libraries ==="
EXTRACT_TMP=$(mktemp -d) EXTRACT_TMP=$(mktemp -d)
trap 'rm -rf "$EXTRACT_TMP"' EXIT INT TERM trap 'rm -rf "$EXTRACT_TMP"' EXIT INT TERM

View File

@@ -189,7 +189,7 @@ fi
# --- build / download NCCL --- # --- build / download NCCL ---
echo "" echo ""
echo "=== downloading NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ===" echo "=== downloading NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ==="
sh "${BUILDER_DIR}/build-nccl.sh" "${NCCL_VERSION}" "${NCCL_CUDA_VERSION}" "${DIST_DIR}" sh "${BUILDER_DIR}/build-nccl.sh" "${NCCL_VERSION}" "${NCCL_CUDA_VERSION}" "${DIST_DIR}" "${NCCL_SHA256:-}"
NCCL_CACHE="${DIST_DIR}/nccl-${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}" NCCL_CACHE="${DIST_DIR}/nccl-${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}"