fix(iso): fix NCCL version to 2.28.9+cuda13.0, add sha256 verification

NVIDIA's CUDA repo for Debian 12 only has NCCL packages for cuda13.x,
not cuda12.x. Update to the latest available: 2.28.9-1+cuda13.0.
Also pass sha256 from VERSIONS into build-nccl.sh for integrity check.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Chusavitin
2026-03-19 12:04:03 +03:00
parent 8233c9ee85
commit 2abe2ce3aa
3 changed files with 22 additions and 8 deletions

View File

@@ -1,7 +1,8 @@
DEBIAN_VERSION=12
DEBIAN_KERNEL_ABI=6.1.0-43
NVIDIA_DRIVER_VERSION=590.48.01
NCCL_VERSION=2.26.2-1
NCCL_CUDA_VERSION=12.8
NCCL_VERSION=2.28.9-1
NCCL_CUDA_VERSION=13.0
NCCL_SHA256=2e6faafd2c19cffc7738d9283976a3200ea9db9895907f337f0c7e5a25563186
GO_VERSION=1.24.0
AUDIT_VERSION=1.0.0

View File

@@ -2,8 +2,7 @@
# build-nccl.sh — download and extract NCCL shared library for the LiveCD.
#
# Downloads libnccl2 .deb from NVIDIA's CUDA apt repository (Debian 12, x86_64)
# and extracts the shared library. Transport security via HTTPS; package integrity
# verified by sha256 from NVIDIA's Packages index.
# and extracts the shared library. Package integrity verified via sha256.
#
# Output is cached in DIST_DIR/nccl-<version>+cuda<cuda>/ so subsequent builds
# are instant unless NCCL_VERSION or NCCL_CUDA_VERSION changes.
@@ -16,10 +15,11 @@ set -e
NCCL_VERSION="$1"
NCCL_CUDA_VERSION="$2"
DIST_DIR="$3"
EXPECTED_SHA256="$4"
[ -n "$NCCL_VERSION" ] || { echo "usage: $0 <nccl-version> <cuda-version> <dist-dir>"; exit 1; }
[ -n "$NCCL_CUDA_VERSION" ] || { echo "usage: $0 <nccl-version> <cuda-version> <dist-dir>"; exit 1; }
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nccl-version> <cuda-version> <dist-dir>"; exit 1; }
[ -n "$NCCL_VERSION" ] || { echo "usage: $0 <nccl-version> <cuda-version> <dist-dir> [sha256]"; exit 1; }
[ -n "$NCCL_CUDA_VERSION" ] || { echo "usage: $0 <nccl-version> <cuda-version> <dist-dir> [sha256]"; exit 1; }
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nccl-version> <cuda-version> <dist-dir> [sha256]"; exit 1; }
echo "=== NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ==="
@@ -45,6 +45,19 @@ echo "=== downloading NCCL package ==="
echo "URL: ${PKG_URL}"
wget --show-progress -O "$DEB_FILE" "$PKG_URL"
if [ -n "$EXPECTED_SHA256" ]; then
echo "=== verifying sha256 ==="
ACTUAL_SHA256=$(sha256sum "$DEB_FILE" | awk '{print $1}')
if [ "$ACTUAL_SHA256" != "$EXPECTED_SHA256" ]; then
echo "ERROR: sha256 mismatch"
echo " expected: $EXPECTED_SHA256"
echo " actual: $ACTUAL_SHA256"
rm -f "$DEB_FILE"
exit 1
fi
echo "sha256 OK"
fi
echo "=== extracting NCCL libraries ==="
EXTRACT_TMP=$(mktemp -d)
trap 'rm -rf "$EXTRACT_TMP"' EXIT INT TERM

View File

@@ -189,7 +189,7 @@ fi
# --- build / download NCCL ---
echo ""
echo "=== downloading NCCL ${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION} ==="
sh "${BUILDER_DIR}/build-nccl.sh" "${NCCL_VERSION}" "${NCCL_CUDA_VERSION}" "${DIST_DIR}"
sh "${BUILDER_DIR}/build-nccl.sh" "${NCCL_VERSION}" "${NCCL_CUDA_VERSION}" "${DIST_DIR}" "${NCCL_SHA256:-}"
NCCL_CACHE="${DIST_DIR}/nccl-${NCCL_VERSION}+cuda${NCCL_CUDA_VERSION}"