From 4110dbf8a6c477d1b6bb6b50490ca439232e9d03 Mon Sep 17 00:00:00 2001 From: Mikhail Chusavitin Date: Wed, 15 Apr 2026 17:10:23 +0300 Subject: [PATCH] Pre-download DCGM/fabricmanager debs on host to bypass chroot apt The NVIDIA CUDA HTTPS apt source (developer.download.nvidia.com) may be unreachable from inside the live-build container chroot, causing 'E: Unable to locate package datacenter-gpu-manager-4-cuda13'. Add build-dcgm.sh that downloads DCGM and nvidia-fabricmanager .deb packages on the build host (verifying SHA256 against Packages.gz) and caches them in BEE_CACHE_DIR. build.sh (step 25-dcgm, nvidia only) copies them into LB_DIR/config/packages.chroot/ before lb build, so live-build creates a local apt repo from them. The chroot installs the packages from the local repo without ever contacting the NVIDIA CUDA HTTPS source. Co-Authored-By: Claude Sonnet 4.6 --- iso/builder/build-dcgm.sh | 125 ++++++++++++++++++++++++++++++++++++++ iso/builder/build.sh | 12 ++++ 2 files changed, 137 insertions(+) create mode 100755 iso/builder/build-dcgm.sh diff --git a/iso/builder/build-dcgm.sh b/iso/builder/build-dcgm.sh new file mode 100755 index 0000000..10f9a61 --- /dev/null +++ b/iso/builder/build-dcgm.sh @@ -0,0 +1,125 @@ +#!/bin/sh +# build-dcgm.sh — pre-download DCGM and nvidia-fabricmanager .deb packages +# from the NVIDIA CUDA apt repository (Debian 12, x86_64) on the build host, +# then place them into config/packages.chroot/ so live-build creates a local +# apt repository inside the chroot. This avoids requiring the NVIDIA CUDA +# HTTPS source to be reachable from within the live-build container chroot. + +set -e + +DCGM_VERSION="$1" +FABRICMANAGER_VERSION="$2" +LB_DIR="$3" + +[ -n "$DCGM_VERSION" ] || { echo "usage: $0 "; exit 1; } +[ -n "$FABRICMANAGER_VERSION" ] || { echo "usage: $0 "; exit 1; } +[ -n "$LB_DIR" ] || { echo "usage: $0 "; exit 1; } + +REPO_BASE="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64" +CACHE_ROOT="${BEE_CACHE_DIR:-$(dirname "$LB_DIR")/cache}" +DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/dcgm-downloads" +PACKAGES_GZ="${DOWNLOAD_CACHE_DIR}/Packages.gz" +PKG_CHROOT_DIR="${LB_DIR}/config/packages.chroot" + +PACKAGES=" +datacenter-gpu-manager-4-core=1:${DCGM_VERSION} +datacenter-gpu-manager-4-cuda13=1:${DCGM_VERSION} +datacenter-gpu-manager-4-proprietary=1:${DCGM_VERSION} +datacenter-gpu-manager-4-proprietary-cuda13=1:${DCGM_VERSION} +nvidia-fabricmanager=${FABRICMANAGER_VERSION} +" + +echo "=== DCGM ${DCGM_VERSION} / nvidia-fabricmanager ${FABRICMANAGER_VERSION} ===" + +# Check if all target .deb files are already present in packages.chroot +all_cached=1 +for entry in $PACKAGES; do + pkg="${entry%%=*}" + if ! ls "${PKG_CHROOT_DIR}/${pkg}_"*.deb >/dev/null 2>&1; then + all_cached=0 + break + fi +done +if [ "$all_cached" = "1" ]; then + echo "=== DCGM packages already in packages.chroot, skipping download ===" + ls "${PKG_CHROOT_DIR}/datacenter-gpu-manager-4"*.deb "${PKG_CHROOT_DIR}/nvidia-fabricmanager_"*.deb 2>/dev/null || true + exit 0 +fi + +mkdir -p "${DOWNLOAD_CACHE_DIR}" "${PKG_CHROOT_DIR}" + +echo "=== downloading Packages.gz ===" +wget -q -O "${PACKAGES_GZ}" "${REPO_BASE}/Packages.gz" + +lookup_pkg() { + pkg="$1" + ver="$2" + gzip -dc "${PACKAGES_GZ}" | awk -v pkg="$pkg" -v ver="$ver" ' + /^Package: / { cur_pkg=$2; gsub(/\r/, "", cur_pkg) } + /^Version: / { cur_ver=$2; gsub(/\r/, "", cur_ver) } + /^Filename: / { cur_file=$2; gsub(/\r/, "", cur_file) } + /^SHA256: / { cur_sha=$2; gsub(/\r/, "", cur_sha) } + /^$/ { + if (cur_pkg == pkg && (ver == "" || cur_ver == ver)) { + print cur_file " " cur_sha + printed=1 + exit + } + cur_pkg=""; cur_ver=""; cur_file=""; cur_sha="" + } + END { + if (!printed && cur_pkg == pkg && (ver == "" || cur_ver == ver)) { + print cur_file " " cur_sha + } + }' +} + +download_deb() { + pkg="$1" + ver="$2" + + meta="$(lookup_pkg "$pkg" "$ver")" + [ -n "$meta" ] || { echo "ERROR: package not found in repo: ${pkg} ${ver}"; exit 1; } + + repo_file="$(printf '%s\n' "$meta" | awk '{print $1}')" + repo_sha="$(printf '%s\n' "$meta" | awk '{print $2}')" + [ -n "$repo_file" ] || { echo "ERROR: filename missing for ${pkg}"; exit 1; } + [ -n "$repo_sha" ] || { echo "ERROR: sha256 missing for ${pkg}"; exit 1; } + + deb_name="$(basename "$repo_file")" + cached="${DOWNLOAD_CACHE_DIR}/${deb_name}" + + if [ -f "$cached" ]; then + actual_sha="$(sha256sum "$cached" | awk '{print $1}')" + if [ "$actual_sha" = "$repo_sha" ]; then + echo "=== cached: ${deb_name} ===" + else + echo "=== removing stale: ${deb_name} (sha256 mismatch) ===" + rm -f "$cached" + wget --show-progress -O "$cached" "${REPO_BASE}/${deb_name}" + fi + else + wget --show-progress -O "$cached" "${REPO_BASE}/${deb_name}" + fi + + actual_sha="$(sha256sum "$cached" | awk '{print $1}')" + if [ "$actual_sha" != "$repo_sha" ]; then + echo "ERROR: sha256 mismatch for ${deb_name}" >&2 + echo " expected: $repo_sha" >&2 + echo " actual: $actual_sha" >&2 + rm -f "$cached" + exit 1 + fi + echo "sha256 OK: ${deb_name}" + + cp -f "$cached" "${PKG_CHROOT_DIR}/${deb_name}" +} + +for entry in $PACKAGES; do + pkg="${entry%%=*}" + ver="${entry#*=}" + download_deb "$pkg" "$ver" +done + +echo "=== DCGM packages ready in ${PKG_CHROOT_DIR} ===" +ls "${PKG_CHROOT_DIR}/datacenter-gpu-manager-4"*.deb "${PKG_CHROOT_DIR}/nvidia-fabricmanager_"*.deb 2>/dev/null || true diff --git a/iso/builder/build.sh b/iso/builder/build.sh index d86b246..47f8682 100755 --- a/iso/builder/build.sh +++ b/iso/builder/build.sh @@ -1296,6 +1296,18 @@ if [ -f "${LB_INCLUDES}/root/.ssh/authorized_keys" ]; then chmod 600 "${LB_INCLUDES}/root/.ssh/authorized_keys" fi +# --- pre-download NVIDIA apt packages into config/packages.chroot --- +# live-build creates a local apt repo from config/packages.chroot/*.deb so the +# chroot can install them without reaching the NVIDIA CUDA HTTPS source. +if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then + run_step "download DCGM ${DCGM_VERSION} / fabricmanager ${NVIDIA_FABRICMANAGER_VERSION} packages" \ + "25-dcgm" \ + sh "${BUILDER_DIR}/build-dcgm.sh" \ + "${DCGM_VERSION}" \ + "${NVIDIA_FABRICMANAGER_VERSION}" \ + "${LB_DIR}" +fi + # --- build ISO using live-build --- echo "" echo "=== building ISO (variant: ${BUILD_VARIANT}) ==="