diff --git a/iso/builder/build-in-container.sh b/iso/builder/build-in-container.sh index 2f2cdbb..28b3710 100755 --- a/iso/builder/build-in-container.sh +++ b/iso/builder/build-in-container.sh @@ -41,15 +41,15 @@ while [ $# -gt 0 ]; do ;; *) echo "unknown arg: $1" >&2 - echo "usage: $0 [--cache-dir /path] [--rebuild-image] [--clean-build] [--authorized-keys /path/to/authorized_keys] [--variant nvidia|amd|all]" >&2 + echo "usage: $0 [--cache-dir /path] [--rebuild-image] [--clean-build] [--authorized-keys /path/to/authorized_keys] [--variant nvidia|nvidia-legacy|amd|nogpu|all]" >&2 exit 1 ;; esac done case "$VARIANT" in - nvidia|amd|nogpu|all) ;; - *) echo "unknown variant: $VARIANT (expected nvidia, amd, nogpu, or all)" >&2; exit 1 ;; + nvidia|nvidia-legacy|amd|nogpu|all) ;; + *) echo "unknown variant: $VARIANT (expected nvidia, nvidia-legacy, amd, nogpu, or all)" >&2; exit 1 ;; esac if [ "$CLEAN_CACHE" = "1" ]; then @@ -61,8 +61,13 @@ if [ "$CLEAN_CACHE" = "1" ]; then "${CACHE_DIR:?}/lb-packages" echo "=== cleaning live-build work dirs ===" rm -rf "${REPO_ROOT}/dist/live-build-work-nvidia" + rm -rf "${REPO_ROOT}/dist/live-build-work-nvidia-legacy" rm -rf "${REPO_ROOT}/dist/live-build-work-amd" rm -rf "${REPO_ROOT}/dist/live-build-work-nogpu" + rm -rf "${REPO_ROOT}/dist/overlay-stage-nvidia" + rm -rf "${REPO_ROOT}/dist/overlay-stage-nvidia-legacy" + rm -rf "${REPO_ROOT}/dist/overlay-stage-amd" + rm -rf "${REPO_ROOT}/dist/overlay-stage-nogpu" echo "=== caches cleared, proceeding with build ===" fi @@ -180,6 +185,9 @@ case "$VARIANT" in nvidia) run_variant nvidia ;; + nvidia-legacy) + run_variant nvidia-legacy + ;; amd) run_variant amd ;; @@ -188,6 +196,7 @@ case "$VARIANT" in ;; all) run_variant nvidia + run_variant nvidia-legacy run_variant amd run_variant nogpu ;; diff --git a/iso/builder/build-nvidia-module.sh b/iso/builder/build-nvidia-module.sh index bfc311e..15067d7 100644 --- a/iso/builder/build-nvidia-module.sh +++ b/iso/builder/build-nvidia-module.sh @@ -1,8 +1,10 @@ #!/bin/sh -# build-nvidia-module.sh — compile NVIDIA proprietary driver modules for Debian 12 +# build-nvidia-module.sh — compile NVIDIA kernel modules for Debian 12 # # Downloads the official NVIDIA .run installer, extracts kernel modules and -# userspace tools (nvidia-smi, libnvidia-ml). Everything is proprietary NVIDIA. +# userspace tools (nvidia-smi, libnvidia-ml). Supports both: +# - open -> kernel-open/ sources from the .run installer +# - proprietary -> traditional proprietary kernel sources from the .run installer # # Output is cached in DIST_DIR/nvidia--/ so subsequent builds # are instant unless NVIDIA_DRIVER_VERSION or kernel version changes. @@ -17,10 +19,19 @@ set -e NVIDIA_VERSION="$1" DIST_DIR="$2" DEBIAN_KERNEL_ABI="$3" +NVIDIA_FLAVOR="${4:-open}" -[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 "; exit 1; } -[ -n "$DIST_DIR" ] || { echo "usage: $0 "; exit 1; } -[ -n "$DEBIAN_KERNEL_ABI" ] || { echo "usage: $0 "; exit 1; } +[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 [open|proprietary]"; exit 1; } +[ -n "$DIST_DIR" ] || { echo "usage: $0 [open|proprietary]"; exit 1; } +[ -n "$DEBIAN_KERNEL_ABI" ] || { echo "usage: $0 [open|proprietary]"; exit 1; } + +case "$NVIDIA_FLAVOR" in + open|proprietary) ;; + *) + echo "unsupported NVIDIA flavor: $NVIDIA_FLAVOR (expected open or proprietary)" >&2 + exit 1 + ;; +esac KVER="${DEBIAN_KERNEL_ABI}-amd64" # On Debian, kernel headers are split into two packages: @@ -31,22 +42,13 @@ KVER="${DEBIAN_KERNEL_ABI}-amd64" KDIR_ARCH="/usr/src/linux-headers-${KVER}" KDIR_COMMON="/usr/src/linux-headers-${DEBIAN_KERNEL_ABI}-common" -echo "=== NVIDIA ${NVIDIA_VERSION} (proprietary) for kernel ${KVER} ===" +echo "=== NVIDIA ${NVIDIA_VERSION} (${NVIDIA_FLAVOR}) for kernel ${KVER} ===" -if [ ! -d "$KDIR_ARCH" ] || [ ! -d "$KDIR_COMMON" ]; then - echo "=== installing linux-headers-${KVER} ===" - DEBIAN_FRONTEND=noninteractive apt-get install -y \ - "linux-headers-${KVER}" \ - gcc make perl -fi -echo "kernel headers (arch): $KDIR_ARCH" -echo "kernel headers (common): $KDIR_COMMON" - -CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_VERSION}-${KVER}" +CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_FLAVOR}-${NVIDIA_VERSION}-${KVER}" CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}" DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads" EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract" -CACHE_LAYOUT_VERSION="2" +CACHE_LAYOUT_VERSION="3" CACHE_LAYOUT_MARKER="${CACHE_DIR}/.cache-layout-v${CACHE_LAYOUT_VERSION}" if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \ && [ -f "$CACHE_LAYOUT_MARKER" ] \ @@ -57,6 +59,15 @@ if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \ exit 0 fi +if [ ! -d "$KDIR_ARCH" ] || [ ! -d "$KDIR_COMMON" ]; then + echo "=== installing linux-headers-${KVER} ===" + DEBIAN_FRONTEND=noninteractive apt-get install -y \ + "linux-headers-${KVER}" \ + gcc make perl +fi +echo "kernel headers (arch): $KDIR_ARCH" +echo "kernel headers (common): $KDIR_COMMON" + # Download official NVIDIA .run installer with sha256 verification BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}" mkdir -p "$DOWNLOAD_CACHE_DIR" "$EXTRACT_CACHE_DIR" @@ -90,12 +101,18 @@ EXTRACT_DIR="${EXTRACT_CACHE_DIR}/nvidia-extract-${NVIDIA_VERSION}" rm -rf "$EXTRACT_DIR" "$RUN_FILE" --extract-only --target "$EXTRACT_DIR" -# Find kernel source directory (proprietary: kernel/, open: kernel-open/) +# Find kernel source directory for the selected flavor. KERNEL_SRC="" -for d in "$EXTRACT_DIR/kernel" "$EXTRACT_DIR/kernel-modules-sources" "$EXTRACT_DIR/kernel-source"; do - [ -f "$d/Makefile" ] && KERNEL_SRC="$d" && break -done -[ -n "$KERNEL_SRC" ] || { echo "ERROR: kernel source dir not found in:"; ls "$EXTRACT_DIR/"; exit 1; } +if [ "$NVIDIA_FLAVOR" = "open" ]; then + for d in "$EXTRACT_DIR/kernel-open" "$EXTRACT_DIR/kernel-open/"*; do + [ -f "$d/Makefile" ] && KERNEL_SRC="$d" && break + done +else + for d in "$EXTRACT_DIR/kernel" "$EXTRACT_DIR/kernel-modules-sources" "$EXTRACT_DIR/kernel-source"; do + [ -f "$d/Makefile" ] && KERNEL_SRC="$d" && break + done +fi +[ -n "$KERNEL_SRC" ] || { echo "ERROR: kernel source dir not found for flavor ${NVIDIA_FLAVOR} in:"; ls "$EXTRACT_DIR/"; exit 1; } echo "kernel source: $KERNEL_SRC" # Build kernel modules diff --git a/iso/builder/build.sh b/iso/builder/build.sh index 8be451d..a9895ce 100755 --- a/iso/builder/build.sh +++ b/iso/builder/build.sh @@ -15,26 +15,46 @@ DIST_DIR="${REPO_ROOT}/dist" VENDOR_DIR="${REPO_ROOT}/iso/vendor" CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}" AUTH_KEYS="" +BUILD_VARIANT="nvidia" BEE_GPU_VENDOR="nvidia" +BEE_NVIDIA_MODULE_FLAVOR="open" # parse args while [ $# -gt 0 ]; do case "$1" in --authorized-keys) AUTH_KEYS="$2"; shift 2 ;; - --variant) BEE_GPU_VENDOR="$2"; shift 2 ;; + --variant) BUILD_VARIANT="$2"; shift 2 ;; *) echo "unknown arg: $1"; exit 1 ;; esac done -case "$BEE_GPU_VENDOR" in - nvidia|amd|nogpu) ;; - *) echo "unknown variant: $BEE_GPU_VENDOR (expected nvidia, amd, or nogpu)" >&2; exit 1 ;; +case "$BUILD_VARIANT" in + nvidia) + BEE_GPU_VENDOR="nvidia" + BEE_NVIDIA_MODULE_FLAVOR="open" + ;; + nvidia-legacy) + BEE_GPU_VENDOR="nvidia" + BEE_NVIDIA_MODULE_FLAVOR="proprietary" + ;; + amd) + BEE_GPU_VENDOR="amd" + BEE_NVIDIA_MODULE_FLAVOR="" + ;; + nogpu) + BEE_GPU_VENDOR="nogpu" + BEE_NVIDIA_MODULE_FLAVOR="" + ;; + *) + echo "unknown variant: $BUILD_VARIANT (expected nvidia, nvidia-legacy, amd, or nogpu)" >&2 + exit 1 + ;; esac -BUILD_WORK_DIR="${DIST_DIR}/live-build-work-${BEE_GPU_VENDOR}" -OVERLAY_STAGE_DIR="${DIST_DIR}/overlay-stage-${BEE_GPU_VENDOR}" +BUILD_WORK_DIR="${DIST_DIR}/live-build-work-${BUILD_VARIANT}" +OVERLAY_STAGE_DIR="${DIST_DIR}/overlay-stage-${BUILD_VARIANT}" -export BEE_GPU_VENDOR +export BEE_GPU_VENDOR BEE_NVIDIA_MODULE_FLAVOR BUILD_VARIANT . "${BUILDER_DIR}/VERSIONS" export PATH="$PATH:/usr/local/go/bin" @@ -627,7 +647,7 @@ recover_iso_memtest() { AUDIT_VERSION_EFFECTIVE="$(resolve_audit_version)" ISO_VERSION_EFFECTIVE="$(resolve_iso_version)" -ISO_BASENAME="easy-bee-${BEE_GPU_VENDOR}-v${ISO_VERSION_EFFECTIVE}-amd64" +ISO_BASENAME="easy-bee-${BUILD_VARIANT}-v${ISO_VERSION_EFFECTIVE}-amd64" # Versioned output directory: dist/easy-bee-v4.1/ — all final artefacts live here. OUT_DIR="${DIST_DIR}/easy-bee-v${ISO_VERSION_EFFECTIVE}" mkdir -p "${OUT_DIR}" @@ -801,7 +821,7 @@ if [ ! -d "/usr/src/linux-headers-${KVER}" ]; then apt-get install -y "linux-headers-${KVER}" fi -echo "=== bee ISO build (variant: ${BEE_GPU_VENDOR}) ===" +echo "=== bee ISO build (variant: ${BUILD_VARIANT}) ===" echo "Debian: ${DEBIAN_VERSION}, Kernel ABI: ${DEBIAN_KERNEL_ABI}, Go: ${GO_VERSION}" echo "Audit version: ${AUDIT_VERSION_EFFECTIVE}, ISO version: ${ISO_VERSION_EFFECTIVE}" echo "" @@ -871,7 +891,7 @@ if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then fi fi -echo "=== preparing staged overlay (${BEE_GPU_VENDOR}) ===" +echo "=== preparing staged overlay (${BUILD_VARIANT}) ===" mkdir -p "${BUILD_WORK_DIR}" "${OVERLAY_STAGE_DIR}" # Sync builder config into variant work dir, preserving lb cache. @@ -981,10 +1001,10 @@ done # --- NVIDIA kernel modules and userspace libs --- if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then run_step "build NVIDIA ${NVIDIA_DRIVER_VERSION} modules" "40-nvidia-module" \ - sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${DEBIAN_KERNEL_ABI}" + sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${DEBIAN_KERNEL_ABI}" "${BEE_NVIDIA_MODULE_FLAVOR}" KVER="${DEBIAN_KERNEL_ABI}-amd64" - NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}" + NVIDIA_CACHE="${DIST_DIR}/nvidia-${BEE_NVIDIA_MODULE_FLAVOR}-${NVIDIA_DRIVER_VERSION}-${KVER}" # Inject .ko files into overlay at /usr/local/lib/nvidia/ OVERLAY_KMOD_DIR="${OVERLAY_STAGE_DIR}/usr/local/lib/nvidia" @@ -1055,13 +1075,14 @@ GIT_COMMIT="$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo u if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then GPU_VERSION_LINE="NVIDIA_DRIVER_VERSION=${NVIDIA_DRIVER_VERSION} +NVIDIA_KERNEL_MODULES_FLAVOR=${BEE_NVIDIA_MODULE_FLAVOR} NCCL_VERSION=${NCCL_VERSION} NCCL_CUDA_VERSION=${NCCL_CUDA_VERSION} CUBLAS_VERSION=${CUBLAS_VERSION} CUDA_USERSPACE_VERSION=${CUDA_USERSPACE_VERSION} NCCL_TESTS_VERSION=${NCCL_TESTS_VERSION} JOHN_JUMBO_COMMIT=${JOHN_JUMBO_COMMIT}" - GPU_BUILD_INFO="nvidia:${NVIDIA_DRIVER_VERSION}" + GPU_BUILD_INFO="nvidia-${BEE_NVIDIA_MODULE_FLAVOR}:${NVIDIA_DRIVER_VERSION}" elif [ "$BEE_GPU_VENDOR" = "amd" ]; then GPU_VERSION_LINE="ROCM_VERSION=${ROCM_VERSION}" GPU_BUILD_INFO="rocm:${ROCM_VERSION}" @@ -1073,6 +1094,7 @@ fi cat > "${OVERLAY_STAGE_DIR}/etc/bee-release" < "${OVERLAY_STAGE_DIR}/etc/bee-gpu-vendor" +if [ "$BEE_GPU_VENDOR" = "nvidia" ]; then + echo "${BEE_NVIDIA_MODULE_FLAVOR}" > "${OVERLAY_STAGE_DIR}/etc/bee-nvidia-modules-flavor" +else + rm -f "${OVERLAY_STAGE_DIR}/etc/bee-nvidia-modules-flavor" +fi # Patch motd with build info BEE_BUILD_INFO="${BUILD_DATE} git:${GIT_COMMIT} debian:${DEBIAN_VERSION} ${GPU_BUILD_INFO}" @@ -1153,10 +1180,10 @@ fi # --- build ISO using live-build --- echo "" -echo "=== building ISO (live-build, variant: ${BEE_GPU_VENDOR}) ===" +echo "=== building ISO (variant: ${BUILD_VARIANT}) ===" # Export for auto/config -BEE_GPU_VENDOR_UPPER="$(echo "${BEE_GPU_VENDOR}" | tr 'a-z' 'A-Z')" +BEE_GPU_VENDOR_UPPER="$(echo "${BUILD_VARIANT}" | tr 'a-z-' 'A-Z_')" export BEE_GPU_VENDOR_UPPER cd "${LB_DIR}" @@ -1191,7 +1218,7 @@ if [ -f "$ISO_RAW" ]; then validate_iso_nvidia_runtime "$ISO_RAW" cp "$ISO_RAW" "$ISO_OUT" echo "" - echo "=== done (${BEE_GPU_VENDOR}) ===" + echo "=== done (${BUILD_VARIANT}) ===" echo "ISO: $ISO_OUT" if command -v stat >/dev/null 2>&1; then ISO_SIZE_BYTES="$(stat -c '%s' "$ISO_OUT" 2>/dev/null || stat -f '%z' "$ISO_OUT")"