#!/bin/sh # build-nvidia-module.sh — compile NVIDIA proprietary driver modules for Debian 12 # # Downloads the official NVIDIA .run installer, extracts kernel modules and # userspace tools (nvidia-smi, libnvidia-ml). Everything is proprietary NVIDIA. # # Output is cached in DIST_DIR/nvidia--/ so subsequent builds # are instant unless NVIDIA_DRIVER_VERSION or kernel version changes. # # Output layout: # $CACHE_DIR/modules/ — nvidia*.ko files # $CACHE_DIR/bin/ — nvidia-smi, nvidia-debugdump # $CACHE_DIR/lib/ — libnvidia-ml.so*, libcuda.so* (for nvidia-smi) set -e NVIDIA_VERSION="$1" DIST_DIR="$2" DEBIAN_KERNEL_ABI="$3" [ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 "; exit 1; } [ -n "$DIST_DIR" ] || { echo "usage: $0 "; exit 1; } [ -n "$DEBIAN_KERNEL_ABI" ] || { echo "usage: $0 "; exit 1; } KVER="${DEBIAN_KERNEL_ABI}-amd64" # On Debian, kernel headers are split into two packages: # linux-headers- — arch-specific (generated, Makefile) # linux-headers--common — common source headers (linux/, asm-generic/, etc.) # NVIDIA conftest needs SYSSRC pointing to common (for source headers like linux/mm.h) # and SYSOUT pointing to amd64 (for generated headers like autoconf.h, asm/). KDIR_ARCH="/usr/src/linux-headers-${KVER}" KDIR_COMMON="/usr/src/linux-headers-${DEBIAN_KERNEL_ABI}-common" echo "=== NVIDIA ${NVIDIA_VERSION} (proprietary) for kernel ${KVER} ===" if [ ! -d "$KDIR_ARCH" ] || [ ! -d "$KDIR_COMMON" ]; then echo "=== installing linux-headers-${KVER} ===" DEBIAN_FRONTEND=noninteractive apt-get install -y \ "linux-headers-${KVER}" \ gcc make perl fi echo "kernel headers (arch): $KDIR_ARCH" echo "kernel headers (common): $KDIR_COMMON" CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_VERSION}-${KVER}" CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}" DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads" EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract" if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ]; then echo "=== NVIDIA cached, skipping build ===" echo "cache: $CACHE_DIR" echo "modules: $(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) .ko files" exit 0 fi # Download official NVIDIA .run installer with sha256 verification BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}" mkdir -p "$DOWNLOAD_CACHE_DIR" "$EXTRACT_CACHE_DIR" RUN_FILE="${DOWNLOAD_CACHE_DIR}/NVIDIA-Linux-x86_64-${NVIDIA_VERSION}.run" SHA_FILE="${DOWNLOAD_CACHE_DIR}/NVIDIA-Linux-x86_64-${NVIDIA_VERSION}.run.sha256sum" verify_run() { [ -s "$SHA_FILE" ] || return 1 [ -s "$RUN_FILE" ] || return 1 cd "$DOWNLOAD_CACHE_DIR" sha256sum -c "$SHA_FILE" --status 2>/dev/null } if ! verify_run; then rm -f "$RUN_FILE" "$SHA_FILE" echo "=== downloading NVIDIA ${NVIDIA_VERSION} installer ===" wget -q -O "$SHA_FILE" "${BASE_URL}/NVIDIA-Linux-x86_64-${NVIDIA_VERSION}.run.sha256sum" echo "sha256: $(cat "$SHA_FILE")" wget --show-progress -O "$RUN_FILE" "${BASE_URL}/NVIDIA-Linux-x86_64-${NVIDIA_VERSION}.run" echo "=== verifying sha256 ===" cd "$DOWNLOAD_CACHE_DIR" && sha256sum -c "$SHA_FILE" || { echo "ERROR: sha256 mismatch"; rm -f "$RUN_FILE"; exit 1; } echo "sha256 OK" else echo "=== NVIDIA installer verified from cache ===" fi # Extract installer contents echo "=== extracting installer ===" chmod +x "$RUN_FILE" EXTRACT_DIR="${EXTRACT_CACHE_DIR}/nvidia-extract-${NVIDIA_VERSION}" rm -rf "$EXTRACT_DIR" "$RUN_FILE" --extract-only --target "$EXTRACT_DIR" # Find kernel source directory (proprietary: kernel/, open: kernel-open/) KERNEL_SRC="" for d in "$EXTRACT_DIR/kernel" "$EXTRACT_DIR/kernel-modules-sources" "$EXTRACT_DIR/kernel-source"; do [ -f "$d/Makefile" ] && KERNEL_SRC="$d" && break done [ -n "$KERNEL_SRC" ] || { echo "ERROR: kernel source dir not found in:"; ls "$EXTRACT_DIR/"; exit 1; } echo "kernel source: $KERNEL_SRC" # Build kernel modules # CFLAGS_MODULE: add GCC include dir so NVIDIA's nv_stdarg.h can find stdarg.h. # Kernel build uses -nostdinc which strips GCC's own includes; we restore it here. echo "=== building kernel modules ($(nproc) cores) ===" cd "$KERNEL_SRC" # SYSSRC=common: conftest finds real kernel headers (linux/mm.h etc.) # SYSOUT=amd64: generated headers (autoconf.h, asm/) from arch package # Without this split, conftest uses amd64/include/ which is nearly empty, # all compile-tests fail silently, and NVIDIA assumes all APIs present → link errors. make -j$(nproc) \ KERNEL_UNAME="$KVER" \ SYSSRC="$KDIR_COMMON" \ SYSOUT="$KDIR_ARCH" \ modules 2>&1 | tail -10 # Collect outputs mkdir -p "$CACHE_DIR/modules" "$CACHE_DIR/bin" "$CACHE_DIR/lib" find "$KERNEL_SRC" -name '*.ko' -exec cp {} "$CACHE_DIR/modules/" \; for ko in "$CACHE_DIR/modules/"*.ko; do strip --strip-debug "$ko" 2>/dev/null || true done cp "$EXTRACT_DIR/nvidia-smi" "$CACHE_DIR/bin/" cp "$EXTRACT_DIR/nvidia-bug-report.sh" "$CACHE_DIR/bin/" 2>/dev/null || true # Copy GSP firmware (required for Hopper/Ada GPUs — H100, H800, etc.) mkdir -p "$CACHE_DIR/firmware" if [ -d "$EXTRACT_DIR/firmware" ]; then cp -r "$EXTRACT_DIR/firmware/." "$CACHE_DIR/firmware/" echo "firmware: $(ls "$CACHE_DIR/firmware/" | wc -l) files" else echo "WARNING: no firmware/ dir found in installer (may be needed for Hopper GPUs)" fi # Copy ALL userspace library files for lib in libnvidia-ml libcuda; do count=0 for f in $(find "$EXTRACT_DIR" -maxdepth 1 -name "${lib}.so.*" 2>/dev/null); do cp "$f" "$CACHE_DIR/lib/" && count=$((count+1)) done if [ "$count" -eq 0 ]; then echo "ERROR: ${lib}.so.* not found in $EXTRACT_DIR" ls "$EXTRACT_DIR/"*.so* 2>/dev/null | head -20 || true exit 1 fi done # Verify .ko files were built ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) [ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; } # Create soname symlinks: use [0-9][0-9]* to avoid circular symlink (.so.1 has single digit) for lib in libnvidia-ml libcuda; do versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9][0-9]* 2>/dev/null | head -1) [ -n "$versioned" ] || continue base=$(basename "$versioned") ln -sf "$base" "$CACHE_DIR/lib/${lib}.so.1" ln -sf "${lib}.so.1" "$CACHE_DIR/lib/${lib}.so" 2>/dev/null || true echo "${lib}: .so.1 -> $base" done echo "=== NVIDIA build complete ===" echo "cache: $CACHE_DIR" echo "modules: $ko_count .ko files" ls -lh "$CACHE_DIR/bin/" "$CACHE_DIR/lib/"