fix: use proprietary NVIDIA .run installer instead of open kernel modules

Builds kernel modules from the official NVIDIA installer source tree,
same as a standard NVIDIA driver install. No open-gpu-kernel-modules.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-05 18:05:57 +03:00
parent ec9c65e20e
commit d4a2d7fa55

View File

@@ -1,14 +1,16 @@
#!/bin/sh
# build-nvidia-module.sh — build NVIDIA open kernel modules and extract nvidia-smi
# build-nvidia-module.sh — install NVIDIA proprietary driver into ISO overlay
#
# Builds NVIDIA open-gpu-kernel-modules from source against the installed linux-lts
# kernel headers. Output is cached in DIST_DIR/nvidia-<version>-<kver>/ so
# subsequent builds are instant unless NVIDIA_DRIVER_VERSION or kernel changes.
# Downloads the official NVIDIA .run installer, extracts kernel modules and
# userspace tools (nvidia-smi, libnvidia-ml). Everything is proprietary NVIDIA.
#
# Output is cached in DIST_DIR/nvidia-<version>-<kver>/ so subsequent builds
# are instant unless NVIDIA_DRIVER_VERSION or kernel version changes.
#
# Output layout:
# $CACHE_DIR/modules/ — nvidia*.ko files (stripped)
# $CACHE_DIR/bin/ — nvidia-smi
# $CACHE_DIR/lib/ — libnvidia-ml.so.1, libcuda stub (for nvidia-smi)
# $CACHE_DIR/modules/ — nvidia*.ko files
# $CACHE_DIR/bin/ — nvidia-smi, nvidia-debugdump
# $CACHE_DIR/lib/ — libnvidia-ml.so*, libcuda.so* (for nvidia-smi)
set -e
@@ -26,71 +28,54 @@ if [ -z "$KVER" ]; then
KVER=$(ls /usr/src/ | grep '^linux-headers-' | sed 's/linux-headers-//' | head -1)
fi
KDIR="/usr/src/linux-headers-${KVER}"
echo "=== NVIDIA ${NVIDIA_VERSION} for kernel ${KVER} ==="
echo "=== NVIDIA ${NVIDIA_VERSION} (proprietary) for kernel ${KVER} ==="
CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_VERSION}-${KVER}"
if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ]; then
echo "=== NVIDIA modules cached, skipping build ==="
echo "=== NVIDIA cached, skipping build ==="
echo "cache: $CACHE_DIR"
echo "modules: $(ls $CACHE_DIR/modules/*.ko 2>/dev/null | wc -l) .ko files"
echo "modules: $(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) .ko files"
exit 0
fi
# Install build dependencies
echo "=== installing build deps ==="
apk add --quiet gcc make perl linux-lts-dev wget tar
apk add --quiet gcc make perl linux-lts-dev wget
# Download and build open kernel modules
BUILD_TMP="/var/tmp/nvidia-build"
rm -rf "$BUILD_TMP"
mkdir -p "$BUILD_TMP"
SRC_TGZ="/var/tmp/nvidia-open-${NVIDIA_VERSION}.tar.gz"
if [ ! -f "$SRC_TGZ" ]; then
echo "=== downloading NVIDIA open kernel modules source ==="
wget -q -O "$SRC_TGZ" \
"https://github.com/NVIDIA/open-gpu-kernel-modules/archive/refs/tags/${NVIDIA_VERSION}.tar.gz"
fi
echo "=== extracting source ==="
tar -xzf "$SRC_TGZ" -C "$BUILD_TMP"
SRC_DIR="$BUILD_TMP/open-gpu-kernel-modules-${NVIDIA_VERSION}"
echo "=== building kernel modules ($(nproc) cores) ==="
cd "$SRC_DIR"
make -j$(nproc) \
KERNEL_SOURCE_PATH="$KDIR" \
IGNORE_MISSING_MODULE_SYMVERS=1 \
modules 2>&1 | tail -5
# Collect .ko files
mkdir -p "$CACHE_DIR/modules"
find "$SRC_DIR" -name '*.ko' -exec cp {} "$CACHE_DIR/modules/" \;
# Strip debug info from modules to reduce size
for ko in "$CACHE_DIR"/modules/*.ko; do
strip --strip-debug "$ko" 2>/dev/null || true
done
echo "modules: $(ls $CACHE_DIR/modules/*.ko | wc -l) .ko files"
# Extract nvidia-smi and required libraries from the .run installer
# Download official NVIDIA .run installer (proprietary)
RUN_FILE="/var/tmp/NVIDIA-Linux-x86_64-${NVIDIA_VERSION}.run"
if [ ! -f "$RUN_FILE" ]; then
echo "=== downloading NVIDIA installer (for nvidia-smi) ==="
wget -q -O "$RUN_FILE" \
echo "=== downloading NVIDIA ${NVIDIA_VERSION} installer ==="
wget -q --show-progress \
-O "$RUN_FILE" \
"https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}/NVIDIA-Linux-x86_64-${NVIDIA_VERSION}.run"
fi
echo "=== extracting nvidia-smi ==="
# Extract installer contents
echo "=== extracting installer ==="
chmod +x "$RUN_FILE"
EXTRACT_DIR="/var/tmp/nvidia-extract-${NVIDIA_VERSION}"
rm -rf "$EXTRACT_DIR"
"$RUN_FILE" --extract-only --target "$EXTRACT_DIR" 2>/dev/null || true
"$RUN_FILE" --extract-only --target "$EXTRACT_DIR"
mkdir -p "$CACHE_DIR/bin" "$CACHE_DIR/lib"
cp "$EXTRACT_DIR/nvidia-smi" "$CACHE_DIR/bin/"
cp "$EXTRACT_DIR/libnvidia-ml.so.1" "$CACHE_DIR/lib/" 2>/dev/null || true
cp "$EXTRACT_DIR/libnvidia-ml.so."* "$CACHE_DIR/lib/" 2>/dev/null || true
# Build kernel modules from extracted source
echo "=== building kernel modules ($(nproc) cores) ==="
cd "$EXTRACT_DIR/kernel"
make -j$(nproc) KERNEL_UNAME="$KVER" SYSSRC="$KDIR" modules 2>&1 | tail -5
# Collect outputs
mkdir -p "$CACHE_DIR/modules" "$CACHE_DIR/bin" "$CACHE_DIR/lib"
find "$EXTRACT_DIR/kernel" -name '*.ko' -exec cp {} "$CACHE_DIR/modules/" \;
for ko in "$CACHE_DIR/modules/"*.ko; do
strip --strip-debug "$ko" 2>/dev/null || true
done
cp "$EXTRACT_DIR/nvidia-smi" "$CACHE_DIR/bin/"
cp "$EXTRACT_DIR/libnvidia-ml.so."* "$CACHE_DIR/lib/" 2>/dev/null || true
# libcuda stub needed by nvidia-smi at runtime
cp "$EXTRACT_DIR/libcuda.so."* "$CACHE_DIR/lib/" 2>/dev/null || true
echo "=== NVIDIA build complete ==="
echo "cache: $CACHE_DIR"
ls -lh "$CACHE_DIR/bin/" "$CACHE_DIR/modules/"
echo "modules: $(ls "$CACHE_DIR/modules/"*.ko | wc -l) .ko files"
ls -lh "$CACHE_DIR/bin/"