fix: use proprietary NVIDIA .run installer instead of open kernel modules
Builds kernel modules from the official NVIDIA installer source tree, same as a standard NVIDIA driver install. No open-gpu-kernel-modules. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,14 +1,16 @@
|
||||
#!/bin/sh
|
||||
# build-nvidia-module.sh — build NVIDIA open kernel modules and extract nvidia-smi
|
||||
# build-nvidia-module.sh — install NVIDIA proprietary driver into ISO overlay
|
||||
#
|
||||
# Builds NVIDIA open-gpu-kernel-modules from source against the installed linux-lts
|
||||
# kernel headers. Output is cached in DIST_DIR/nvidia-<version>-<kver>/ so
|
||||
# subsequent builds are instant unless NVIDIA_DRIVER_VERSION or kernel changes.
|
||||
# Downloads the official NVIDIA .run installer, extracts kernel modules and
|
||||
# userspace tools (nvidia-smi, libnvidia-ml). Everything is proprietary NVIDIA.
|
||||
#
|
||||
# Output is cached in DIST_DIR/nvidia-<version>-<kver>/ so subsequent builds
|
||||
# are instant unless NVIDIA_DRIVER_VERSION or kernel version changes.
|
||||
#
|
||||
# Output layout:
|
||||
# $CACHE_DIR/modules/ — nvidia*.ko files (stripped)
|
||||
# $CACHE_DIR/bin/ — nvidia-smi
|
||||
# $CACHE_DIR/lib/ — libnvidia-ml.so.1, libcuda stub (for nvidia-smi)
|
||||
# $CACHE_DIR/modules/ — nvidia*.ko files
|
||||
# $CACHE_DIR/bin/ — nvidia-smi, nvidia-debugdump
|
||||
# $CACHE_DIR/lib/ — libnvidia-ml.so*, libcuda.so* (for nvidia-smi)
|
||||
|
||||
set -e
|
||||
|
||||
@@ -26,71 +28,54 @@ if [ -z "$KVER" ]; then
|
||||
KVER=$(ls /usr/src/ | grep '^linux-headers-' | sed 's/linux-headers-//' | head -1)
|
||||
fi
|
||||
KDIR="/usr/src/linux-headers-${KVER}"
|
||||
echo "=== NVIDIA ${NVIDIA_VERSION} for kernel ${KVER} ==="
|
||||
echo "=== NVIDIA ${NVIDIA_VERSION} (proprietary) for kernel ${KVER} ==="
|
||||
|
||||
CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_VERSION}-${KVER}"
|
||||
if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ]; then
|
||||
echo "=== NVIDIA modules cached, skipping build ==="
|
||||
echo "=== NVIDIA cached, skipping build ==="
|
||||
echo "cache: $CACHE_DIR"
|
||||
echo "modules: $(ls $CACHE_DIR/modules/*.ko 2>/dev/null | wc -l) .ko files"
|
||||
echo "modules: $(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) .ko files"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Install build dependencies
|
||||
echo "=== installing build deps ==="
|
||||
apk add --quiet gcc make perl linux-lts-dev wget tar
|
||||
apk add --quiet gcc make perl linux-lts-dev wget
|
||||
|
||||
# Download and build open kernel modules
|
||||
BUILD_TMP="/var/tmp/nvidia-build"
|
||||
rm -rf "$BUILD_TMP"
|
||||
mkdir -p "$BUILD_TMP"
|
||||
|
||||
SRC_TGZ="/var/tmp/nvidia-open-${NVIDIA_VERSION}.tar.gz"
|
||||
if [ ! -f "$SRC_TGZ" ]; then
|
||||
echo "=== downloading NVIDIA open kernel modules source ==="
|
||||
wget -q -O "$SRC_TGZ" \
|
||||
"https://github.com/NVIDIA/open-gpu-kernel-modules/archive/refs/tags/${NVIDIA_VERSION}.tar.gz"
|
||||
fi
|
||||
|
||||
echo "=== extracting source ==="
|
||||
tar -xzf "$SRC_TGZ" -C "$BUILD_TMP"
|
||||
SRC_DIR="$BUILD_TMP/open-gpu-kernel-modules-${NVIDIA_VERSION}"
|
||||
|
||||
echo "=== building kernel modules ($(nproc) cores) ==="
|
||||
cd "$SRC_DIR"
|
||||
make -j$(nproc) \
|
||||
KERNEL_SOURCE_PATH="$KDIR" \
|
||||
IGNORE_MISSING_MODULE_SYMVERS=1 \
|
||||
modules 2>&1 | tail -5
|
||||
|
||||
# Collect .ko files
|
||||
mkdir -p "$CACHE_DIR/modules"
|
||||
find "$SRC_DIR" -name '*.ko' -exec cp {} "$CACHE_DIR/modules/" \;
|
||||
# Strip debug info from modules to reduce size
|
||||
for ko in "$CACHE_DIR"/modules/*.ko; do
|
||||
strip --strip-debug "$ko" 2>/dev/null || true
|
||||
done
|
||||
echo "modules: $(ls $CACHE_DIR/modules/*.ko | wc -l) .ko files"
|
||||
|
||||
# Extract nvidia-smi and required libraries from the .run installer
|
||||
# Download official NVIDIA .run installer (proprietary)
|
||||
RUN_FILE="/var/tmp/NVIDIA-Linux-x86_64-${NVIDIA_VERSION}.run"
|
||||
if [ ! -f "$RUN_FILE" ]; then
|
||||
echo "=== downloading NVIDIA installer (for nvidia-smi) ==="
|
||||
wget -q -O "$RUN_FILE" \
|
||||
echo "=== downloading NVIDIA ${NVIDIA_VERSION} installer ==="
|
||||
wget -q --show-progress \
|
||||
-O "$RUN_FILE" \
|
||||
"https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}/NVIDIA-Linux-x86_64-${NVIDIA_VERSION}.run"
|
||||
fi
|
||||
|
||||
echo "=== extracting nvidia-smi ==="
|
||||
# Extract installer contents
|
||||
echo "=== extracting installer ==="
|
||||
chmod +x "$RUN_FILE"
|
||||
EXTRACT_DIR="/var/tmp/nvidia-extract-${NVIDIA_VERSION}"
|
||||
rm -rf "$EXTRACT_DIR"
|
||||
"$RUN_FILE" --extract-only --target "$EXTRACT_DIR" 2>/dev/null || true
|
||||
"$RUN_FILE" --extract-only --target "$EXTRACT_DIR"
|
||||
|
||||
mkdir -p "$CACHE_DIR/bin" "$CACHE_DIR/lib"
|
||||
cp "$EXTRACT_DIR/nvidia-smi" "$CACHE_DIR/bin/"
|
||||
cp "$EXTRACT_DIR/libnvidia-ml.so.1" "$CACHE_DIR/lib/" 2>/dev/null || true
|
||||
cp "$EXTRACT_DIR/libnvidia-ml.so."* "$CACHE_DIR/lib/" 2>/dev/null || true
|
||||
# Build kernel modules from extracted source
|
||||
echo "=== building kernel modules ($(nproc) cores) ==="
|
||||
cd "$EXTRACT_DIR/kernel"
|
||||
make -j$(nproc) KERNEL_UNAME="$KVER" SYSSRC="$KDIR" modules 2>&1 | tail -5
|
||||
|
||||
# Collect outputs
|
||||
mkdir -p "$CACHE_DIR/modules" "$CACHE_DIR/bin" "$CACHE_DIR/lib"
|
||||
|
||||
find "$EXTRACT_DIR/kernel" -name '*.ko' -exec cp {} "$CACHE_DIR/modules/" \;
|
||||
for ko in "$CACHE_DIR/modules/"*.ko; do
|
||||
strip --strip-debug "$ko" 2>/dev/null || true
|
||||
done
|
||||
|
||||
cp "$EXTRACT_DIR/nvidia-smi" "$CACHE_DIR/bin/"
|
||||
cp "$EXTRACT_DIR/libnvidia-ml.so."* "$CACHE_DIR/lib/" 2>/dev/null || true
|
||||
# libcuda stub needed by nvidia-smi at runtime
|
||||
cp "$EXTRACT_DIR/libcuda.so."* "$CACHE_DIR/lib/" 2>/dev/null || true
|
||||
|
||||
echo "=== NVIDIA build complete ==="
|
||||
echo "cache: $CACHE_DIR"
|
||||
ls -lh "$CACHE_DIR/bin/" "$CACHE_DIR/modules/"
|
||||
echo "modules: $(ls "$CACHE_DIR/modules/"*.ko | wc -l) .ko files"
|
||||
ls -lh "$CACHE_DIR/bin/"
|
||||
|
||||
Reference in New Issue
Block a user