feat: build NVIDIA open kernel modules during ISO build
- build-nvidia-module.sh: downloads nvidia open-gpu-kernel-modules source, builds against linux-lts headers, extracts nvidia-smi from .run installer - modules cached by driver version + kernel version (rebuild only on update) - .ko files injected into ISO overlay at /lib/modules/<kver>/extra/nvidia/ - bee-nvidia init script loads nvidia/nvidia-modeset/nvidia-uvm at boot - NVIDIA_DRIVER_VERSION=550.54.15 (Turing+, H100/A100 supported) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,8 +2,7 @@
|
||||
# build-debug.sh — build bee debug ISO with SSH access
|
||||
#
|
||||
# Debug ISO purpose: test audit binary on real hardware.
|
||||
# Includes dropbear SSH, all audit packages, audit binary.
|
||||
# Does NOT include NVIDIA driver (added in production build).
|
||||
# Includes dropbear SSH, all audit packages, audit binary, NVIDIA open kernel modules.
|
||||
#
|
||||
# Run on Alpine builder VM as root after setup-builder.sh.
|
||||
# Usage:
|
||||
@@ -101,6 +100,26 @@ mkdir -p "${OVERLAY_DIR}/usr/local/bin"
|
||||
cp "${DIST_DIR}/bee-audit-linux-amd64" "${OVERLAY_DIR}/usr/local/bin/audit"
|
||||
chmod +x "${OVERLAY_DIR}/usr/local/bin/audit"
|
||||
|
||||
# --- build NVIDIA kernel modules and inject into overlay ---
|
||||
echo ""
|
||||
echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
|
||||
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}"
|
||||
|
||||
# Determine kernel version (same as what goes into the ISO — both use linux-lts from same Alpine)
|
||||
KVER=$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | head -1)
|
||||
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
|
||||
|
||||
# Inject .ko files into overlay at /lib/modules/<kver>/extra/nvidia/
|
||||
OVERLAY_KMOD_DIR="${OVERLAY_DIR}/lib/modules/${KVER}/extra/nvidia"
|
||||
mkdir -p "${OVERLAY_KMOD_DIR}"
|
||||
cp "${NVIDIA_CACHE}/modules/"*.ko "${OVERLAY_KMOD_DIR}/"
|
||||
|
||||
# Inject nvidia-smi and libnvidia-ml
|
||||
mkdir -p "${OVERLAY_DIR}/usr/local/bin" "${OVERLAY_DIR}/usr/lib"
|
||||
cp "${NVIDIA_CACHE}/bin/nvidia-smi" "${OVERLAY_DIR}/usr/local/bin/"
|
||||
chmod +x "${OVERLAY_DIR}/usr/local/bin/nvidia-smi"
|
||||
cp "${NVIDIA_CACHE}/lib/"* "${OVERLAY_DIR}/usr/lib/" 2>/dev/null || true
|
||||
|
||||
# --- build ISO using mkimage ---
|
||||
mkdir -p "${DIST_DIR}"
|
||||
echo ""
|
||||
|
||||
96
iso/builder/build-nvidia-module.sh
Normal file
96
iso/builder/build-nvidia-module.sh
Normal file
@@ -0,0 +1,96 @@
|
||||
#!/bin/sh
|
||||
# build-nvidia-module.sh — build NVIDIA open kernel modules and extract nvidia-smi
|
||||
#
|
||||
# Builds NVIDIA open-gpu-kernel-modules from source against the installed linux-lts
|
||||
# kernel headers. Output is cached in DIST_DIR/nvidia-<version>-<kver>/ so
|
||||
# subsequent builds are instant unless NVIDIA_DRIVER_VERSION or kernel changes.
|
||||
#
|
||||
# Output layout:
|
||||
# $CACHE_DIR/modules/ — nvidia*.ko files (stripped)
|
||||
# $CACHE_DIR/bin/ — nvidia-smi
|
||||
# $CACHE_DIR/lib/ — libnvidia-ml.so.1, libcuda stub (for nvidia-smi)
|
||||
|
||||
set -e
|
||||
|
||||
NVIDIA_VERSION="$1"
|
||||
DIST_DIR="$2"
|
||||
|
||||
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir>"; exit 1; }
|
||||
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir>"; exit 1; }
|
||||
|
||||
# Detect kernel version from installed headers
|
||||
KVER=$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | head -1)
|
||||
if [ -z "$KVER" ]; then
|
||||
echo "=== installing linux-lts-dev ==="
|
||||
apk add --quiet linux-lts-dev
|
||||
KVER=$(ls /usr/src/ | grep '^linux-headers-' | sed 's/linux-headers-//' | head -1)
|
||||
fi
|
||||
KDIR="/usr/src/linux-headers-${KVER}"
|
||||
echo "=== NVIDIA ${NVIDIA_VERSION} for kernel ${KVER} ==="
|
||||
|
||||
CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_VERSION}-${KVER}"
|
||||
if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ]; then
|
||||
echo "=== NVIDIA modules cached, skipping build ==="
|
||||
echo "cache: $CACHE_DIR"
|
||||
echo "modules: $(ls $CACHE_DIR/modules/*.ko 2>/dev/null | wc -l) .ko files"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Install build dependencies
|
||||
echo "=== installing build deps ==="
|
||||
apk add --quiet gcc make perl linux-lts-dev wget tar
|
||||
|
||||
# Download and build open kernel modules
|
||||
BUILD_TMP="/var/tmp/nvidia-build"
|
||||
rm -rf "$BUILD_TMP"
|
||||
mkdir -p "$BUILD_TMP"
|
||||
|
||||
SRC_TGZ="/var/tmp/nvidia-open-${NVIDIA_VERSION}.tar.gz"
|
||||
if [ ! -f "$SRC_TGZ" ]; then
|
||||
echo "=== downloading NVIDIA open kernel modules source ==="
|
||||
wget -q -O "$SRC_TGZ" \
|
||||
"https://github.com/NVIDIA/open-gpu-kernel-modules/archive/refs/tags/${NVIDIA_VERSION}.tar.gz"
|
||||
fi
|
||||
|
||||
echo "=== extracting source ==="
|
||||
tar -xzf "$SRC_TGZ" -C "$BUILD_TMP"
|
||||
SRC_DIR="$BUILD_TMP/open-gpu-kernel-modules-${NVIDIA_VERSION}"
|
||||
|
||||
echo "=== building kernel modules ($(nproc) cores) ==="
|
||||
cd "$SRC_DIR"
|
||||
make -j$(nproc) \
|
||||
KERNEL_SOURCE_PATH="$KDIR" \
|
||||
IGNORE_MISSING_MODULE_SYMVERS=1 \
|
||||
modules 2>&1 | tail -5
|
||||
|
||||
# Collect .ko files
|
||||
mkdir -p "$CACHE_DIR/modules"
|
||||
find "$SRC_DIR" -name '*.ko' -exec cp {} "$CACHE_DIR/modules/" \;
|
||||
# Strip debug info from modules to reduce size
|
||||
for ko in "$CACHE_DIR"/modules/*.ko; do
|
||||
strip --strip-debug "$ko" 2>/dev/null || true
|
||||
done
|
||||
echo "modules: $(ls $CACHE_DIR/modules/*.ko | wc -l) .ko files"
|
||||
|
||||
# Extract nvidia-smi and required libraries from the .run installer
|
||||
RUN_FILE="/var/tmp/NVIDIA-Linux-x86_64-${NVIDIA_VERSION}.run"
|
||||
if [ ! -f "$RUN_FILE" ]; then
|
||||
echo "=== downloading NVIDIA installer (for nvidia-smi) ==="
|
||||
wget -q -O "$RUN_FILE" \
|
||||
"https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}/NVIDIA-Linux-x86_64-${NVIDIA_VERSION}.run"
|
||||
fi
|
||||
|
||||
echo "=== extracting nvidia-smi ==="
|
||||
chmod +x "$RUN_FILE"
|
||||
EXTRACT_DIR="/var/tmp/nvidia-extract-${NVIDIA_VERSION}"
|
||||
rm -rf "$EXTRACT_DIR"
|
||||
"$RUN_FILE" --extract-only --target "$EXTRACT_DIR" 2>/dev/null || true
|
||||
|
||||
mkdir -p "$CACHE_DIR/bin" "$CACHE_DIR/lib"
|
||||
cp "$EXTRACT_DIR/nvidia-smi" "$CACHE_DIR/bin/"
|
||||
cp "$EXTRACT_DIR/libnvidia-ml.so.1" "$CACHE_DIR/lib/" 2>/dev/null || true
|
||||
cp "$EXTRACT_DIR/libnvidia-ml.so."* "$CACHE_DIR/lib/" 2>/dev/null || true
|
||||
|
||||
echo "=== NVIDIA build complete ==="
|
||||
echo "cache: $CACHE_DIR"
|
||||
ls -lh "$CACHE_DIR/bin/" "$CACHE_DIR/modules/"
|
||||
@@ -67,6 +67,7 @@ rc_add savecache shutdown
|
||||
rc_add bee-sshsetup default
|
||||
rc_add bee-network default
|
||||
rc_add dropbear default
|
||||
rc_add bee-nvidia default
|
||||
rc_add bee-audit-debug default
|
||||
|
||||
if [ -d "$OVERLAY/etc" ]; then cp -r "$OVERLAY/etc/." "$tmp/etc/"; fi
|
||||
|
||||
23
iso/overlay-debug/etc/init.d/bee-nvidia
Normal file
23
iso/overlay-debug/etc/init.d/bee-nvidia
Normal file
@@ -0,0 +1,23 @@
|
||||
#!/sbin/openrc-run
|
||||
|
||||
description="Bee: load NVIDIA kernel modules"
|
||||
|
||||
depend() {
|
||||
need localmount
|
||||
before bee-audit-debug
|
||||
}
|
||||
|
||||
start() {
|
||||
ebegin "Loading NVIDIA modules"
|
||||
# Run depmod so kernel can locate our modules in /lib/modules/.../extra/
|
||||
depmod -a 2>/dev/null || true
|
||||
|
||||
for mod in nvidia nvidia-modeset nvidia-uvm; do
|
||||
if modprobe "$mod" 2>/dev/null; then
|
||||
einfo "loaded: $mod"
|
||||
else
|
||||
ewarn "failed to load: $mod"
|
||||
fi
|
||||
done
|
||||
eend 0
|
||||
}
|
||||
Reference in New Issue
Block a user