fix: remove kernel version pin — dynamic detection prevents KVER mismatch
The static KERNEL_PKG_VERSION pin was the root cause of nvidia-smi never working: modules were compiled for pinned version (e.g. 6.12.76-r0) but the ISO kernel was unpinned (latest from repo at build time). When Alpine updated linux-lts, the two diverged silently. Fix: both steps now use whatever linux-lts is current in Alpine 3.21 main at build time. build-nvidia-module.sh uses `apk add --update linux-lts-dev` (no version pin), mkimage gets the same package from the same mirror. Module cache is still keyed by detected KVER so rebuilds remain fast. Removed: KERNEL_VERSION, KERNEL_PKG_VERSION from VERSIONS, all pin references from build.sh and build-nvidia-module.sh. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,4 @@
|
||||
ALPINE_VERSION=3.21
|
||||
KERNEL_VERSION=6.12
|
||||
# Exact Alpine package version for linux-lts. Pin this to match builder headers with ISO kernel.
|
||||
# To update: check `apk info linux-lts` on the target Alpine 3.21 system, update both here and in
|
||||
# build-nvidia-module.sh + mkimg.bee.sh. Do NOT change without rebuilding NVIDIA modules cache.
|
||||
KERNEL_PKG_VERSION=6.12.76-r0
|
||||
NVIDIA_DRIVER_VERSION=590.48.01
|
||||
GO_VERSION=1.23.6
|
||||
AUDIT_VERSION=0.1.0
|
||||
|
||||
@@ -16,20 +16,19 @@ set -e
|
||||
|
||||
NVIDIA_VERSION="$1"
|
||||
DIST_DIR="$2"
|
||||
KERNEL_PKG_VERSION="$3"
|
||||
ALPINE_VERSION="$4"
|
||||
ALPINE_VERSION="$3"
|
||||
|
||||
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version> <alpine-version>"; exit 1; }
|
||||
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version> <alpine-version>"; exit 1; }
|
||||
[ -n "$KERNEL_PKG_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version> <alpine-version>"; exit 1; }
|
||||
[ -n "$ALPINE_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version> <alpine-version>"; exit 1; }
|
||||
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <alpine-version>"; exit 1; }
|
||||
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <alpine-version>"; exit 1; }
|
||||
[ -n "$ALPINE_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <alpine-version>"; exit 1; }
|
||||
|
||||
# Install the EXACT pinned linux-lts-dev version so builder headers always match ISO kernel.
|
||||
# Use dl-cdn.alpinelinux.org — same source as mkimage. If unavailable, apk fails loudly.
|
||||
echo "=== installing linux-lts-dev=${KERNEL_PKG_VERSION} ==="
|
||||
apk add --quiet \
|
||||
# Install linux-lts-dev (no version pin — always use whatever is current in Alpine 3.21 main).
|
||||
# This ensures modules are compiled for the same kernel that mkimage will install in the ISO.
|
||||
# Both use dl-cdn.alpinelinux.org, so they see the same package state at build time.
|
||||
echo "=== installing linux-lts-dev (latest from dl-cdn) ==="
|
||||
apk add --quiet --update \
|
||||
--repository "https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_VERSION}/main" \
|
||||
"linux-lts-dev=${KERNEL_PKG_VERSION}"
|
||||
linux-lts-dev
|
||||
|
||||
# Detect kernel version from installed headers (pick highest version if multiple).
|
||||
detect_kver() {
|
||||
@@ -52,10 +51,10 @@ if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Install build dependencies (linux-lts-dev pinned, same dl-cdn source)
|
||||
# Install build dependencies (linux-lts-dev already at correct version from above)
|
||||
apk add --quiet \
|
||||
--repository "https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_VERSION}/main" \
|
||||
gcc make perl "linux-lts-dev=${KERNEL_PKG_VERSION}" wget
|
||||
gcc make perl linux-lts-dev wget
|
||||
|
||||
# Download official NVIDIA .run installer (proprietary) with sha256 verification
|
||||
BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}"
|
||||
|
||||
@@ -25,7 +25,6 @@ while [ $# -gt 0 ]; do
|
||||
done
|
||||
|
||||
. "${BUILDER_DIR}/VERSIONS"
|
||||
export KERNEL_PKG_VERSION
|
||||
export PATH="$PATH:/usr/local/go/bin"
|
||||
|
||||
# NOTE: lz4 compression for modloop is disabled — Alpine initramfs may not support lz4 squashfs.
|
||||
@@ -117,22 +116,12 @@ done
|
||||
# --- build NVIDIA kernel modules and inject into overlay ---
|
||||
echo ""
|
||||
echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
|
||||
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${KERNEL_PKG_VERSION}" "${ALPINE_VERSION}"
|
||||
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${ALPINE_VERSION}"
|
||||
|
||||
# Determine kernel version from installed headers
|
||||
# Detect kernel version from installed headers (set by build-nvidia-module.sh above)
|
||||
KVER=$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | sort -V | tail -1)
|
||||
|
||||
# Build-time verification: headers must match the repo version we detected.
|
||||
PINNED_KVER="$(echo "${KERNEL_PKG_VERSION}" | sed 's/-r[0-9]*//')"
|
||||
RUNNING_KVER="$(echo "${KVER}" | sed 's/-[0-9]*-lts//')"
|
||||
if [ "${PINNED_KVER}" != "${RUNNING_KVER}" ]; then
|
||||
echo "ERROR: kernel version mismatch!"
|
||||
echo " Repo version: ${KERNEL_PKG_VERSION} (numeric: ${PINNED_KVER})"
|
||||
echo " Installed headers: ${KVER} (numeric: ${RUNNING_KVER})"
|
||||
echo " This should not happen — apk should have installed the repo version."
|
||||
exit 1
|
||||
fi
|
||||
echo "=== kernel version OK: ${KVER} ==="
|
||||
[ -n "$KVER" ] || { echo "ERROR: linux-lts-dev not installed — no headers in /usr/src/"; exit 1; }
|
||||
echo "=== kernel version: ${KVER} ==="
|
||||
|
||||
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user