fix: remove kernel version pin — dynamic detection prevents KVER mismatch

The static KERNEL_PKG_VERSION pin was the root cause of nvidia-smi never
working: modules were compiled for pinned version (e.g. 6.12.76-r0) but
the ISO kernel was unpinned (latest from repo at build time). When Alpine
updated linux-lts, the two diverged silently.

Fix: both steps now use whatever linux-lts is current in Alpine 3.21 main
at build time. build-nvidia-module.sh uses `apk add --update linux-lts-dev`
(no version pin), mkimage gets the same package from the same mirror.
Module cache is still keyed by detected KVER so rebuilds remain fast.

Removed: KERNEL_VERSION, KERNEL_PKG_VERSION from VERSIONS, all pin references
from build.sh and build-nvidia-module.sh.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Chusavitin
2026-03-07 12:11:05 +03:00
parent 18f377987f
commit 98f14b21c1
3 changed files with 16 additions and 33 deletions

View File

@@ -1,9 +1,4 @@
ALPINE_VERSION=3.21
KERNEL_VERSION=6.12
# Exact Alpine package version for linux-lts. Pin this to match builder headers with ISO kernel.
# To update: check `apk info linux-lts` on the target Alpine 3.21 system, update both here and in
# build-nvidia-module.sh + mkimg.bee.sh. Do NOT change without rebuilding NVIDIA modules cache.
KERNEL_PKG_VERSION=6.12.76-r0
NVIDIA_DRIVER_VERSION=590.48.01
GO_VERSION=1.23.6
AUDIT_VERSION=0.1.0

View File

@@ -16,20 +16,19 @@ set -e
NVIDIA_VERSION="$1"
DIST_DIR="$2"
KERNEL_PKG_VERSION="$3"
ALPINE_VERSION="$4"
ALPINE_VERSION="$3"
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version> <alpine-version>"; exit 1; }
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version> <alpine-version>"; exit 1; }
[ -n "$KERNEL_PKG_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version> <alpine-version>"; exit 1; }
[ -n "$ALPINE_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version> <alpine-version>"; exit 1; }
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <alpine-version>"; exit 1; }
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <alpine-version>"; exit 1; }
[ -n "$ALPINE_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <alpine-version>"; exit 1; }
# Install the EXACT pinned linux-lts-dev version so builder headers always match ISO kernel.
# Use dl-cdn.alpinelinux.org — same source as mkimage. If unavailable, apk fails loudly.
echo "=== installing linux-lts-dev=${KERNEL_PKG_VERSION} ==="
apk add --quiet \
# Install linux-lts-dev (no version pin — always use whatever is current in Alpine 3.21 main).
# This ensures modules are compiled for the same kernel that mkimage will install in the ISO.
# Both use dl-cdn.alpinelinux.org, so they see the same package state at build time.
echo "=== installing linux-lts-dev (latest from dl-cdn) ==="
apk add --quiet --update \
--repository "https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_VERSION}/main" \
"linux-lts-dev=${KERNEL_PKG_VERSION}"
linux-lts-dev
# Detect kernel version from installed headers (pick highest version if multiple).
detect_kver() {
@@ -52,10 +51,10 @@ if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ]; then
exit 0
fi
# Install build dependencies (linux-lts-dev pinned, same dl-cdn source)
# Install build dependencies (linux-lts-dev already at correct version from above)
apk add --quiet \
--repository "https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_VERSION}/main" \
gcc make perl "linux-lts-dev=${KERNEL_PKG_VERSION}" wget
gcc make perl linux-lts-dev wget
# Download official NVIDIA .run installer (proprietary) with sha256 verification
BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}"

View File

@@ -25,7 +25,6 @@ while [ $# -gt 0 ]; do
done
. "${BUILDER_DIR}/VERSIONS"
export KERNEL_PKG_VERSION
export PATH="$PATH:/usr/local/go/bin"
# NOTE: lz4 compression for modloop is disabled — Alpine initramfs may not support lz4 squashfs.
@@ -117,22 +116,12 @@ done
# --- build NVIDIA kernel modules and inject into overlay ---
echo ""
echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${KERNEL_PKG_VERSION}" "${ALPINE_VERSION}"
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${ALPINE_VERSION}"
# Determine kernel version from installed headers
# Detect kernel version from installed headers (set by build-nvidia-module.sh above)
KVER=$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | sort -V | tail -1)
# Build-time verification: headers must match the repo version we detected.
PINNED_KVER="$(echo "${KERNEL_PKG_VERSION}" | sed 's/-r[0-9]*//')"
RUNNING_KVER="$(echo "${KVER}" | sed 's/-[0-9]*-lts//')"
if [ "${PINNED_KVER}" != "${RUNNING_KVER}" ]; then
echo "ERROR: kernel version mismatch!"
echo " Repo version: ${KERNEL_PKG_VERSION} (numeric: ${PINNED_KVER})"
echo " Installed headers: ${KVER} (numeric: ${RUNNING_KVER})"
echo " This should not happen — apk should have installed the repo version."
exit 1
fi
echo "=== kernel version OK: ${KVER} ==="
[ -n "$KVER" ] || { echo "ERROR: linux-lts-dev not installed — no headers in /usr/src/"; exit 1; }
echo "=== kernel version: ${KVER} ==="
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"