fix: remove kernel version pin — dynamic detection prevents KVER mismatch
The static KERNEL_PKG_VERSION pin was the root cause of nvidia-smi never working: modules were compiled for pinned version (e.g. 6.12.76-r0) but the ISO kernel was unpinned (latest from repo at build time). When Alpine updated linux-lts, the two diverged silently. Fix: both steps now use whatever linux-lts is current in Alpine 3.21 main at build time. build-nvidia-module.sh uses `apk add --update linux-lts-dev` (no version pin), mkimage gets the same package from the same mirror. Module cache is still keyed by detected KVER so rebuilds remain fast. Removed: KERNEL_VERSION, KERNEL_PKG_VERSION from VERSIONS, all pin references from build.sh and build-nvidia-module.sh. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,4 @@
|
|||||||
ALPINE_VERSION=3.21
|
ALPINE_VERSION=3.21
|
||||||
KERNEL_VERSION=6.12
|
|
||||||
# Exact Alpine package version for linux-lts. Pin this to match builder headers with ISO kernel.
|
|
||||||
# To update: check `apk info linux-lts` on the target Alpine 3.21 system, update both here and in
|
|
||||||
# build-nvidia-module.sh + mkimg.bee.sh. Do NOT change without rebuilding NVIDIA modules cache.
|
|
||||||
KERNEL_PKG_VERSION=6.12.76-r0
|
|
||||||
NVIDIA_DRIVER_VERSION=590.48.01
|
NVIDIA_DRIVER_VERSION=590.48.01
|
||||||
GO_VERSION=1.23.6
|
GO_VERSION=1.23.6
|
||||||
AUDIT_VERSION=0.1.0
|
AUDIT_VERSION=0.1.0
|
||||||
|
|||||||
@@ -16,20 +16,19 @@ set -e
|
|||||||
|
|
||||||
NVIDIA_VERSION="$1"
|
NVIDIA_VERSION="$1"
|
||||||
DIST_DIR="$2"
|
DIST_DIR="$2"
|
||||||
KERNEL_PKG_VERSION="$3"
|
ALPINE_VERSION="$3"
|
||||||
ALPINE_VERSION="$4"
|
|
||||||
|
|
||||||
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version> <alpine-version>"; exit 1; }
|
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <alpine-version>"; exit 1; }
|
||||||
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version> <alpine-version>"; exit 1; }
|
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <alpine-version>"; exit 1; }
|
||||||
[ -n "$KERNEL_PKG_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version> <alpine-version>"; exit 1; }
|
[ -n "$ALPINE_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <alpine-version>"; exit 1; }
|
||||||
[ -n "$ALPINE_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version> <alpine-version>"; exit 1; }
|
|
||||||
|
|
||||||
# Install the EXACT pinned linux-lts-dev version so builder headers always match ISO kernel.
|
# Install linux-lts-dev (no version pin — always use whatever is current in Alpine 3.21 main).
|
||||||
# Use dl-cdn.alpinelinux.org — same source as mkimage. If unavailable, apk fails loudly.
|
# This ensures modules are compiled for the same kernel that mkimage will install in the ISO.
|
||||||
echo "=== installing linux-lts-dev=${KERNEL_PKG_VERSION} ==="
|
# Both use dl-cdn.alpinelinux.org, so they see the same package state at build time.
|
||||||
apk add --quiet \
|
echo "=== installing linux-lts-dev (latest from dl-cdn) ==="
|
||||||
|
apk add --quiet --update \
|
||||||
--repository "https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_VERSION}/main" \
|
--repository "https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_VERSION}/main" \
|
||||||
"linux-lts-dev=${KERNEL_PKG_VERSION}"
|
linux-lts-dev
|
||||||
|
|
||||||
# Detect kernel version from installed headers (pick highest version if multiple).
|
# Detect kernel version from installed headers (pick highest version if multiple).
|
||||||
detect_kver() {
|
detect_kver() {
|
||||||
@@ -52,10 +51,10 @@ if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ]; then
|
|||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Install build dependencies (linux-lts-dev pinned, same dl-cdn source)
|
# Install build dependencies (linux-lts-dev already at correct version from above)
|
||||||
apk add --quiet \
|
apk add --quiet \
|
||||||
--repository "https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_VERSION}/main" \
|
--repository "https://dl-cdn.alpinelinux.org/alpine/v${ALPINE_VERSION}/main" \
|
||||||
gcc make perl "linux-lts-dev=${KERNEL_PKG_VERSION}" wget
|
gcc make perl linux-lts-dev wget
|
||||||
|
|
||||||
# Download official NVIDIA .run installer (proprietary) with sha256 verification
|
# Download official NVIDIA .run installer (proprietary) with sha256 verification
|
||||||
BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}"
|
BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}"
|
||||||
|
|||||||
@@ -25,7 +25,6 @@ while [ $# -gt 0 ]; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
. "${BUILDER_DIR}/VERSIONS"
|
. "${BUILDER_DIR}/VERSIONS"
|
||||||
export KERNEL_PKG_VERSION
|
|
||||||
export PATH="$PATH:/usr/local/go/bin"
|
export PATH="$PATH:/usr/local/go/bin"
|
||||||
|
|
||||||
# NOTE: lz4 compression for modloop is disabled — Alpine initramfs may not support lz4 squashfs.
|
# NOTE: lz4 compression for modloop is disabled — Alpine initramfs may not support lz4 squashfs.
|
||||||
@@ -117,22 +116,12 @@ done
|
|||||||
# --- build NVIDIA kernel modules and inject into overlay ---
|
# --- build NVIDIA kernel modules and inject into overlay ---
|
||||||
echo ""
|
echo ""
|
||||||
echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
|
echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
|
||||||
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${KERNEL_PKG_VERSION}" "${ALPINE_VERSION}"
|
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${ALPINE_VERSION}"
|
||||||
|
|
||||||
# Determine kernel version from installed headers
|
# Detect kernel version from installed headers (set by build-nvidia-module.sh above)
|
||||||
KVER=$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | sort -V | tail -1)
|
KVER=$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | sort -V | tail -1)
|
||||||
|
[ -n "$KVER" ] || { echo "ERROR: linux-lts-dev not installed — no headers in /usr/src/"; exit 1; }
|
||||||
# Build-time verification: headers must match the repo version we detected.
|
echo "=== kernel version: ${KVER} ==="
|
||||||
PINNED_KVER="$(echo "${KERNEL_PKG_VERSION}" | sed 's/-r[0-9]*//')"
|
|
||||||
RUNNING_KVER="$(echo "${KVER}" | sed 's/-[0-9]*-lts//')"
|
|
||||||
if [ "${PINNED_KVER}" != "${RUNNING_KVER}" ]; then
|
|
||||||
echo "ERROR: kernel version mismatch!"
|
|
||||||
echo " Repo version: ${KERNEL_PKG_VERSION} (numeric: ${PINNED_KVER})"
|
|
||||||
echo " Installed headers: ${KVER} (numeric: ${RUNNING_KVER})"
|
|
||||||
echo " This should not happen — apk should have installed the repo version."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "=== kernel version OK: ${KVER} ==="
|
|
||||||
|
|
||||||
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
|
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user