Fix critical ISO build bugs: kernel pinning, service registration, PATH, audit checks

- Pin linux-lts to exact KERNEL_PKG_VERSION=6.12.76-r0 in build and ISO package list
- Add build-time verification that compiled kernel version matches pin (fails loudly)
- Fix bee-audit-debug → bee-audit in genapkovl OpenRC registration (service was never starting)
- Add AUDIT_VERSION=0.1.0 to VERSIONS (was undefined, bee-release had empty fields)
- Pin linux-lts-dev version in second apk add in build-nvidia-module.sh
- Add /root/.profile to overlay so /usr/local/bin is in PATH for SSH sessions
- Remove "DEBUG MODE" from motd
- Fix smoketest: grep for slog "audit output written" instead of non-existent "audit completed"
- Document no-internet constraint in system-overview and runtime-flows
- Remove redundant genapkovl copy to /var/tmp (now found via ~/.mkimage/)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Chusavitin
2026-03-07 10:52:54 +03:00
parent 493ccea415
commit ffc7e5c71a
10 changed files with 73 additions and 24 deletions

View File

@@ -1,4 +1,9 @@
ALPINE_VERSION=3.21
KERNEL_VERSION=6.12
# Exact Alpine package version for linux-lts. Pin this to match builder headers with ISO kernel.
# To update: check `apk info linux-lts` on the target Alpine 3.21 system, update both here and in
# build-nvidia-module.sh + mkimg.bee.sh. Do NOT change without rebuilding NVIDIA modules cache.
KERNEL_PKG_VERSION=6.12.76-r0
NVIDIA_DRIVER_VERSION=590.48.01
GO_VERSION=1.23.6
AUDIT_VERSION=0.1.0

View File

@@ -16,14 +16,16 @@ set -e
NVIDIA_VERSION="$1"
DIST_DIR="$2"
KERNEL_PKG_VERSION="$3"
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir>"; exit 1; }
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir>"; exit 1; }
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version>"; exit 1; }
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version>"; exit 1; }
[ -n "$KERNEL_PKG_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version>"; exit 1; }
# Always install linux-lts-dev to ensure headers match the ISO's kernel (Alpine 3.21 = 6.12.x).
# Without this, a builder with stale 6.6.x headers produces modules for the wrong kernel version.
echo "=== installing linux-lts-dev ==="
apk add --quiet linux-lts-dev
# Install the EXACT pinned linux-lts-dev version so builder headers always match ISO kernel.
# If this version is unavailable, apk will fail loudly — do NOT use a floating version here.
echo "=== installing linux-lts-dev=${KERNEL_PKG_VERSION} ==="
apk add --quiet "linux-lts-dev=${KERNEL_PKG_VERSION}"
# Detect kernel version from installed headers (pick highest version if multiple).
detect_kver() {
@@ -46,8 +48,8 @@ if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ]; then
exit 0
fi
# Install build dependencies
apk add --quiet gcc make perl linux-lts-dev wget
# Install build dependencies (linux-lts-dev pinned to same version as initial install above)
apk add --quiet gcc make perl "linux-lts-dev=${KERNEL_PKG_VERSION}" wget
# Download official NVIDIA .run installer (proprietary) with sha256 verification
BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}"

View File

@@ -25,6 +25,7 @@ while [ $# -gt 0 ]; do
done
. "${BUILDER_DIR}/VERSIONS"
export KERNEL_PKG_VERSION
export PATH="$PATH:/usr/local/go/bin"
# NOTE: lz4 compression for modloop is disabled — Alpine initramfs may not support lz4 squashfs.
@@ -112,10 +113,25 @@ done
# --- build NVIDIA kernel modules and inject into overlay ---
echo ""
echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}"
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${KERNEL_PKG_VERSION}"
# Determine kernel version (same as what goes into the ISO — both use linux-lts from same Alpine)
# Determine kernel version from installed headers (must match KERNEL_PKG_VERSION)
KVER=$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | sort -V | tail -1)
# Build-time verification: ensure modules were compiled for the pinned kernel version.
# KERNEL_PKG_VERSION is like "6.12.76-r0"; KVER is like "6.12.76-0-lts".
# Extract numeric part from both and compare.
PINNED_KVER="$(echo "${KERNEL_PKG_VERSION}" | sed 's/-r[0-9]*//')"
RUNNING_KVER="$(echo "${KVER}" | sed 's/-[0-9]*-lts//')"
if [ "${PINNED_KVER}" != "${RUNNING_KVER}" ]; then
echo "ERROR: kernel version mismatch!"
echo " VERSIONS pins: ${KERNEL_PKG_VERSION} (numeric: ${PINNED_KVER})"
echo " Installed headers: ${KVER} (numeric: ${RUNNING_KVER})"
echo " Update KERNEL_PKG_VERSION in iso/builder/VERSIONS to match installed headers."
exit 1
fi
echo "=== kernel version OK: ${KVER} matches pin ${KERNEL_PKG_VERSION} ==="
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
# Inject .ko files into overlay at /usr/local/lib/nvidia/ (not /lib/modules/ — modloop squashfs
@@ -176,12 +192,9 @@ if [ -d /var/tmp/bee-iso-work ]; then
-exec rm -rf {} + 2>/dev/null || true
fi
# Run from /var/tmp to avoid git repo context conflicts and to ensure enough scratch space.
# mkinitfs/update-kernel use TMPDIR for initramfs build; tmpfs /tmp is only ~1GB.
# mkimage.sh sources genapkovl-*.sh from CWD (not from ~/.mkimage), so copy it here too.
# genapkovl-bee.sh is found by mkimage via ~/.mkimage/ (copied above) — no CWD dependency.
export TMPDIR=/var/tmp
cp "${BUILDER_DIR}/genapkovl-bee.sh" /var/tmp/
cd /var/tmp
sh /usr/share/aports/scripts/mkimage.sh \
--tag "v${ALPINE_VERSION}" \
--outdir "${DIST_DIR}" \

View File

@@ -69,7 +69,7 @@ rc_add bee-sshsetup default
rc_add bee-network default
rc_add dropbear default
rc_add bee-nvidia default
rc_add bee-audit-debug default
rc_add bee-audit default
if [ -d "$OVERLAY/etc" ]; then
cp -r "$OVERLAY/etc/." "$tmp/etc/"

View File

@@ -17,7 +17,7 @@ profile_bee() {
syslinux_serial="0 115200"
apks="
alpine-base
linux-lts
linux-lts=${KERNEL_PKG_VERSION}
linux-firmware-none
linux-firmware-rtl_nic
linux-firmware-bnx2

View File

@@ -126,20 +126,30 @@ fi
echo ""
echo "-- audit last run --"
# audit binary logs via slog to stderr (bee-audit.log); JSON output goes to bee-audit.json.
# slog format: time=... level=INFO msg="audit output written" path=...
if [ -f /var/log/bee-audit.json ] && [ -s /var/log/bee-audit.json ]; then
ok "audit: bee-audit.json present and non-empty"
info "size: $(du -sh /var/log/bee-audit.json | cut -f1)"
else
fail "audit: bee-audit.json missing or empty"
fi
if [ -f /var/log/bee-audit.log ]; then
last_line=$(tail -1 /var/log/bee-audit.log)
info "last log line: $last_line"
if grep -q "audit completed" /var/log/bee-audit.log 2>/dev/null; then
# slog writes: msg="audit output written" on success
if grep -q "audit output written" /var/log/bee-audit.log 2>/dev/null; then
ok "audit: completed successfully"
elif grep -q "audit started" /var/log/bee-audit.log 2>/dev/null; then
warn "audit: started but may not have completed"
else
warn "audit: 'audit output written' not found in log — may have failed"
fi
# check for nvidia enrichment
if grep -q "nvidia: enrichment skipped" /var/log/bee-audit.log 2>/dev/null; then
reason=$(grep "nvidia: enrichment skipped" /var/log/bee-audit.log | tail -1)
# check for nvidia enrichment skip (slog message from nvidia collector)
if grep -q "nvidia: enrichment skipped\|nvidia.*skipped\|enrichment skipped" /var/log/bee-audit.log 2>/dev/null; then
reason=$(grep -E "nvidia.*skipped|enrichment skipped" /var/log/bee-audit.log | tail -1)
fail "audit: nvidia enrichment skipped — $reason"
else
ok "audit: nvidia enrichment OK"
ok "audit: nvidia enrichment OK (no skip message)"
fi
else
warn "audit: no log found at /var/log/bee-audit.log"