Fix critical ISO build bugs: kernel pinning, service registration, PATH, audit checks
- Pin linux-lts to exact KERNEL_PKG_VERSION=6.12.76-r0 in build and ISO package list - Add build-time verification that compiled kernel version matches pin (fails loudly) - Fix bee-audit-debug → bee-audit in genapkovl OpenRC registration (service was never starting) - Add AUDIT_VERSION=0.1.0 to VERSIONS (was undefined, bee-release had empty fields) - Pin linux-lts-dev version in second apk add in build-nvidia-module.sh - Add /root/.profile to overlay so /usr/local/bin is in PATH for SSH sessions - Remove "DEBUG MODE" from motd - Fix smoketest: grep for slog "audit output written" instead of non-existent "audit completed" - Document no-internet constraint in system-overview and runtime-flows - Remove redundant genapkovl copy to /var/tmp (now found via ~/.mkimage/) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,9 @@
|
||||
ALPINE_VERSION=3.21
|
||||
KERNEL_VERSION=6.12
|
||||
# Exact Alpine package version for linux-lts. Pin this to match builder headers with ISO kernel.
|
||||
# To update: check `apk info linux-lts` on the target Alpine 3.21 system, update both here and in
|
||||
# build-nvidia-module.sh + mkimg.bee.sh. Do NOT change without rebuilding NVIDIA modules cache.
|
||||
KERNEL_PKG_VERSION=6.12.76-r0
|
||||
NVIDIA_DRIVER_VERSION=590.48.01
|
||||
GO_VERSION=1.23.6
|
||||
AUDIT_VERSION=0.1.0
|
||||
|
||||
@@ -16,14 +16,16 @@ set -e
|
||||
|
||||
NVIDIA_VERSION="$1"
|
||||
DIST_DIR="$2"
|
||||
KERNEL_PKG_VERSION="$3"
|
||||
|
||||
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir>"; exit 1; }
|
||||
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir>"; exit 1; }
|
||||
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version>"; exit 1; }
|
||||
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version>"; exit 1; }
|
||||
[ -n "$KERNEL_PKG_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version>"; exit 1; }
|
||||
|
||||
# Always install linux-lts-dev to ensure headers match the ISO's kernel (Alpine 3.21 = 6.12.x).
|
||||
# Without this, a builder with stale 6.6.x headers produces modules for the wrong kernel version.
|
||||
echo "=== installing linux-lts-dev ==="
|
||||
apk add --quiet linux-lts-dev
|
||||
# Install the EXACT pinned linux-lts-dev version so builder headers always match ISO kernel.
|
||||
# If this version is unavailable, apk will fail loudly — do NOT use a floating version here.
|
||||
echo "=== installing linux-lts-dev=${KERNEL_PKG_VERSION} ==="
|
||||
apk add --quiet "linux-lts-dev=${KERNEL_PKG_VERSION}"
|
||||
|
||||
# Detect kernel version from installed headers (pick highest version if multiple).
|
||||
detect_kver() {
|
||||
@@ -46,8 +48,8 @@ if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Install build dependencies
|
||||
apk add --quiet gcc make perl linux-lts-dev wget
|
||||
# Install build dependencies (linux-lts-dev pinned to same version as initial install above)
|
||||
apk add --quiet gcc make perl "linux-lts-dev=${KERNEL_PKG_VERSION}" wget
|
||||
|
||||
# Download official NVIDIA .run installer (proprietary) with sha256 verification
|
||||
BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}"
|
||||
|
||||
@@ -25,6 +25,7 @@ while [ $# -gt 0 ]; do
|
||||
done
|
||||
|
||||
. "${BUILDER_DIR}/VERSIONS"
|
||||
export KERNEL_PKG_VERSION
|
||||
export PATH="$PATH:/usr/local/go/bin"
|
||||
|
||||
# NOTE: lz4 compression for modloop is disabled — Alpine initramfs may not support lz4 squashfs.
|
||||
@@ -112,10 +113,25 @@ done
|
||||
# --- build NVIDIA kernel modules and inject into overlay ---
|
||||
echo ""
|
||||
echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
|
||||
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}"
|
||||
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${KERNEL_PKG_VERSION}"
|
||||
|
||||
# Determine kernel version (same as what goes into the ISO — both use linux-lts from same Alpine)
|
||||
# Determine kernel version from installed headers (must match KERNEL_PKG_VERSION)
|
||||
KVER=$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | sort -V | tail -1)
|
||||
|
||||
# Build-time verification: ensure modules were compiled for the pinned kernel version.
|
||||
# KERNEL_PKG_VERSION is like "6.12.76-r0"; KVER is like "6.12.76-0-lts".
|
||||
# Extract numeric part from both and compare.
|
||||
PINNED_KVER="$(echo "${KERNEL_PKG_VERSION}" | sed 's/-r[0-9]*//')"
|
||||
RUNNING_KVER="$(echo "${KVER}" | sed 's/-[0-9]*-lts//')"
|
||||
if [ "${PINNED_KVER}" != "${RUNNING_KVER}" ]; then
|
||||
echo "ERROR: kernel version mismatch!"
|
||||
echo " VERSIONS pins: ${KERNEL_PKG_VERSION} (numeric: ${PINNED_KVER})"
|
||||
echo " Installed headers: ${KVER} (numeric: ${RUNNING_KVER})"
|
||||
echo " Update KERNEL_PKG_VERSION in iso/builder/VERSIONS to match installed headers."
|
||||
exit 1
|
||||
fi
|
||||
echo "=== kernel version OK: ${KVER} matches pin ${KERNEL_PKG_VERSION} ==="
|
||||
|
||||
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
|
||||
|
||||
# Inject .ko files into overlay at /usr/local/lib/nvidia/ (not /lib/modules/ — modloop squashfs
|
||||
@@ -176,12 +192,9 @@ if [ -d /var/tmp/bee-iso-work ]; then
|
||||
-exec rm -rf {} + 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Run from /var/tmp to avoid git repo context conflicts and to ensure enough scratch space.
|
||||
# mkinitfs/update-kernel use TMPDIR for initramfs build; tmpfs /tmp is only ~1GB.
|
||||
# mkimage.sh sources genapkovl-*.sh from CWD (not from ~/.mkimage), so copy it here too.
|
||||
# genapkovl-bee.sh is found by mkimage via ~/.mkimage/ (copied above) — no CWD dependency.
|
||||
export TMPDIR=/var/tmp
|
||||
cp "${BUILDER_DIR}/genapkovl-bee.sh" /var/tmp/
|
||||
cd /var/tmp
|
||||
sh /usr/share/aports/scripts/mkimage.sh \
|
||||
--tag "v${ALPINE_VERSION}" \
|
||||
--outdir "${DIST_DIR}" \
|
||||
|
||||
@@ -69,7 +69,7 @@ rc_add bee-sshsetup default
|
||||
rc_add bee-network default
|
||||
rc_add dropbear default
|
||||
rc_add bee-nvidia default
|
||||
rc_add bee-audit-debug default
|
||||
rc_add bee-audit default
|
||||
|
||||
if [ -d "$OVERLAY/etc" ]; then
|
||||
cp -r "$OVERLAY/etc/." "$tmp/etc/"
|
||||
|
||||
@@ -17,7 +17,7 @@ profile_bee() {
|
||||
syslinux_serial="0 115200"
|
||||
apks="
|
||||
alpine-base
|
||||
linux-lts
|
||||
linux-lts=${KERNEL_PKG_VERSION}
|
||||
linux-firmware-none
|
||||
linux-firmware-rtl_nic
|
||||
linux-firmware-bnx2
|
||||
|
||||
@@ -126,20 +126,30 @@ fi
|
||||
|
||||
echo ""
|
||||
echo "-- audit last run --"
|
||||
# audit binary logs via slog to stderr (bee-audit.log); JSON output goes to bee-audit.json.
|
||||
# slog format: time=... level=INFO msg="audit output written" path=...
|
||||
if [ -f /var/log/bee-audit.json ] && [ -s /var/log/bee-audit.json ]; then
|
||||
ok "audit: bee-audit.json present and non-empty"
|
||||
info "size: $(du -sh /var/log/bee-audit.json | cut -f1)"
|
||||
else
|
||||
fail "audit: bee-audit.json missing or empty"
|
||||
fi
|
||||
|
||||
if [ -f /var/log/bee-audit.log ]; then
|
||||
last_line=$(tail -1 /var/log/bee-audit.log)
|
||||
info "last log line: $last_line"
|
||||
if grep -q "audit completed" /var/log/bee-audit.log 2>/dev/null; then
|
||||
# slog writes: msg="audit output written" on success
|
||||
if grep -q "audit output written" /var/log/bee-audit.log 2>/dev/null; then
|
||||
ok "audit: completed successfully"
|
||||
elif grep -q "audit started" /var/log/bee-audit.log 2>/dev/null; then
|
||||
warn "audit: started but may not have completed"
|
||||
else
|
||||
warn "audit: 'audit output written' not found in log — may have failed"
|
||||
fi
|
||||
# check for nvidia enrichment
|
||||
if grep -q "nvidia: enrichment skipped" /var/log/bee-audit.log 2>/dev/null; then
|
||||
reason=$(grep "nvidia: enrichment skipped" /var/log/bee-audit.log | tail -1)
|
||||
# check for nvidia enrichment skip (slog message from nvidia collector)
|
||||
if grep -q "nvidia: enrichment skipped\|nvidia.*skipped\|enrichment skipped" /var/log/bee-audit.log 2>/dev/null; then
|
||||
reason=$(grep -E "nvidia.*skipped|enrichment skipped" /var/log/bee-audit.log | tail -1)
|
||||
fail "audit: nvidia enrichment skipped — $reason"
|
||||
else
|
||||
ok "audit: nvidia enrichment OK"
|
||||
ok "audit: nvidia enrichment OK (no skip message)"
|
||||
fi
|
||||
else
|
||||
warn "audit: no log found at /var/log/bee-audit.log"
|
||||
|
||||
Reference in New Issue
Block a user