Fix critical ISO build bugs: kernel pinning, service registration, PATH, audit checks
- Pin linux-lts to exact KERNEL_PKG_VERSION=6.12.76-r0 in build and ISO package list - Add build-time verification that compiled kernel version matches pin (fails loudly) - Fix bee-audit-debug → bee-audit in genapkovl OpenRC registration (service was never starting) - Add AUDIT_VERSION=0.1.0 to VERSIONS (was undefined, bee-release had empty fields) - Pin linux-lts-dev version in second apk add in build-nvidia-module.sh - Add /root/.profile to overlay so /usr/local/bin is in PATH for SSH sessions - Remove "DEBUG MODE" from motd - Fix smoketest: grep for slog "audit output written" instead of non-existent "audit completed" - Document no-internet constraint in system-overview and runtime-flows - Remove redundant genapkovl copy to /var/tmp (now found via ~/.mkimage/) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,12 @@
|
|||||||
# Runtime Flows — bee
|
# Runtime Flows — bee
|
||||||
|
|
||||||
|
## Network isolation — CRITICAL
|
||||||
|
|
||||||
|
**The live CD runs in an isolated network segment with no internet access.**
|
||||||
|
All binaries, kernel modules, and tools must be baked into the ISO at build time.
|
||||||
|
No `apk add`, no downloads, no package manager calls are allowed at boot.
|
||||||
|
DHCP is used only for LAN (operator SSH access). Internet is NOT available.
|
||||||
|
|
||||||
## Boot sequence (single ISO)
|
## Boot sequence (single ISO)
|
||||||
|
|
||||||
OpenRC default runlevel, service start order:
|
OpenRC default runlevel, service start order:
|
||||||
|
|||||||
@@ -25,6 +25,16 @@ Fills gaps where Redfish/logpile is blind:
|
|||||||
- Interactive TUI (`bee-tui`) for network setup, service management, GPU tests
|
- Interactive TUI (`bee-tui`) for network setup, service management, GPU tests
|
||||||
- GPU stress testing via `gpu_burn` (vendor binary, optional)
|
- GPU stress testing via `gpu_burn` (vendor binary, optional)
|
||||||
|
|
||||||
|
## Network isolation — CRITICAL
|
||||||
|
|
||||||
|
**The live CD runs in an isolated network segment with no internet access.**
|
||||||
|
|
||||||
|
- All tools, drivers, and binaries MUST be pre-baked into the ISO at build time
|
||||||
|
- No `apk add` at boot — packages are installed during ISO creation, not at runtime
|
||||||
|
- No downloads at boot — NVIDIA modules, vendor tools, and all binaries come from the ISO overlay
|
||||||
|
- DHCP is used only for LAN access (SSH from operator laptop); internet is NOT assumed
|
||||||
|
- Any feature requiring network downloads cannot be added to the live CD
|
||||||
|
|
||||||
## Out of scope
|
## Out of scope
|
||||||
|
|
||||||
- Any writes to the server being audited
|
- Any writes to the server being audited
|
||||||
@@ -32,6 +42,7 @@ Fills gaps where Redfish/logpile is blind:
|
|||||||
- BMC/IPMI configuration
|
- BMC/IPMI configuration
|
||||||
- Anything requiring persistent storage on the audited machine
|
- Anything requiring persistent storage on the audited machine
|
||||||
- Windows support
|
- Windows support
|
||||||
|
- Any functionality requiring internet access at boot
|
||||||
|
|
||||||
## Tech stack
|
## Tech stack
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,9 @@
|
|||||||
ALPINE_VERSION=3.21
|
ALPINE_VERSION=3.21
|
||||||
KERNEL_VERSION=6.12
|
KERNEL_VERSION=6.12
|
||||||
|
# Exact Alpine package version for linux-lts. Pin this to match builder headers with ISO kernel.
|
||||||
|
# To update: check `apk info linux-lts` on the target Alpine 3.21 system, update both here and in
|
||||||
|
# build-nvidia-module.sh + mkimg.bee.sh. Do NOT change without rebuilding NVIDIA modules cache.
|
||||||
|
KERNEL_PKG_VERSION=6.12.76-r0
|
||||||
NVIDIA_DRIVER_VERSION=590.48.01
|
NVIDIA_DRIVER_VERSION=590.48.01
|
||||||
GO_VERSION=1.23.6
|
GO_VERSION=1.23.6
|
||||||
|
AUDIT_VERSION=0.1.0
|
||||||
|
|||||||
@@ -16,14 +16,16 @@ set -e
|
|||||||
|
|
||||||
NVIDIA_VERSION="$1"
|
NVIDIA_VERSION="$1"
|
||||||
DIST_DIR="$2"
|
DIST_DIR="$2"
|
||||||
|
KERNEL_PKG_VERSION="$3"
|
||||||
|
|
||||||
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir>"; exit 1; }
|
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version>"; exit 1; }
|
||||||
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir>"; exit 1; }
|
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version>"; exit 1; }
|
||||||
|
[ -n "$KERNEL_PKG_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir> <kernel-pkg-version>"; exit 1; }
|
||||||
|
|
||||||
# Always install linux-lts-dev to ensure headers match the ISO's kernel (Alpine 3.21 = 6.12.x).
|
# Install the EXACT pinned linux-lts-dev version so builder headers always match ISO kernel.
|
||||||
# Without this, a builder with stale 6.6.x headers produces modules for the wrong kernel version.
|
# If this version is unavailable, apk will fail loudly — do NOT use a floating version here.
|
||||||
echo "=== installing linux-lts-dev ==="
|
echo "=== installing linux-lts-dev=${KERNEL_PKG_VERSION} ==="
|
||||||
apk add --quiet linux-lts-dev
|
apk add --quiet "linux-lts-dev=${KERNEL_PKG_VERSION}"
|
||||||
|
|
||||||
# Detect kernel version from installed headers (pick highest version if multiple).
|
# Detect kernel version from installed headers (pick highest version if multiple).
|
||||||
detect_kver() {
|
detect_kver() {
|
||||||
@@ -46,8 +48,8 @@ if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ]; then
|
|||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Install build dependencies
|
# Install build dependencies (linux-lts-dev pinned to same version as initial install above)
|
||||||
apk add --quiet gcc make perl linux-lts-dev wget
|
apk add --quiet gcc make perl "linux-lts-dev=${KERNEL_PKG_VERSION}" wget
|
||||||
|
|
||||||
# Download official NVIDIA .run installer (proprietary) with sha256 verification
|
# Download official NVIDIA .run installer (proprietary) with sha256 verification
|
||||||
BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}"
|
BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64/${NVIDIA_VERSION}"
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ while [ $# -gt 0 ]; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
. "${BUILDER_DIR}/VERSIONS"
|
. "${BUILDER_DIR}/VERSIONS"
|
||||||
|
export KERNEL_PKG_VERSION
|
||||||
export PATH="$PATH:/usr/local/go/bin"
|
export PATH="$PATH:/usr/local/go/bin"
|
||||||
|
|
||||||
# NOTE: lz4 compression for modloop is disabled — Alpine initramfs may not support lz4 squashfs.
|
# NOTE: lz4 compression for modloop is disabled — Alpine initramfs may not support lz4 squashfs.
|
||||||
@@ -112,10 +113,25 @@ done
|
|||||||
# --- build NVIDIA kernel modules and inject into overlay ---
|
# --- build NVIDIA kernel modules and inject into overlay ---
|
||||||
echo ""
|
echo ""
|
||||||
echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
|
echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
|
||||||
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}"
|
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}" "${KERNEL_PKG_VERSION}"
|
||||||
|
|
||||||
# Determine kernel version (same as what goes into the ISO — both use linux-lts from same Alpine)
|
# Determine kernel version from installed headers (must match KERNEL_PKG_VERSION)
|
||||||
KVER=$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | sort -V | tail -1)
|
KVER=$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | sort -V | tail -1)
|
||||||
|
|
||||||
|
# Build-time verification: ensure modules were compiled for the pinned kernel version.
|
||||||
|
# KERNEL_PKG_VERSION is like "6.12.76-r0"; KVER is like "6.12.76-0-lts".
|
||||||
|
# Extract numeric part from both and compare.
|
||||||
|
PINNED_KVER="$(echo "${KERNEL_PKG_VERSION}" | sed 's/-r[0-9]*//')"
|
||||||
|
RUNNING_KVER="$(echo "${KVER}" | sed 's/-[0-9]*-lts//')"
|
||||||
|
if [ "${PINNED_KVER}" != "${RUNNING_KVER}" ]; then
|
||||||
|
echo "ERROR: kernel version mismatch!"
|
||||||
|
echo " VERSIONS pins: ${KERNEL_PKG_VERSION} (numeric: ${PINNED_KVER})"
|
||||||
|
echo " Installed headers: ${KVER} (numeric: ${RUNNING_KVER})"
|
||||||
|
echo " Update KERNEL_PKG_VERSION in iso/builder/VERSIONS to match installed headers."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "=== kernel version OK: ${KVER} matches pin ${KERNEL_PKG_VERSION} ==="
|
||||||
|
|
||||||
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
|
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
|
||||||
|
|
||||||
# Inject .ko files into overlay at /usr/local/lib/nvidia/ (not /lib/modules/ — modloop squashfs
|
# Inject .ko files into overlay at /usr/local/lib/nvidia/ (not /lib/modules/ — modloop squashfs
|
||||||
@@ -176,12 +192,9 @@ if [ -d /var/tmp/bee-iso-work ]; then
|
|||||||
-exec rm -rf {} + 2>/dev/null || true
|
-exec rm -rf {} + 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Run from /var/tmp to avoid git repo context conflicts and to ensure enough scratch space.
|
|
||||||
# mkinitfs/update-kernel use TMPDIR for initramfs build; tmpfs /tmp is only ~1GB.
|
# mkinitfs/update-kernel use TMPDIR for initramfs build; tmpfs /tmp is only ~1GB.
|
||||||
# mkimage.sh sources genapkovl-*.sh from CWD (not from ~/.mkimage), so copy it here too.
|
# genapkovl-bee.sh is found by mkimage via ~/.mkimage/ (copied above) — no CWD dependency.
|
||||||
export TMPDIR=/var/tmp
|
export TMPDIR=/var/tmp
|
||||||
cp "${BUILDER_DIR}/genapkovl-bee.sh" /var/tmp/
|
|
||||||
cd /var/tmp
|
|
||||||
sh /usr/share/aports/scripts/mkimage.sh \
|
sh /usr/share/aports/scripts/mkimage.sh \
|
||||||
--tag "v${ALPINE_VERSION}" \
|
--tag "v${ALPINE_VERSION}" \
|
||||||
--outdir "${DIST_DIR}" \
|
--outdir "${DIST_DIR}" \
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ rc_add bee-sshsetup default
|
|||||||
rc_add bee-network default
|
rc_add bee-network default
|
||||||
rc_add dropbear default
|
rc_add dropbear default
|
||||||
rc_add bee-nvidia default
|
rc_add bee-nvidia default
|
||||||
rc_add bee-audit-debug default
|
rc_add bee-audit default
|
||||||
|
|
||||||
if [ -d "$OVERLAY/etc" ]; then
|
if [ -d "$OVERLAY/etc" ]; then
|
||||||
cp -r "$OVERLAY/etc/." "$tmp/etc/"
|
cp -r "$OVERLAY/etc/." "$tmp/etc/"
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ profile_bee() {
|
|||||||
syslinux_serial="0 115200"
|
syslinux_serial="0 115200"
|
||||||
apks="
|
apks="
|
||||||
alpine-base
|
alpine-base
|
||||||
linux-lts
|
linux-lts=${KERNEL_PKG_VERSION}
|
||||||
linux-firmware-none
|
linux-firmware-none
|
||||||
linux-firmware-rtl_nic
|
linux-firmware-rtl_nic
|
||||||
linux-firmware-bnx2
|
linux-firmware-bnx2
|
||||||
|
|||||||
@@ -126,20 +126,30 @@ fi
|
|||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "-- audit last run --"
|
echo "-- audit last run --"
|
||||||
|
# audit binary logs via slog to stderr (bee-audit.log); JSON output goes to bee-audit.json.
|
||||||
|
# slog format: time=... level=INFO msg="audit output written" path=...
|
||||||
|
if [ -f /var/log/bee-audit.json ] && [ -s /var/log/bee-audit.json ]; then
|
||||||
|
ok "audit: bee-audit.json present and non-empty"
|
||||||
|
info "size: $(du -sh /var/log/bee-audit.json | cut -f1)"
|
||||||
|
else
|
||||||
|
fail "audit: bee-audit.json missing or empty"
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -f /var/log/bee-audit.log ]; then
|
if [ -f /var/log/bee-audit.log ]; then
|
||||||
last_line=$(tail -1 /var/log/bee-audit.log)
|
last_line=$(tail -1 /var/log/bee-audit.log)
|
||||||
info "last log line: $last_line"
|
info "last log line: $last_line"
|
||||||
if grep -q "audit completed" /var/log/bee-audit.log 2>/dev/null; then
|
# slog writes: msg="audit output written" on success
|
||||||
|
if grep -q "audit output written" /var/log/bee-audit.log 2>/dev/null; then
|
||||||
ok "audit: completed successfully"
|
ok "audit: completed successfully"
|
||||||
elif grep -q "audit started" /var/log/bee-audit.log 2>/dev/null; then
|
else
|
||||||
warn "audit: started but may not have completed"
|
warn "audit: 'audit output written' not found in log — may have failed"
|
||||||
fi
|
fi
|
||||||
# check for nvidia enrichment
|
# check for nvidia enrichment skip (slog message from nvidia collector)
|
||||||
if grep -q "nvidia: enrichment skipped" /var/log/bee-audit.log 2>/dev/null; then
|
if grep -q "nvidia: enrichment skipped\|nvidia.*skipped\|enrichment skipped" /var/log/bee-audit.log 2>/dev/null; then
|
||||||
reason=$(grep "nvidia: enrichment skipped" /var/log/bee-audit.log | tail -1)
|
reason=$(grep -E "nvidia.*skipped|enrichment skipped" /var/log/bee-audit.log | tail -1)
|
||||||
fail "audit: nvidia enrichment skipped — $reason"
|
fail "audit: nvidia enrichment skipped — $reason"
|
||||||
else
|
else
|
||||||
ok "audit: nvidia enrichment OK"
|
ok "audit: nvidia enrichment OK (no skip message)"
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
warn "audit: no log found at /var/log/bee-audit.log"
|
warn "audit: no log found at /var/log/bee-audit.log"
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
██████╔╝███████╗███████╗ ██████╔╝███████╗██████╔╝╚██████╔╝╚██████╔╝
|
██████╔╝███████╗███████╗ ██████╔╝███████╗██████╔╝╚██████╔╝╚██████╔╝
|
||||||
╚═════╝ ╚══════╝╚══════╝ ╚═════╝ ╚══════╝╚═════╝ ╚═════╝ ╚═════╝
|
╚═════╝ ╚══════╝╚══════╝ ╚═════╝ ╚══════╝╚═════╝ ╚═════╝ ╚═════╝
|
||||||
|
|
||||||
Hardware Audit LiveCD — DEBUG MODE
|
Hardware Audit LiveCD
|
||||||
Build: %%BUILD_INFO%%
|
Build: %%BUILD_INFO%%
|
||||||
|
|
||||||
Logs: /var/log/bee-audit.json /var/log/bee-network.log
|
Logs: /var/log/bee-audit.json /var/log/bee-network.log
|
||||||
|
|||||||
1
iso/overlay/root/.profile
Normal file
1
iso/overlay/root/.profile
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export PATH="/usr/local/bin:$PATH"
|
||||||
Reference in New Issue
Block a user