Merge debug/prod into single ISO build, fix NVIDIA module loading
## ISO build consolidation - Remove separate debug/prod split: overlay-debug/, build-debug.sh, mkimg.bee_debug.sh, genapkovl-bee_debug.sh all deleted - Single overlay: iso/overlay/ (was overlay-debug content) - Single build script: build.sh (SSH, TUI, NVIDIA, vendor tools, bee-release) - Single mkimage profile: bee (with dropbear, dialog, strace, gcompat, etc.) ## NVIDIA fixes - Modules now stored at /usr/local/lib/nvidia/ instead of /lib/modules/<kver>/extra/nvidia/ — modloop squashfs mounts over that path at boot making overlay content there inaccessible - bee-nvidia init: load via insmod (absolute path), not modprobe - bee-nvidia init: create libnvidia-ml.so.1/libcuda.so.1 symlinks in /usr/lib/ - build-nvidia-module.sh: always install linux-lts-dev (not conditional) — stale 6.6.x headers caused wrong-kernel modules that never loaded at runtime - build-nvidia-module.sh: create soname symlinks in cache - KERNEL_VERSION in VERSIONS updated 6.6 → 6.12 - gcompat added to ISO packages (nvidia-smi is a glibc binary on musl Alpine) ## Service ordering - bee-audit: add `after bee-nvidia` so NVIDIA enrichment always succeeds ## New tooling - iso/builder/smoketest.sh: SSH smoke test for post-boot ISO validation - iso/builder/build-gpu-burn.sh: builds gpu_burn vendor binary (CUDA 12.8+) - vendor/gpu_burn included automatically if placed in iso/vendor/ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,8 @@
|
||||
|
||||
description="Bee: load NVIDIA kernel modules"
|
||||
|
||||
NVIDIA_KO_DIR="/usr/local/lib/nvidia"
|
||||
|
||||
depend() {
|
||||
need localmount
|
||||
before bee-audit
|
||||
@@ -9,23 +11,39 @@ depend() {
|
||||
|
||||
start() {
|
||||
ebegin "Loading NVIDIA modules"
|
||||
kver="$(uname -r)"
|
||||
einfo "kernel: ${kver}"
|
||||
if [ -d "/lib/modules/${kver}/extra/nvidia" ]; then
|
||||
einfo "module dir: /lib/modules/${kver}/extra/nvidia"
|
||||
ls "/lib/modules/${kver}/extra/nvidia"/*.ko 2>/dev/null | sed 's/^/ /' || true
|
||||
else
|
||||
ewarn "module dir missing: /lib/modules/${kver}/extra/nvidia"
|
||||
einfo "kernel: $(uname -r)"
|
||||
|
||||
if [ ! -d "$NVIDIA_KO_DIR" ]; then
|
||||
ewarn "NVIDIA module dir missing: $NVIDIA_KO_DIR"
|
||||
eend 1
|
||||
return 1
|
||||
fi
|
||||
|
||||
depmod -a 2>/dev/null || true
|
||||
einfo "module dir: $NVIDIA_KO_DIR"
|
||||
ls "$NVIDIA_KO_DIR"/*.ko 2>/dev/null | sed 's/^/ /' || true
|
||||
|
||||
# Create libnvidia-ml soname symlinks needed by nvidia-smi (glibc binary on Alpine/musl)
|
||||
for lib in libnvidia-ml libcuda; do
|
||||
versioned=$(ls /usr/lib/${lib}.so.[0-9]* 2>/dev/null | head -1)
|
||||
[ -n "$versioned" ] || continue
|
||||
base=$(basename "$versioned")
|
||||
ln -sf "$base" "/usr/lib/${lib}.so.1" 2>/dev/null || true
|
||||
ln -sf "${lib}.so.1" "/usr/lib/${lib}.so" 2>/dev/null || true
|
||||
done
|
||||
|
||||
# Load modules via insmod (bypasses modules.dep — modloop squashfs is read-only)
|
||||
for mod in nvidia nvidia-modeset nvidia-uvm; do
|
||||
if modprobe "$mod" 2>/dev/null; then
|
||||
einfo "loaded: $mod"
|
||||
ko="$NVIDIA_KO_DIR/${mod}.ko"
|
||||
[ -f "$ko" ] || ko="$NVIDIA_KO_DIR/${mod//-/_}.ko"
|
||||
if [ -f "$ko" ]; then
|
||||
if insmod "$ko" 2>/dev/null; then
|
||||
einfo "loaded: $mod"
|
||||
else
|
||||
ewarn "failed to load: $mod"
|
||||
dmesg | tail -n 5 | sed 's/^/ dmesg: /' || true
|
||||
fi
|
||||
else
|
||||
ewarn "failed to load: $mod"
|
||||
dmesg | tail -n 5 | sed 's/^/ dmesg: /' || true
|
||||
ewarn "not found: $ko"
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
Reference in New Issue
Block a user