Merge debug/prod into single ISO build, fix NVIDIA module loading

## ISO build consolidation
- Remove separate debug/prod split: overlay-debug/, build-debug.sh,
  mkimg.bee_debug.sh, genapkovl-bee_debug.sh all deleted
- Single overlay: iso/overlay/ (was overlay-debug content)
- Single build script: build.sh (SSH, TUI, NVIDIA, vendor tools, bee-release)
- Single mkimage profile: bee (with dropbear, dialog, strace, gcompat, etc.)

## NVIDIA fixes
- Modules now stored at /usr/local/lib/nvidia/ instead of
  /lib/modules/<kver>/extra/nvidia/ — modloop squashfs mounts over that
  path at boot making overlay content there inaccessible
- bee-nvidia init: load via insmod (absolute path), not modprobe
- bee-nvidia init: create libnvidia-ml.so.1/libcuda.so.1 symlinks in /usr/lib/
- build-nvidia-module.sh: always install linux-lts-dev (not conditional) —
  stale 6.6.x headers caused wrong-kernel modules that never loaded at runtime
- build-nvidia-module.sh: create soname symlinks in cache
- KERNEL_VERSION in VERSIONS updated 6.6 → 6.12
- gcompat added to ISO packages (nvidia-smi is a glibc binary on musl Alpine)

## Service ordering
- bee-audit: add `after bee-nvidia` so NVIDIA enrichment always succeeds

## New tooling
- iso/builder/smoketest.sh: SSH smoke test for post-boot ISO validation
- iso/builder/build-gpu-burn.sh: builds gpu_burn vendor binary (CUDA 12.8+)
- vendor/gpu_burn included automatically if placed in iso/vendor/

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikhail Chusavitin
2026-03-06 20:14:18 +03:00
parent 0907ba07c3
commit 1768bb58dd
24 changed files with 1296 additions and 261 deletions

View File

@@ -1,20 +1,21 @@
#!/sbin/openrc-run
description="Bee: run hardware audit (production unattended mode)"
description="Bee: run hardware audit"
depend() {
need localmount
after bee-update bee-nvidia
after bee-network bee-nvidia
}
start() {
ebegin "Running hardware audit"
/usr/local/bin/audit --output usb > /var/log/bee-audit.json 2>/var/log/bee-audit.log
rc=$?
if [ "$rc" -eq 0 ]; then
einfo "Audit complete"
/usr/local/bin/audit --output stdout > /var/log/bee-audit.json 2>/var/log/bee-audit.log
local rc=$?
if [ $rc -eq 0 ]; then
einfo "Audit complete: /var/log/bee-audit.json"
einfo "SSH in and inspect results. Dropbear is running."
else
ewarn "Audit finished with errors"
ewarn "Audit finished with errors — check /var/log/bee-audit.log"
fi
eend 0
}

View File

@@ -4,7 +4,7 @@ description="Bee: bring up network interfaces via DHCP"
depend() {
need localmount
before bee-update bee-audit
before bee-audit
}
start() {

View File

@@ -2,6 +2,8 @@
description="Bee: load NVIDIA kernel modules"
NVIDIA_KO_DIR="/usr/local/lib/nvidia"
depend() {
need localmount
before bee-audit
@@ -9,23 +11,39 @@ depend() {
start() {
ebegin "Loading NVIDIA modules"
kver="$(uname -r)"
einfo "kernel: ${kver}"
if [ -d "/lib/modules/${kver}/extra/nvidia" ]; then
einfo "module dir: /lib/modules/${kver}/extra/nvidia"
ls "/lib/modules/${kver}/extra/nvidia"/*.ko 2>/dev/null | sed 's/^/ /' || true
else
ewarn "module dir missing: /lib/modules/${kver}/extra/nvidia"
einfo "kernel: $(uname -r)"
if [ ! -d "$NVIDIA_KO_DIR" ]; then
ewarn "NVIDIA module dir missing: $NVIDIA_KO_DIR"
eend 1
return 1
fi
depmod -a 2>/dev/null || true
einfo "module dir: $NVIDIA_KO_DIR"
ls "$NVIDIA_KO_DIR"/*.ko 2>/dev/null | sed 's/^/ /' || true
# Create libnvidia-ml soname symlinks needed by nvidia-smi (glibc binary on Alpine/musl)
for lib in libnvidia-ml libcuda; do
versioned=$(ls /usr/lib/${lib}.so.[0-9]* 2>/dev/null | head -1)
[ -n "$versioned" ] || continue
base=$(basename "$versioned")
ln -sf "$base" "/usr/lib/${lib}.so.1" 2>/dev/null || true
ln -sf "${lib}.so.1" "/usr/lib/${lib}.so" 2>/dev/null || true
done
# Load modules via insmod (bypasses modules.dep — modloop squashfs is read-only)
for mod in nvidia nvidia-modeset nvidia-uvm; do
if modprobe "$mod" 2>/dev/null; then
einfo "loaded: $mod"
ko="$NVIDIA_KO_DIR/${mod}.ko"
[ -f "$ko" ] || ko="$NVIDIA_KO_DIR/${mod//-/_}.ko"
if [ -f "$ko" ]; then
if insmod "$ko" 2>/dev/null; then
einfo "loaded: $mod"
else
ewarn "failed to load: $mod"
dmesg | tail -n 5 | sed 's/^/ dmesg: /' || true
fi
else
ewarn "failed to load: $mod"
dmesg | tail -n 5 | sed 's/^/ dmesg: /' || true
ewarn "not found: $ko"
fi
done

View File

@@ -0,0 +1,28 @@
#!/sbin/openrc-run
description="Bee: configure SSH access (keys or password fallback)"
depend() {
need localmount
before dropbear
}
start() {
# Always create dedicated 'bee' user for password fallback.
# If no SSH keys embedded: login with bee / eeb
if ! id bee > /dev/null 2>&1; then
adduser -D -s /bin/sh bee > /dev/null 2>&1
fi
printf 'eeb\neeb\n' | passwd bee > /dev/null 2>&1
if [ -f /etc/bee-ssh-password-fallback ]; then
ebegin "SSH key auth unavailable — password fallback active"
ewarn "Login: bee / eeb"
ewarn "Generate a key: sh keys/scripts/keygen.sh <name>"
eend 0
else
ebegin "SSH key auth configured"
# bee user exists but password login less useful when keys work
eend 0
fi
}

View File

@@ -1,15 +0,0 @@
#!/sbin/openrc-run
description="Bee: update audit binary from USB/network"
depend() {
need localmount
after bee-network
before bee-audit
}
start() {
ebegin "Checking for audit binary update"
/usr/local/bin/bee-update.sh >> /var/log/bee-update.log 2>&1
eend 0
}

37
iso/overlay/etc/init.d/dropbear Executable file
View File

@@ -0,0 +1,37 @@
#!/sbin/openrc-run
description="Dropbear SSH server"
depend() {
need localmount
after bee-sshsetup
use logger
}
check_config() {
if [ ! -e /etc/dropbear/dropbear_rsa_host_key ]; then
einfo "Generating RSA host key..."
/usr/bin/dropbearkey -t rsa -f /etc/dropbear/dropbear_rsa_host_key
fi
if [ ! -e /etc/dropbear/dropbear_ecdsa_host_key ]; then
einfo "Generating ECDSA host key..."
/usr/bin/dropbearkey -t ecdsa -f /etc/dropbear/dropbear_ecdsa_host_key
fi
if [ ! -e /etc/dropbear/dropbear_ed25519_host_key ]; then
einfo "Generating ED25519 host key..."
/usr/bin/dropbearkey -t ed25519 -f /etc/dropbear/dropbear_ed25519_host_key
fi
}
start() {
check_config || return 1
ebegin "Starting dropbear"
/usr/sbin/dropbear ${DROPBEAR_OPTS}
eend $?
}
stop() {
ebegin "Stopping dropbear"
start-stop-daemon --stop --pidfile /var/run/dropbear.pid
eend $?
}