Merge debug/prod into single ISO build, fix NVIDIA module loading
## ISO build consolidation - Remove separate debug/prod split: overlay-debug/, build-debug.sh, mkimg.bee_debug.sh, genapkovl-bee_debug.sh all deleted - Single overlay: iso/overlay/ (was overlay-debug content) - Single build script: build.sh (SSH, TUI, NVIDIA, vendor tools, bee-release) - Single mkimage profile: bee (with dropbear, dialog, strace, gcompat, etc.) ## NVIDIA fixes - Modules now stored at /usr/local/lib/nvidia/ instead of /lib/modules/<kver>/extra/nvidia/ — modloop squashfs mounts over that path at boot making overlay content there inaccessible - bee-nvidia init: load via insmod (absolute path), not modprobe - bee-nvidia init: create libnvidia-ml.so.1/libcuda.so.1 symlinks in /usr/lib/ - build-nvidia-module.sh: always install linux-lts-dev (not conditional) — stale 6.6.x headers caused wrong-kernel modules that never loaded at runtime - build-nvidia-module.sh: create soname symlinks in cache - KERNEL_VERSION in VERSIONS updated 6.6 → 6.12 - gcompat added to ISO packages (nvidia-smi is a glibc binary on musl Alpine) ## Service ordering - bee-audit: add `after bee-nvidia` so NVIDIA enrichment always succeeds ## New tooling - iso/builder/smoketest.sh: SSH smoke test for post-boot ISO validation - iso/builder/build-gpu-burn.sh: builds gpu_burn vendor binary (CUDA 12.8+) - vendor/gpu_burn included automatically if placed in iso/vendor/ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
ALPINE_VERSION=3.21
|
||||
KERNEL_VERSION=6.6
|
||||
KERNEL_VERSION=6.12
|
||||
NVIDIA_DRIVER_VERSION=590.48.01
|
||||
GO_VERSION=1.23.6
|
||||
|
||||
82
iso/builder/build-gpu-burn.sh
Normal file
82
iso/builder/build-gpu-burn.sh
Normal file
@@ -0,0 +1,82 @@
|
||||
#!/bin/sh
|
||||
# build-gpu-burn.sh — build gpu_burn stress tool and output static-ish binary to DIST_DIR
|
||||
#
|
||||
# gpu_burn requires nvcc (CUDA toolkit). This script downloads a minimal CUDA toolkit
|
||||
# runfile, extracts only nvcc + headers, builds gpu_burn, then cleans up the toolkit.
|
||||
#
|
||||
# Output: $DIST_DIR/gpu_burn (ready to copy into ISO vendor/)
|
||||
#
|
||||
# Usage: sh build-gpu-burn.sh <dist-dir>
|
||||
|
||||
set -e
|
||||
|
||||
DIST_DIR="$1"
|
||||
[ -n "$DIST_DIR" ] || { echo "usage: $0 <dist-dir>"; exit 1; }
|
||||
mkdir -p "$DIST_DIR"
|
||||
|
||||
OUTPUT="$DIST_DIR/gpu_burn"
|
||||
if [ -f "$OUTPUT" ] && [ -s "$OUTPUT" ]; then
|
||||
echo "=== gpu_burn cached: $OUTPUT ==="
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# CUDA toolkit version for building — only nvcc + headers needed, not the full runtime.
|
||||
# Must be <= max CUDA version supported by the NVIDIA driver in VERSIONS.
|
||||
# Driver 590.48.01 supports up to CUDA 13.1; use 12.6 (stable, widely tested).
|
||||
CUDA_VERSION="12.8.1"
|
||||
CUDA_BUILD="570.124.06"
|
||||
CUDA_RUN="/var/tmp/cuda-${CUDA_VERSION}.run"
|
||||
CUDA_DIR="/var/tmp/cuda-toolkit-${CUDA_VERSION}"
|
||||
|
||||
echo "=== building gpu_burn (CUDA ${CUDA_VERSION}) ==="
|
||||
|
||||
# Install build dependencies
|
||||
apk add --quiet gcc g++ make git wget libxml2
|
||||
|
||||
# Download CUDA toolkit runfile if not cached
|
||||
if [ ! -s "$CUDA_RUN" ]; then
|
||||
echo "=== downloading CUDA ${CUDA_VERSION} toolkit ==="
|
||||
wget -q --show-progress -O "$CUDA_RUN" \
|
||||
"https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/local_installers/cuda_${CUDA_VERSION}_${CUDA_BUILD}_linux.run"
|
||||
fi
|
||||
|
||||
# Extract toolkit (nvcc + headers only — skip driver, samples, docs to save time/space)
|
||||
if [ ! -d "$CUDA_DIR/bin/nvcc" ] && [ ! -f "$CUDA_DIR/bin/nvcc" ]; then
|
||||
echo "=== extracting CUDA toolkit ==="
|
||||
rm -rf "$CUDA_DIR"
|
||||
sh "$CUDA_RUN" \
|
||||
--silent \
|
||||
--toolkit \
|
||||
--toolkitpath="$CUDA_DIR" \
|
||||
--no-opengl-libs \
|
||||
--no-drm \
|
||||
--override 2>&1 | tail -5
|
||||
fi
|
||||
|
||||
NVCC="$CUDA_DIR/bin/nvcc"
|
||||
[ -f "$NVCC" ] || { echo "ERROR: nvcc not found after extraction: $NVCC"; exit 1; }
|
||||
echo "nvcc: $("$NVCC" --version | head -1)"
|
||||
|
||||
# Clone gpu_burn source
|
||||
GPU_BURN_DIR="/var/tmp/gpu-burn-src"
|
||||
if [ ! -d "$GPU_BURN_DIR/.git" ]; then
|
||||
echo "=== cloning gpu-burn ==="
|
||||
git clone --depth=1 https://github.com/wilicc/gpu-burn.git "$GPU_BURN_DIR"
|
||||
else
|
||||
echo "=== gpu-burn source already cloned ==="
|
||||
fi
|
||||
|
||||
# Build
|
||||
echo "=== building gpu_burn ==="
|
||||
cd "$GPU_BURN_DIR"
|
||||
make clean 2>/dev/null || true
|
||||
CUDA_PATH="$CUDA_DIR" make 2>&1
|
||||
|
||||
[ -f "$GPU_BURN_DIR/gpu_burn" ] || { echo "ERROR: gpu_burn binary not produced"; exit 1; }
|
||||
|
||||
cp "$GPU_BURN_DIR/gpu_burn" "$OUTPUT"
|
||||
cp "$GPU_BURN_DIR/compare.ptx" "$(dirname "$OUTPUT")/compare.ptx" 2>/dev/null || true
|
||||
|
||||
echo "=== gpu_burn build complete ==="
|
||||
ls -lh "$OUTPUT"
|
||||
echo "NOTE: compare.ptx must be present in same dir as gpu_burn at runtime"
|
||||
@@ -20,6 +20,11 @@ DIST_DIR="$2"
|
||||
[ -n "$NVIDIA_VERSION" ] || { echo "usage: $0 <nvidia-version> <dist-dir>"; exit 1; }
|
||||
[ -n "$DIST_DIR" ] || { echo "usage: $0 <nvidia-version> <dist-dir>"; exit 1; }
|
||||
|
||||
# Always install linux-lts-dev to ensure headers match the ISO's kernel (Alpine 3.21 = 6.12.x).
|
||||
# Without this, a builder with stale 6.6.x headers produces modules for the wrong kernel version.
|
||||
echo "=== installing linux-lts-dev ==="
|
||||
apk add --quiet linux-lts-dev
|
||||
|
||||
# Detect kernel version from installed headers (pick highest version if multiple).
|
||||
detect_kver() {
|
||||
ls /usr/src/ 2>/dev/null \
|
||||
@@ -30,11 +35,6 @@ detect_kver() {
|
||||
}
|
||||
|
||||
KVER="$(detect_kver)"
|
||||
if [ -z "$KVER" ]; then
|
||||
echo "=== installing linux-lts-dev ==="
|
||||
apk add --quiet linux-lts-dev
|
||||
KVER="$(detect_kver)"
|
||||
fi
|
||||
KDIR="/usr/src/linux-headers-${KVER}"
|
||||
echo "=== NVIDIA ${NVIDIA_VERSION} (proprietary) for kernel ${KVER} ==="
|
||||
|
||||
@@ -107,6 +107,15 @@ cp "$EXTRACT_DIR/libnvidia-ml.so."* "$CACHE_DIR/lib/" 2>/dev/null || true
|
||||
# libcuda stub needed by nvidia-smi at runtime
|
||||
cp "$EXTRACT_DIR/libcuda.so."* "$CACHE_DIR/lib/" 2>/dev/null || true
|
||||
|
||||
# Create soname symlinks required by nvidia-smi on Alpine (musl/glibc via gcompat)
|
||||
for lib in libnvidia-ml libcuda; do
|
||||
versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9]* 2>/dev/null | head -1)
|
||||
[ -n "$versioned" ] || continue
|
||||
base=$(basename "$versioned")
|
||||
ln -sf "$base" "$CACHE_DIR/lib/${lib}.so.1" 2>/dev/null || true
|
||||
ln -sf "${lib}.so.1" "$CACHE_DIR/lib/${lib}.so" 2>/dev/null || true
|
||||
done
|
||||
|
||||
echo "=== NVIDIA build complete ==="
|
||||
echo "cache: $CACHE_DIR"
|
||||
echo "modules: $(ls "$CACHE_DIR/modules/"*.ko | wc -l) .ko files"
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
#!/bin/sh
|
||||
# build.sh — production ISO build (unattended mode)
|
||||
# build.sh — build bee ISO
|
||||
#
|
||||
# Single build script. Produces a bootable live ISO with SSH access, TUI, NVIDIA drivers.
|
||||
#
|
||||
# Run on Alpine builder VM as root after setup-builder.sh.
|
||||
# Usage:
|
||||
# sh iso/builder/build.sh [--authorized-keys /path/to/authorized_keys]
|
||||
|
||||
set -e
|
||||
|
||||
@@ -8,29 +14,92 @@ BUILDER_DIR="${REPO_ROOT}/iso/builder"
|
||||
OVERLAY_DIR="${REPO_ROOT}/iso/overlay"
|
||||
DIST_DIR="${REPO_ROOT}/dist"
|
||||
VENDOR_DIR="${REPO_ROOT}/iso/vendor"
|
||||
AUTH_KEYS=""
|
||||
|
||||
# parse args
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--authorized-keys) AUTH_KEYS="$2"; shift 2 ;;
|
||||
*) echo "unknown arg: $1"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
. "${BUILDER_DIR}/VERSIONS"
|
||||
export PATH="$PATH:/usr/local/go/bin"
|
||||
|
||||
echo "=== bee production ISO build ==="
|
||||
echo "Alpine: ${ALPINE_VERSION}, Go: ${GO_VERSION}, NVIDIA: ${NVIDIA_DRIVER_VERSION}"
|
||||
# NOTE: lz4 compression for modloop is disabled — Alpine initramfs may not support lz4 squashfs.
|
||||
# Default xz compression is used until lz4 support is confirmed.
|
||||
|
||||
echo "=== bee ISO build ==="
|
||||
echo "Alpine: ${ALPINE_VERSION}, Go: ${GO_VERSION}"
|
||||
echo ""
|
||||
|
||||
# --- compile audit binary (static, Linux amd64) ---
|
||||
# Skip rebuild if binary is newer than all Go source files.
|
||||
AUDIT_BIN="${DIST_DIR}/bee-audit-linux-amd64"
|
||||
mkdir -p "$DIST_DIR"
|
||||
NEED_BUILD=1
|
||||
if [ -f "$AUDIT_BIN" ]; then
|
||||
NEWEST_SRC=$(find "${REPO_ROOT}/audit" -name '*.go' -newer "$AUDIT_BIN" | head -1)
|
||||
[ -z "$NEWEST_SRC" ] && NEED_BUILD=0
|
||||
fi
|
||||
|
||||
cd "${REPO_ROOT}/audit"
|
||||
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
|
||||
go build \
|
||||
-ldflags "-s -w -X main.Version=${AUDIT_VERSION}" \
|
||||
-o "$AUDIT_BIN" \
|
||||
./cmd/audit
|
||||
if [ "$NEED_BUILD" = "1" ]; then
|
||||
echo "=== building audit binary ==="
|
||||
cd "${REPO_ROOT}/audit"
|
||||
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
|
||||
go build \
|
||||
-ldflags "-s -w -X main.Version=${AUDIT_VERSION:-$(date +%Y%m%d)}" \
|
||||
-o "$AUDIT_BIN" \
|
||||
./cmd/audit
|
||||
echo "binary: $AUDIT_BIN"
|
||||
echo "size: $(du -sh "$AUDIT_BIN" | cut -f1)"
|
||||
else
|
||||
echo "=== audit binary up to date, skipping build ==="
|
||||
fi
|
||||
|
||||
# --- inject authorized_keys for SSH access ---
|
||||
# Uses the same Ed25519 keys as release signing (from git.mchus.pro/mchus/keys).
|
||||
# SSH public keys are stored alongside signing keys as ~/.keys/<name>.key.pub
|
||||
AUTHORIZED_KEYS_FILE="${OVERLAY_DIR}/root/.ssh/authorized_keys"
|
||||
mkdir -p "${OVERLAY_DIR}/root/.ssh"
|
||||
|
||||
if [ -n "$AUTH_KEYS" ]; then
|
||||
cp "$AUTH_KEYS" "$AUTHORIZED_KEYS_FILE"
|
||||
chmod 600 "$AUTHORIZED_KEYS_FILE"
|
||||
echo "SSH authorized_keys: installed from $AUTH_KEYS"
|
||||
else
|
||||
# auto-collect all developer SSH public keys from ~/.keys/*.key.pub
|
||||
> "$AUTHORIZED_KEYS_FILE"
|
||||
FOUND=0
|
||||
for ssh_pub in "$HOME"/.keys/*.key.pub; do
|
||||
[ -f "$ssh_pub" ] || continue
|
||||
cat "$ssh_pub" >> "$AUTHORIZED_KEYS_FILE"
|
||||
echo "SSH: added $(basename "$ssh_pub" .key.pub)"
|
||||
FOUND=$((FOUND + 1))
|
||||
done
|
||||
if [ "$FOUND" -gt 0 ]; then
|
||||
chmod 600 "$AUTHORIZED_KEYS_FILE"
|
||||
echo "SSH authorized_keys: $FOUND key(s) from ~/.keys/*.key.pub"
|
||||
else
|
||||
echo "WARNING: no SSH public keys found — falling back to password auth"
|
||||
echo " root password will be set to: bee / eeb"
|
||||
echo " (generate a key with: sh keys/scripts/keygen.sh <your-name>)"
|
||||
USE_PASSWORD_FALLBACK=1
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- password fallback: write marker file read by init script ---
|
||||
if [ "${USE_PASSWORD_FALLBACK:-0}" = "1" ]; then
|
||||
touch "${OVERLAY_DIR}/etc/bee-ssh-password-fallback"
|
||||
fi
|
||||
|
||||
# --- copy audit binary into overlay ---
|
||||
mkdir -p "${OVERLAY_DIR}/usr/local/bin"
|
||||
cp "$AUDIT_BIN" "${OVERLAY_DIR}/usr/local/bin/audit"
|
||||
cp "${DIST_DIR}/bee-audit-linux-amd64" "${OVERLAY_DIR}/usr/local/bin/audit"
|
||||
chmod +x "${OVERLAY_DIR}/usr/local/bin/audit"
|
||||
|
||||
# Copy optional vendor utilities if already fetched.
|
||||
for tool in storcli64 sas2ircu sas3ircu mstflint; do
|
||||
# --- vendor utilities (optional pre-fetched binaries) ---
|
||||
for tool in storcli64 sas2ircu sas3ircu mstflint gpu_burn; do
|
||||
if [ -f "${VENDOR_DIR}/${tool}" ]; then
|
||||
cp "${VENDOR_DIR}/${tool}" "${OVERLAY_DIR}/usr/local/bin/${tool}"
|
||||
chmod +x "${OVERLAY_DIR}/usr/local/bin/${tool}" || true
|
||||
@@ -40,21 +109,29 @@ for tool in storcli64 sas2ircu sas3ircu mstflint; do
|
||||
fi
|
||||
done
|
||||
|
||||
# Build and inject NVIDIA proprietary modules + userspace tools.
|
||||
echo "=== building NVIDIA modules ==="
|
||||
# --- build NVIDIA kernel modules and inject into overlay ---
|
||||
echo ""
|
||||
echo "=== building NVIDIA ${NVIDIA_DRIVER_VERSION} modules ==="
|
||||
sh "${BUILDER_DIR}/build-nvidia-module.sh" "${NVIDIA_DRIVER_VERSION}" "${DIST_DIR}"
|
||||
KVER="$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | sort -V | tail -1)"
|
||||
|
||||
# Determine kernel version (same as what goes into the ISO — both use linux-lts from same Alpine)
|
||||
KVER=$(ls /usr/src/ 2>/dev/null | grep '^linux-headers-' | sed 's/linux-headers-//' | sort -V | tail -1)
|
||||
NVIDIA_CACHE="${DIST_DIR}/nvidia-${NVIDIA_DRIVER_VERSION}-${KVER}"
|
||||
|
||||
mkdir -p "${OVERLAY_DIR}/lib/modules/${KVER}/extra/nvidia"
|
||||
cp "${NVIDIA_CACHE}/modules/"*.ko "${OVERLAY_DIR}/lib/modules/${KVER}/extra/nvidia/"
|
||||
# Inject .ko files into overlay at /usr/local/lib/nvidia/ (not /lib/modules/ — modloop squashfs
|
||||
# mounts over that path at boot and makes it read-only, so overlay content there is inaccessible)
|
||||
OVERLAY_KMOD_DIR="${OVERLAY_DIR}/usr/local/lib/nvidia"
|
||||
mkdir -p "${OVERLAY_KMOD_DIR}"
|
||||
cp "${NVIDIA_CACHE}/modules/"*.ko "${OVERLAY_KMOD_DIR}/"
|
||||
|
||||
# Inject nvidia-smi and libnvidia-ml
|
||||
mkdir -p "${OVERLAY_DIR}/usr/local/bin" "${OVERLAY_DIR}/usr/lib"
|
||||
cp "${NVIDIA_CACHE}/bin/nvidia-smi" "${OVERLAY_DIR}/usr/local/bin/"
|
||||
chmod +x "${OVERLAY_DIR}/usr/local/bin/nvidia-smi"
|
||||
cp "${NVIDIA_CACHE}/lib/"* "${OVERLAY_DIR}/usr/lib/" 2>/dev/null || true
|
||||
|
||||
# Embed build metadata used at runtime.
|
||||
|
||||
# --- embed build metadata ---
|
||||
mkdir -p "${OVERLAY_DIR}/etc"
|
||||
BUILD_DATE="$(date +%Y-%m-%d)"
|
||||
GIT_COMMIT="$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo unknown)"
|
||||
@@ -67,12 +144,27 @@ ALPINE_VERSION=${ALPINE_VERSION}
|
||||
NVIDIA_DRIVER_VERSION=${NVIDIA_DRIVER_VERSION}
|
||||
EOF
|
||||
|
||||
# --- export build info for genapkovl to inject into motd ---
|
||||
BUILD_DATE=$(date +%Y-%m-%d)
|
||||
GIT_COMMIT=$(git -C "${REPO_ROOT}" rev-parse --short HEAD 2>/dev/null || echo "unknown")
|
||||
export BEE_BUILD_INFO="${BUILD_DATE} git:${GIT_COMMIT} alpine:${ALPINE_VERSION} nvidia:${NVIDIA_DRIVER_VERSION}"
|
||||
|
||||
# --- build ISO using mkimage ---
|
||||
mkdir -p "${DIST_DIR}"
|
||||
echo ""
|
||||
echo "=== building ISO ==="
|
||||
|
||||
# Install our mkimage profile where mkimage.sh can find it.
|
||||
# ~/.mkimage is the user plugin directory loaded by mkimage.sh.
|
||||
mkdir -p "${HOME}/.mkimage"
|
||||
cp "${BUILDER_DIR}/mkimg.bee.sh" "${HOME}/.mkimage/"
|
||||
cp "${BUILDER_DIR}/genapkovl-bee.sh" "${HOME}/.mkimage/"
|
||||
|
||||
# Export overlay dir so the profile script can find it regardless of SRCDIR.
|
||||
export BEE_OVERLAY_DIR="${OVERLAY_DIR}"
|
||||
|
||||
# Clean workdir selectively: remove everything except apks cache so packages aren't re-downloaded.
|
||||
# mkimage stores each section in a hash-named subdir; apks_* dirs contain downloaded packages.
|
||||
if [ -d /var/tmp/bee-iso-work ]; then
|
||||
find /var/tmp/bee-iso-work -maxdepth 1 -mindepth 1 \
|
||||
-not -name 'apks_*' -not -name 'kernel_*' \
|
||||
@@ -80,6 +172,9 @@ if [ -d /var/tmp/bee-iso-work ]; then
|
||||
-exec rm -rf {} + 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Run from /var/tmp to avoid git repo context conflicts and to ensure enough scratch space.
|
||||
# mkinitfs/update-kernel use TMPDIR for initramfs build; tmpfs /tmp is only ~1GB.
|
||||
# mkimage.sh sources genapkovl-*.sh from CWD (not from ~/.mkimage), so copy it here too.
|
||||
export TMPDIR=/var/tmp
|
||||
cp "${BUILDER_DIR}/genapkovl-bee.sh" /var/tmp/
|
||||
cd /var/tmp
|
||||
@@ -93,5 +188,9 @@ sh /usr/share/aports/scripts/mkimage.sh \
|
||||
--profile bee
|
||||
|
||||
ISO="${DIST_DIR}/alpine-bee-${ALPINE_VERSION}-x86_64.iso"
|
||||
echo ""
|
||||
echo "=== done ==="
|
||||
echo "ISO: $ISO"
|
||||
echo "Size: $(du -sh "$ISO" 2>/dev/null | cut -f1 || echo 'not found')"
|
||||
echo ""
|
||||
echo "Boot via BMC virtual media and SSH to the server IP on port 22 as root."
|
||||
|
||||
@@ -12,18 +12,19 @@ makefile() { OWNER="$1" PERMS="$2" FILENAME="$3"; cat > "$FILENAME"; chown "$OWN
|
||||
rc_add() { mkdir -p "$tmp/etc/runlevels/$2"; ln -sf /etc/init.d/"$1" "$tmp/etc/runlevels/$2/$1"; }
|
||||
|
||||
mkdir -p "$tmp/etc"
|
||||
makefile root:root 0644 "$tmp/etc/hostname" <<EOT
|
||||
makefile root:root 0644 "$tmp/etc/hostname" <<EOF
|
||||
$HOSTNAME
|
||||
EOT
|
||||
EOF
|
||||
|
||||
# Empty interfaces file — prevents ifupdown from erroring, bee-network handles DHCP
|
||||
mkdir -p "$tmp/etc/network"
|
||||
makefile root:root 0644 "$tmp/etc/network/interfaces" <<EOT
|
||||
makefile root:root 0644 "$tmp/etc/network/interfaces" <<EOF
|
||||
auto lo
|
||||
iface lo inet loopback
|
||||
EOT
|
||||
EOF
|
||||
|
||||
mkdir -p "$tmp/etc/apk"
|
||||
makefile root:root 0644 "$tmp/etc/apk/world" <<EOT
|
||||
makefile root:root 0644 "$tmp/etc/apk/world" <<EOF
|
||||
alpine-base
|
||||
dmidecode
|
||||
smartmontools
|
||||
@@ -34,12 +35,18 @@ util-linux
|
||||
lsblk
|
||||
e2fsprogs
|
||||
lshw
|
||||
openrc
|
||||
ca-certificates
|
||||
dropbear
|
||||
libqrencode-tools
|
||||
tzdata
|
||||
jq
|
||||
wget
|
||||
EOT
|
||||
ca-certificates
|
||||
strace
|
||||
procps
|
||||
lsof
|
||||
file
|
||||
less
|
||||
vim
|
||||
dialog
|
||||
EOF
|
||||
|
||||
rc_add devfs sysinit
|
||||
rc_add dmesg sysinit
|
||||
@@ -58,14 +65,16 @@ rc_add mount-ro shutdown
|
||||
rc_add killprocs shutdown
|
||||
rc_add savecache shutdown
|
||||
|
||||
rc_add bee-sshsetup default
|
||||
rc_add bee-network default
|
||||
rc_add bee-update default
|
||||
rc_add dropbear default
|
||||
rc_add bee-nvidia default
|
||||
rc_add bee-audit default
|
||||
rc_add bee-audit-debug default
|
||||
|
||||
if [ -d "$OVERLAY/etc" ]; then
|
||||
cp -r "$OVERLAY/etc/." "$tmp/etc/"
|
||||
chmod +x "$tmp/etc/init.d/"* 2>/dev/null || true
|
||||
[ -n "$BEE_BUILD_INFO" ] && sed -i "s/%%BUILD_INFO%%/${BEE_BUILD_INFO}/" "$tmp/etc/motd" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
mkdir -p "$tmp/usr"
|
||||
@@ -74,9 +83,24 @@ if [ -d "$OVERLAY/usr" ]; then
|
||||
chmod +x "$tmp/usr/local/bin/"* 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if [ -d "$OVERLAY/root" ]; then
|
||||
mkdir -p "$tmp/root"
|
||||
cp -r "$OVERLAY/root/." "$tmp/root/"
|
||||
chmod 700 "$tmp/root/.ssh" 2>/dev/null || true
|
||||
chmod 600 "$tmp/root/.ssh/authorized_keys" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
if [ -d "$OVERLAY/lib" ]; then
|
||||
mkdir -p "$tmp/lib"
|
||||
cp -r "$OVERLAY/lib/." "$tmp/lib/"
|
||||
fi
|
||||
|
||||
tar -c -C "$tmp" etc usr lib 2>/dev/null | gzip -9n > "$HOSTNAME.apkovl.tar.gz"
|
||||
mkdir -p "$tmp/etc/dropbear" "$tmp/etc/conf.d"
|
||||
# -R: auto-generate host keys if missing
|
||||
# no dependency on networking service — bee-network handles DHCP independently
|
||||
makefile root:root 0644 "$tmp/etc/conf.d/dropbear" <<EOF
|
||||
DROPBEAR_OPTS="-R -B"
|
||||
EOF
|
||||
|
||||
|
||||
tar -c -C "$tmp" etc usr root lib 2>/dev/null | gzip -9n > "$HOSTNAME.apkovl.tar.gz"
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
#!/bin/sh
|
||||
# Alpine mkimage profile: bee (production)
|
||||
# Alpine mkimage profile: bee
|
||||
|
||||
profile_bee() {
|
||||
title="Bee Hardware Audit"
|
||||
desc="Hardware audit LiveCD (production unattended mode)"
|
||||
desc="Hardware audit LiveCD"
|
||||
arch="x86_64"
|
||||
hostname="alpine-bee"
|
||||
apkovl="genapkovl-bee.sh"
|
||||
@@ -13,7 +13,8 @@ profile_bee() {
|
||||
kernel_addons=""
|
||||
initfs_cmdline="modules=loop,squashfs,sd-mod,usb-storage modloop=/boot/modloop-lts quiet"
|
||||
initfs_features="ata base cdrom ext4 mmc nvme raid scsi squashfs usb virtio nfit"
|
||||
|
||||
grub_mod="all_video disk part_gpt part_msdos linux normal configfile search search_label efi_gop fat iso9660 cat echo ls test true help gzio multiboot2 efi_uga"
|
||||
syslinux_serial="0 115200"
|
||||
apks="
|
||||
alpine-base
|
||||
linux-lts
|
||||
@@ -38,10 +39,20 @@ profile_bee() {
|
||||
e2fsprogs
|
||||
lshw
|
||||
|
||||
dropbear
|
||||
openrc
|
||||
ca-certificates
|
||||
libqrencode-tools
|
||||
tzdata
|
||||
jq
|
||||
wget
|
||||
ca-certificates
|
||||
|
||||
strace
|
||||
procps
|
||||
lsof
|
||||
file
|
||||
less
|
||||
vim
|
||||
dialog
|
||||
|
||||
gcompat
|
||||
"
|
||||
}
|
||||
|
||||
176
iso/builder/smoketest.sh
Normal file
176
iso/builder/smoketest.sh
Normal file
@@ -0,0 +1,176 @@
|
||||
#!/bin/sh
|
||||
# smoketest.sh — run on a live ISO via SSH to verify all critical components.
|
||||
#
|
||||
# Usage:
|
||||
# ssh root@<ip> 'sh -s' < smoketest.sh
|
||||
# or: scp smoketest.sh root@<ip>:/var/tmp/ && ssh root@<ip> sh /var/tmp/smoketest.sh
|
||||
#
|
||||
# Exit code: 0 = all required checks passed, 1 = at least one required check failed.
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
WARN=0
|
||||
|
||||
ok() { echo "[ OK ] $*"; PASS=$((PASS+1)); }
|
||||
fail() { echo "[ FAIL ] $*"; FAIL=$((FAIL+1)); }
|
||||
warn() { echo "[ WARN ] $*"; WARN=$((WARN+1)); }
|
||||
info() { echo "[ INFO ] $*"; }
|
||||
|
||||
echo "========================================"
|
||||
echo " bee live ISO smoketest"
|
||||
echo " host: $(uname -n)"
|
||||
echo " kernel: $(uname -r)"
|
||||
echo " date: $(date -u)"
|
||||
echo "========================================"
|
||||
echo ""
|
||||
|
||||
# --- kernel version ---
|
||||
KVER=$(uname -r)
|
||||
info "kernel: $KVER"
|
||||
|
||||
# --- PATH ---
|
||||
echo "-- PATH & binaries --"
|
||||
for tool in dmidecode smartctl nvme ipmitool lspci audit; do
|
||||
if p=$(PATH="/usr/local/bin:$PATH" command -v "$tool" 2>/dev/null); then
|
||||
ok "$tool found: $p"
|
||||
else
|
||||
fail "$tool: NOT FOUND"
|
||||
fi
|
||||
done
|
||||
|
||||
for tool in nvidia-smi gpu_burn; do
|
||||
if p=$(PATH="/usr/local/bin:$PATH" command -v "$tool" 2>/dev/null); then
|
||||
ok "$tool found: $p"
|
||||
else
|
||||
warn "$tool: NOT FOUND (optional but expected)"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "-- NVIDIA modules --"
|
||||
KO_DIR="/usr/local/lib/nvidia"
|
||||
if [ -d "$KO_DIR" ]; then
|
||||
ko_count=$(ls "$KO_DIR"/*.ko 2>/dev/null | wc -l)
|
||||
ok "NVIDIA ko dir exists: $KO_DIR ($ko_count .ko files)"
|
||||
else
|
||||
fail "NVIDIA ko dir missing: $KO_DIR"
|
||||
fi
|
||||
|
||||
for mod in nvidia nvidia_modeset nvidia_uvm; do
|
||||
if /sbin/lsmod 2>/dev/null | grep -q "^$mod "; then
|
||||
ok "module loaded: $mod"
|
||||
else
|
||||
fail "module NOT loaded: $mod"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "-- nvidia-smi --"
|
||||
if PATH="/usr/local/bin:$PATH" command -v nvidia-smi >/dev/null 2>&1; then
|
||||
if PATH="/usr/local/bin:$PATH" nvidia-smi -L 2>/dev/null | grep -q "GPU"; then
|
||||
gpu_count=$(PATH="/usr/local/bin:$PATH" nvidia-smi -L 2>/dev/null | grep -c "GPU")
|
||||
ok "nvidia-smi: $gpu_count GPU(s) found"
|
||||
else
|
||||
fail "nvidia-smi: runs but no GPUs detected"
|
||||
fi
|
||||
else
|
||||
fail "nvidia-smi: not found in PATH"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "-- lib symlinks --"
|
||||
for lib in libnvidia-ml libcuda; do
|
||||
if [ -f "/usr/lib/${lib}.so.1" ] || [ -L "/usr/lib/${lib}.so.1" ]; then
|
||||
ok "/usr/lib/${lib}.so.1 exists"
|
||||
else
|
||||
fail "/usr/lib/${lib}.so.1 MISSING (nvidia-smi will fail)"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "-- gcompat (glibc compat for nvidia-smi) --"
|
||||
if [ -L /lib64/ld-linux-x86-64.so.2 ] || [ -f /lib64/ld-linux-x86-64.so.2 ]; then
|
||||
ok "gcompat: /lib64/ld-linux-x86-64.so.2 present"
|
||||
else
|
||||
fail "gcompat: /lib64/ld-linux-x86-64.so.2 MISSING — nvidia-smi will fail to exec"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "-- openrc services --"
|
||||
for svc in bee-nvidia bee-network; do
|
||||
if rc-service "$svc" status >/dev/null 2>&1; then
|
||||
ok "service running: $svc"
|
||||
else
|
||||
fail "service NOT running: $svc"
|
||||
fi
|
||||
done
|
||||
|
||||
for svc in bee-audit-debug dropbear bee-sshsetup; do
|
||||
if [ -f "/etc/init.d/$svc" ]; then
|
||||
if rc-service "$svc" status >/dev/null 2>&1; then
|
||||
ok "service running: $svc"
|
||||
else
|
||||
warn "service not running: $svc (may be one-shot)"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "-- audit binary --"
|
||||
AUDIT=/usr/local/bin/audit
|
||||
if [ -x "$AUDIT" ]; then
|
||||
ok "audit binary: present"
|
||||
ver=$("$AUDIT" --version 2>/dev/null || "$AUDIT" version 2>/dev/null || echo "unknown")
|
||||
info "audit version: $ver"
|
||||
else
|
||||
fail "audit binary: NOT FOUND at $AUDIT"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "-- audit last run --"
|
||||
if [ -f /var/log/bee-audit.log ]; then
|
||||
last_line=$(tail -1 /var/log/bee-audit.log)
|
||||
info "last log line: $last_line"
|
||||
if grep -q "audit completed" /var/log/bee-audit.log 2>/dev/null; then
|
||||
ok "audit: completed successfully"
|
||||
elif grep -q "audit started" /var/log/bee-audit.log 2>/dev/null; then
|
||||
warn "audit: started but may not have completed"
|
||||
fi
|
||||
# check for nvidia enrichment
|
||||
if grep -q "nvidia: enrichment skipped" /var/log/bee-audit.log 2>/dev/null; then
|
||||
reason=$(grep "nvidia: enrichment skipped" /var/log/bee-audit.log | tail -1)
|
||||
fail "audit: nvidia enrichment skipped — $reason"
|
||||
else
|
||||
ok "audit: nvidia enrichment OK"
|
||||
fi
|
||||
else
|
||||
warn "audit: no log found at /var/log/bee-audit.log"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "-- network --"
|
||||
if ip route show default 2>/dev/null | grep -q "default"; then
|
||||
gw=$(ip route show default | awk '{print $3}' | head -1)
|
||||
ok "default route: $gw"
|
||||
else
|
||||
fail "no default route"
|
||||
fi
|
||||
if ping -c1 -W3 1.1.1.1 >/dev/null 2>&1; then
|
||||
ok "internet: reachable (1.1.1.1)"
|
||||
else
|
||||
fail "internet: unreachable"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "-- /etc/profile.d/bee.sh PATH --"
|
||||
if grep -q "/usr/local/bin" /etc/profile.d/bee.sh 2>/dev/null; then
|
||||
ok "/etc/profile.d/bee.sh exports /usr/local/bin"
|
||||
else
|
||||
fail "/etc/profile.d/bee.sh does not add /usr/local/bin to PATH"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "========================================"
|
||||
echo " Results: OK=$PASS FAIL=$FAIL WARN=$WARN"
|
||||
echo "========================================"
|
||||
[ "$FAIL" -eq 0 ] && exit 0 || exit 1
|
||||
Reference in New Issue
Block a user