Files
bee/iso/overlay/usr/local/bin/bee-nvidia-load
Michael Chus 5ee120158e fix(build): remove unused variant package lists before lb build
live-build picks up ALL .list.chroot files in config/package-lists/.
After rsync, bee-nvidia.list.chroot, bee-amd.list.chroot, and
bee-nogpu.list.chroot all end up in BUILD_WORK_DIR — causing lb to
try installing packages from every variant (and leaving version
placeholders unsubstituted in the unused lists).

Fix: after copying bee-${BEE_GPU_VENDOR}.list.chroot → bee-gpu.list.chroot,
delete all other bee-{nvidia,amd,nogpu}.list.chroot from BUILD_WORK_DIR.

Also includes nomsi boot mode changes (bee-nvidia-load + grub.cfg).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-30 23:03:42 +03:00

118 lines
3.6 KiB
Bash
Executable File

#!/bin/sh
# bee-nvidia-load — load NVIDIA kernel modules and create device nodes
# Called by bee-nvidia.service at boot.
NVIDIA_KO_DIR="/usr/local/lib/nvidia"
log() { echo "[bee-nvidia] $*"; }
log "kernel: $(uname -r)"
# Skip if no NVIDIA GPU present (PCI vendor 10de)
if ! lspci -nn 2>/dev/null | grep -qi '10de:'; then
log "no NVIDIA GPU detected — skipping module load"
exit 0
fi
if [ ! -d "$NVIDIA_KO_DIR" ]; then
log "ERROR: NVIDIA module dir missing: $NVIDIA_KO_DIR"
exit 1
fi
log "module dir: $NVIDIA_KO_DIR"
ls "$NVIDIA_KO_DIR"/*.ko 2>/dev/null | sed 's/^/ /' || true
cmdline_param() {
key="$1"
for token in $(cat /proc/cmdline 2>/dev/null); do
case "$token" in
"$key"=*)
echo "${token#*=}"
return 0
;;
esac
done
return 1
}
nvidia_mode="$(cmdline_param bee.nvidia.mode || true)"
if [ -z "$nvidia_mode" ]; then
nvidia_mode="normal"
fi
log "boot mode: $nvidia_mode"
load_module() {
mod="$1"
shift
ko="$NVIDIA_KO_DIR/${mod}.ko"
[ -f "$ko" ] || ko="$NVIDIA_KO_DIR/${mod//-/_}.ko"
if [ ! -f "$ko" ]; then
log "WARN: not found: $ko"
return 1
fi
if insmod "$ko" "$@"; then
log "loaded: $mod $*"
return 0
fi
log "WARN: failed to load: $mod"
dmesg | tail -n 10 | sed 's/^/ dmesg: /' || true
return 1
}
case "$nvidia_mode" in
normal|full)
if ! load_module nvidia; then
exit 1
fi
load_module nvidia-modeset || true
load_module nvidia-uvm || true
;;
gsp-off|safe)
# NVIDIA documents that GSP firmware is enabled by default on newer GPUs and can
# be disabled via NVreg_EnableGpuFirmware=0. Safe mode keeps the live ISO on the
# conservative path for platforms where full boot-time GSP init is unstable.
if ! load_module nvidia NVreg_EnableGpuFirmware=0; then
exit 1
fi
log "GSP-off mode: skipping nvidia-modeset and nvidia-uvm during boot"
;;
nomsi|*)
# nomsi: disable MSI-X/MSI interrupts — use when RmInitAdapter fails with
# "Failed to enable MSI-X" on one or more GPUs (IOMMU group interrupt limits).
# NVreg_EnableMSI=0 forces legacy INTx interrupts for all GPUs.
if ! load_module nvidia NVreg_EnableGpuFirmware=0 NVreg_EnableMSI=0; then
exit 1
fi
log "nomsi mode: MSI-X disabled (NVreg_EnableMSI=0), skipping nvidia-modeset and nvidia-uvm"
;;
esac
# Create /dev/nvidia* device nodes (udev rules absent since we use .run installer)
nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices | awk '{print $1}')
if [ -n "$nvidia_major" ]; then
mknod -m 666 /dev/nvidiactl c "$nvidia_major" 255 \
&& log "created /dev/nvidiactl (major $nvidia_major)" \
|| log "WARN: /dev/nvidiactl already exists or mknod failed"
for i in 0 1 2 3 4 5 6 7; do
mknod -m 666 "/dev/nvidia$i" c "$nvidia_major" "$i" || true
done
log "created /dev/nvidia{0-7}"
else
log "WARN: nvidiactl not in /proc/devices — no GPU hardware present?"
fi
uvm_major=$(grep -m1 ' nvidia-uvm$' /proc/devices | awk '{print $1}')
if [ -n "$uvm_major" ]; then
mknod -m 666 /dev/nvidia-uvm c "$uvm_major" 0 \
&& log "created /dev/nvidia-uvm (major $uvm_major)" \
|| log "WARN: /dev/nvidia-uvm already exists"
mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 || true
fi
# Refresh dynamic linker cache so that NVIDIA/NCCL libs injected into /usr/lib/
# are visible to dlopen() calls (libcuda, libnvidia-ptxjitcompiler, libnccl, etc.)
ldconfig 2>/dev/null || true
log "ldconfig refreshed"
log "done"