Add bootloader choice for safe vs full NVIDIA boot

This commit is contained in:
Mikhail Chusavitin
2026-03-25 19:11:15 +03:00
parent d36e8442a9
commit 27d478aed6
4 changed files with 77 additions and 13 deletions

View File

@@ -9,13 +9,18 @@ echo " ███████╗██║ ██║███████║
echo " ╚══════╝╚═╝ ╚═╝╚══════╝ ╚═╝ ╚═════╝ ╚══════╝╚══════╝"
echo ""
menuentry "EASY-BEE" {
linux @KERNEL_LIVE@ @APPEND_LIVE@
menuentry "EASY-BEE (safe NVIDIA boot, default)" {
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=safe
initrd @INITRD_LIVE@
}
menuentry "EASY-BEE (full NVIDIA / GSP)" {
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=full
initrd @INITRD_LIVE@
}
menuentry "EASY-BEE (fail-safe)" {
linux @KERNEL_LIVE@ @APPEND_LIVE@ memtest noapic noapm nodma nomce nolapic nosmp vga=normal
linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=safe memtest noapic noapm nodma nomce nolapic nosmp vga=normal
initrd @INITRD_LIVE@
}

View File

@@ -0,0 +1,18 @@
label live-@FLAVOUR@-safe
menu label ^EASY-BEE (safe NVIDIA boot, default)
menu default
linux @LINUX@
initrd @INITRD@
append @APPEND_LIVE@ bee.nvidia.mode=safe
label live-@FLAVOUR@-full
menu label ^EASY-BEE (full NVIDIA / GSP)
linux @LINUX@
initrd @INITRD@
append @APPEND_LIVE@ bee.nvidia.mode=full
label live-@FLAVOUR@-failsafe
menu label EASY-BEE (^fail-safe)
linux @LINUX@
initrd @INITRD@
append @APPEND_LIVE@ bee.nvidia.mode=safe memtest noapic noapm nodma nomce nolapic nosmp vga=normal

View File

@@ -26,6 +26,15 @@ echo ""
KVER=$(uname -r)
info "kernel: $KVER"
NVIDIA_BOOT_MODE="safe"
for arg in $(cat /proc/cmdline 2>/dev/null); do
case "$arg" in
bee.nvidia.mode=*)
NVIDIA_BOOT_MODE="${arg#*=}"
;;
esac
done
info "nvidia boot mode: ${NVIDIA_BOOT_MODE}"
# --- PATH & binaries ---
echo "-- PATH & binaries --"
@@ -62,8 +71,10 @@ fi
for mod in nvidia_modeset nvidia_uvm; do
if /sbin/lsmod 2>/dev/null | grep -q "^$mod "; then
ok "module loaded: $mod"
elif [ "${NVIDIA_BOOT_MODE}" = "full" ]; then
fail "module NOT loaded in full mode: $mod"
else
warn "module not loaded at boot: $mod"
warn "module not loaded in safe mode: $mod"
fi
done
@@ -79,6 +90,8 @@ done
if [ -e /dev/nvidia-uvm ]; then
ok "/dev/nvidia-uvm exists"
elif [ "${NVIDIA_BOOT_MODE}" = "full" ]; then
fail "/dev/nvidia-uvm missing in full mode"
else
warn "/dev/nvidia-uvm missing — CUDA stress path may be unavailable until loaded on demand"
fi

View File

@@ -22,6 +22,25 @@ fi
log "module dir: $NVIDIA_KO_DIR"
ls "$NVIDIA_KO_DIR"/*.ko 2>/dev/null | sed 's/^/ /' || true
cmdline_param() {
key="$1"
for token in $(cat /proc/cmdline 2>/dev/null); do
case "$token" in
"$key"=*)
echo "${token#*=}"
return 0
;;
esac
done
return 1
}
nvidia_mode="$(cmdline_param bee.nvidia.mode || true)"
if [ -z "$nvidia_mode" ]; then
nvidia_mode="safe"
fi
log "boot mode: $nvidia_mode"
load_module() {
mod="$1"
shift
@@ -40,15 +59,24 @@ load_module() {
return 1
}
# Load only the base NVIDIA kernel module on boot.
# NVIDIA documents that GSP firmware is enabled by default on newer GPUs and can
# be disabled via NVreg_EnableGpuFirmware=0. This keeps the live ISO on the
# conservative path until we have a stable repro for the observed boot crash.
if ! load_module nvidia NVreg_EnableGpuFirmware=0; then
exit 1
fi
log "skipping nvidia-modeset and nvidia-uvm during boot for stability"
case "$nvidia_mode" in
full)
if ! load_module nvidia; then
exit 1
fi
load_module nvidia-modeset || true
load_module nvidia-uvm || true
;;
safe|*)
# NVIDIA documents that GSP firmware is enabled by default on newer GPUs and can
# be disabled via NVreg_EnableGpuFirmware=0. Safe mode keeps the live ISO on the
# conservative path for platforms where full boot-time GSP init is unstable.
if ! load_module nvidia NVreg_EnableGpuFirmware=0; then
exit 1
fi
log "safe mode: skipping nvidia-modeset and nvidia-uvm during boot"
;;
esac
# Create /dev/nvidia* device nodes (udev rules absent since we use .run installer)
nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices | awk '{print $1}')