From 27d478aed613ebdc18dd2d2b4480b5167f409465 Mon Sep 17 00:00:00 2001 From: Mikhail Chusavitin Date: Wed, 25 Mar 2026 19:11:15 +0300 Subject: [PATCH] Add bootloader choice for safe vs full NVIDIA boot --- .../config/bootloaders/grub-pc/grub.cfg | 11 +++-- .../config/bootloaders/isolinux/live.cfg.in | 18 ++++++++ iso/builder/smoketest.sh | 15 +++++- iso/overlay/usr/local/bin/bee-nvidia-load | 46 +++++++++++++++---- 4 files changed, 77 insertions(+), 13 deletions(-) create mode 100644 iso/builder/config/bootloaders/isolinux/live.cfg.in diff --git a/iso/builder/config/bootloaders/grub-pc/grub.cfg b/iso/builder/config/bootloaders/grub-pc/grub.cfg index 07671e4..5b45d74 100644 --- a/iso/builder/config/bootloaders/grub-pc/grub.cfg +++ b/iso/builder/config/bootloaders/grub-pc/grub.cfg @@ -9,13 +9,18 @@ echo " ███████╗██║ ██║███████║ echo " ╚══════╝╚═╝ ╚═╝╚══════╝ ╚═╝ ╚═════╝ ╚══════╝╚══════╝" echo "" -menuentry "EASY-BEE" { - linux @KERNEL_LIVE@ @APPEND_LIVE@ +menuentry "EASY-BEE (safe NVIDIA boot, default)" { + linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=safe + initrd @INITRD_LIVE@ +} + +menuentry "EASY-BEE (full NVIDIA / GSP)" { + linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=full initrd @INITRD_LIVE@ } menuentry "EASY-BEE (fail-safe)" { - linux @KERNEL_LIVE@ @APPEND_LIVE@ memtest noapic noapm nodma nomce nolapic nosmp vga=normal + linux @KERNEL_LIVE@ @APPEND_LIVE@ bee.nvidia.mode=safe memtest noapic noapm nodma nomce nolapic nosmp vga=normal initrd @INITRD_LIVE@ } diff --git a/iso/builder/config/bootloaders/isolinux/live.cfg.in b/iso/builder/config/bootloaders/isolinux/live.cfg.in new file mode 100644 index 0000000..758d562 --- /dev/null +++ b/iso/builder/config/bootloaders/isolinux/live.cfg.in @@ -0,0 +1,18 @@ +label live-@FLAVOUR@-safe + menu label ^EASY-BEE (safe NVIDIA boot, default) + menu default + linux @LINUX@ + initrd @INITRD@ + append @APPEND_LIVE@ bee.nvidia.mode=safe + +label live-@FLAVOUR@-full + menu label ^EASY-BEE (full NVIDIA / GSP) + linux @LINUX@ + initrd @INITRD@ + append @APPEND_LIVE@ bee.nvidia.mode=full + +label live-@FLAVOUR@-failsafe + menu label EASY-BEE (^fail-safe) + linux @LINUX@ + initrd @INITRD@ + append @APPEND_LIVE@ bee.nvidia.mode=safe memtest noapic noapm nodma nomce nolapic nosmp vga=normal diff --git a/iso/builder/smoketest.sh b/iso/builder/smoketest.sh index e3be0d0..febab91 100644 --- a/iso/builder/smoketest.sh +++ b/iso/builder/smoketest.sh @@ -26,6 +26,15 @@ echo "" KVER=$(uname -r) info "kernel: $KVER" +NVIDIA_BOOT_MODE="safe" +for arg in $(cat /proc/cmdline 2>/dev/null); do + case "$arg" in + bee.nvidia.mode=*) + NVIDIA_BOOT_MODE="${arg#*=}" + ;; + esac +done +info "nvidia boot mode: ${NVIDIA_BOOT_MODE}" # --- PATH & binaries --- echo "-- PATH & binaries --" @@ -62,8 +71,10 @@ fi for mod in nvidia_modeset nvidia_uvm; do if /sbin/lsmod 2>/dev/null | grep -q "^$mod "; then ok "module loaded: $mod" + elif [ "${NVIDIA_BOOT_MODE}" = "full" ]; then + fail "module NOT loaded in full mode: $mod" else - warn "module not loaded at boot: $mod" + warn "module not loaded in safe mode: $mod" fi done @@ -79,6 +90,8 @@ done if [ -e /dev/nvidia-uvm ]; then ok "/dev/nvidia-uvm exists" +elif [ "${NVIDIA_BOOT_MODE}" = "full" ]; then + fail "/dev/nvidia-uvm missing in full mode" else warn "/dev/nvidia-uvm missing — CUDA stress path may be unavailable until loaded on demand" fi diff --git a/iso/overlay/usr/local/bin/bee-nvidia-load b/iso/overlay/usr/local/bin/bee-nvidia-load index 8982ac9..c830788 100755 --- a/iso/overlay/usr/local/bin/bee-nvidia-load +++ b/iso/overlay/usr/local/bin/bee-nvidia-load @@ -22,6 +22,25 @@ fi log "module dir: $NVIDIA_KO_DIR" ls "$NVIDIA_KO_DIR"/*.ko 2>/dev/null | sed 's/^/ /' || true +cmdline_param() { + key="$1" + for token in $(cat /proc/cmdline 2>/dev/null); do + case "$token" in + "$key"=*) + echo "${token#*=}" + return 0 + ;; + esac + done + return 1 +} + +nvidia_mode="$(cmdline_param bee.nvidia.mode || true)" +if [ -z "$nvidia_mode" ]; then + nvidia_mode="safe" +fi +log "boot mode: $nvidia_mode" + load_module() { mod="$1" shift @@ -40,15 +59,24 @@ load_module() { return 1 } -# Load only the base NVIDIA kernel module on boot. -# NVIDIA documents that GSP firmware is enabled by default on newer GPUs and can -# be disabled via NVreg_EnableGpuFirmware=0. This keeps the live ISO on the -# conservative path until we have a stable repro for the observed boot crash. -if ! load_module nvidia NVreg_EnableGpuFirmware=0; then - exit 1 -fi - -log "skipping nvidia-modeset and nvidia-uvm during boot for stability" +case "$nvidia_mode" in + full) + if ! load_module nvidia; then + exit 1 + fi + load_module nvidia-modeset || true + load_module nvidia-uvm || true + ;; + safe|*) + # NVIDIA documents that GSP firmware is enabled by default on newer GPUs and can + # be disabled via NVreg_EnableGpuFirmware=0. Safe mode keeps the live ISO on the + # conservative path for platforms where full boot-time GSP init is unstable. + if ! load_module nvidia NVreg_EnableGpuFirmware=0; then + exit 1 + fi + log "safe mode: skipping nvidia-modeset and nvidia-uvm during boot" + ;; +esac # Create /dev/nvidia* device nodes (udev rules absent since we use .run installer) nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices | awk '{print $1}')