Stabilize live ISO consoles and NVIDIA boot path

This commit is contained in:
Mikhail Chusavitin
2026-03-25 19:05:18 +03:00
parent b345b0d14d
commit d36e8442a9
13 changed files with 124 additions and 38 deletions

View File

@@ -0,0 +1,29 @@
#!/bin/bash
# bee-log-run — run a command, append its output to a file, and keep stdout/stderr
# connected to systemd so journald and the serial console also receive the logs.
set -o pipefail
log_file="$1"
shift
if [ -z "$log_file" ] || [ "$#" -eq 0 ]; then
echo "usage: $0 <log-file> <command> [args...]" >&2
exit 2
fi
mkdir -p "$(dirname "$log_file")"
serial_sink() {
local tty="$1"
if [ -w "$tty" ]; then
cat > "$tty"
else
cat > /dev/null
fi
}
"$@" 2>&1 | tee -a "$log_file" \
>(serial_sink /dev/ttyS0) \
>(serial_sink /dev/ttyS1)
exit "${PIPESTATUS[0]}"

View File

@@ -22,24 +22,33 @@ fi
log "module dir: $NVIDIA_KO_DIR"
ls "$NVIDIA_KO_DIR"/*.ko 2>/dev/null | sed 's/^/ /' || true
# Some kernels expose backlight helper symbols only after loading `video`.
modprobe video >/dev/null 2>&1 && log "loaded helper module: video" || log "helper module unavailable: video"
# Load modules via insmod (direct load — no depmod needed)
for mod in nvidia nvidia-modeset nvidia-uvm; do
load_module() {
mod="$1"
shift
ko="$NVIDIA_KO_DIR/${mod}.ko"
[ -f "$ko" ] || ko="$NVIDIA_KO_DIR/${mod//-/_}.ko"
if [ -f "$ko" ]; then
if insmod "$ko"; then
log "loaded: $mod"
else
log "WARN: failed to load: $mod"
dmesg | tail -n 5 | sed 's/^/ dmesg: /' || true
fi
else
if [ ! -f "$ko" ]; then
log "WARN: not found: $ko"
return 1
fi
done
if insmod "$ko" "$@"; then
log "loaded: $mod $*"
return 0
fi
log "WARN: failed to load: $mod"
dmesg | tail -n 10 | sed 's/^/ dmesg: /' || true
return 1
}
# Load only the base NVIDIA kernel module on boot.
# NVIDIA documents that GSP firmware is enabled by default on newer GPUs and can
# be disabled via NVreg_EnableGpuFirmware=0. This keeps the live ISO on the
# conservative path until we have a stable repro for the observed boot crash.
if ! load_module nvidia NVreg_EnableGpuFirmware=0; then
exit 1
fi
log "skipping nvidia-modeset and nvidia-uvm during boot for stability"
# Create /dev/nvidia* device nodes (udev rules absent since we use .run installer)
nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices | awk '{print $1}')
@@ -61,8 +70,6 @@ if [ -n "$uvm_major" ]; then
&& log "created /dev/nvidia-uvm (major $uvm_major)" \
|| log "WARN: /dev/nvidia-uvm already exists"
mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 || true
else
log "WARN: nvidia-uvm not in /proc/devices"
fi
log "done"