Stabilize live ISO consoles and NVIDIA boot path

This commit is contained in:
Mikhail Chusavitin
2026-03-25 19:05:18 +03:00
parent b345b0d14d
commit d36e8442a9
13 changed files with 124 additions and 38 deletions

View File

@@ -0,0 +1,4 @@
[Journal]
ForwardToConsole=yes
TTYPath=/dev/ttyS0
MaxLevelConsole=debug

View File

@@ -5,9 +5,9 @@ Before=bee-web.service
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/usr/local/bin/bee audit --runtime livecd --output file:/appdata/bee/export/bee-audit.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-audit] WARN: audit exited with rc=$rc"; fi; exit 0'
StandardOutput=append:/appdata/bee/export/bee-audit.log
StandardError=append:/appdata/bee/export/bee-audit.log
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-audit.log /bin/sh -c '/usr/local/bin/bee audit --runtime livecd --output file:/appdata/bee/export/bee-audit.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-audit] WARN: audit exited with rc=$rc"; fi; exit 0'
StandardOutput=journal
StandardError=journal
RemainAfterExit=yes
[Install]

View File

@@ -0,0 +1,16 @@
[Unit]
Description=Bee: mirror system journal to %I
After=systemd-journald.service
Requires=systemd-journald.service
ConditionPathExists=/dev/%I
[Service]
Type=simple
ExecStart=/bin/sh -c 'exec journalctl -f -n 200 -o short-monotonic > /dev/%I'
Restart=always
RestartSec=1
StandardOutput=null
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -5,9 +5,9 @@ Before=network-online.target bee-audit.service
[Service]
Type=oneshot
ExecStart=/usr/local/bin/bee-network.sh
StandardOutput=append:/appdata/bee/export/bee-network.log
StandardError=append:/appdata/bee/export/bee-network.log
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-network.log /usr/local/bin/bee-network.sh
StandardOutput=journal
StandardError=journal
RemainAfterExit=yes
[Install]

View File

@@ -5,9 +5,9 @@ Before=bee-audit.service
[Service]
Type=oneshot
ExecStart=/usr/local/bin/bee-nvidia-load
StandardOutput=append:/appdata/bee/export/bee-nvidia.log
StandardError=append:/appdata/bee/export/bee-nvidia.log
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-nvidia.log /usr/local/bin/bee-nvidia-load
StandardOutput=journal
StandardError=journal
RemainAfterExit=yes
[Install]

View File

@@ -5,9 +5,9 @@ Before=bee-audit.service
[Service]
Type=oneshot
ExecStart=/bin/sh -c '/usr/local/bin/bee preflight --output file:/appdata/bee/export/runtime-health.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-preflight] WARN: preflight exited with rc=$rc"; fi; exit 0'
StandardOutput=append:/appdata/bee/export/runtime-health.log
StandardError=append:/appdata/bee/export/runtime-health.log
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/runtime-health.log /bin/sh -c '/usr/local/bin/bee preflight --output file:/appdata/bee/export/runtime-health.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-preflight] WARN: preflight exited with rc=$rc"; fi; exit 0'
StandardOutput=journal
StandardError=journal
RemainAfterExit=yes
[Install]

View File

@@ -5,9 +5,9 @@ Before=ssh.service
[Service]
Type=oneshot
ExecStart=/usr/local/bin/bee-sshsetup
StandardOutput=append:/appdata/bee/export/bee-sshsetup.log
StandardError=append:/appdata/bee/export/bee-sshsetup.log
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-sshsetup.log /usr/local/bin/bee-sshsetup
StandardOutput=journal
StandardError=journal
RemainAfterExit=yes
[Install]

View File

@@ -5,11 +5,11 @@ Wants=bee-audit.service
[Service]
Type=simple
ExecStart=/usr/local/bin/bee web --listen :80 --audit-path /appdata/bee/export/bee-audit.json --export-dir /appdata/bee/export --title "Bee Hardware Audit"
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-web.log /usr/local/bin/bee web --listen :80 --audit-path /appdata/bee/export/bee-audit.json --export-dir /appdata/bee/export --title "Bee Hardware Audit"
Restart=always
RestartSec=2
StandardOutput=append:/appdata/bee/export/bee-web.log
StandardError=append:/appdata/bee/export/bee-web.log
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,29 @@
#!/bin/bash
# bee-log-run — run a command, append its output to a file, and keep stdout/stderr
# connected to systemd so journald and the serial console also receive the logs.
set -o pipefail
log_file="$1"
shift
if [ -z "$log_file" ] || [ "$#" -eq 0 ]; then
echo "usage: $0 <log-file> <command> [args...]" >&2
exit 2
fi
mkdir -p "$(dirname "$log_file")"
serial_sink() {
local tty="$1"
if [ -w "$tty" ]; then
cat > "$tty"
else
cat > /dev/null
fi
}
"$@" 2>&1 | tee -a "$log_file" \
>(serial_sink /dev/ttyS0) \
>(serial_sink /dev/ttyS1)
exit "${PIPESTATUS[0]}"

View File

@@ -22,24 +22,33 @@ fi
log "module dir: $NVIDIA_KO_DIR"
ls "$NVIDIA_KO_DIR"/*.ko 2>/dev/null | sed 's/^/ /' || true
# Some kernels expose backlight helper symbols only after loading `video`.
modprobe video >/dev/null 2>&1 && log "loaded helper module: video" || log "helper module unavailable: video"
# Load modules via insmod (direct load — no depmod needed)
for mod in nvidia nvidia-modeset nvidia-uvm; do
load_module() {
mod="$1"
shift
ko="$NVIDIA_KO_DIR/${mod}.ko"
[ -f "$ko" ] || ko="$NVIDIA_KO_DIR/${mod//-/_}.ko"
if [ -f "$ko" ]; then
if insmod "$ko"; then
log "loaded: $mod"
else
log "WARN: failed to load: $mod"
dmesg | tail -n 5 | sed 's/^/ dmesg: /' || true
fi
else
if [ ! -f "$ko" ]; then
log "WARN: not found: $ko"
return 1
fi
done
if insmod "$ko" "$@"; then
log "loaded: $mod $*"
return 0
fi
log "WARN: failed to load: $mod"
dmesg | tail -n 10 | sed 's/^/ dmesg: /' || true
return 1
}
# Load only the base NVIDIA kernel module on boot.
# NVIDIA documents that GSP firmware is enabled by default on newer GPUs and can
# be disabled via NVreg_EnableGpuFirmware=0. This keeps the live ISO on the
# conservative path until we have a stable repro for the observed boot crash.
if ! load_module nvidia NVreg_EnableGpuFirmware=0; then
exit 1
fi
log "skipping nvidia-modeset and nvidia-uvm during boot for stability"
# Create /dev/nvidia* device nodes (udev rules absent since we use .run installer)
nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices | awk '{print $1}')
@@ -61,8 +70,6 @@ if [ -n "$uvm_major" ]; then
&& log "created /dev/nvidia-uvm (major $uvm_major)" \
|| log "WARN: /dev/nvidia-uvm already exists"
mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 || true
else
log "WARN: nvidia-uvm not in /proc/devices"
fi
log "done"