From d36e8442a97e5ae415921af9e50510b75b4fe51a Mon Sep 17 00:00:00 2001 From: Mikhail Chusavitin Date: Wed, 25 Mar 2026 19:05:18 +0300 Subject: [PATCH] Stabilize live ISO consoles and NVIDIA boot path --- iso/builder/auto/config | 2 +- .../hooks/normal/9000-bee-setup.hook.chroot | 18 +++++++++ iso/builder/smoketest.sh | 18 +++++++-- .../journald.conf.d/console-to-sol.conf | 4 ++ .../etc/systemd/system/bee-audit.service | 6 +-- .../system/bee-journal-mirror@.service | 16 ++++++++ .../etc/systemd/system/bee-network.service | 6 +-- .../etc/systemd/system/bee-nvidia.service | 6 +-- .../etc/systemd/system/bee-preflight.service | 6 +-- .../etc/systemd/system/bee-sshsetup.service | 6 +-- .../etc/systemd/system/bee-web.service | 6 +-- iso/overlay/usr/local/bin/bee-log-run | 29 ++++++++++++++ iso/overlay/usr/local/bin/bee-nvidia-load | 39 +++++++++++-------- 13 files changed, 124 insertions(+), 38 deletions(-) create mode 100644 iso/overlay/etc/systemd/journald.conf.d/console-to-sol.conf create mode 100644 iso/overlay/etc/systemd/system/bee-journal-mirror@.service create mode 100644 iso/overlay/usr/local/bin/bee-log-run diff --git a/iso/builder/auto/config b/iso/builder/auto/config index 50ba931..41c05b1 100755 --- a/iso/builder/auto/config +++ b/iso/builder/auto/config @@ -32,6 +32,6 @@ lb config noauto \ --memtest none \ --iso-volume "EASY-BEE" \ --iso-application "EASY-BEE" \ - --bootappend-live "boot=live components console=tty0 console=ttyS0,115200n8 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \ + --bootappend-live "boot=live components console=ttyS0,115200n8 console=ttyS1,115200n8 loglevel=7 systemd.log_target=console systemd.journald.forward_to_console=1 systemd.journald.max_level_console=debug username=bee user-fullname=Bee modprobe.blacklist=nouveau" \ --apt-recommends false \ "${@}" diff --git a/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot b/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot index 2590ed9..a83eed1 100755 --- a/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot +++ b/iso/builder/config/hooks/normal/9000-bee-setup.hook.chroot @@ -5,6 +5,21 @@ set -e echo "=== bee chroot setup ===" +ensure_bee_console_user() { + if id bee >/dev/null 2>&1; then + usermod -d /home/bee -s /bin/sh bee 2>/dev/null || true + else + useradd -d /home/bee -m -s /bin/sh -U bee + fi + + mkdir -p /home/bee + chown -R bee:bee /home/bee + echo "bee:eeb" | chpasswd + usermod -aG sudo bee 2>/dev/null || true +} + +ensure_bee_console_user + # Enable bee services systemctl enable bee-network.service systemctl enable bee-nvidia.service @@ -15,6 +30,8 @@ systemctl enable bee-sshsetup.service systemctl enable ssh.service systemctl enable qemu-guest-agent.service 2>/dev/null || true systemctl enable serial-getty@ttyS0.service 2>/dev/null || true +systemctl enable serial-getty@ttyS1.service 2>/dev/null || true +systemctl enable bee-journal-mirror@ttyS1.service 2>/dev/null || true # Ensure scripts are executable chmod +x /usr/local/bin/bee-network.sh 2>/dev/null || true @@ -23,6 +40,7 @@ chmod +x /usr/local/bin/bee-sshsetup 2>/dev/null || true chmod +x /usr/local/bin/bee-smoketest 2>/dev/null || true chmod +x /usr/local/bin/bee-tui 2>/dev/null || true chmod +x /usr/local/bin/bee 2>/dev/null || true +chmod +x /usr/local/bin/bee-log-run 2>/dev/null || true # Reload udev rules udevadm control --reload-rules 2>/dev/null || true diff --git a/iso/builder/smoketest.sh b/iso/builder/smoketest.sh index 261426d..e3be0d0 100644 --- a/iso/builder/smoketest.sh +++ b/iso/builder/smoketest.sh @@ -53,17 +53,23 @@ else fail "NVIDIA ko dir missing: $KO_DIR" fi -for mod in nvidia nvidia_modeset nvidia_uvm; do +if /sbin/lsmod 2>/dev/null | grep -q "^nvidia "; then + ok "module loaded: nvidia" +else + fail "module NOT loaded: nvidia" +fi + +for mod in nvidia_modeset nvidia_uvm; do if /sbin/lsmod 2>/dev/null | grep -q "^$mod "; then ok "module loaded: $mod" else - fail "module NOT loaded: $mod" + warn "module not loaded at boot: $mod" fi done echo "" echo "-- NVIDIA device nodes --" -for dev in nvidiactl nvidia0 nvidia-uvm; do +for dev in nvidiactl nvidia0; do if [ -e "/dev/$dev" ]; then ok "/dev/$dev exists" else @@ -71,6 +77,12 @@ for dev in nvidiactl nvidia0 nvidia-uvm; do fi done +if [ -e /dev/nvidia-uvm ]; then + ok "/dev/nvidia-uvm exists" +else + warn "/dev/nvidia-uvm missing — CUDA stress path may be unavailable until loaded on demand" +fi + echo "" echo "-- nvidia-smi --" if PATH="/usr/local/bin:$PATH" command -v nvidia-smi >/dev/null 2>&1; then diff --git a/iso/overlay/etc/systemd/journald.conf.d/console-to-sol.conf b/iso/overlay/etc/systemd/journald.conf.d/console-to-sol.conf new file mode 100644 index 0000000..43cc614 --- /dev/null +++ b/iso/overlay/etc/systemd/journald.conf.d/console-to-sol.conf @@ -0,0 +1,4 @@ +[Journal] +ForwardToConsole=yes +TTYPath=/dev/ttyS0 +MaxLevelConsole=debug diff --git a/iso/overlay/etc/systemd/system/bee-audit.service b/iso/overlay/etc/systemd/system/bee-audit.service index 1594063..dd7417a 100644 --- a/iso/overlay/etc/systemd/system/bee-audit.service +++ b/iso/overlay/etc/systemd/system/bee-audit.service @@ -5,9 +5,9 @@ Before=bee-web.service [Service] Type=oneshot -ExecStart=/bin/sh -c '/usr/local/bin/bee audit --runtime livecd --output file:/appdata/bee/export/bee-audit.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-audit] WARN: audit exited with rc=$rc"; fi; exit 0' -StandardOutput=append:/appdata/bee/export/bee-audit.log -StandardError=append:/appdata/bee/export/bee-audit.log +ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-audit.log /bin/sh -c '/usr/local/bin/bee audit --runtime livecd --output file:/appdata/bee/export/bee-audit.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-audit] WARN: audit exited with rc=$rc"; fi; exit 0' +StandardOutput=journal +StandardError=journal RemainAfterExit=yes [Install] diff --git a/iso/overlay/etc/systemd/system/bee-journal-mirror@.service b/iso/overlay/etc/systemd/system/bee-journal-mirror@.service new file mode 100644 index 0000000..b386556 --- /dev/null +++ b/iso/overlay/etc/systemd/system/bee-journal-mirror@.service @@ -0,0 +1,16 @@ +[Unit] +Description=Bee: mirror system journal to %I +After=systemd-journald.service +Requires=systemd-journald.service +ConditionPathExists=/dev/%I + +[Service] +Type=simple +ExecStart=/bin/sh -c 'exec journalctl -f -n 200 -o short-monotonic > /dev/%I' +Restart=always +RestartSec=1 +StandardOutput=null +StandardError=journal + +[Install] +WantedBy=multi-user.target diff --git a/iso/overlay/etc/systemd/system/bee-network.service b/iso/overlay/etc/systemd/system/bee-network.service index a3cbcd2..080ad3a 100644 --- a/iso/overlay/etc/systemd/system/bee-network.service +++ b/iso/overlay/etc/systemd/system/bee-network.service @@ -5,9 +5,9 @@ Before=network-online.target bee-audit.service [Service] Type=oneshot -ExecStart=/usr/local/bin/bee-network.sh -StandardOutput=append:/appdata/bee/export/bee-network.log -StandardError=append:/appdata/bee/export/bee-network.log +ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-network.log /usr/local/bin/bee-network.sh +StandardOutput=journal +StandardError=journal RemainAfterExit=yes [Install] diff --git a/iso/overlay/etc/systemd/system/bee-nvidia.service b/iso/overlay/etc/systemd/system/bee-nvidia.service index b2278f6..1bddb2f 100644 --- a/iso/overlay/etc/systemd/system/bee-nvidia.service +++ b/iso/overlay/etc/systemd/system/bee-nvidia.service @@ -5,9 +5,9 @@ Before=bee-audit.service [Service] Type=oneshot -ExecStart=/usr/local/bin/bee-nvidia-load -StandardOutput=append:/appdata/bee/export/bee-nvidia.log -StandardError=append:/appdata/bee/export/bee-nvidia.log +ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-nvidia.log /usr/local/bin/bee-nvidia-load +StandardOutput=journal +StandardError=journal RemainAfterExit=yes [Install] diff --git a/iso/overlay/etc/systemd/system/bee-preflight.service b/iso/overlay/etc/systemd/system/bee-preflight.service index 9c3781a..b6c73da 100644 --- a/iso/overlay/etc/systemd/system/bee-preflight.service +++ b/iso/overlay/etc/systemd/system/bee-preflight.service @@ -5,9 +5,9 @@ Before=bee-audit.service [Service] Type=oneshot -ExecStart=/bin/sh -c '/usr/local/bin/bee preflight --output file:/appdata/bee/export/runtime-health.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-preflight] WARN: preflight exited with rc=$rc"; fi; exit 0' -StandardOutput=append:/appdata/bee/export/runtime-health.log -StandardError=append:/appdata/bee/export/runtime-health.log +ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/runtime-health.log /bin/sh -c '/usr/local/bin/bee preflight --output file:/appdata/bee/export/runtime-health.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-preflight] WARN: preflight exited with rc=$rc"; fi; exit 0' +StandardOutput=journal +StandardError=journal RemainAfterExit=yes [Install] diff --git a/iso/overlay/etc/systemd/system/bee-sshsetup.service b/iso/overlay/etc/systemd/system/bee-sshsetup.service index 44c7c90..4be2391 100644 --- a/iso/overlay/etc/systemd/system/bee-sshsetup.service +++ b/iso/overlay/etc/systemd/system/bee-sshsetup.service @@ -5,9 +5,9 @@ Before=ssh.service [Service] Type=oneshot -ExecStart=/usr/local/bin/bee-sshsetup -StandardOutput=append:/appdata/bee/export/bee-sshsetup.log -StandardError=append:/appdata/bee/export/bee-sshsetup.log +ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-sshsetup.log /usr/local/bin/bee-sshsetup +StandardOutput=journal +StandardError=journal RemainAfterExit=yes [Install] diff --git a/iso/overlay/etc/systemd/system/bee-web.service b/iso/overlay/etc/systemd/system/bee-web.service index 247da2a..0fc2252 100644 --- a/iso/overlay/etc/systemd/system/bee-web.service +++ b/iso/overlay/etc/systemd/system/bee-web.service @@ -5,11 +5,11 @@ Wants=bee-audit.service [Service] Type=simple -ExecStart=/usr/local/bin/bee web --listen :80 --audit-path /appdata/bee/export/bee-audit.json --export-dir /appdata/bee/export --title "Bee Hardware Audit" +ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-web.log /usr/local/bin/bee web --listen :80 --audit-path /appdata/bee/export/bee-audit.json --export-dir /appdata/bee/export --title "Bee Hardware Audit" Restart=always RestartSec=2 -StandardOutput=append:/appdata/bee/export/bee-web.log -StandardError=append:/appdata/bee/export/bee-web.log +StandardOutput=journal +StandardError=journal [Install] WantedBy=multi-user.target diff --git a/iso/overlay/usr/local/bin/bee-log-run b/iso/overlay/usr/local/bin/bee-log-run new file mode 100644 index 0000000..62fd621 --- /dev/null +++ b/iso/overlay/usr/local/bin/bee-log-run @@ -0,0 +1,29 @@ +#!/bin/bash +# bee-log-run — run a command, append its output to a file, and keep stdout/stderr +# connected to systemd so journald and the serial console also receive the logs. + +set -o pipefail + +log_file="$1" +shift + +if [ -z "$log_file" ] || [ "$#" -eq 0 ]; then + echo "usage: $0 [args...]" >&2 + exit 2 +fi + +mkdir -p "$(dirname "$log_file")" + +serial_sink() { + local tty="$1" + if [ -w "$tty" ]; then + cat > "$tty" + else + cat > /dev/null + fi +} + +"$@" 2>&1 | tee -a "$log_file" \ + >(serial_sink /dev/ttyS0) \ + >(serial_sink /dev/ttyS1) +exit "${PIPESTATUS[0]}" diff --git a/iso/overlay/usr/local/bin/bee-nvidia-load b/iso/overlay/usr/local/bin/bee-nvidia-load index 0226ebe..8982ac9 100755 --- a/iso/overlay/usr/local/bin/bee-nvidia-load +++ b/iso/overlay/usr/local/bin/bee-nvidia-load @@ -22,24 +22,33 @@ fi log "module dir: $NVIDIA_KO_DIR" ls "$NVIDIA_KO_DIR"/*.ko 2>/dev/null | sed 's/^/ /' || true -# Some kernels expose backlight helper symbols only after loading `video`. -modprobe video >/dev/null 2>&1 && log "loaded helper module: video" || log "helper module unavailable: video" - -# Load modules via insmod (direct load — no depmod needed) -for mod in nvidia nvidia-modeset nvidia-uvm; do +load_module() { + mod="$1" + shift ko="$NVIDIA_KO_DIR/${mod}.ko" [ -f "$ko" ] || ko="$NVIDIA_KO_DIR/${mod//-/_}.ko" - if [ -f "$ko" ]; then - if insmod "$ko"; then - log "loaded: $mod" - else - log "WARN: failed to load: $mod" - dmesg | tail -n 5 | sed 's/^/ dmesg: /' || true - fi - else + if [ ! -f "$ko" ]; then log "WARN: not found: $ko" + return 1 fi -done + if insmod "$ko" "$@"; then + log "loaded: $mod $*" + return 0 + fi + log "WARN: failed to load: $mod" + dmesg | tail -n 10 | sed 's/^/ dmesg: /' || true + return 1 +} + +# Load only the base NVIDIA kernel module on boot. +# NVIDIA documents that GSP firmware is enabled by default on newer GPUs and can +# be disabled via NVreg_EnableGpuFirmware=0. This keeps the live ISO on the +# conservative path until we have a stable repro for the observed boot crash. +if ! load_module nvidia NVreg_EnableGpuFirmware=0; then + exit 1 +fi + +log "skipping nvidia-modeset and nvidia-uvm during boot for stability" # Create /dev/nvidia* device nodes (udev rules absent since we use .run installer) nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices | awk '{print $1}') @@ -61,8 +70,6 @@ if [ -n "$uvm_major" ]; then && log "created /dev/nvidia-uvm (major $uvm_major)" \ || log "WARN: /dev/nvidia-uvm already exists" mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 || true -else - log "WARN: nvidia-uvm not in /proc/devices" fi log "done"