Stabilize live ISO consoles and NVIDIA boot path
This commit is contained in:
@@ -32,6 +32,6 @@ lb config noauto \
|
|||||||
--memtest none \
|
--memtest none \
|
||||||
--iso-volume "EASY-BEE" \
|
--iso-volume "EASY-BEE" \
|
||||||
--iso-application "EASY-BEE" \
|
--iso-application "EASY-BEE" \
|
||||||
--bootappend-live "boot=live components console=tty0 console=ttyS0,115200n8 username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
|
--bootappend-live "boot=live components console=ttyS0,115200n8 console=ttyS1,115200n8 loglevel=7 systemd.log_target=console systemd.journald.forward_to_console=1 systemd.journald.max_level_console=debug username=bee user-fullname=Bee modprobe.blacklist=nouveau" \
|
||||||
--apt-recommends false \
|
--apt-recommends false \
|
||||||
"${@}"
|
"${@}"
|
||||||
|
|||||||
@@ -5,6 +5,21 @@ set -e
|
|||||||
|
|
||||||
echo "=== bee chroot setup ==="
|
echo "=== bee chroot setup ==="
|
||||||
|
|
||||||
|
ensure_bee_console_user() {
|
||||||
|
if id bee >/dev/null 2>&1; then
|
||||||
|
usermod -d /home/bee -s /bin/sh bee 2>/dev/null || true
|
||||||
|
else
|
||||||
|
useradd -d /home/bee -m -s /bin/sh -U bee
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p /home/bee
|
||||||
|
chown -R bee:bee /home/bee
|
||||||
|
echo "bee:eeb" | chpasswd
|
||||||
|
usermod -aG sudo bee 2>/dev/null || true
|
||||||
|
}
|
||||||
|
|
||||||
|
ensure_bee_console_user
|
||||||
|
|
||||||
# Enable bee services
|
# Enable bee services
|
||||||
systemctl enable bee-network.service
|
systemctl enable bee-network.service
|
||||||
systemctl enable bee-nvidia.service
|
systemctl enable bee-nvidia.service
|
||||||
@@ -15,6 +30,8 @@ systemctl enable bee-sshsetup.service
|
|||||||
systemctl enable ssh.service
|
systemctl enable ssh.service
|
||||||
systemctl enable qemu-guest-agent.service 2>/dev/null || true
|
systemctl enable qemu-guest-agent.service 2>/dev/null || true
|
||||||
systemctl enable serial-getty@ttyS0.service 2>/dev/null || true
|
systemctl enable serial-getty@ttyS0.service 2>/dev/null || true
|
||||||
|
systemctl enable serial-getty@ttyS1.service 2>/dev/null || true
|
||||||
|
systemctl enable bee-journal-mirror@ttyS1.service 2>/dev/null || true
|
||||||
|
|
||||||
# Ensure scripts are executable
|
# Ensure scripts are executable
|
||||||
chmod +x /usr/local/bin/bee-network.sh 2>/dev/null || true
|
chmod +x /usr/local/bin/bee-network.sh 2>/dev/null || true
|
||||||
@@ -23,6 +40,7 @@ chmod +x /usr/local/bin/bee-sshsetup 2>/dev/null || true
|
|||||||
chmod +x /usr/local/bin/bee-smoketest 2>/dev/null || true
|
chmod +x /usr/local/bin/bee-smoketest 2>/dev/null || true
|
||||||
chmod +x /usr/local/bin/bee-tui 2>/dev/null || true
|
chmod +x /usr/local/bin/bee-tui 2>/dev/null || true
|
||||||
chmod +x /usr/local/bin/bee 2>/dev/null || true
|
chmod +x /usr/local/bin/bee 2>/dev/null || true
|
||||||
|
chmod +x /usr/local/bin/bee-log-run 2>/dev/null || true
|
||||||
|
|
||||||
# Reload udev rules
|
# Reload udev rules
|
||||||
udevadm control --reload-rules 2>/dev/null || true
|
udevadm control --reload-rules 2>/dev/null || true
|
||||||
|
|||||||
@@ -53,17 +53,23 @@ else
|
|||||||
fail "NVIDIA ko dir missing: $KO_DIR"
|
fail "NVIDIA ko dir missing: $KO_DIR"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for mod in nvidia nvidia_modeset nvidia_uvm; do
|
if /sbin/lsmod 2>/dev/null | grep -q "^nvidia "; then
|
||||||
|
ok "module loaded: nvidia"
|
||||||
|
else
|
||||||
|
fail "module NOT loaded: nvidia"
|
||||||
|
fi
|
||||||
|
|
||||||
|
for mod in nvidia_modeset nvidia_uvm; do
|
||||||
if /sbin/lsmod 2>/dev/null | grep -q "^$mod "; then
|
if /sbin/lsmod 2>/dev/null | grep -q "^$mod "; then
|
||||||
ok "module loaded: $mod"
|
ok "module loaded: $mod"
|
||||||
else
|
else
|
||||||
fail "module NOT loaded: $mod"
|
warn "module not loaded at boot: $mod"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "-- NVIDIA device nodes --"
|
echo "-- NVIDIA device nodes --"
|
||||||
for dev in nvidiactl nvidia0 nvidia-uvm; do
|
for dev in nvidiactl nvidia0; do
|
||||||
if [ -e "/dev/$dev" ]; then
|
if [ -e "/dev/$dev" ]; then
|
||||||
ok "/dev/$dev exists"
|
ok "/dev/$dev exists"
|
||||||
else
|
else
|
||||||
@@ -71,6 +77,12 @@ for dev in nvidiactl nvidia0 nvidia-uvm; do
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
if [ -e /dev/nvidia-uvm ]; then
|
||||||
|
ok "/dev/nvidia-uvm exists"
|
||||||
|
else
|
||||||
|
warn "/dev/nvidia-uvm missing — CUDA stress path may be unavailable until loaded on demand"
|
||||||
|
fi
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "-- nvidia-smi --"
|
echo "-- nvidia-smi --"
|
||||||
if PATH="/usr/local/bin:$PATH" command -v nvidia-smi >/dev/null 2>&1; then
|
if PATH="/usr/local/bin:$PATH" command -v nvidia-smi >/dev/null 2>&1; then
|
||||||
|
|||||||
@@ -0,0 +1,4 @@
|
|||||||
|
[Journal]
|
||||||
|
ForwardToConsole=yes
|
||||||
|
TTYPath=/dev/ttyS0
|
||||||
|
MaxLevelConsole=debug
|
||||||
@@ -5,9 +5,9 @@ Before=bee-web.service
|
|||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
ExecStart=/bin/sh -c '/usr/local/bin/bee audit --runtime livecd --output file:/appdata/bee/export/bee-audit.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-audit] WARN: audit exited with rc=$rc"; fi; exit 0'
|
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-audit.log /bin/sh -c '/usr/local/bin/bee audit --runtime livecd --output file:/appdata/bee/export/bee-audit.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-audit] WARN: audit exited with rc=$rc"; fi; exit 0'
|
||||||
StandardOutput=append:/appdata/bee/export/bee-audit.log
|
StandardOutput=journal
|
||||||
StandardError=append:/appdata/bee/export/bee-audit.log
|
StandardError=journal
|
||||||
RemainAfterExit=yes
|
RemainAfterExit=yes
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
|
|||||||
16
iso/overlay/etc/systemd/system/bee-journal-mirror@.service
Normal file
16
iso/overlay/etc/systemd/system/bee-journal-mirror@.service
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Bee: mirror system journal to %I
|
||||||
|
After=systemd-journald.service
|
||||||
|
Requires=systemd-journald.service
|
||||||
|
ConditionPathExists=/dev/%I
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
ExecStart=/bin/sh -c 'exec journalctl -f -n 200 -o short-monotonic > /dev/%I'
|
||||||
|
Restart=always
|
||||||
|
RestartSec=1
|
||||||
|
StandardOutput=null
|
||||||
|
StandardError=journal
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
@@ -5,9 +5,9 @@ Before=network-online.target bee-audit.service
|
|||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
ExecStart=/usr/local/bin/bee-network.sh
|
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-network.log /usr/local/bin/bee-network.sh
|
||||||
StandardOutput=append:/appdata/bee/export/bee-network.log
|
StandardOutput=journal
|
||||||
StandardError=append:/appdata/bee/export/bee-network.log
|
StandardError=journal
|
||||||
RemainAfterExit=yes
|
RemainAfterExit=yes
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ Before=bee-audit.service
|
|||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
ExecStart=/usr/local/bin/bee-nvidia-load
|
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-nvidia.log /usr/local/bin/bee-nvidia-load
|
||||||
StandardOutput=append:/appdata/bee/export/bee-nvidia.log
|
StandardOutput=journal
|
||||||
StandardError=append:/appdata/bee/export/bee-nvidia.log
|
StandardError=journal
|
||||||
RemainAfterExit=yes
|
RemainAfterExit=yes
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ Before=bee-audit.service
|
|||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
ExecStart=/bin/sh -c '/usr/local/bin/bee preflight --output file:/appdata/bee/export/runtime-health.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-preflight] WARN: preflight exited with rc=$rc"; fi; exit 0'
|
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/runtime-health.log /bin/sh -c '/usr/local/bin/bee preflight --output file:/appdata/bee/export/runtime-health.json; rc=$?; if [ "$rc" -ne 0 ]; then echo "[bee-preflight] WARN: preflight exited with rc=$rc"; fi; exit 0'
|
||||||
StandardOutput=append:/appdata/bee/export/runtime-health.log
|
StandardOutput=journal
|
||||||
StandardError=append:/appdata/bee/export/runtime-health.log
|
StandardError=journal
|
||||||
RemainAfterExit=yes
|
RemainAfterExit=yes
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ Before=ssh.service
|
|||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
ExecStart=/usr/local/bin/bee-sshsetup
|
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-sshsetup.log /usr/local/bin/bee-sshsetup
|
||||||
StandardOutput=append:/appdata/bee/export/bee-sshsetup.log
|
StandardOutput=journal
|
||||||
StandardError=append:/appdata/bee/export/bee-sshsetup.log
|
StandardError=journal
|
||||||
RemainAfterExit=yes
|
RemainAfterExit=yes
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
|
|||||||
@@ -5,11 +5,11 @@ Wants=bee-audit.service
|
|||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Type=simple
|
Type=simple
|
||||||
ExecStart=/usr/local/bin/bee web --listen :80 --audit-path /appdata/bee/export/bee-audit.json --export-dir /appdata/bee/export --title "Bee Hardware Audit"
|
ExecStart=/usr/local/bin/bee-log-run /appdata/bee/export/bee-web.log /usr/local/bin/bee web --listen :80 --audit-path /appdata/bee/export/bee-audit.json --export-dir /appdata/bee/export --title "Bee Hardware Audit"
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=2
|
RestartSec=2
|
||||||
StandardOutput=append:/appdata/bee/export/bee-web.log
|
StandardOutput=journal
|
||||||
StandardError=append:/appdata/bee/export/bee-web.log
|
StandardError=journal
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|||||||
29
iso/overlay/usr/local/bin/bee-log-run
Normal file
29
iso/overlay/usr/local/bin/bee-log-run
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# bee-log-run — run a command, append its output to a file, and keep stdout/stderr
|
||||||
|
# connected to systemd so journald and the serial console also receive the logs.
|
||||||
|
|
||||||
|
set -o pipefail
|
||||||
|
|
||||||
|
log_file="$1"
|
||||||
|
shift
|
||||||
|
|
||||||
|
if [ -z "$log_file" ] || [ "$#" -eq 0 ]; then
|
||||||
|
echo "usage: $0 <log-file> <command> [args...]" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p "$(dirname "$log_file")"
|
||||||
|
|
||||||
|
serial_sink() {
|
||||||
|
local tty="$1"
|
||||||
|
if [ -w "$tty" ]; then
|
||||||
|
cat > "$tty"
|
||||||
|
else
|
||||||
|
cat > /dev/null
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
"$@" 2>&1 | tee -a "$log_file" \
|
||||||
|
>(serial_sink /dev/ttyS0) \
|
||||||
|
>(serial_sink /dev/ttyS1)
|
||||||
|
exit "${PIPESTATUS[0]}"
|
||||||
@@ -22,24 +22,33 @@ fi
|
|||||||
log "module dir: $NVIDIA_KO_DIR"
|
log "module dir: $NVIDIA_KO_DIR"
|
||||||
ls "$NVIDIA_KO_DIR"/*.ko 2>/dev/null | sed 's/^/ /' || true
|
ls "$NVIDIA_KO_DIR"/*.ko 2>/dev/null | sed 's/^/ /' || true
|
||||||
|
|
||||||
# Some kernels expose backlight helper symbols only after loading `video`.
|
load_module() {
|
||||||
modprobe video >/dev/null 2>&1 && log "loaded helper module: video" || log "helper module unavailable: video"
|
mod="$1"
|
||||||
|
shift
|
||||||
# Load modules via insmod (direct load — no depmod needed)
|
|
||||||
for mod in nvidia nvidia-modeset nvidia-uvm; do
|
|
||||||
ko="$NVIDIA_KO_DIR/${mod}.ko"
|
ko="$NVIDIA_KO_DIR/${mod}.ko"
|
||||||
[ -f "$ko" ] || ko="$NVIDIA_KO_DIR/${mod//-/_}.ko"
|
[ -f "$ko" ] || ko="$NVIDIA_KO_DIR/${mod//-/_}.ko"
|
||||||
if [ -f "$ko" ]; then
|
if [ ! -f "$ko" ]; then
|
||||||
if insmod "$ko"; then
|
|
||||||
log "loaded: $mod"
|
|
||||||
else
|
|
||||||
log "WARN: failed to load: $mod"
|
|
||||||
dmesg | tail -n 5 | sed 's/^/ dmesg: /' || true
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
log "WARN: not found: $ko"
|
log "WARN: not found: $ko"
|
||||||
|
return 1
|
||||||
fi
|
fi
|
||||||
done
|
if insmod "$ko" "$@"; then
|
||||||
|
log "loaded: $mod $*"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
log "WARN: failed to load: $mod"
|
||||||
|
dmesg | tail -n 10 | sed 's/^/ dmesg: /' || true
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Load only the base NVIDIA kernel module on boot.
|
||||||
|
# NVIDIA documents that GSP firmware is enabled by default on newer GPUs and can
|
||||||
|
# be disabled via NVreg_EnableGpuFirmware=0. This keeps the live ISO on the
|
||||||
|
# conservative path until we have a stable repro for the observed boot crash.
|
||||||
|
if ! load_module nvidia NVreg_EnableGpuFirmware=0; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
log "skipping nvidia-modeset and nvidia-uvm during boot for stability"
|
||||||
|
|
||||||
# Create /dev/nvidia* device nodes (udev rules absent since we use .run installer)
|
# Create /dev/nvidia* device nodes (udev rules absent since we use .run installer)
|
||||||
nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices | awk '{print $1}')
|
nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices | awk '{print $1}')
|
||||||
@@ -61,8 +70,6 @@ if [ -n "$uvm_major" ]; then
|
|||||||
&& log "created /dev/nvidia-uvm (major $uvm_major)" \
|
&& log "created /dev/nvidia-uvm (major $uvm_major)" \
|
||||||
|| log "WARN: /dev/nvidia-uvm already exists"
|
|| log "WARN: /dev/nvidia-uvm already exists"
|
||||||
mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 || true
|
mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 || true
|
||||||
else
|
|
||||||
log "WARN: nvidia-uvm not in /proc/devices"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "done"
|
log "done"
|
||||||
|
|||||||
Reference in New Issue
Block a user