#!/bin/sh
# bee-nvidia-load — load NVIDIA kernel modules and create device nodes
# Called by bee-nvidia.service at boot.

NVIDIA_KO_DIR="/usr/local/lib/nvidia"

log() { echo "[bee-nvidia] $*"; }

log "kernel: $(uname -r)"

# Skip if no NVIDIA GPU present (PCI vendor 10de)
if ! lspci -nn 2>/dev/null | grep -qi '10de:'; then
    log "no NVIDIA GPU detected — skipping module load"
    exit 0
fi

if [ ! -d "$NVIDIA_KO_DIR" ]; then
    log "ERROR: NVIDIA module dir missing: $NVIDIA_KO_DIR"
    exit 1
fi

log "module dir: $NVIDIA_KO_DIR"
ls "$NVIDIA_KO_DIR"/*.ko 2>/dev/null | sed 's/^/  /' || true

load_module() {
    mod="$1"
    shift
    ko="$NVIDIA_KO_DIR/${mod}.ko"
    [ -f "$ko" ] || ko="$NVIDIA_KO_DIR/${mod//-/_}.ko"
    if [ ! -f "$ko" ]; then
        log "WARN: not found: $ko"
        return 1
    fi
    if insmod "$ko" "$@"; then
        log "loaded: $mod $*"
        return 0
    fi
    log "WARN: failed to load: $mod"
    dmesg | tail -n 10 | sed 's/^/  dmesg: /' || true
    return 1
}

# Load only the base NVIDIA kernel module on boot.
# NVIDIA documents that GSP firmware is enabled by default on newer GPUs and can
# be disabled via NVreg_EnableGpuFirmware=0. This keeps the live ISO on the
# conservative path until we have a stable repro for the observed boot crash.
if ! load_module nvidia NVreg_EnableGpuFirmware=0; then
    exit 1
fi

log "skipping nvidia-modeset and nvidia-uvm during boot for stability"

# Create /dev/nvidia* device nodes (udev rules absent since we use .run installer)
nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices | awk '{print $1}')
if [ -n "$nvidia_major" ]; then
    mknod -m 666 /dev/nvidiactl c "$nvidia_major" 255 \
        && log "created /dev/nvidiactl (major $nvidia_major)" \
        || log "WARN: /dev/nvidiactl already exists or mknod failed"
    for i in 0 1 2 3 4 5 6 7; do
        mknod -m 666 "/dev/nvidia$i" c "$nvidia_major" "$i" || true
    done
    log "created /dev/nvidia{0-7}"
else
    log "WARN: nvidiactl not in /proc/devices — no GPU hardware present?"
fi

uvm_major=$(grep -m1 ' nvidia-uvm$' /proc/devices | awk '{print $1}')
if [ -n "$uvm_major" ]; then
    mknod -m 666 /dev/nvidia-uvm       c "$uvm_major" 0 \
        && log "created /dev/nvidia-uvm (major $uvm_major)" \
        || log "WARN: /dev/nvidia-uvm already exists"
    mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 || true
fi

log "done"
