diff --git a/iso/builder/build-nvidia-module.sh b/iso/builder/build-nvidia-module.sh index 521edb9..fafde2d 100644 --- a/iso/builder/build-nvidia-module.sh +++ b/iso/builder/build-nvidia-module.sh @@ -109,22 +109,38 @@ for ko in "$CACHE_DIR/modules/"*.ko; do strip --strip-debug "$ko" 2>/dev/null || true done -cp "$EXTRACT_DIR/nvidia-smi" "$CACHE_DIR/bin/" +cp "$EXTRACT_DIR/nvidia-smi" "$CACHE_DIR/bin/" cp "$EXTRACT_DIR/nvidia-bug-report.sh" "$CACHE_DIR/bin/" 2>/dev/null || true -cp "$EXTRACT_DIR/libnvidia-ml.so."* "$CACHE_DIR/lib/" 2>/dev/null || true -# libcuda stub needed by nvidia-smi at runtime -cp "$EXTRACT_DIR/libcuda.so."* "$CACHE_DIR/lib/" 2>/dev/null || true -# Create soname symlinks required by nvidia-smi on Alpine (musl/glibc via gcompat) +# Copy userspace libraries — use find to handle any versioning scheme (libnvidia-ml.so.X.Y.Z or .so.1) for lib in libnvidia-ml libcuda; do - versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9]* 2>/dev/null | head -1) + found=$(find "$EXTRACT_DIR" -maxdepth 1 -name "${lib}.so.*" | head -1) + if [ -z "$found" ]; then + echo "ERROR: ${lib}.so.* not found in $EXTRACT_DIR" + ls "$EXTRACT_DIR/"*.so* 2>/dev/null | head -20 || true + exit 1 + fi + cp "$found" "$CACHE_DIR/lib/" +done + +# Verify .ko files were actually built +ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) +[ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; } + +# Create soname symlinks required by nvidia-smi on Alpine (musl/glibc via gcompat + libc6-compat) +for lib in libnvidia-ml libcuda; do + versioned=$(ls "$CACHE_DIR/lib/${lib}.so."* 2>/dev/null | grep -v '\.so\.1$' | head -1) + [ -n "$versioned" ] || versioned=$(ls "$CACHE_DIR/lib/${lib}.so."* 2>/dev/null | head -1) [ -n "$versioned" ] || continue base=$(basename "$versioned") - ln -sf "$base" "$CACHE_DIR/lib/${lib}.so.1" 2>/dev/null || true + # Only create .so.1 if versioned file is not already named .so.1 + if [ "$base" != "${lib}.so.1" ]; then + ln -sf "$base" "$CACHE_DIR/lib/${lib}.so.1" + fi ln -sf "${lib}.so.1" "$CACHE_DIR/lib/${lib}.so" 2>/dev/null || true done echo "=== NVIDIA build complete ===" echo "cache: $CACHE_DIR" -echo "modules: $(ls "$CACHE_DIR/modules/"*.ko | wc -l) .ko files" -ls -lh "$CACHE_DIR/bin/" +echo "modules: $ko_count .ko files" +ls -lh "$CACHE_DIR/bin/" "$CACHE_DIR/lib/" diff --git a/iso/overlay/etc/init.d/bee-nvidia b/iso/overlay/etc/init.d/bee-nvidia index 9f129ae..637ffb4 100755 --- a/iso/overlay/etc/init.d/bee-nvidia +++ b/iso/overlay/etc/init.d/bee-nvidia @@ -52,20 +52,27 @@ start() { # Without /dev/nvidiactl nvidia-smi returns NVML_ERROR_LIBRARY_NOT_FOUND (exit 12). nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices 2>/dev/null | awk '{print $1}') if [ -n "$nvidia_major" ]; then - mknod -m 666 /dev/nvidiactl c "$nvidia_major" 255 2>/dev/null || true + mknod -m 666 /dev/nvidiactl c "$nvidia_major" 255 2>/dev/null \ + && einfo "created /dev/nvidiactl (major $nvidia_major)" \ + || ewarn "/dev/nvidiactl already exists or mknod failed" for i in 0 1 2 3 4 5 6 7; do mknod -m 666 "/dev/nvidia$i" c "$nvidia_major" "$i" 2>/dev/null || true done - einfo "created /dev/nvidiactl and /dev/nvidia{0-7} (major $nvidia_major)" + einfo "created /dev/nvidia{0-7}" else - ewarn "/dev/nvidiactl: nvidia not in /proc/devices — no GPU hardware?" + ewarn "/dev/nvidiactl: nvidia not in /proc/devices — no GPU hardware present?" + eend 0 + return 0 fi uvm_major=$(grep -m1 ' nvidia-uvm$' /proc/devices 2>/dev/null | awk '{print $1}') if [ -n "$uvm_major" ]; then - mknod -m 666 /dev/nvidia-uvm c "$uvm_major" 0 2>/dev/null || true + mknod -m 666 /dev/nvidia-uvm c "$uvm_major" 0 2>/dev/null \ + && einfo "created /dev/nvidia-uvm (major $uvm_major)" \ + || ewarn "/dev/nvidia-uvm already exists or mknod failed" mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 2>/dev/null || true - einfo "created /dev/nvidia-uvm (major $uvm_major)" + else + ewarn "/dev/nvidia-uvm: nvidia-uvm not in /proc/devices" fi eend 0