fix: fail loudly on missing NVIDIA libs and .ko, improve mknod logging
build-nvidia-module.sh: - Replace silent glob cp for libnvidia-ml/libcuda with find + explicit error if library not found in extract dir (catches installer layout changes) - Fix circular symlink bug: don't create .so.1 -> .so.1 if versioned file is already named .so.1 - Verify .ko count > 0 after build, fail loudly if none produced - Show lib cache in final summary bee-nvidia: - mknod failures are now logged with ewarn instead of silently suppressed - If nvidia not in /proc/devices (no GPU hardware), log clearly and exit clean Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -52,20 +52,27 @@ start() {
|
||||
# Without /dev/nvidiactl nvidia-smi returns NVML_ERROR_LIBRARY_NOT_FOUND (exit 12).
|
||||
nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices 2>/dev/null | awk '{print $1}')
|
||||
if [ -n "$nvidia_major" ]; then
|
||||
mknod -m 666 /dev/nvidiactl c "$nvidia_major" 255 2>/dev/null || true
|
||||
mknod -m 666 /dev/nvidiactl c "$nvidia_major" 255 2>/dev/null \
|
||||
&& einfo "created /dev/nvidiactl (major $nvidia_major)" \
|
||||
|| ewarn "/dev/nvidiactl already exists or mknod failed"
|
||||
for i in 0 1 2 3 4 5 6 7; do
|
||||
mknod -m 666 "/dev/nvidia$i" c "$nvidia_major" "$i" 2>/dev/null || true
|
||||
done
|
||||
einfo "created /dev/nvidiactl and /dev/nvidia{0-7} (major $nvidia_major)"
|
||||
einfo "created /dev/nvidia{0-7}"
|
||||
else
|
||||
ewarn "/dev/nvidiactl: nvidia not in /proc/devices — no GPU hardware?"
|
||||
ewarn "/dev/nvidiactl: nvidia not in /proc/devices — no GPU hardware present?"
|
||||
eend 0
|
||||
return 0
|
||||
fi
|
||||
|
||||
uvm_major=$(grep -m1 ' nvidia-uvm$' /proc/devices 2>/dev/null | awk '{print $1}')
|
||||
if [ -n "$uvm_major" ]; then
|
||||
mknod -m 666 /dev/nvidia-uvm c "$uvm_major" 0 2>/dev/null || true
|
||||
mknod -m 666 /dev/nvidia-uvm c "$uvm_major" 0 2>/dev/null \
|
||||
&& einfo "created /dev/nvidia-uvm (major $uvm_major)" \
|
||||
|| ewarn "/dev/nvidia-uvm already exists or mknod failed"
|
||||
mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 2>/dev/null || true
|
||||
einfo "created /dev/nvidia-uvm (major $uvm_major)"
|
||||
else
|
||||
ewarn "/dev/nvidia-uvm: nvidia-uvm not in /proc/devices"
|
||||
fi
|
||||
|
||||
eend 0
|
||||
|
||||
Reference in New Issue
Block a user