fix: create /dev/nvidia* nodes in bee-nvidia — mdev has no NVIDIA rules
Alpine uses mdev which has no rules for NVIDIA devices. Without /dev/nvidiactl
and /dev/nvidia{0-7}, nvidia-smi returns NVML_ERROR_LIBRARY_NOT_FOUND (exit 12)
even though kernel modules are loaded and libraries are present.
Fix: after insmod, read major numbers from /proc/devices and mknod the required
character devices (/dev/nvidiactl, /dev/nvidia{0-7}, /dev/nvidia-uvm).
Add /dev/nvidia* node checks to smoketest for earlier failure detection.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -62,6 +62,16 @@ for mod in nvidia nvidia_modeset nvidia_uvm; do
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "-- NVIDIA device nodes --"
|
||||||
|
for dev in nvidiactl nvidia0 nvidia-uvm; do
|
||||||
|
if [ -e "/dev/$dev" ]; then
|
||||||
|
ok "/dev/$dev exists"
|
||||||
|
else
|
||||||
|
fail "/dev/$dev missing — nvidia-smi will return NVML_ERROR_LIBRARY_NOT_FOUND"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "-- nvidia-smi --"
|
echo "-- nvidia-smi --"
|
||||||
if PATH="/usr/local/bin:$PATH" command -v nvidia-smi >/dev/null 2>&1; then
|
if PATH="/usr/local/bin:$PATH" command -v nvidia-smi >/dev/null 2>&1; then
|
||||||
|
|||||||
@@ -47,5 +47,26 @@ start() {
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# Create /dev/nvidia* device nodes — mdev on Alpine does not have NVIDIA rules,
|
||||||
|
# so the kernel hotplug events are not handled and nodes are never created.
|
||||||
|
# Without /dev/nvidiactl nvidia-smi returns NVML_ERROR_LIBRARY_NOT_FOUND (exit 12).
|
||||||
|
nvidia_major=$(grep -m1 ' nvidiactl$' /proc/devices 2>/dev/null | awk '{print $1}')
|
||||||
|
if [ -n "$nvidia_major" ]; then
|
||||||
|
mknod -m 666 /dev/nvidiactl c "$nvidia_major" 255 2>/dev/null || true
|
||||||
|
for i in 0 1 2 3 4 5 6 7; do
|
||||||
|
mknod -m 666 "/dev/nvidia$i" c "$nvidia_major" "$i" 2>/dev/null || true
|
||||||
|
done
|
||||||
|
einfo "created /dev/nvidiactl and /dev/nvidia{0-7} (major $nvidia_major)"
|
||||||
|
else
|
||||||
|
ewarn "/dev/nvidiactl: nvidia not in /proc/devices — no GPU hardware?"
|
||||||
|
fi
|
||||||
|
|
||||||
|
uvm_major=$(grep -m1 ' nvidia-uvm$' /proc/devices 2>/dev/null | awk '{print $1}')
|
||||||
|
if [ -n "$uvm_major" ]; then
|
||||||
|
mknod -m 666 /dev/nvidia-uvm c "$uvm_major" 0 2>/dev/null || true
|
||||||
|
mknod -m 666 /dev/nvidia-uvm-tools c "$uvm_major" 1 2>/dev/null || true
|
||||||
|
einfo "created /dev/nvidia-uvm (major $uvm_major)"
|
||||||
|
fi
|
||||||
|
|
||||||
eend 0
|
eend 0
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user