fix(iso): include full nvidia opencl runtime

This commit is contained in:
Mikhail Chusavitin
2026-04-01 09:16:06 +03:00
parent b447717a5a
commit 5839f870b7
2 changed files with 65 additions and 28 deletions

View File

@@ -46,7 +46,10 @@ CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_VERSION}-${KVER}"
CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}" CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads" DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads"
EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract" EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract"
CACHE_LAYOUT_VERSION="2"
CACHE_LAYOUT_MARKER="${CACHE_DIR}/.cache-layout-v${CACHE_LAYOUT_VERSION}"
if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \ if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \
&& [ -f "$CACHE_LAYOUT_MARKER" ] \
&& [ "$(ls "$CACHE_DIR/lib/libnvidia-ptxjitcompiler.so."* 2>/dev/null | wc -l)" -gt 0 ]; then && [ "$(ls "$CACHE_DIR/lib/libnvidia-ptxjitcompiler.so."* 2>/dev/null | wc -l)" -gt 0 ]; then
echo "=== NVIDIA cached, skipping build ===" echo "=== NVIDIA cached, skipping build ==="
echo "cache: $CACHE_DIR" echo "cache: $CACHE_DIR"
@@ -130,24 +133,30 @@ else
echo "WARNING: no firmware/ dir found in installer (may be needed for Hopper GPUs)" echo "WARNING: no firmware/ dir found in installer (may be needed for Hopper GPUs)"
fi fi
# Copy ALL userspace library files. # Copy NVIDIA userspace libraries broadly instead of whitelisting a few names.
# libnvidia-ptxjitcompiler is required by libcuda for PTX JIT compilation # Newer driver branches add extra runtime deps (for example OpenCL/compiler side
# (cuModuleLoadDataEx with PTX source) — without it CUDA_ERROR_JIT_COMPILER_NOT_FOUND. # libraries). If we only copy a narrow allowlist, clinfo/John can see nvidia.icd
# but still fail with "no OpenCL platforms" because one dependent .so is absent.
copied_libs=0
for f in $(find "$EXTRACT_DIR" -maxdepth 1 \( -name 'libnvidia*.so.*' -o -name 'libcuda.so.*' \) -type f 2>/dev/null | sort); do
cp "$f" "$CACHE_DIR/lib/"
copied_libs=$((copied_libs+1))
done
if [ "$copied_libs" -eq 0 ]; then
echo "ERROR: no NVIDIA userspace libraries found in $EXTRACT_DIR"
ls "$EXTRACT_DIR/"*.so* 2>/dev/null | head -40 || true
exit 1
fi
for lib in \ for lib in \
libnvidia-ml \ libnvidia-ml \
libcuda \ libcuda \
libnvidia-ptxjitcompiler \ libnvidia-ptxjitcompiler \
libnvidia-opencl \ libnvidia-opencl; do
libnvidia-compiler \ if ! ls "$CACHE_DIR/lib/${lib}.so."* >/dev/null 2>&1; then
libnvidia-nvvm \ echo "ERROR: required ${lib}.so.* not found in extracted userspace libs"
libnvidia-fatbinaryloader; do ls "$CACHE_DIR/lib/" | sort >&2 || true
count=0
for f in $(find "$EXTRACT_DIR" -maxdepth 1 -name "${lib}.so.*" 2>/dev/null); do
cp "$f" "$CACHE_DIR/lib/" && count=$((count+1))
done
if [ "$count" -eq 0 ]; then
echo "ERROR: ${lib}.so.* not found in $EXTRACT_DIR"
ls "$EXTRACT_DIR/"*.so* 2>/dev/null | head -20 || true
exit 1 exit 1
fi fi
done done
@@ -156,23 +165,17 @@ done
ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l) ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l)
[ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; } [ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; }
# Create soname symlinks: use [0-9][0-9]* to avoid circular symlink (.so.1 has single digit) # Create soname symlinks for every copied versioned library.
for lib in \ for versioned in "$CACHE_DIR"/lib/*.so.*; do
libnvidia-ml \ [ -f "$versioned" ] || continue
libcuda \
libnvidia-ptxjitcompiler \
libnvidia-opencl \
libnvidia-compiler \
libnvidia-nvvm \
libnvidia-fatbinaryloader; do
versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9][0-9]* 2>/dev/null | head -1)
[ -n "$versioned" ] || continue
base=$(basename "$versioned") base=$(basename "$versioned")
ln -sf "$base" "$CACHE_DIR/lib/${lib}.so.1" stem=${base%%.so.*}
ln -sf "${lib}.so.1" "$CACHE_DIR/lib/${lib}.so" 2>/dev/null || true ln -sf "$base" "$CACHE_DIR/lib/${stem}.so.1"
echo "${lib}: .so.1 -> $base" ln -sf "${stem}.so.1" "$CACHE_DIR/lib/${stem}.so" 2>/dev/null || true
done done
touch "$CACHE_LAYOUT_MARKER"
echo "=== NVIDIA build complete ===" echo "=== NVIDIA build complete ==="
echo "cache: $CACHE_DIR" echo "cache: $CACHE_DIR"
echo "modules: $ko_count .ko files" echo "modules: $ko_count .ko files"

View File

@@ -109,6 +109,40 @@ else
fail "nvidia-smi: not found in PATH" fail "nvidia-smi: not found in PATH"
fi fi
echo ""
echo "-- OpenCL / John --"
if [ -f /etc/OpenCL/vendors/nvidia.icd ]; then
ok "OpenCL ICD present: /etc/OpenCL/vendors/nvidia.icd"
else
fail "OpenCL ICD missing: /etc/OpenCL/vendors/nvidia.icd"
fi
if ldconfig -p 2>/dev/null | grep -q "libnvidia-opencl.so.1"; then
ok "libnvidia-opencl.so.1 present in linker cache"
else
fail "libnvidia-opencl.so.1 missing from linker cache"
fi
if command -v clinfo >/dev/null 2>&1; then
if clinfo -l 2>/dev/null | grep -q "Platform"; then
ok "clinfo: OpenCL platform detected"
else
fail "clinfo: no OpenCL platform detected"
fi
else
fail "clinfo: not found in PATH"
fi
if command -v john >/dev/null 2>&1; then
if john --list=opencl-devices 2>/dev/null | grep -q "Device #"; then
ok "john: OpenCL devices detected"
else
fail "john: no OpenCL devices detected"
fi
else
fail "john: not found in PATH"
fi
echo "" echo ""
echo "-- lib symlinks --" echo "-- lib symlinks --"
for lib in libnvidia-ml libcuda; do for lib in libnvidia-ml libcuda; do