fix(iso): include full nvidia opencl runtime

This commit is contained in:
Mikhail Chusavitin
2026-04-01 09:16:06 +03:00
parent b447717a5a
commit 5839f870b7
2 changed files with 65 additions and 28 deletions

View File

@@ -46,7 +46,10 @@ CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_VERSION}-${KVER}"
CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads"
EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract"
CACHE_LAYOUT_VERSION="2"
CACHE_LAYOUT_MARKER="${CACHE_DIR}/.cache-layout-v${CACHE_LAYOUT_VERSION}"
if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \
&& [ -f "$CACHE_LAYOUT_MARKER" ] \
&& [ "$(ls "$CACHE_DIR/lib/libnvidia-ptxjitcompiler.so."* 2>/dev/null | wc -l)" -gt 0 ]; then
echo "=== NVIDIA cached, skipping build ==="
echo "cache: $CACHE_DIR"
@@ -130,24 +133,30 @@ else
echo "WARNING: no firmware/ dir found in installer (may be needed for Hopper GPUs)"
fi
# Copy ALL userspace library files.
# libnvidia-ptxjitcompiler is required by libcuda for PTX JIT compilation
# (cuModuleLoadDataEx with PTX source) — without it CUDA_ERROR_JIT_COMPILER_NOT_FOUND.
# Copy NVIDIA userspace libraries broadly instead of whitelisting a few names.
# Newer driver branches add extra runtime deps (for example OpenCL/compiler side
# libraries). If we only copy a narrow allowlist, clinfo/John can see nvidia.icd
# but still fail with "no OpenCL platforms" because one dependent .so is absent.
copied_libs=0
for f in $(find "$EXTRACT_DIR" -maxdepth 1 \( -name 'libnvidia*.so.*' -o -name 'libcuda.so.*' \) -type f 2>/dev/null | sort); do
cp "$f" "$CACHE_DIR/lib/"
copied_libs=$((copied_libs+1))
done
if [ "$copied_libs" -eq 0 ]; then
echo "ERROR: no NVIDIA userspace libraries found in $EXTRACT_DIR"
ls "$EXTRACT_DIR/"*.so* 2>/dev/null | head -40 || true
exit 1
fi
for lib in \
libnvidia-ml \
libcuda \
libnvidia-ptxjitcompiler \
libnvidia-opencl \
libnvidia-compiler \
libnvidia-nvvm \
libnvidia-fatbinaryloader; do
count=0
for f in $(find "$EXTRACT_DIR" -maxdepth 1 -name "${lib}.so.*" 2>/dev/null); do
cp "$f" "$CACHE_DIR/lib/" && count=$((count+1))
done
if [ "$count" -eq 0 ]; then
echo "ERROR: ${lib}.so.* not found in $EXTRACT_DIR"
ls "$EXTRACT_DIR/"*.so* 2>/dev/null | head -20 || true
libnvidia-opencl; do
if ! ls "$CACHE_DIR/lib/${lib}.so."* >/dev/null 2>&1; then
echo "ERROR: required ${lib}.so.* not found in extracted userspace libs"
ls "$CACHE_DIR/lib/" | sort >&2 || true
exit 1
fi
done
@@ -156,23 +165,17 @@ done
ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l)
[ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; }
# Create soname symlinks: use [0-9][0-9]* to avoid circular symlink (.so.1 has single digit)
for lib in \
libnvidia-ml \
libcuda \
libnvidia-ptxjitcompiler \
libnvidia-opencl \
libnvidia-compiler \
libnvidia-nvvm \
libnvidia-fatbinaryloader; do
versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9][0-9]* 2>/dev/null | head -1)
[ -n "$versioned" ] || continue
# Create soname symlinks for every copied versioned library.
for versioned in "$CACHE_DIR"/lib/*.so.*; do
[ -f "$versioned" ] || continue
base=$(basename "$versioned")
ln -sf "$base" "$CACHE_DIR/lib/${lib}.so.1"
ln -sf "${lib}.so.1" "$CACHE_DIR/lib/${lib}.so" 2>/dev/null || true
echo "${lib}: .so.1 -> $base"
stem=${base%%.so.*}
ln -sf "$base" "$CACHE_DIR/lib/${stem}.so.1"
ln -sf "${stem}.so.1" "$CACHE_DIR/lib/${stem}.so" 2>/dev/null || true
done
touch "$CACHE_LAYOUT_MARKER"
echo "=== NVIDIA build complete ==="
echo "cache: $CACHE_DIR"
echo "modules: $ko_count .ko files"