fix(iso): include full nvidia opencl runtime
This commit is contained in:
@@ -46,7 +46,10 @@ CACHE_DIR="${DIST_DIR}/nvidia-${NVIDIA_VERSION}-${KVER}"
|
|||||||
CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
|
CACHE_ROOT="${BEE_CACHE_DIR:-${DIST_DIR}/cache}"
|
||||||
DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads"
|
DOWNLOAD_CACHE_DIR="${CACHE_ROOT}/nvidia-downloads"
|
||||||
EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract"
|
EXTRACT_CACHE_DIR="${CACHE_ROOT}/nvidia-extract"
|
||||||
|
CACHE_LAYOUT_VERSION="2"
|
||||||
|
CACHE_LAYOUT_MARKER="${CACHE_DIR}/.cache-layout-v${CACHE_LAYOUT_VERSION}"
|
||||||
if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \
|
if [ -d "$CACHE_DIR/modules" ] && [ -f "$CACHE_DIR/bin/nvidia-smi" ] \
|
||||||
|
&& [ -f "$CACHE_LAYOUT_MARKER" ] \
|
||||||
&& [ "$(ls "$CACHE_DIR/lib/libnvidia-ptxjitcompiler.so."* 2>/dev/null | wc -l)" -gt 0 ]; then
|
&& [ "$(ls "$CACHE_DIR/lib/libnvidia-ptxjitcompiler.so."* 2>/dev/null | wc -l)" -gt 0 ]; then
|
||||||
echo "=== NVIDIA cached, skipping build ==="
|
echo "=== NVIDIA cached, skipping build ==="
|
||||||
echo "cache: $CACHE_DIR"
|
echo "cache: $CACHE_DIR"
|
||||||
@@ -130,24 +133,30 @@ else
|
|||||||
echo "WARNING: no firmware/ dir found in installer (may be needed for Hopper GPUs)"
|
echo "WARNING: no firmware/ dir found in installer (may be needed for Hopper GPUs)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Copy ALL userspace library files.
|
# Copy NVIDIA userspace libraries broadly instead of whitelisting a few names.
|
||||||
# libnvidia-ptxjitcompiler is required by libcuda for PTX JIT compilation
|
# Newer driver branches add extra runtime deps (for example OpenCL/compiler side
|
||||||
# (cuModuleLoadDataEx with PTX source) — without it CUDA_ERROR_JIT_COMPILER_NOT_FOUND.
|
# libraries). If we only copy a narrow allowlist, clinfo/John can see nvidia.icd
|
||||||
|
# but still fail with "no OpenCL platforms" because one dependent .so is absent.
|
||||||
|
copied_libs=0
|
||||||
|
for f in $(find "$EXTRACT_DIR" -maxdepth 1 \( -name 'libnvidia*.so.*' -o -name 'libcuda.so.*' \) -type f 2>/dev/null | sort); do
|
||||||
|
cp "$f" "$CACHE_DIR/lib/"
|
||||||
|
copied_libs=$((copied_libs+1))
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ "$copied_libs" -eq 0 ]; then
|
||||||
|
echo "ERROR: no NVIDIA userspace libraries found in $EXTRACT_DIR"
|
||||||
|
ls "$EXTRACT_DIR/"*.so* 2>/dev/null | head -40 || true
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
for lib in \
|
for lib in \
|
||||||
libnvidia-ml \
|
libnvidia-ml \
|
||||||
libcuda \
|
libcuda \
|
||||||
libnvidia-ptxjitcompiler \
|
libnvidia-ptxjitcompiler \
|
||||||
libnvidia-opencl \
|
libnvidia-opencl; do
|
||||||
libnvidia-compiler \
|
if ! ls "$CACHE_DIR/lib/${lib}.so."* >/dev/null 2>&1; then
|
||||||
libnvidia-nvvm \
|
echo "ERROR: required ${lib}.so.* not found in extracted userspace libs"
|
||||||
libnvidia-fatbinaryloader; do
|
ls "$CACHE_DIR/lib/" | sort >&2 || true
|
||||||
count=0
|
|
||||||
for f in $(find "$EXTRACT_DIR" -maxdepth 1 -name "${lib}.so.*" 2>/dev/null); do
|
|
||||||
cp "$f" "$CACHE_DIR/lib/" && count=$((count+1))
|
|
||||||
done
|
|
||||||
if [ "$count" -eq 0 ]; then
|
|
||||||
echo "ERROR: ${lib}.so.* not found in $EXTRACT_DIR"
|
|
||||||
ls "$EXTRACT_DIR/"*.so* 2>/dev/null | head -20 || true
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
@@ -156,23 +165,17 @@ done
|
|||||||
ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l)
|
ko_count=$(ls "$CACHE_DIR/modules/"*.ko 2>/dev/null | wc -l)
|
||||||
[ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; }
|
[ "$ko_count" -gt 0 ] || { echo "ERROR: no .ko files built in $CACHE_DIR/modules/"; exit 1; }
|
||||||
|
|
||||||
# Create soname symlinks: use [0-9][0-9]* to avoid circular symlink (.so.1 has single digit)
|
# Create soname symlinks for every copied versioned library.
|
||||||
for lib in \
|
for versioned in "$CACHE_DIR"/lib/*.so.*; do
|
||||||
libnvidia-ml \
|
[ -f "$versioned" ] || continue
|
||||||
libcuda \
|
|
||||||
libnvidia-ptxjitcompiler \
|
|
||||||
libnvidia-opencl \
|
|
||||||
libnvidia-compiler \
|
|
||||||
libnvidia-nvvm \
|
|
||||||
libnvidia-fatbinaryloader; do
|
|
||||||
versioned=$(ls "$CACHE_DIR/lib/${lib}.so."[0-9][0-9]* 2>/dev/null | head -1)
|
|
||||||
[ -n "$versioned" ] || continue
|
|
||||||
base=$(basename "$versioned")
|
base=$(basename "$versioned")
|
||||||
ln -sf "$base" "$CACHE_DIR/lib/${lib}.so.1"
|
stem=${base%%.so.*}
|
||||||
ln -sf "${lib}.so.1" "$CACHE_DIR/lib/${lib}.so" 2>/dev/null || true
|
ln -sf "$base" "$CACHE_DIR/lib/${stem}.so.1"
|
||||||
echo "${lib}: .so.1 -> $base"
|
ln -sf "${stem}.so.1" "$CACHE_DIR/lib/${stem}.so" 2>/dev/null || true
|
||||||
done
|
done
|
||||||
|
|
||||||
|
touch "$CACHE_LAYOUT_MARKER"
|
||||||
|
|
||||||
echo "=== NVIDIA build complete ==="
|
echo "=== NVIDIA build complete ==="
|
||||||
echo "cache: $CACHE_DIR"
|
echo "cache: $CACHE_DIR"
|
||||||
echo "modules: $ko_count .ko files"
|
echo "modules: $ko_count .ko files"
|
||||||
|
|||||||
@@ -109,6 +109,40 @@ else
|
|||||||
fail "nvidia-smi: not found in PATH"
|
fail "nvidia-smi: not found in PATH"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "-- OpenCL / John --"
|
||||||
|
if [ -f /etc/OpenCL/vendors/nvidia.icd ]; then
|
||||||
|
ok "OpenCL ICD present: /etc/OpenCL/vendors/nvidia.icd"
|
||||||
|
else
|
||||||
|
fail "OpenCL ICD missing: /etc/OpenCL/vendors/nvidia.icd"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ldconfig -p 2>/dev/null | grep -q "libnvidia-opencl.so.1"; then
|
||||||
|
ok "libnvidia-opencl.so.1 present in linker cache"
|
||||||
|
else
|
||||||
|
fail "libnvidia-opencl.so.1 missing from linker cache"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if command -v clinfo >/dev/null 2>&1; then
|
||||||
|
if clinfo -l 2>/dev/null | grep -q "Platform"; then
|
||||||
|
ok "clinfo: OpenCL platform detected"
|
||||||
|
else
|
||||||
|
fail "clinfo: no OpenCL platform detected"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
fail "clinfo: not found in PATH"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if command -v john >/dev/null 2>&1; then
|
||||||
|
if john --list=opencl-devices 2>/dev/null | grep -q "Device #"; then
|
||||||
|
ok "john: OpenCL devices detected"
|
||||||
|
else
|
||||||
|
fail "john: no OpenCL devices detected"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
fail "john: not found in PATH"
|
||||||
|
fi
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "-- lib symlinks --"
|
echo "-- lib symlinks --"
|
||||||
for lib in libnvidia-ml libcuda; do
|
for lib in libnvidia-ml libcuda; do
|
||||||
|
|||||||
Reference in New Issue
Block a user