Tighten support bundles and fix AMD runtime checks

This commit is contained in:
Mikhail Chusavitin
2026-03-25 19:35:25 +03:00
parent 30cf014d58
commit 9a1df9b1ba
12 changed files with 663 additions and 79 deletions

View File

@@ -60,6 +60,14 @@ apt-get update -qq
# rocm-smi-lib provides the rocm-smi CLI tool for GPU monitoring
if apt-get install -y --no-install-recommends rocm-smi-lib 2>/dev/null; then
echo "=== AMD ROCm: rocm-smi installed ==="
if [ -x /opt/rocm/bin/rocm-smi ]; then
ln -sf /opt/rocm/bin/rocm-smi /usr/local/bin/rocm-smi
else
candidate="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
if [ -n "${candidate}" ]; then
ln -sf "${candidate}" /usr/local/bin/rocm-smi
fi
fi
rocm-smi --version 2>/dev/null || true
else
echo "WARN: rocm-smi-lib install failed — GPU monitoring unavailable"