refactor(iso): replace chroot hooks for DCGM/ROCm with live-build apt sources
Move datacenter-gpu-manager and rocm-smi-lib from dynamic chroot hooks into live-build's config/archives mechanism so lb caches the .deb files in cache/packages.chroot/ between builds, eliminating repeated 900+ MB downloads. Versions pinned via VERSIONS and substituted into package lists at build time. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
29
iso/builder/config/archives/nvidia-cuda.key.chroot
Normal file
29
iso/builder/config/archives/nvidia-cuda.key.chroot
Normal file
@@ -0,0 +1,29 @@
|
||||
-----BEGIN PGP PUBLIC KEY BLOCK-----
|
||||
Version: GnuPG v2.0.22 (GNU/Linux)
|
||||
|
||||
mQINBGJYmlEBEAC6nJmeqByeReM+MSy4palACCnfOg4pOxffrrkldxz4jrDOZNK4
|
||||
q8KG+ZbXrkdP0e9qTFRvZzN+A6Jw3ySfoiKXRBw5l2Zp81AYkghV641OpWNjZOyL
|
||||
syKEtST9LR1ttHv1ZI71pj8NVG/EnpimZPOblEJ1OpibJJCXLrbn+qcJ8JNuGTSK
|
||||
6v2aLBmhR8VR/aSJpmkg7fFjcGklweTI8+Ibj72HuY9JRD/+dtUoSh7z037mWo56
|
||||
ee02lPFRD0pHOEAlLSXxFO/SDqRVMhcgHk0a8roCF+9h5Ni7ZUyxlGK/uHkqN7ED
|
||||
/U/ATpGKgvk4t23eTpdRC8FXAlBZQyf/xnhQXsyF/z7+RV5CL0o1zk1LKgo+5K32
|
||||
5ka5uZb6JSIrEPUaCPEMXu6EEY8zSFnCrRS/Vjkfvc9ViYZWzJ387WTjAhMdS7wd
|
||||
PmdDWw2ASGUP4FrfCireSZiFX+ZAOspKpZdh0P5iR5XSx14XDt3jNK2EQQboaJAD
|
||||
uqksItatOEYNu4JsCbc24roJvJtGhpjTnq1/dyoy6K433afU0DS2ZPLthLpGqeyK
|
||||
MKNY7a2WjxhRmCSu5Zok/fGKcO62XF8a3eSj4NzCRv8LM6mG1Oekz6Zz+tdxHg19
|
||||
ufHO0et7AKE5q+5VjE438Xpl4UWbM/Voj6VPJ9uzywDcnZXpeOqeTQh2pQARAQAB
|
||||
tCBjdWRhdG9vbHMgPGN1ZGF0b29sc0BudmlkaWEuY29tPokCOQQTAQIAIwUCYlia
|
||||
UQIbAwcLCQgHAwIBBhUIAgkKCwQWAgMBAh4BAheAAAoJEKS0aZY7+GPM1y4QALKh
|
||||
BqSozrYbe341Qu7SyxHQgjRCGi4YhI3bHCMj5F6vEOHnwiFH6YmFkxCYtqcGjca6
|
||||
iw7cCYMow/hgKLAPwkwSJ84EYpGLWx62+20rMM4OuZwauSUcY/kE2WgnQ74zbh3+
|
||||
MHs56zntJFfJ9G+NYidvwDWeZn5HIzR4CtxaxRgpiykg0s3ps6X0U+vuVcLnutBF
|
||||
7r81astvlVQERFbce/6KqHK+yj843Qrhb3JEolUoOETK06nD25bVtnAxe0QEyA90
|
||||
9MpRNLfR6BdjPpxqhphDcMOhJfyubAroQUxG/7S+Yw+mtEqHrL/dz9iEYqodYiSo
|
||||
zfi0b+HFI59sRkTfOBDBwb3kcARExwnvLJmqijiVqWkoJ3H67oA0XJN2nelucw+A
|
||||
Hb+Jt9BWjyzKWlLFDnVHdGicyRJ0I8yqi32w8hGeXmu3tU58VWJrkXEXadBftmci
|
||||
pemb6oZ/r5SCkW6kxr2PsNWcJoebUdynyOQGbVwpMtJAnjOYp0ObKOANbcIg+tsi
|
||||
kyCIO5TiY3ADbBDPCeZK8xdcugXoW5WFwACGC0z+Cn0mtw8z3VGIPAMSCYmLusgW
|
||||
t2+EpikwrP2inNp5Pc+YdczRAsa4s30Jpyv/UHEG5P9GKnvofaxJgnU56lJIRPzF
|
||||
iCUGy6cVI0Fq777X/ME1K6A/bzZ4vRYNx8rUmVE5
|
||||
=DO7z
|
||||
-----END PGP PUBLIC KEY BLOCK-----
|
||||
1
iso/builder/config/archives/nvidia-cuda.list.chroot
Normal file
1
iso/builder/config/archives/nvidia-cuda.list.chroot
Normal file
@@ -0,0 +1 @@
|
||||
deb https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/ /
|
||||
BIN
iso/builder/config/archives/rocm.key.chroot
Normal file
BIN
iso/builder/config/archives/rocm.key.chroot
Normal file
Binary file not shown.
1
iso/builder/config/archives/rocm.list.chroot
Normal file
1
iso/builder/config/archives/rocm.list.chroot
Normal file
@@ -0,0 +1 @@
|
||||
deb https://repo.radeon.com/rocm/apt/%%ROCM_VERSION%% jammy main
|
||||
@@ -46,6 +46,12 @@ chmod +x /usr/local/bin/bee-log-run 2>/dev/null || true
|
||||
# Reload udev rules
|
||||
udevadm control --reload-rules 2>/dev/null || true
|
||||
|
||||
# rocm-smi symlink (package installs to /opt/rocm-*/bin/rocm-smi)
|
||||
if [ ! -e /usr/local/bin/rocm-smi ]; then
|
||||
smi_path="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
|
||||
[ -n "${smi_path}" ] && ln -sf "${smi_path}" /usr/local/bin/rocm-smi
|
||||
fi
|
||||
|
||||
# Create export directory
|
||||
mkdir -p /appdata/bee/export
|
||||
|
||||
|
||||
@@ -1,103 +0,0 @@
|
||||
#!/bin/sh
|
||||
# 9001-amd-rocm.hook.chroot — install AMD ROCm SMI tool for Instinct GPU monitoring.
|
||||
# Runs inside the live-build chroot. Adds AMD's apt repository and installs
|
||||
# rocm-smi-lib which provides the `rocm-smi` CLI (analogous to nvidia-smi).
|
||||
#
|
||||
# AMD does NOT publish Debian Bookworm packages. The repo uses Ubuntu codenames
|
||||
# (jammy/noble). We use jammy (Ubuntu 22.04) — its packages install cleanly on
|
||||
# Debian 12 (Bookworm) due to compatible glibc/libstdc++.
|
||||
# Tried versions newest-first; falls back if a point release is missing.
|
||||
|
||||
set -e
|
||||
|
||||
# Ubuntu codename to use for the AMD repo (Debian has no AMD packages).
|
||||
ROCM_UBUNTU_DIST="jammy"
|
||||
|
||||
# ROCm point-releases to try newest-first. AMD drops old point releases
|
||||
# from the repo, so we walk backwards until one responds 200.
|
||||
ROCM_CANDIDATES="6.3.4 6.3.3 6.3.2 6.3.1 6.3 6.2.4 6.2.3 6.2.2 6.2.1 6.2"
|
||||
|
||||
ROCM_KEYRING="/etc/apt/keyrings/rocm.gpg"
|
||||
ROCM_LIST="/etc/apt/sources.list.d/rocm.list"
|
||||
APT_UPDATED=0
|
||||
|
||||
mkdir -p /etc/apt/keyrings
|
||||
|
||||
ensure_tool() {
|
||||
tool="$1"
|
||||
pkg="$2"
|
||||
if command -v "${tool}" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
if [ "${APT_UPDATED}" -eq 0 ]; then
|
||||
apt-get update -qq
|
||||
APT_UPDATED=1
|
||||
fi
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends "${pkg}"
|
||||
}
|
||||
|
||||
ensure_cert_bundle() {
|
||||
if [ -s /etc/ssl/certs/ca-certificates.crt ]; then
|
||||
return 0
|
||||
fi
|
||||
if [ "${APT_UPDATED}" -eq 0 ]; then
|
||||
apt-get update -qq
|
||||
APT_UPDATED=1
|
||||
fi
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates
|
||||
}
|
||||
|
||||
# live-build chroot may not include fetch/signing tools yet
|
||||
if ! ensure_cert_bundle || ! ensure_tool wget wget || ! ensure_tool gpg gpg; then
|
||||
echo "WARN: failed to install wget/gpg/ca-certificates prerequisites — skipping ROCm install"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Download and import AMD GPG key
|
||||
if ! wget -qO- "https://repo.radeon.com/rocm/rocm.gpg.key" \
|
||||
| gpg --dearmor --yes --output "${ROCM_KEYRING}"; then
|
||||
echo "WARN: failed to fetch AMD ROCm GPG key — skipping ROCm install"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Try each ROCm version until apt-get update succeeds.
|
||||
# AMD repo uses Ubuntu codenames; bookworm is not published — use jammy.
|
||||
ROCM_VERSION=""
|
||||
for candidate in ${ROCM_CANDIDATES}; do
|
||||
cat > "${ROCM_LIST}" <<EOF
|
||||
deb [arch=amd64 signed-by=${ROCM_KEYRING}] https://repo.radeon.com/rocm/apt/${candidate} ${ROCM_UBUNTU_DIST} main
|
||||
EOF
|
||||
if apt-get update -qq 2>/dev/null; then
|
||||
ROCM_VERSION="${candidate}"
|
||||
echo "=== AMD ROCm ${ROCM_VERSION} (${ROCM_UBUNTU_DIST}): repository available ==="
|
||||
break
|
||||
fi
|
||||
echo "WARN: ROCm ${candidate} not available, trying next..."
|
||||
rm -f "${ROCM_LIST}"
|
||||
done
|
||||
|
||||
if [ -z "${ROCM_VERSION}" ]; then
|
||||
echo "WARN: no ROCm apt repository available — skipping ROCm install"
|
||||
rm -f "${ROCM_KEYRING}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# rocm-smi-lib provides the rocm-smi CLI tool for GPU monitoring
|
||||
if DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends rocm-smi-lib; then
|
||||
echo "=== AMD ROCm: rocm-smi-lib installed ==="
|
||||
if [ -x /opt/rocm/bin/rocm-smi ]; then
|
||||
ln -sf /opt/rocm/bin/rocm-smi /usr/local/bin/rocm-smi
|
||||
else
|
||||
smi_path="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
|
||||
if [ -n "${smi_path}" ]; then
|
||||
ln -sf "${smi_path}" /usr/local/bin/rocm-smi
|
||||
fi
|
||||
fi
|
||||
rocm-smi --version 2>/dev/null || true
|
||||
else
|
||||
echo "WARN: rocm-smi-lib install failed — AMD GPU monitoring unavailable"
|
||||
fi
|
||||
|
||||
# Clean up apt lists to keep ISO size down
|
||||
rm -f "${ROCM_LIST}"
|
||||
apt-get clean
|
||||
@@ -1,66 +0,0 @@
|
||||
#!/bin/sh
|
||||
# 9002-nvidia-dcgm.hook.chroot — install NVIDIA DCGM inside the live-build chroot.
|
||||
# DCGM (Data Center GPU Manager) provides dcgmi diag for acceptance testing.
|
||||
# Adds NVIDIA's CUDA apt repository (debian12/x86_64) and installs datacenter-gpu-manager.
|
||||
|
||||
set -e
|
||||
|
||||
NVIDIA_KEYRING="/usr/share/keyrings/nvidia-cuda.gpg"
|
||||
NVIDIA_LIST="/etc/apt/sources.list.d/nvidia-cuda.list"
|
||||
NVIDIA_KEY_URL="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/3bf863cc.pub"
|
||||
NVIDIA_REPO="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/"
|
||||
APT_UPDATED=0
|
||||
|
||||
mkdir -p /usr/share/keyrings /etc/apt/sources.list.d
|
||||
|
||||
ensure_tool() {
|
||||
tool="$1"
|
||||
pkg="$2"
|
||||
if command -v "${tool}" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
if [ "${APT_UPDATED}" -eq 0 ]; then
|
||||
apt-get update -qq
|
||||
APT_UPDATED=1
|
||||
fi
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends "${pkg}"
|
||||
}
|
||||
|
||||
ensure_cert_bundle() {
|
||||
if [ -s /etc/ssl/certs/ca-certificates.crt ]; then
|
||||
return 0
|
||||
fi
|
||||
if [ "${APT_UPDATED}" -eq 0 ]; then
|
||||
apt-get update -qq
|
||||
APT_UPDATED=1
|
||||
fi
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates
|
||||
}
|
||||
|
||||
if ! ensure_cert_bundle || ! ensure_tool wget wget || ! ensure_tool gpg gpg; then
|
||||
echo "WARN: prerequisites missing — skipping DCGM install"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Download and import NVIDIA GPG key
|
||||
if ! wget -qO- "${NVIDIA_KEY_URL}" | gpg --dearmor --yes --output "${NVIDIA_KEYRING}"; then
|
||||
echo "WARN: failed to fetch NVIDIA GPG key — skipping DCGM install"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
cat > "${NVIDIA_LIST}" <<EOF
|
||||
deb [signed-by=${NVIDIA_KEYRING}] ${NVIDIA_REPO} /
|
||||
EOF
|
||||
|
||||
apt-get update -qq
|
||||
|
||||
if DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends datacenter-gpu-manager; then
|
||||
echo "=== DCGM: datacenter-gpu-manager installed ==="
|
||||
dcgmi --version 2>/dev/null || true
|
||||
else
|
||||
echo "WARN: datacenter-gpu-manager install failed — DCGM unavailable"
|
||||
fi
|
||||
|
||||
# Clean up apt lists to keep ISO size down
|
||||
rm -f "${NVIDIA_LIST}"
|
||||
apt-get clean
|
||||
@@ -70,5 +70,11 @@ firmware-bnx2x
|
||||
firmware-cavium
|
||||
firmware-qlogic
|
||||
|
||||
# NVIDIA DCGM (Data Center GPU Manager) — dcgmi diag for acceptance testing
|
||||
datacenter-gpu-manager=1:%%DCGM_VERSION%%
|
||||
|
||||
# AMD ROCm SMI — GPU monitoring for Instinct cards (repo: rocm/apt/6.3.4 jammy)
|
||||
rocm-smi-lib=%%ROCM_SMI_VERSION%%
|
||||
|
||||
# glibc compat helpers (for any external binaries that need it)
|
||||
libc6
|
||||
|
||||
Reference in New Issue
Block a user