refactor(iso): replace chroot hooks for DCGM/ROCm with live-build apt sources

Move datacenter-gpu-manager and rocm-smi-lib from dynamic chroot hooks
into live-build's config/archives mechanism so lb caches the .deb files
in cache/packages.chroot/ between builds, eliminating repeated 900+ MB
downloads. Versions pinned via VERSIONS and substituted into package
lists at build time.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-28 13:01:10 +03:00
parent acfd2010d7
commit 911745e4da
11 changed files with 68 additions and 178 deletions

View File

@@ -0,0 +1,29 @@
-----BEGIN PGP PUBLIC KEY BLOCK-----
Version: GnuPG v2.0.22 (GNU/Linux)
mQINBGJYmlEBEAC6nJmeqByeReM+MSy4palACCnfOg4pOxffrrkldxz4jrDOZNK4
q8KG+ZbXrkdP0e9qTFRvZzN+A6Jw3ySfoiKXRBw5l2Zp81AYkghV641OpWNjZOyL
syKEtST9LR1ttHv1ZI71pj8NVG/EnpimZPOblEJ1OpibJJCXLrbn+qcJ8JNuGTSK
6v2aLBmhR8VR/aSJpmkg7fFjcGklweTI8+Ibj72HuY9JRD/+dtUoSh7z037mWo56
ee02lPFRD0pHOEAlLSXxFO/SDqRVMhcgHk0a8roCF+9h5Ni7ZUyxlGK/uHkqN7ED
/U/ATpGKgvk4t23eTpdRC8FXAlBZQyf/xnhQXsyF/z7+RV5CL0o1zk1LKgo+5K32
5ka5uZb6JSIrEPUaCPEMXu6EEY8zSFnCrRS/Vjkfvc9ViYZWzJ387WTjAhMdS7wd
PmdDWw2ASGUP4FrfCireSZiFX+ZAOspKpZdh0P5iR5XSx14XDt3jNK2EQQboaJAD
uqksItatOEYNu4JsCbc24roJvJtGhpjTnq1/dyoy6K433afU0DS2ZPLthLpGqeyK
MKNY7a2WjxhRmCSu5Zok/fGKcO62XF8a3eSj4NzCRv8LM6mG1Oekz6Zz+tdxHg19
ufHO0et7AKE5q+5VjE438Xpl4UWbM/Voj6VPJ9uzywDcnZXpeOqeTQh2pQARAQAB
tCBjdWRhdG9vbHMgPGN1ZGF0b29sc0BudmlkaWEuY29tPokCOQQTAQIAIwUCYlia
UQIbAwcLCQgHAwIBBhUIAgkKCwQWAgMBAh4BAheAAAoJEKS0aZY7+GPM1y4QALKh
BqSozrYbe341Qu7SyxHQgjRCGi4YhI3bHCMj5F6vEOHnwiFH6YmFkxCYtqcGjca6
iw7cCYMow/hgKLAPwkwSJ84EYpGLWx62+20rMM4OuZwauSUcY/kE2WgnQ74zbh3+
MHs56zntJFfJ9G+NYidvwDWeZn5HIzR4CtxaxRgpiykg0s3ps6X0U+vuVcLnutBF
7r81astvlVQERFbce/6KqHK+yj843Qrhb3JEolUoOETK06nD25bVtnAxe0QEyA90
9MpRNLfR6BdjPpxqhphDcMOhJfyubAroQUxG/7S+Yw+mtEqHrL/dz9iEYqodYiSo
zfi0b+HFI59sRkTfOBDBwb3kcARExwnvLJmqijiVqWkoJ3H67oA0XJN2nelucw+A
Hb+Jt9BWjyzKWlLFDnVHdGicyRJ0I8yqi32w8hGeXmu3tU58VWJrkXEXadBftmci
pemb6oZ/r5SCkW6kxr2PsNWcJoebUdynyOQGbVwpMtJAnjOYp0ObKOANbcIg+tsi
kyCIO5TiY3ADbBDPCeZK8xdcugXoW5WFwACGC0z+Cn0mtw8z3VGIPAMSCYmLusgW
t2+EpikwrP2inNp5Pc+YdczRAsa4s30Jpyv/UHEG5P9GKnvofaxJgnU56lJIRPzF
iCUGy6cVI0Fq777X/ME1K6A/bzZ4vRYNx8rUmVE5
=DO7z
-----END PGP PUBLIC KEY BLOCK-----

View File

@@ -0,0 +1 @@
deb https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/ /

Binary file not shown.

View File

@@ -0,0 +1 @@
deb https://repo.radeon.com/rocm/apt/%%ROCM_VERSION%% jammy main

View File

@@ -46,6 +46,12 @@ chmod +x /usr/local/bin/bee-log-run 2>/dev/null || true
# Reload udev rules
udevadm control --reload-rules 2>/dev/null || true
# rocm-smi symlink (package installs to /opt/rocm-*/bin/rocm-smi)
if [ ! -e /usr/local/bin/rocm-smi ]; then
smi_path="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
[ -n "${smi_path}" ] && ln -sf "${smi_path}" /usr/local/bin/rocm-smi
fi
# Create export directory
mkdir -p /appdata/bee/export

View File

@@ -1,103 +0,0 @@
#!/bin/sh
# 9001-amd-rocm.hook.chroot — install AMD ROCm SMI tool for Instinct GPU monitoring.
# Runs inside the live-build chroot. Adds AMD's apt repository and installs
# rocm-smi-lib which provides the `rocm-smi` CLI (analogous to nvidia-smi).
#
# AMD does NOT publish Debian Bookworm packages. The repo uses Ubuntu codenames
# (jammy/noble). We use jammy (Ubuntu 22.04) — its packages install cleanly on
# Debian 12 (Bookworm) due to compatible glibc/libstdc++.
# Tried versions newest-first; falls back if a point release is missing.
set -e
# Ubuntu codename to use for the AMD repo (Debian has no AMD packages).
ROCM_UBUNTU_DIST="jammy"
# ROCm point-releases to try newest-first. AMD drops old point releases
# from the repo, so we walk backwards until one responds 200.
ROCM_CANDIDATES="6.3.4 6.3.3 6.3.2 6.3.1 6.3 6.2.4 6.2.3 6.2.2 6.2.1 6.2"
ROCM_KEYRING="/etc/apt/keyrings/rocm.gpg"
ROCM_LIST="/etc/apt/sources.list.d/rocm.list"
APT_UPDATED=0
mkdir -p /etc/apt/keyrings
ensure_tool() {
tool="$1"
pkg="$2"
if command -v "${tool}" >/dev/null 2>&1; then
return 0
fi
if [ "${APT_UPDATED}" -eq 0 ]; then
apt-get update -qq
APT_UPDATED=1
fi
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends "${pkg}"
}
ensure_cert_bundle() {
if [ -s /etc/ssl/certs/ca-certificates.crt ]; then
return 0
fi
if [ "${APT_UPDATED}" -eq 0 ]; then
apt-get update -qq
APT_UPDATED=1
fi
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates
}
# live-build chroot may not include fetch/signing tools yet
if ! ensure_cert_bundle || ! ensure_tool wget wget || ! ensure_tool gpg gpg; then
echo "WARN: failed to install wget/gpg/ca-certificates prerequisites — skipping ROCm install"
exit 0
fi
# Download and import AMD GPG key
if ! wget -qO- "https://repo.radeon.com/rocm/rocm.gpg.key" \
| gpg --dearmor --yes --output "${ROCM_KEYRING}"; then
echo "WARN: failed to fetch AMD ROCm GPG key — skipping ROCm install"
exit 0
fi
# Try each ROCm version until apt-get update succeeds.
# AMD repo uses Ubuntu codenames; bookworm is not published — use jammy.
ROCM_VERSION=""
for candidate in ${ROCM_CANDIDATES}; do
cat > "${ROCM_LIST}" <<EOF
deb [arch=amd64 signed-by=${ROCM_KEYRING}] https://repo.radeon.com/rocm/apt/${candidate} ${ROCM_UBUNTU_DIST} main
EOF
if apt-get update -qq 2>/dev/null; then
ROCM_VERSION="${candidate}"
echo "=== AMD ROCm ${ROCM_VERSION} (${ROCM_UBUNTU_DIST}): repository available ==="
break
fi
echo "WARN: ROCm ${candidate} not available, trying next..."
rm -f "${ROCM_LIST}"
done
if [ -z "${ROCM_VERSION}" ]; then
echo "WARN: no ROCm apt repository available — skipping ROCm install"
rm -f "${ROCM_KEYRING}"
exit 0
fi
# rocm-smi-lib provides the rocm-smi CLI tool for GPU monitoring
if DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends rocm-smi-lib; then
echo "=== AMD ROCm: rocm-smi-lib installed ==="
if [ -x /opt/rocm/bin/rocm-smi ]; then
ln -sf /opt/rocm/bin/rocm-smi /usr/local/bin/rocm-smi
else
smi_path="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
if [ -n "${smi_path}" ]; then
ln -sf "${smi_path}" /usr/local/bin/rocm-smi
fi
fi
rocm-smi --version 2>/dev/null || true
else
echo "WARN: rocm-smi-lib install failed — AMD GPU monitoring unavailable"
fi
# Clean up apt lists to keep ISO size down
rm -f "${ROCM_LIST}"
apt-get clean

View File

@@ -1,66 +0,0 @@
#!/bin/sh
# 9002-nvidia-dcgm.hook.chroot — install NVIDIA DCGM inside the live-build chroot.
# DCGM (Data Center GPU Manager) provides dcgmi diag for acceptance testing.
# Adds NVIDIA's CUDA apt repository (debian12/x86_64) and installs datacenter-gpu-manager.
set -e
NVIDIA_KEYRING="/usr/share/keyrings/nvidia-cuda.gpg"
NVIDIA_LIST="/etc/apt/sources.list.d/nvidia-cuda.list"
NVIDIA_KEY_URL="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/3bf863cc.pub"
NVIDIA_REPO="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/"
APT_UPDATED=0
mkdir -p /usr/share/keyrings /etc/apt/sources.list.d
ensure_tool() {
tool="$1"
pkg="$2"
if command -v "${tool}" >/dev/null 2>&1; then
return 0
fi
if [ "${APT_UPDATED}" -eq 0 ]; then
apt-get update -qq
APT_UPDATED=1
fi
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends "${pkg}"
}
ensure_cert_bundle() {
if [ -s /etc/ssl/certs/ca-certificates.crt ]; then
return 0
fi
if [ "${APT_UPDATED}" -eq 0 ]; then
apt-get update -qq
APT_UPDATED=1
fi
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates
}
if ! ensure_cert_bundle || ! ensure_tool wget wget || ! ensure_tool gpg gpg; then
echo "WARN: prerequisites missing — skipping DCGM install"
exit 0
fi
# Download and import NVIDIA GPG key
if ! wget -qO- "${NVIDIA_KEY_URL}" | gpg --dearmor --yes --output "${NVIDIA_KEYRING}"; then
echo "WARN: failed to fetch NVIDIA GPG key — skipping DCGM install"
exit 0
fi
cat > "${NVIDIA_LIST}" <<EOF
deb [signed-by=${NVIDIA_KEYRING}] ${NVIDIA_REPO} /
EOF
apt-get update -qq
if DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends datacenter-gpu-manager; then
echo "=== DCGM: datacenter-gpu-manager installed ==="
dcgmi --version 2>/dev/null || true
else
echo "WARN: datacenter-gpu-manager install failed — DCGM unavailable"
fi
# Clean up apt lists to keep ISO size down
rm -f "${NVIDIA_LIST}"
apt-get clean

View File

@@ -70,5 +70,11 @@ firmware-bnx2x
firmware-cavium
firmware-qlogic
# NVIDIA DCGM (Data Center GPU Manager) — dcgmi diag for acceptance testing
datacenter-gpu-manager=1:%%DCGM_VERSION%%
# AMD ROCm SMI — GPU monitoring for Instinct cards (repo: rocm/apt/6.3.4 jammy)
rocm-smi-lib=%%ROCM_SMI_VERSION%%
# glibc compat helpers (for any external binaries that need it)
libc6