- Add 9002-nvidia-dcgm.hook.chroot: installs datacenter-gpu-manager from NVIDIA apt repo during live-build - Enable nvidia-dcgm.service in chroot setup hook - Replace bee-gpu-stress with dcgmi diag (levels 1-4) in NVIDIA SAT - TUI: replace GPU checkbox + duration UI with DCGM level selection - Remove console=tty2 from boot params: KVM/VGA now shows tty1 where bee-tui runs, fixing unresponsive console Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
67 lines
2.0 KiB
Bash
Executable File
67 lines
2.0 KiB
Bash
Executable File
#!/bin/sh
|
|
# 9002-nvidia-dcgm.hook.chroot — install NVIDIA DCGM inside the live-build chroot.
|
|
# DCGM (Data Center GPU Manager) provides dcgmi diag for acceptance testing.
|
|
# Adds NVIDIA's CUDA apt repository (debian12/x86_64) and installs datacenter-gpu-manager.
|
|
|
|
set -e
|
|
|
|
NVIDIA_KEYRING="/usr/share/keyrings/nvidia-cuda.gpg"
|
|
NVIDIA_LIST="/etc/apt/sources.list.d/nvidia-cuda.list"
|
|
NVIDIA_KEY_URL="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/3bf863cc.pub"
|
|
NVIDIA_REPO="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/"
|
|
APT_UPDATED=0
|
|
|
|
mkdir -p /usr/share/keyrings /etc/apt/sources.list.d
|
|
|
|
ensure_tool() {
|
|
tool="$1"
|
|
pkg="$2"
|
|
if command -v "${tool}" >/dev/null 2>&1; then
|
|
return 0
|
|
fi
|
|
if [ "${APT_UPDATED}" -eq 0 ]; then
|
|
apt-get update -qq
|
|
APT_UPDATED=1
|
|
fi
|
|
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends "${pkg}"
|
|
}
|
|
|
|
ensure_cert_bundle() {
|
|
if [ -s /etc/ssl/certs/ca-certificates.crt ]; then
|
|
return 0
|
|
fi
|
|
if [ "${APT_UPDATED}" -eq 0 ]; then
|
|
apt-get update -qq
|
|
APT_UPDATED=1
|
|
fi
|
|
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates
|
|
}
|
|
|
|
if ! ensure_cert_bundle || ! ensure_tool wget wget || ! ensure_tool gpg gpg; then
|
|
echo "WARN: prerequisites missing — skipping DCGM install"
|
|
exit 0
|
|
fi
|
|
|
|
# Download and import NVIDIA GPG key
|
|
if ! wget -qO- "${NVIDIA_KEY_URL}" | gpg --dearmor --yes --output "${NVIDIA_KEYRING}"; then
|
|
echo "WARN: failed to fetch NVIDIA GPG key — skipping DCGM install"
|
|
exit 0
|
|
fi
|
|
|
|
cat > "${NVIDIA_LIST}" <<EOF
|
|
deb [signed-by=${NVIDIA_KEYRING}] ${NVIDIA_REPO} /
|
|
EOF
|
|
|
|
apt-get update -qq
|
|
|
|
if DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends datacenter-gpu-manager; then
|
|
echo "=== DCGM: datacenter-gpu-manager installed ==="
|
|
dcgmi --version 2>/dev/null || true
|
|
else
|
|
echo "WARN: datacenter-gpu-manager install failed — DCGM unavailable"
|
|
fi
|
|
|
|
# Clean up apt lists to keep ISO size down
|
|
rm -f "${NVIDIA_LIST}"
|
|
apt-get clean
|