Files
bee/iso/builder/config/hooks/normal/9002-nvidia-dcgm.hook.chroot
Michael Chus eea98e6d76 feat(dcgm): add NVIDIA DCGM diagnostics, fix KVM console
- Add 9002-nvidia-dcgm.hook.chroot: installs datacenter-gpu-manager
  from NVIDIA apt repo during live-build
- Enable nvidia-dcgm.service in chroot setup hook
- Replace bee-gpu-stress with dcgmi diag (levels 1-4) in NVIDIA SAT
- TUI: replace GPU checkbox + duration UI with DCGM level selection
- Remove console=tty2 from boot params: KVM/VGA now shows tty1
  where bee-tui runs, fixing unresponsive console

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-26 23:08:12 +03:00

67 lines
2.0 KiB
Bash
Executable File

#!/bin/sh
# 9002-nvidia-dcgm.hook.chroot — install NVIDIA DCGM inside the live-build chroot.
# DCGM (Data Center GPU Manager) provides dcgmi diag for acceptance testing.
# Adds NVIDIA's CUDA apt repository (debian12/x86_64) and installs datacenter-gpu-manager.
set -e
NVIDIA_KEYRING="/usr/share/keyrings/nvidia-cuda.gpg"
NVIDIA_LIST="/etc/apt/sources.list.d/nvidia-cuda.list"
NVIDIA_KEY_URL="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/3bf863cc.pub"
NVIDIA_REPO="https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/"
APT_UPDATED=0
mkdir -p /usr/share/keyrings /etc/apt/sources.list.d
ensure_tool() {
tool="$1"
pkg="$2"
if command -v "${tool}" >/dev/null 2>&1; then
return 0
fi
if [ "${APT_UPDATED}" -eq 0 ]; then
apt-get update -qq
APT_UPDATED=1
fi
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends "${pkg}"
}
ensure_cert_bundle() {
if [ -s /etc/ssl/certs/ca-certificates.crt ]; then
return 0
fi
if [ "${APT_UPDATED}" -eq 0 ]; then
apt-get update -qq
APT_UPDATED=1
fi
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ca-certificates
}
if ! ensure_cert_bundle || ! ensure_tool wget wget || ! ensure_tool gpg gpg; then
echo "WARN: prerequisites missing — skipping DCGM install"
exit 0
fi
# Download and import NVIDIA GPG key
if ! wget -qO- "${NVIDIA_KEY_URL}" | gpg --dearmor --yes --output "${NVIDIA_KEYRING}"; then
echo "WARN: failed to fetch NVIDIA GPG key — skipping DCGM install"
exit 0
fi
cat > "${NVIDIA_LIST}" <<EOF
deb [signed-by=${NVIDIA_KEYRING}] ${NVIDIA_REPO} /
EOF
apt-get update -qq
if DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends datacenter-gpu-manager; then
echo "=== DCGM: datacenter-gpu-manager installed ==="
dcgmi --version 2>/dev/null || true
else
echo "WARN: datacenter-gpu-manager install failed — DCGM unavailable"
fi
# Clean up apt lists to keep ISO size down
rm -f "${NVIDIA_LIST}"
apt-get clean