feat(iso): add rocm-bandwidth-test for AMD GPU burn-in
- Add rocm-bandwidth-test package to ISO - Add bee user to 'render' group (/dev/kfd, /dev/dri/renderD* access) - Add rocm-bandwidth-test symlink alongside rocm-smi Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,5 +11,6 @@ CUDA_USERSPACE_VERSION=13.0.96-1
|
|||||||
DCGM_VERSION=3.3.9
|
DCGM_VERSION=3.3.9
|
||||||
ROCM_VERSION=6.3.4
|
ROCM_VERSION=6.3.4
|
||||||
ROCM_SMI_VERSION=7.4.0.60304-76~22.04
|
ROCM_SMI_VERSION=7.4.0.60304-76~22.04
|
||||||
|
ROCM_BANDWIDTH_TEST_VERSION=1.4.0.60304-76~22.04
|
||||||
GO_VERSION=1.24.0
|
GO_VERSION=1.24.0
|
||||||
AUDIT_VERSION=1.0.0
|
AUDIT_VERSION=1.0.0
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ ensure_bee_console_user() {
|
|||||||
mkdir -p /home/bee
|
mkdir -p /home/bee
|
||||||
chown -R bee:bee /home/bee
|
chown -R bee:bee /home/bee
|
||||||
echo "bee:eeb" | chpasswd
|
echo "bee:eeb" | chpasswd
|
||||||
usermod -aG sudo,video,input bee 2>/dev/null || true
|
usermod -aG sudo,video,input,render bee 2>/dev/null || true
|
||||||
}
|
}
|
||||||
|
|
||||||
ensure_bee_console_user
|
ensure_bee_console_user
|
||||||
@@ -46,11 +46,13 @@ chmod +x /usr/local/bin/bee-log-run 2>/dev/null || true
|
|||||||
# Reload udev rules
|
# Reload udev rules
|
||||||
udevadm control --reload-rules 2>/dev/null || true
|
udevadm control --reload-rules 2>/dev/null || true
|
||||||
|
|
||||||
# rocm-smi symlink (package installs to /opt/rocm-*/bin/rocm-smi)
|
# rocm symlinks (packages install to /opt/rocm-*/bin/)
|
||||||
if [ ! -e /usr/local/bin/rocm-smi ]; then
|
for tool in rocm-smi rocm-bandwidth-test; do
|
||||||
smi_path="$(find /opt -path '*/bin/rocm-smi' -type f 2>/dev/null | sort | tail -1)"
|
if [ ! -e /usr/local/bin/${tool} ]; then
|
||||||
[ -n "${smi_path}" ] && ln -sf "${smi_path}" /usr/local/bin/rocm-smi
|
bin_path="$(find /opt -path "*/bin/${tool}" -type f 2>/dev/null | sort | tail -1)"
|
||||||
fi
|
[ -n "${bin_path}" ] && ln -sf "${bin_path}" /usr/local/bin/${tool}
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
# Create export directory
|
# Create export directory
|
||||||
mkdir -p /appdata/bee/export
|
mkdir -p /appdata/bee/export
|
||||||
|
|||||||
@@ -77,6 +77,8 @@ datacenter-gpu-manager=1:%%DCGM_VERSION%%
|
|||||||
|
|
||||||
# AMD ROCm SMI — GPU monitoring for Instinct cards (repo: rocm/apt/6.3.4 jammy)
|
# AMD ROCm SMI — GPU monitoring for Instinct cards (repo: rocm/apt/6.3.4 jammy)
|
||||||
rocm-smi-lib=%%ROCM_SMI_VERSION%%
|
rocm-smi-lib=%%ROCM_SMI_VERSION%%
|
||||||
|
# AMD ROCm bandwidth test — used for GPU burn-in stress
|
||||||
|
rocm-bandwidth-test=%%ROCM_BANDWIDTH_TEST_VERSION%%
|
||||||
|
|
||||||
# glibc compat helpers (for any external binaries that need it)
|
# glibc compat helpers (for any external binaries that need it)
|
||||||
libc6
|
libc6
|
||||||
|
|||||||
Reference in New Issue
Block a user