feat: AMD GPU compute stress via rocm-validation-suite GST (GEMM)
- Add rocm-validation-suite, rocblas, rocrand, hip-runtime-amd, hipblaslt, comgr to ISO (~700MB, needed for HIP compute) - RunAMDStressPack: run RVS GST (SGEMM ~31 TFLOPS/GPU) + bandwidth test - Add rvs symlink in chroot setup hook - Pin all new package versions in VERSIONS Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -47,7 +47,7 @@ chmod +x /usr/local/bin/bee-log-run 2>/dev/null || true
|
||||
udevadm control --reload-rules 2>/dev/null || true
|
||||
|
||||
# rocm symlinks (packages install to /opt/rocm-*/bin/)
|
||||
for tool in rocm-smi rocm-bandwidth-test; do
|
||||
for tool in rocm-smi rocm-bandwidth-test rvs; do
|
||||
if [ ! -e /usr/local/bin/${tool} ]; then
|
||||
bin_path="$(find /opt -path "*/bin/${tool}" -type f 2>/dev/null | sort | tail -1)"
|
||||
[ -n "${bin_path}" ] && ln -sf "${bin_path}" /usr/local/bin/${tool}
|
||||
|
||||
@@ -75,10 +75,15 @@ firmware-qlogic
|
||||
# NVIDIA DCGM (Data Center GPU Manager) — dcgmi diag for acceptance testing
|
||||
datacenter-gpu-manager=1:%%DCGM_VERSION%%
|
||||
|
||||
# AMD ROCm SMI — GPU monitoring for Instinct cards (repo: rocm/apt/6.3.4 jammy)
|
||||
# AMD ROCm — GPU monitoring, bandwidth test, and compute stress (RVS GST)
|
||||
rocm-smi-lib=%%ROCM_SMI_VERSION%%
|
||||
# AMD ROCm bandwidth test — used for GPU burn-in stress
|
||||
rocm-bandwidth-test=%%ROCM_BANDWIDTH_TEST_VERSION%%
|
||||
rocm-validation-suite=%%ROCM_VALIDATION_SUITE_VERSION%%
|
||||
rocblas=%%ROCBLAS_VERSION%%
|
||||
rocrand=%%ROCRAND_VERSION%%
|
||||
hip-runtime-amd=%%HIP_RUNTIME_AMD_VERSION%%
|
||||
hipblaslt=%%HIPBLASLT_VERSION%%
|
||||
comgr=%%COMGR_VERSION%%
|
||||
|
||||
# glibc compat helpers (for any external binaries that need it)
|
||||
libc6
|
||||
|
||||
Reference in New Issue
Block a user