feat: AMD GPU compute stress via rocm-validation-suite GST (GEMM)

- Add rocm-validation-suite, rocblas, rocrand, hip-runtime-amd,
  hipblaslt, comgr to ISO (~700MB, needed for HIP compute)
- RunAMDStressPack: run RVS GST (SGEMM ~31 TFLOPS/GPU) + bandwidth test
- Add rvs symlink in chroot setup hook
- Pin all new package versions in VERSIONS

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-29 10:56:32 +03:00
parent e69e9109da
commit a03312c286
4 changed files with 32 additions and 7 deletions

View File

@@ -142,13 +142,27 @@ func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationS
if err := ensureAMDRuntimeReady(); err != nil {
return "", err
}
// Write RVS GST config to a temp file
rvsCfg := fmt.Sprintf(`actions:
- name: gst_stress
device: all
module: gst
parallel: true
duration: %d
copy_matrix: false
target_stress: 90
matrix_size_a: 8640
matrix_size_b: 8640
matrix_size_c: 8640
`, seconds*1000)
cfgFile := "/tmp/bee-amd-gst.conf"
_ = os.WriteFile(cfgFile, []byte(rvsCfg), 0644)
return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-stress", []satJob{
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
{name: fmt.Sprintf("03-rocm-smi-monitor-%ds.log", seconds), cmd: []string{
"bash", "-lc",
fmt.Sprintf("end=$((SECONDS+%d)); while [ \"$SECONDS\" -lt \"$end\" ]; do rocm-smi --showtemp --showpower --csv; sleep 1; done", seconds),
}},
{name: fmt.Sprintf("03-rvs-gst-%ds.log", seconds), cmd: []string{"rvs", "-c", cfgFile}},
{name: fmt.Sprintf("04-rocm-smi-after.log"), cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--csv"}},
}, logFunc)
}