fix(amd-stress): include VRAM load in GST burn
This commit is contained in:
@@ -146,28 +146,36 @@ func (s *System) RunAMDStressPack(ctx context.Context, baseDir string, durationS
|
||||
if err := ensureAMDRuntimeReady(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
// Write RVS GST config to a temp file
|
||||
rvsCfg := fmt.Sprintf(`actions:
|
||||
// Enable copy_matrix so the same GST run drives VRAM traffic in addition to compute.
|
||||
rvsCfg := amdStressRVSConfig(seconds)
|
||||
cfgFile := "/tmp/bee-amd-gst.conf"
|
||||
_ = os.WriteFile(cfgFile, []byte(rvsCfg), 0644)
|
||||
|
||||
return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-stress", amdStressJobs(seconds, cfgFile), logFunc)
|
||||
}
|
||||
|
||||
func amdStressRVSConfig(seconds int) string {
|
||||
return fmt.Sprintf(`actions:
|
||||
- name: gst_stress
|
||||
device: all
|
||||
module: gst
|
||||
parallel: true
|
||||
duration: %d
|
||||
copy_matrix: false
|
||||
copy_matrix: true
|
||||
target_stress: 90
|
||||
matrix_size_a: 8640
|
||||
matrix_size_b: 8640
|
||||
matrix_size_c: 8640
|
||||
`, seconds*1000)
|
||||
cfgFile := "/tmp/bee-amd-gst.conf"
|
||||
_ = os.WriteFile(cfgFile, []byte(rvsCfg), 0644)
|
||||
}
|
||||
|
||||
return runAcceptancePackCtx(ctx, baseDir, "gpu-amd-stress", []satJob{
|
||||
func amdStressJobs(seconds int, cfgFile string) []satJob {
|
||||
return []satJob{
|
||||
{name: "01-rocm-smi.log", cmd: []string{"rocm-smi"}},
|
||||
{name: "02-rocm-bandwidth-test.log", cmd: []string{"rocm-bandwidth-test"}},
|
||||
{name: fmt.Sprintf("03-rvs-gst-%ds.log", seconds), cmd: []string{"rvs", "-c", cfgFile}},
|
||||
{name: fmt.Sprintf("04-rocm-smi-after.log"), cmd: []string{"rocm-smi", "--showtemp", "--showpower", "--csv"}},
|
||||
}, logFunc)
|
||||
}
|
||||
}
|
||||
|
||||
// ListNvidiaGPUs returns GPUs visible to nvidia-smi.
|
||||
|
||||
Reference in New Issue
Block a user