Add NVIDIA stress loader selection and DCGM 4 support

This commit is contained in:
Mikhail Chusavitin
2026-03-31 11:15:15 +03:00
parent 20f834aa96
commit 6dee8f3509
31 changed files with 789 additions and 111 deletions

View File

@@ -425,14 +425,12 @@ type satStats struct {
}
func nvidiaSATJobs() []satJob {
seconds := envInt("BEE_GPU_STRESS_SECONDS", 5)
sizeMB := envInt("BEE_GPU_STRESS_SIZE_MB", 64)
return []satJob{
{name: "01-nvidia-smi-q.log", cmd: []string{"nvidia-smi", "-q"}},
{name: "02-dmidecode-baseboard.log", cmd: []string{"dmidecode", "-t", "baseboard"}},
{name: "03-dmidecode-system.log", cmd: []string{"dmidecode", "-t", "system"}},
{name: "04-nvidia-bug-report.log", cmd: []string{"nvidia-bug-report.sh", "--output-file", "{{run_dir}}/nvidia-bug-report.log"}},
{name: "05-bee-gpu-stress.log", cmd: []string{"bee-gpu-stress", "--seconds", fmt.Sprintf("%d", seconds), "--size-mb", fmt.Sprintf("%d", sizeMB)}},
{name: "05-bee-gpu-burn.log", cmd: []string{"bee-gpu-burn", "--seconds", "5", "--size-mb", "64"}},
}
}