Fix platform thermal cycling leaving GPU load running after test ends

bee-gpu-burn is a shell script that spawns bee-gpu-burn-worker children.
exec.CommandContext default cancel only kills the shell parent; the worker
processes survive and keep loading the GPU indefinitely.

Fix: set Setpgid=true and a custom Cancel that sends SIGKILL to the
entire process group (-pid), same pattern already used in runSATCommandCtx.
Applied to Nvidia, AMD, and CPU stress commands for consistency.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-05 20:19:20 +03:00
parent 5b9015451e
commit c5d6b30177

View File

@@ -392,6 +392,13 @@ func buildCPUStressCmd(ctx context.Context) (*exec.Cmd, error) {
cmdArgs = append(cmdArgs, "-M", strconv.Itoa(mb))
}
cmd := exec.CommandContext(ctx, path, cmdArgs...)
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
cmd.Cancel = func() error {
if cmd.Process != nil {
_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
}
return nil
}
cmd.Stdout = nil
cmd.Stderr = nil
if err := startLowPriorityCmd(cmd, 15); err != nil {
@@ -433,6 +440,13 @@ func buildAMDGPUStressCmd(ctx context.Context) *exec.Cmd {
cfgFile := "/tmp/bee-platform-gst.conf"
_ = os.WriteFile(cfgFile, []byte(cfg), 0644)
cmd := exec.CommandContext(ctx, rvsPath, "-c", cfgFile)
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
cmd.Cancel = func() error {
if cmd.Process != nil {
_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
}
return nil
}
cmd.Stdout = nil
cmd.Stderr = nil
_ = startLowPriorityCmd(cmd, 10)
@@ -448,6 +462,16 @@ func buildNvidiaGPUStressCmd(ctx context.Context) *exec.Cmd {
return nil
}
cmd := exec.CommandContext(ctx, path, "--seconds", "86400")
// bee-gpu-burn is a shell script that spawns bee-gpu-burn-worker children.
// Put the whole tree in its own process group so context cancellation kills
// all workers, not just the shell parent.
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
cmd.Cancel = func() error {
if cmd.Process != nil {
_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
}
return nil
}
cmd.Stdout = nil
cmd.Stderr = nil
_ = startLowPriorityCmd(cmd, 10)