From c5d6b30177d03491671b8789dc26ef82b5681cc7 Mon Sep 17 00:00:00 2001 From: Michael Chus Date: Sun, 5 Apr 2026 20:19:20 +0300 Subject: [PATCH] Fix platform thermal cycling leaving GPU load running after test ends bee-gpu-burn is a shell script that spawns bee-gpu-burn-worker children. exec.CommandContext default cancel only kills the shell parent; the worker processes survive and keep loading the GPU indefinitely. Fix: set Setpgid=true and a custom Cancel that sends SIGKILL to the entire process group (-pid), same pattern already used in runSATCommandCtx. Applied to Nvidia, AMD, and CPU stress commands for consistency. Co-Authored-By: Claude Sonnet 4.6 --- audit/internal/platform/platform_stress.go | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/audit/internal/platform/platform_stress.go b/audit/internal/platform/platform_stress.go index d7e909f..239cfad 100644 --- a/audit/internal/platform/platform_stress.go +++ b/audit/internal/platform/platform_stress.go @@ -392,6 +392,13 @@ func buildCPUStressCmd(ctx context.Context) (*exec.Cmd, error) { cmdArgs = append(cmdArgs, "-M", strconv.Itoa(mb)) } cmd := exec.CommandContext(ctx, path, cmdArgs...) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + cmd.Cancel = func() error { + if cmd.Process != nil { + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + } + return nil + } cmd.Stdout = nil cmd.Stderr = nil if err := startLowPriorityCmd(cmd, 15); err != nil { @@ -433,6 +440,13 @@ func buildAMDGPUStressCmd(ctx context.Context) *exec.Cmd { cfgFile := "/tmp/bee-platform-gst.conf" _ = os.WriteFile(cfgFile, []byte(cfg), 0644) cmd := exec.CommandContext(ctx, rvsPath, "-c", cfgFile) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + cmd.Cancel = func() error { + if cmd.Process != nil { + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + } + return nil + } cmd.Stdout = nil cmd.Stderr = nil _ = startLowPriorityCmd(cmd, 10) @@ -448,6 +462,16 @@ func buildNvidiaGPUStressCmd(ctx context.Context) *exec.Cmd { return nil } cmd := exec.CommandContext(ctx, path, "--seconds", "86400") + // bee-gpu-burn is a shell script that spawns bee-gpu-burn-worker children. + // Put the whole tree in its own process group so context cancellation kills + // all workers, not just the shell parent. + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + cmd.Cancel = func() error { + if cmd.Process != nil { + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + } + return nil + } cmd.Stdout = nil cmd.Stderr = nil _ = startLowPriorityCmd(cmd, 10)