Files
bee/audit/internal/platform/kill_workers.go

91 lines
2.3 KiB
Go

package platform
import (
"context"
"fmt"
"log/slog"
"os"
"strconv"
"strings"
"syscall"
"time"
)
// workerPatterns are substrings matched against /proc/<pid>/cmdline to identify
// bee test worker processes that should be killed by KillTestWorkers.
var workerPatterns = []string{
"bee-gpu-burn",
"stress-ng",
"stressapptest",
"memtester",
"nvbandwidth",
// DCGM diagnostic workers — nvvs is spawned by dcgmi diag and survives
// if dcgmi is killed mid-run, leaving the GPU occupied (DCGM_ST_IN_USE).
"nvvs",
"dcgmi",
}
// KilledProcess describes a process that was sent SIGKILL.
type KilledProcess struct {
PID int `json:"pid"`
Name string `json:"name"`
}
// KillTestWorkers scans /proc for running test worker processes and sends
// SIGKILL to each one found. It returns a list of killed processes.
// Errors for individual processes (e.g. already exited) are silently ignored.
// The scan runs under a 5-second deadline to avoid blocking if the process
// table is very large (e.g. after a stress test with thousands of children).
func KillTestWorkers() []KilledProcess {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
entries, err := os.ReadDir("/proc")
if err != nil {
return nil
}
var killed []KilledProcess
for _, e := range entries {
select {
case <-ctx.Done():
slog.Warn("KillTestWorkers scan timed out", "killed_so_far", len(killed))
return killed
default:
}
if !e.IsDir() {
continue
}
pid, err := strconv.Atoi(e.Name())
if err != nil {
continue
}
cmdline, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid))
if err != nil {
continue
}
// /proc/*/cmdline uses NUL bytes as argument separators.
args := strings.SplitN(strings.ReplaceAll(string(cmdline), "\x00", " "), " ", 2)
exe := strings.TrimSpace(args[0])
base := exe
if idx := strings.LastIndexByte(exe, '/'); idx >= 0 {
base = exe[idx+1:]
}
if shouldKillWorkerProcess(exe, base) {
_ = syscall.Kill(pid, syscall.SIGKILL)
killed = append(killed, KilledProcess{PID: pid, Name: base})
}
}
return killed
}
func shouldKillWorkerProcess(exe, base string) bool {
for _, pat := range workerPatterns {
if strings.Contains(base, pat) || strings.Contains(exe, pat) {
return true
}
}
return false
}