Reset GPUs before power benchmark

This commit is contained in:
Mikhail Chusavitin
2026-04-20 09:42:19 +03:00
parent 5dc711de23
commit 1cfabc9230
2 changed files with 121 additions and 8 deletions

View File

@@ -1,8 +1,13 @@
package platform
import (
"context"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"time"
)
func TestResolveBenchmarkProfile(t *testing.T) {
@@ -182,6 +187,75 @@ func TestBenchmarkCalibrationThrottleReasonIgnoresPowerReasons(t *testing.T) {
}
}
func TestResetBenchmarkGPUsSkipsWithoutRoot(t *testing.T) {
t.Parallel()
oldGeteuid := benchmarkGeteuid
oldExec := satExecCommand
benchmarkGeteuid = func() int { return 1000 }
satExecCommand = func(name string, args ...string) *exec.Cmd {
t.Fatalf("unexpected command: %s %v", name, args)
return nil
}
t.Cleanup(func() {
benchmarkGeteuid = oldGeteuid
satExecCommand = oldExec
})
var logs []string
failed := resetBenchmarkGPUs(context.Background(), filepath.Join(t.TempDir(), "verbose.log"), []int{0, 2}, func(line string) {
logs = append(logs, line)
})
if got, want := strings.Join(logs, "\n"), "power benchmark pre-flight: root privileges unavailable, GPU reset skipped"; !strings.Contains(got, want) {
t.Fatalf("logs=%q want substring %q", got, want)
}
if len(failed) != 2 || failed[0] != 0 || failed[1] != 2 {
t.Fatalf("failed=%v want [0 2]", failed)
}
}
func TestResetBenchmarkGPUsResetsEachGPU(t *testing.T) {
t.Parallel()
dir := t.TempDir()
script := filepath.Join(dir, "nvidia-smi")
argsLog := filepath.Join(dir, "args.log")
if err := os.WriteFile(script, []byte("#!/bin/sh\nprintf '%s\\n' \"$*\" >> "+argsLog+"\nprintf 'ok\\n'\n"), 0755); err != nil {
t.Fatalf("write script: %v", err)
}
oldGeteuid := benchmarkGeteuid
oldSleep := benchmarkSleep
oldLookPath := satLookPath
benchmarkGeteuid = func() int { return 0 }
benchmarkSleep = func(time.Duration) {}
satLookPath = func(file string) (string, error) {
if file == "nvidia-smi" {
return script, nil
}
return exec.LookPath(file)
}
t.Cleanup(func() {
benchmarkGeteuid = oldGeteuid
benchmarkSleep = oldSleep
satLookPath = oldLookPath
})
failed := resetBenchmarkGPUs(context.Background(), filepath.Join(dir, "verbose.log"), []int{2, 5}, nil)
if len(failed) != 0 {
t.Fatalf("failed=%v want no failures", failed)
}
raw, err := os.ReadFile(argsLog)
if err != nil {
t.Fatalf("read args log: %v", err)
}
got := strings.Fields(string(raw))
want := []string{"-i", "2", "-r", "-i", "5", "-r"}
if strings.Join(got, " ") != strings.Join(want, " ") {
t.Fatalf("args=%v want %v", got, want)
}
}
func TestNormalizeNvidiaBenchmarkOptionsPreservesRunNCCLChoice(t *testing.T) {
t.Parallel()