Add NVIDIA stress loader selection and DCGM 4 support

This commit is contained in:
Mikhail Chusavitin
2026-03-31 11:15:15 +03:00
parent 20f834aa96
commit 6dee8f3509
31 changed files with 789 additions and 111 deletions

View File

@@ -356,6 +356,7 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
fs := flag.NewFlagSet("sat", flag.ContinueOnError)
fs.SetOutput(stderr)
duration := fs.Int("duration", 0, "stress-ng duration in seconds (cpu only; default: 60)")
diagLevel := fs.Int("diag-level", 0, "DCGM diagnostic level for nvidia (1=quick, 2=medium, 3=targeted stress, 4=extended stress; default: 1)")
if err := fs.Parse(args[1:]); err != nil {
if err == flag.ErrHelp {
return 0
@@ -370,7 +371,7 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
target := args[0]
if target != "nvidia" && target != "memory" && target != "storage" && target != "cpu" {
fmt.Fprintf(stderr, "bee sat: unknown target %q\n", target)
fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>]")
fmt.Fprintln(stderr, "usage: bee sat nvidia|memory|storage|cpu [--duration <seconds>] [--diag-level <1-4>]")
return 2
}
@@ -382,7 +383,12 @@ func runSAT(args []string, stdout, stderr io.Writer) int {
logLine := func(s string) { fmt.Fprintln(os.Stderr, s) }
switch target {
case "nvidia":
archive, err = application.RunNvidiaAcceptancePack("", logLine)
level := *diagLevel
if level > 0 {
_, err = application.RunNvidiaAcceptancePackWithOptions(context.Background(), "", level, nil, logLine)
} else {
archive, err = application.RunNvidiaAcceptancePack("", logLine)
}
case "memory":
archive, err = application.RunMemoryAcceptancePackCtx(context.Background(), "", logLine)
case "storage":