Split bee-bench into perf and power workflows
This commit is contained in:
@@ -32,7 +32,8 @@ const (
|
||||
var taskNames = map[string]string{
|
||||
"nvidia": "NVIDIA SAT",
|
||||
"nvidia-targeted-stress": "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)",
|
||||
"nvidia-benchmark": "NVIDIA Benchmark",
|
||||
"nvidia-bench-perf": "NVIDIA Bee Bench Perf",
|
||||
"nvidia-bench-power": "NVIDIA Bee Bench Power",
|
||||
"nvidia-compute": "NVIDIA Max Compute Load (dcgmproftester)",
|
||||
"nvidia-targeted-power": "NVIDIA Targeted Power (dcgmi diag targeted_power)",
|
||||
"nvidia-pulse": "NVIDIA Pulse Test (dcgmi diag pulse_test)",
|
||||
@@ -628,7 +629,7 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
||||
dur = 300
|
||||
}
|
||||
archive, err = a.RunNvidiaTargetedStressValidatePack(ctx, "", dur, t.params.GPUIndices, j.append)
|
||||
case "nvidia-benchmark":
|
||||
case "nvidia-bench-perf":
|
||||
if a == nil {
|
||||
err = fmt.Errorf("app not configured")
|
||||
break
|
||||
@@ -644,6 +645,31 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
||||
RampTotal: t.params.RampTotal,
|
||||
RampRunID: t.params.RampRunID,
|
||||
}, j.append)
|
||||
case "nvidia-bench-power":
|
||||
if a == nil {
|
||||
err = fmt.Errorf("app not configured")
|
||||
break
|
||||
}
|
||||
dur := t.params.Duration
|
||||
if dur <= 0 {
|
||||
switch strings.TrimSpace(strings.ToLower(t.params.BenchmarkProfile)) {
|
||||
case platform.NvidiaBenchmarkProfileStability:
|
||||
dur = 300
|
||||
case platform.NvidiaBenchmarkProfileOvernight:
|
||||
dur = 600
|
||||
default:
|
||||
dur = 120
|
||||
}
|
||||
}
|
||||
rampPlan, planErr := resolveNvidiaRampPlan(t.params.BenchmarkProfile, t.params.RampTotal > 0, t.params.GPUIndices)
|
||||
if planErr != nil {
|
||||
err = planErr
|
||||
break
|
||||
}
|
||||
if t.params.RampTotal > 0 && t.params.RampStep > 0 && dur <= 0 {
|
||||
dur = rampPlan.DurationSec
|
||||
}
|
||||
archive, err = a.RunNvidiaTargetedPowerPack(ctx, app.DefaultBeeBenchPowerDir, dur, t.params.GPUIndices, j.append)
|
||||
case "nvidia-compute":
|
||||
if a == nil {
|
||||
err = fmt.Errorf("app not configured")
|
||||
|
||||
Reference in New Issue
Block a user