audit: switch power benchmark load to dcgmproftester

2026-04-20 06:57:14 +03:00
parent 65bcc9ce81
commit 17118298bd
3 changed files with 962 additions and 161 deletions
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
@@ -240,6 +240,47 @@ func setBenchmarkPowerLimit(ctx context.Context, verboseLog string, gpuIndex, po
 	return nil
 }

+func benchmarkPowerEngine() string {
+	switch strings.TrimSpace(strings.ToLower(os.Getenv("BEE_BENCH_POWER_ENGINE"))) {
+	case BenchmarkPowerEngineTargetedPower:
+		return BenchmarkPowerEngineTargetedPower
+	default:
+		return BenchmarkPowerEngineDCGMProfTester
+	}
+}
+
+func benchmarkPowerEngineLabel(engine string) string {
+	switch strings.TrimSpace(strings.ToLower(engine)) {
+	case BenchmarkPowerEngineTargetedPower:
+		return "dcgmi diag targeted_power"
+	default:
+		return "dcgmproftester"
+	}
+}
+
+func resolveBenchmarkPowerLoadCommand(durationSec int, gpuIndices []int) ([]string, []string, error) {
+	engine := benchmarkPowerEngine()
+	durationSec = normalizeNvidiaBurnDuration(durationSec)
+	switch engine {
+	case BenchmarkPowerEngineTargetedPower:
+		return nvidiaDCGMNamedDiagCommand("targeted_power", durationSec, gpuIndices), nil, nil
+	default:
+		if len(gpuIndices) > 1 {
+			return []string{
+				"bee-dcgmproftester-staggered",
+				"--seconds", strconv.Itoa(durationSec),
+				"--stagger-seconds", "0",
+				"--devices", joinIndexList(gpuIndices),
+			}, nil, nil
+		}
+		cmd, err := resolveDCGMProfTesterCommand("--no-dcgm-validation", "-t", "1004", "-d", strconv.Itoa(durationSec))
+		if err != nil {
+			return nil, nil, err
+		}
+		return cmd, nvidiaVisibleDevicesEnv(gpuIndices), nil
+	}
+}
+
 func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts NvidiaBenchmarkOptions, logFunc func(string)) (string, error) {
 	if ctx == nil {
 		ctx = context.Background()
@@ -384,10 +425,10 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv

 			// Sample server idle power once (first GPU only — server state is global).
 			if !serverIdleOK {
-				if w, ok := sampleIPMIPowerSeries(ctx, maxInt(spec.BaselineSec, 10)); ok {
+				if w, ok := sampleBenchmarkPowerSourceSeries(ctx, opts.ServerPowerSource, maxInt(spec.BaselineSec, 10), benchmarkPowerAutotuneSampleInterval); ok {
 					serverIdleW = w
 					serverIdleOK = true
-					logFunc(fmt.Sprintf("server idle power (IPMI): %.0f W", w))
+					logFunc(fmt.Sprintf("server idle power (%s): %.0f W", opts.ServerPowerSource, w))
 				}
 			}

@@ -430,7 +471,16 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 				"--precision-plan-seconds", benchmarkPlanDurationsCSV(planPhases),
 			}
 			logFunc(fmt.Sprintf("GPU %d: uninterrupted precision plan (%d precision phases x %ds, mixed %ds)", idx, len(supportedPrecisions), basePhaseSec, mixedPhaseSec))
+			serverPowerStopCh := make(chan struct{})
+			serverPowerCh := startSelectedPowerSourceSampler(serverPowerStopCh, opts.ServerPowerSource, benchmarkPowerAutotuneSampleInterval)
 			_, phaseRowsByStage, phaseLogs, planErr := runBenchmarkPlannedCommandWithMetrics(ctx, verboseLog, fmt.Sprintf("gpu-%d-precision-plan.log", idx), planCmd, nil, []int{idx}, planPhases, logFunc)
+			close(serverPowerStopCh)
+			if serverPowerSamples := <-serverPowerCh; len(serverPowerSamples) > 0 {
+				serverLoadedWSum += benchmarkMean(serverPowerSamples)
+				serverLoadedSamples++
+				serverLoadedOK = true
+				logFunc(fmt.Sprintf("GPU %d: server loaded power (%s avg): %.0f W", idx, opts.ServerPowerSource, benchmarkMean(serverPowerSamples)))
+			}
 			for _, phaseSpec := range planPhases {
 				if rows := phaseRowsByStage[phaseSpec.MetricStage]; len(rows) > 0 {
 					appendBenchmarkMetrics(&metricRows, rows, phaseSpec.MetricStage, &metricTimelineSec, float64(phaseSpec.DurationSec))
@@ -461,48 +511,6 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv

 			beforeThrottle, _ := queryThrottleCounters(idx)
 			logFunc(fmt.Sprintf("GPU %d: steady compute (combined, %ds)", idx, mixedPhaseSec))
-
-			// Sample server power via IPMI in parallel with the steady phase.
-			// We collect readings every 5s and average them.
-			ipmiStopCh := make(chan struct{})
-			ipmiResultCh := make(chan float64, 1)
-			go func() {
-				defer close(ipmiResultCh)
-				var samples []float64
-				ticker := time.NewTicker(5 * time.Second)
-				defer ticker.Stop()
-				// First sample after a short warmup delay.
-				select {
-				case <-ipmiStopCh:
-					return
-				case <-time.After(15 * time.Second):
-				}
-				for {
-					if w, err := queryIPMIServerPowerW(); err == nil {
-						samples = append(samples, w)
-					}
-					select {
-					case <-ipmiStopCh:
-						if len(samples) > 0 {
-							var sum float64
-							for _, w := range samples {
-								sum += w
-							}
-							ipmiResultCh <- sum / float64(len(samples))
-						}
-						return
-					case <-ticker.C:
-					}
-				}
-			}()
-
-			close(ipmiStopCh)
-			if loadedW, ok := <-ipmiResultCh; ok {
-				serverLoadedWSum += loadedW
-				serverLoadedSamples++
-				serverLoadedOK = true
-				logFunc(fmt.Sprintf("GPU %d: server loaded power (IPMI): %.0f W", idx, loadedW))
-			}
 			afterThrottle, _ := queryThrottleCounters(idx)
 			if planErr != nil {
 				gpuResult.Notes = append(gpuResult.Notes, "precision plan failed: "+planErr.Error())
@@ -652,7 +660,7 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
 	if serverLoadedSamples > 0 {
 		serverLoadedW = serverLoadedWSum / float64(serverLoadedSamples)
 	}
-	result.ServerPower = characterizeServerPower(serverIdleW, serverLoadedW, gpuReportedSumW, serverIdleOK && serverLoadedOK)
+	result.ServerPower = characterizeServerPower(serverIdleW, serverLoadedW, gpuReportedSumW, opts.ServerPowerSource, serverIdleOK && serverLoadedOK)
 	result.Cooling = summarizeBenchmarkCooling(metricRows)

 	// Apply server-power penalty when IPMI reports the server delta is much
@@ -707,6 +715,7 @@ func normalizeNvidiaBenchmarkOptionsForBenchmark(opts NvidiaBenchmarkOptions) Nv
 	if opts.SizeMB < 0 {
 		opts.SizeMB = 0
 	}
+	opts.ServerPowerSource = normalizeBenchmarkPowerSource(opts.ServerPowerSource)
 	opts.GPUIndices = dedupeSortedIndices(opts.GPUIndices)
 	opts.ExcludeGPUIndices = dedupeSortedIndices(opts.ExcludeGPUIndices)
 	return opts
@@ -2535,10 +2544,14 @@ loop:
 }

 // characterizeServerPower computes BenchmarkServerPower from idle and loaded
-// IPMI samples plus the GPU-reported average power during steady state.
-func characterizeServerPower(idleW, loadedW, gpuReportedSumW float64, ipmiAvailable bool) *BenchmarkServerPower {
-	sp := &BenchmarkServerPower{Available: ipmiAvailable}
-	if !ipmiAvailable {
+// samples plus the GPU-reported average power during steady state.
+func characterizeServerPower(idleW, loadedW, gpuReportedSumW float64, source string, available bool) *BenchmarkServerPower {
+	sp := &BenchmarkServerPower{
+		Available:         available,
+		Source:            normalizeBenchmarkPowerSource(source),
+		SampleIntervalSec: benchmarkPowerAutotuneSampleInterval,
+	}
+	if !available {
 		sp.Notes = append(sp.Notes, "IPMI power reading unavailable; server-side power characterization skipped")
 		return sp
 	}
@@ -2671,10 +2684,10 @@ func runNvidiaBenchmarkParallel(

 	// Sample server idle power once.
 	if !*serverIdleOK {
-		if w, ok := sampleIPMIPowerSeries(ctx, maxInt(spec.BaselineSec, 10)); ok {
+		if w, ok := sampleBenchmarkPowerSourceSeries(ctx, opts.ServerPowerSource, maxInt(spec.BaselineSec, 10), benchmarkPowerAutotuneSampleInterval); ok {
 			*serverIdleW = w
 			*serverIdleOK = true
-			logFunc(fmt.Sprintf("server idle power (IPMI): %.0f W", w))
+			logFunc(fmt.Sprintf("server idle power (%s): %.0f W", opts.ServerPowerSource, w))
 		}
 	}

@@ -2728,7 +2741,16 @@ func runNvidiaBenchmarkParallel(
 		"--precision-plan-seconds", benchmarkPlanDurationsCSV(planPhases),
 	}
 	logFunc(fmt.Sprintf("GPUs %s: uninterrupted precision plan (%d precision phases x %ds, mixed %ds)", allDevices, len(supportedPrecisions), basePhaseSec, mixedPhaseSec))
+	serverPowerStopCh := make(chan struct{})
+	serverPowerCh := startSelectedPowerSourceSampler(serverPowerStopCh, opts.ServerPowerSource, benchmarkPowerAutotuneSampleInterval)
 	_, phaseRowsByStage, phaseLogs, planErr := runBenchmarkPlannedCommandWithMetrics(ctx, verboseLog, "gpu-all-precision-plan.log", planCmd, nil, selected, planPhases, logFunc)
+	close(serverPowerStopCh)
+	if serverPowerSamples := <-serverPowerCh; len(serverPowerSamples) > 0 {
+		*serverLoadedWSum += benchmarkMean(serverPowerSamples)
+		(*serverLoadedSamples)++
+		*serverLoadedOK = true
+		logFunc(fmt.Sprintf("GPUs %s: server loaded power (%s avg): %.0f W", allDevices, opts.ServerPowerSource, benchmarkMean(serverPowerSamples)))
+	}
 	for _, phaseSpec := range planPhases {
 		if rows := phaseRowsByStage[phaseSpec.MetricStage]; len(rows) > 0 {
 			appendBenchmarkMetrics(allMetricRows, rows, phaseSpec.MetricStage, metricTimelineSec, float64(phaseSpec.DurationSec))
@@ -2770,46 +2792,6 @@ func runNvidiaBenchmarkParallel(
 	}

 	logFunc(fmt.Sprintf("GPUs %s: parallel steady compute (combined, %ds)", allDevices, mixedPhaseSec))
-
-	// Sample server power via IPMI in parallel with steady phase.
-	ipmiStopCh := make(chan struct{})
-	ipmiResultCh := make(chan float64, 1)
-	go func() {
-		defer close(ipmiResultCh)
-		var samples []float64
-		ticker := time.NewTicker(5 * time.Second)
-		defer ticker.Stop()
-		select {
-		case <-ipmiStopCh:
-			return
-		case <-time.After(15 * time.Second):
-		}
-		for {
-			if w, err := queryIPMIServerPowerW(); err == nil {
-				samples = append(samples, w)
-			}
-			select {
-			case <-ipmiStopCh:
-				if len(samples) > 0 {
-					var sum float64
-					for _, w := range samples {
-						sum += w
-					}
-					ipmiResultCh <- sum / float64(len(samples))
-				}
-				return
-			case <-ticker.C:
-			}
-		}
-	}()
-
-	close(ipmiStopCh)
-	if loadedW, ok := <-ipmiResultCh; ok {
-		*serverLoadedWSum += loadedW
-		(*serverLoadedSamples)++
-		*serverLoadedOK = true
-		logFunc(fmt.Sprintf("GPUs %s: server loaded power (IPMI): %.0f W", allDevices, loadedW))
-	}
 	afterThrottle := make(map[int]BenchmarkThrottleCounters, len(selected))
 	for _, idx := range selected {
 		afterThrottle[idx], _ = queryThrottleCounters(idx)
@@ -3040,8 +3022,8 @@ func summarizeCPULoad(samples []float64) *BenchmarkCPULoad {
 	return cl
 }

-// runBenchmarkPowerCalibration runs targeted_power for the supplied GPU set and
-// actively watches throttle counters. seedLimits, when provided, are treated as
+// runBenchmarkPowerCalibration runs the configured power-fit load for the supplied
+// GPU set and actively watches throttle counters. seedLimits, when provided, are treated as
 // the starting point for this calibration pass rather than as immutable fixed
 // limits. This matters during cumulative ramp-up: once an additional GPU is
 // introduced, every already-active GPU must be revalidated under the new
@@ -3070,10 +3052,19 @@ func runBenchmarkPowerCalibration(
 	// doubling each retry until it would exceed the cap, at which point the
 	// next busy response fails the calibration immediately.
 	const dcgmResourceBusyMaxDelaySec = 300
+	engine := benchmarkPowerEngine()
+	engineLabel := benchmarkPowerEngineLabel(engine)

-	if _, err := exec.LookPath("dcgmi"); err != nil {
-		logFunc("power calibration: dcgmi not found, skipping (will use default power limit)")
-		return map[int]benchmarkPowerCalibrationResult{}, nil, nil
+	if engine == BenchmarkPowerEngineTargetedPower {
+		if _, err := exec.LookPath("dcgmi"); err != nil {
+			logFunc("power calibration: dcgmi not found, skipping (will use default power limit)")
+			return map[int]benchmarkPowerCalibrationResult{}, nil, nil
+		}
+	} else {
+		if _, _, err := resolveBenchmarkPowerLoadCommand(calibDurationSec, gpuIndices); err != nil {
+			logFunc("power calibration: dcgmproftester not found, skipping (will use default power limit)")
+			return map[int]benchmarkPowerCalibrationResult{}, nil, nil
+		}
 	}
 	if killed := KillTestWorkers(); len(killed) > 0 {
 		for _, p := range killed {
@@ -3206,7 +3197,7 @@ calibDone:
 		sharedAttempt++
 		for _, s := range active {
 			s.calib.Attempts++
-			logFunc(fmt.Sprintf("power calibration: GPU %d targeted_power attempt %d at %d W for %ds", s.idx, s.calib.Attempts, s.appliedLimitW, calibDurationSec))
+			logFunc(fmt.Sprintf("power calibration: GPU %d %s attempt %d at %d W for %ds", s.idx, engineLabel, s.calib.Attempts, s.appliedLimitW, calibDurationSec))
 		}

 		// Snapshot throttle counters for all active GPUs before the run.
@@ -3215,14 +3206,22 @@ calibDone:
 			beforeThrottle[s.idx], _ = queryThrottleCounters(s.idx)
 		}

-		// Run targeted_power for ALL gpuIndices simultaneously so every card
+		// Run the selected power-fit load for ALL gpuIndices simultaneously so every card
 		// is under load during calibration — this reflects real server thermals.
 		logName := fmt.Sprintf("power-calibration-attempt-%d.log", sharedAttempt)
-		cmd := nvidiaDCGMNamedDiagCommand("targeted_power", calibDurationSec, gpuIndices)
+		cmd, env, err := resolveBenchmarkPowerLoadCommand(calibDurationSec, gpuIndices)
+		if err != nil {
+			for _, s := range active {
+				s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("failed to resolve %s command: %v", engineLabel, err))
+				s.converged = true
+			}
+			logFunc(fmt.Sprintf("power calibration: failed to resolve %s command: %v", engineLabel, err))
+			break calibDone
+		}
 		attemptCtx, cancelAttempt := context.WithCancel(ctx)
 		doneCh := make(chan sharedAttemptResult, 1)
 		go func() {
-			out, rows, err := runBenchmarkCommandWithMetrics(attemptCtx, verboseLog, logName, cmd, nil, gpuIndices, logFunc)
+			out, rows, err := runBenchmarkCommandWithMetrics(attemptCtx, verboseLog, logName, cmd, env, gpuIndices, logFunc)
 			doneCh <- sharedAttemptResult{out: out, rows: rows, err: err}
 		}()

@@ -3245,8 +3244,8 @@ calibDone:
 					if err != nil {
 						continue
 					}
-					// Record throttle but do NOT cancel — let dcgmi finish so
-					// nv-hostengine releases the slot cleanly before the next attempt.
+					// Record throttle but do NOT cancel — let the load command finish so
+					// runtime resources release cleanly before the next attempt.
 					if reason := benchmarkCalibrationThrottleReason(beforeThrottle[s.idx], after); reason != "" {
 						throttleReasons[s.idx] = reason
 						logFunc(fmt.Sprintf("power calibration: GPU %d detected %s throttle at %d W, waiting for run to finish", s.idx, reason, s.appliedLimitW))
@@ -3359,9 +3358,9 @@ calibDone:
 				logFunc(fmt.Sprintf("power calibration: GPU %d throttled (%s) at %d W, reducing power limit", s.idx, throttle, s.appliedLimitW))
 			case ar.err != nil:
 				s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("targeted_power attempt %d failed at %d W: %v", s.calib.Attempts, s.appliedLimitW, ar.err))
-				logFunc(fmt.Sprintf("power calibration: GPU %d targeted_power failed at %d W: %v", s.idx, s.appliedLimitW, ar.err))
+				logFunc(fmt.Sprintf("power calibration: GPU %d %s failed at %d W: %v", s.idx, engineLabel, s.appliedLimitW, ar.err))
 			default:
-				s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("targeted_power attempt %d at %d W: no valid power telemetry", s.calib.Attempts, s.appliedLimitW))
+				s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("%s attempt %d at %d W: no valid power telemetry", engineLabel, s.calib.Attempts, s.appliedLimitW))
 				logFunc(fmt.Sprintf("power calibration: GPU %d attempt %d at %d W: no valid telemetry", s.idx, s.calib.Attempts, s.appliedLimitW))
 			}

@@ -3384,7 +3383,7 @@ calibDone:
 						s.calib.Completed = true
 					}
 				} else {
-					s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("could not find a stable targeted_power limit within %d W of the default", maxDerateW))
+					s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("could not find a stable %s limit within %d W of the default", engineLabel, maxDerateW))
 				}
 				s.calib.MetricRows = filterRowsByGPU(ar.rows, s.idx)
 				s.converged = true
@@ -3399,7 +3398,7 @@ calibDone:
 				next = (s.lo + s.hi) / 2
 			}
 			if next < s.minLimitW {
-				s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("could not find a stable targeted_power limit within %d W of the default", maxDerateW))
+				s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("could not find a stable %s limit within %d W of the default", engineLabel, maxDerateW))
 				s.converged = true
 				continue
 			}
@@ -4117,13 +4116,13 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 	}
 	durationSec := powerBenchDurationSec(opts.Profile)

-	// Sample IPMI idle power before any GPU load.
+	// Sample server idle power before any GPU load.
 	var serverIdleW float64
 	var serverIdleOK bool
-	if w, ok := sampleIPMIPowerSeries(ctx, 10); ok {
+	if w, ok := sampleBenchmarkPowerSourceSeries(ctx, opts.ServerPowerSource, 10, benchmarkPowerAutotuneSampleInterval); ok {
 		serverIdleW = w
 		serverIdleOK = true
-		logFunc(fmt.Sprintf("server idle power (IPMI): %.0f W", w))
+		logFunc(fmt.Sprintf("server idle power (%s): %.0f W", opts.ServerPowerSource, w))
 	}
 	sdrIdle := sampleIPMISDRPowerSensors()
 	psuBefore := psuStatusSnapshot()
@@ -4141,26 +4140,18 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 		_ = os.MkdirAll(singleDir, 0755)
 		singleInfo := cloneBenchmarkGPUInfoMap(infoByIndex)
 		logFunc(fmt.Sprintf("power calibration: GPU %d single-card baseline", idx))
-		ipmiSingleCtx, ipmiSingleCancel := context.WithCancel(ctx)
-		ipmiSingleDone := make(chan float64, 1)
-		go func() {
-			defer close(ipmiSingleDone)
-			if w, ok := sampleIPMIPowerSeries(ipmiSingleCtx, 3600); ok {
-				ipmiSingleDone <- w
-			}
-		}()
+		singlePowerStopCh := make(chan struct{})
+		singlePowerCh := startSelectedPowerSourceSampler(singlePowerStopCh, opts.ServerPowerSource, benchmarkPowerAutotuneSampleInterval)
 		c, restore, singleRows := runBenchmarkPowerCalibration(ctx, verboseLog, singleDir, []int{idx}, singleInfo, logFunc, nil, durationSec)
 		appendBenchmarkMetrics(&allPowerRows, singleRows, fmt.Sprintf("single-gpu-%d", idx), &powerCursor, 0)
-		ipmiSingleCancel()
+		close(singlePowerStopCh)
 		sdrSingle := sampleIPMISDRPowerSensors()
-		if sdrSingle.PSUInW > 0 {
+		if samples := <-singlePowerCh; len(samples) > 0 {
+			singleIPMILoadedW[idx] = benchmarkMean(samples)
+			logFunc(fmt.Sprintf("power calibration: GPU %d single-card server power (%s avg): %.0f W", idx, opts.ServerPowerSource, singleIPMILoadedW[idx]))
+		} else if opts.ServerPowerSource == BenchmarkPowerSourceSDRPSUInput && sdrSingle.PSUInW > 0 {
 			singleIPMILoadedW[idx] = sdrSingle.PSUInW
-			logFunc(fmt.Sprintf("power calibration: GPU %d single-card IPMI loaded: %.0f W (SDR PSU AC input)", idx, sdrSingle.PSUInW))
-		} else if w, ok := <-ipmiSingleDone; ok {
-			singleIPMILoadedW[idx] = w
-			logFunc(fmt.Sprintf("power calibration: GPU %d single-card IPMI loaded: %.0f W (DCMI)", idx, w))
-		} else {
-			<-ipmiSingleDone // drain channel
+			logFunc(fmt.Sprintf("power calibration: GPU %d single-card fallback server power (SDR snapshot): %.0f W", idx, sdrSingle.PSUInW))
 		}
 		allRestoreActions = append(allRestoreActions, restore...)
 		if r, ok := c[idx]; ok {
@@ -4234,11 +4225,11 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 		result.RecommendedSlotOrder = append(result.RecommendedSlotOrder, gpu.Index)
 	}
 	if len(result.RecommendedSlotOrder) > 0 {
-		result.Findings = append(result.Findings, fmt.Sprintf("Recommended slot order for installation based on single-card targeted_power: %s.", joinIndexList(result.RecommendedSlotOrder)))
+		result.Findings = append(result.Findings, fmt.Sprintf("Recommended slot order for installation based on single-card %s: %s.", benchmarkPowerEngineLabel(benchmarkPowerEngine()), joinIndexList(result.RecommendedSlotOrder)))
 	}
 	for _, gpu := range gpus {
 		if gpu.Derated {
-			result.Findings = append(result.Findings, fmt.Sprintf("GPU %d required reduced power limit %.0f W to complete targeted_power.", gpu.Index, gpu.AppliedPowerLimitW))
+			result.Findings = append(result.Findings, fmt.Sprintf("GPU %d required reduced power limit %.0f W to complete %s.", gpu.Index, gpu.AppliedPowerLimitW, benchmarkPowerEngineLabel(benchmarkPowerEngine())))
 		}
 		if gpu.CoolingWarning != "" {
 			result.Findings = append(result.Findings, fmt.Sprintf(
@@ -4255,7 +4246,7 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 	// Phase 2: cumulative thermal ramp.
 	// Each step introduces one new GPU into an environment where all previously
 	// calibrated GPUs are already running at their fixed stable limits. The new
-	// GPU's stable TDP is searched via binary search (targeted_power) under real
+	// GPU's stable TDP is searched via binary search under real
 	// multi-GPU thermal load. Once found, its limit is fixed permanently for all
 	// subsequent steps. This ensures each GPU's limit reflects actual sustained
 	// power in the final full-system thermal state.
@@ -4294,7 +4285,7 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 		}
 		if !firstCalib.Completed {
 			ramp.Status = "FAILED"
-			ramp.Notes = append(ramp.Notes, fmt.Sprintf("GPU %d did not complete single-card targeted_power", firstIdx))
+			ramp.Notes = append(ramp.Notes, fmt.Sprintf("GPU %d did not complete single-card %s", firstIdx, benchmarkPowerEngineLabel(benchmarkPowerEngine())))
 			result.OverallStatus = "PARTIAL"
 		} else if firstCalib.Derated {
 			ramp.Status = "PARTIAL"
@@ -4340,21 +4331,15 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 			step, len(result.RecommendedSlotOrder), len(subset), newGPUIdx))

 		stepInfo := cloneBenchmarkGPUInfoMap(infoByIndex)
-		ipmiStepCtx, ipmiStepCancel := context.WithCancel(ctx)
-		ipmiStepDone := make(chan float64, 1)
-		go func() {
-			defer close(ipmiStepDone)
-			if w, ok := sampleIPMIPowerSeries(ipmiStepCtx, 3600); ok {
-				ipmiStepDone <- w
-			}
-		}()
+		stepPowerStopCh := make(chan struct{})
+		stepPowerCh := startSelectedPowerSourceSampler(stepPowerStopCh, opts.ServerPowerSource, benchmarkPowerAutotuneSampleInterval)
 		stepCalib, stepRestore, stepRows := runBenchmarkPowerCalibration(ctx, verboseLog, stepDir, subset, stepInfo, logFunc, seedForStep, durationSec)
 		appendBenchmarkMetrics(&allPowerRows, stepRows, fmt.Sprintf("ramp-step-%d", step), &powerCursor, 0)
-		ipmiStepCancel()
+		close(stepPowerStopCh)
 		var stepIPMILoadedW float64
 		var stepIPMIOK bool
-		if w, ok := <-ipmiStepDone; ok {
-			stepIPMILoadedW = w
+		if samples := <-stepPowerCh; len(samples) > 0 {
+			stepIPMILoadedW = benchmarkMean(samples)
 			stepIPMIOK = true
 		}
 		// Accumulate restore actions; they all run in the outer defer.
@@ -4391,7 +4376,7 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 				}
 				ramp.Status = "FAILED"
 				ramp.Notes = append(ramp.Notes,
-					fmt.Sprintf("GPU %d did not complete targeted_power in ramp step %d; keeping previous stable limit %d W", idx, step, fallback))
+					fmt.Sprintf("GPU %d did not complete %s in ramp step %d; keeping previous stable limit %d W", idx, benchmarkPowerEngineLabel(benchmarkPowerEngine()), step, fallback))
 				result.OverallStatus = "PARTIAL"
 				continue
 			}
@@ -4427,24 +4412,24 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 			ramp.PSUSlotReadings = sdrStep.PSUSlots
 		}

-		if sdrStep.PSUInW > 0 {
-			// SDR PSU sum is available — use it for server power (includes all PSUs).
-			ramp.ServerLoadedW = sdrStep.PSUInW
-			ramp.ServerDeltaW = sdrStep.PSUInW - sdrIdle.PSUInW
-			logFunc(fmt.Sprintf("power ramp: step %d IPMI loaded: %.0f W (SDR PSU AC input)", step, sdrStep.PSUInW))
-			if step == len(result.RecommendedSlotOrder) {
-				serverLoadedW = sdrStep.PSUInW
-				serverLoadedOK = true
-				sdrLastStep = sdrStep
-			}
-		} else if stepIPMIOK && serverIdleOK && stepIPMILoadedW > 0 {
+		if stepIPMIOK && serverIdleOK && stepIPMILoadedW > 0 {
 			ramp.ServerLoadedW = stepIPMILoadedW
 			ramp.ServerDeltaW = stepIPMILoadedW - serverIdleW
-			logFunc(fmt.Sprintf("power ramp: step %d IPMI loaded: %.0f W (DCMI)", step, stepIPMILoadedW))
+			logFunc(fmt.Sprintf("power ramp: step %d server loaded power (%s avg): %.0f W", step, opts.ServerPowerSource, stepIPMILoadedW))
 			// The last step has all GPUs loaded — use it as the top-level loaded_w.
 			if step == len(result.RecommendedSlotOrder) {
 				serverLoadedW = stepIPMILoadedW
 				serverLoadedOK = true
+				sdrLastStep = sdrStep
+			}
+		} else if opts.ServerPowerSource == BenchmarkPowerSourceSDRPSUInput && sdrStep.PSUInW > 0 {
+			ramp.ServerLoadedW = sdrStep.PSUInW
+			ramp.ServerDeltaW = sdrStep.PSUInW - sdrIdle.PSUInW
+			logFunc(fmt.Sprintf("power ramp: step %d fallback server loaded power (SDR snapshot): %.0f W", step, sdrStep.PSUInW))
+			if step == len(result.RecommendedSlotOrder) {
+				serverLoadedW = sdrStep.PSUInW
+				serverLoadedOK = true
+				sdrLastStep = sdrStep
 			}
 		}

@@ -4502,7 +4487,7 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 		gpuActualSumW = result.PlatformMaxTDPW
 	}
 	_ = serverIdleOK // used implicitly via characterizeServerPower
-	result.ServerPower = characterizeServerPower(serverIdleW, serverLoadedW, gpuActualSumW, serverIdleOK && serverLoadedOK)
+	result.ServerPower = characterizeServerPower(serverIdleW, serverLoadedW, gpuActualSumW, opts.ServerPowerSource, serverIdleOK && serverLoadedOK)
 	// Supplement DCMI with SDR multi-source data via collector's PSU slot patterns.
 	// Per-slot readings enable correlation with audit HardwarePowerSupply entries.
 	if result.ServerPower != nil {