diff --git a/audit/internal/platform/benchmark.go b/audit/internal/platform/benchmark.go index a182582..2fabde9 100644 --- a/audit/internal/platform/benchmark.go +++ b/audit/internal/platform/benchmark.go @@ -2476,9 +2476,6 @@ func runBenchmarkPowerCalibration( // calibSearchTolerance is the binary-search convergence threshold in watts. // When hi-lo ≤ this, the highest verified-stable limit (lo) is used. const calibSearchTolerance = 10 - // calibPreThrottleMarginW is subtracted from the telemetry-estimated - // pre-throttle power draw to produce a smarter initial search candidate. - const calibPreThrottleMarginW = 10 // dcgmResourceBusyMaxDelaySec caps the exponential back-off when DCGM // returns DCGM_ST_IN_USE (exit 222). The sequence is 1 s, 2 s, 4 s, … // doubling each retry until it would exceed the cap, at which point the @@ -2717,20 +2714,8 @@ func runBenchmarkPowerCalibration( break } - // Compute the next candidate. - // For thermal throttle: use the pre-throttle power draw from telemetry - // as a smarter initial estimate instead of the binary midpoint — it - // lands much closer to the true limit on the first attempt. - nextLimitW := (lo + hi) / 2 - if strings.Contains(throttleReason, "thermal") { - if onsetW := calibPreThrottlePowerW(perGPU); onsetW > 0 { - candidate := roundTo5W(int(math.Round(onsetW)) - calibPreThrottleMarginW) - if candidate > lo && candidate < hi { - nextLimitW = candidate - } - } - } - nextLimitW = roundTo5W(nextLimitW) + // Binary midpoint within the remaining search range. + nextLimitW := roundTo5W((lo + hi) / 2) // Ensure the candidate is strictly inside the search range. if nextLimitW <= lo { nextLimitW = lo + calibSearchTolerance @@ -2770,28 +2755,6 @@ func isDCGMResourceBusy(err error) bool { return errors.As(err, &exitErr) && exitErr.ExitCode() == 222 } -// calibPreThrottlePowerW estimates the GPU power draw just before thermal -// throttle onset by averaging the first quarter of telemetry rows. The early -// samples capture the GPU at peak before clock/power reduction kicks in. -func calibPreThrottlePowerW(rows []GPUMetricRow) float64 { - if len(rows) < 4 { - return 0 - } - n := len(rows) / 4 - var sum float64 - var cnt int - for _, r := range rows[:n] { - if r.PowerW > 0 { - sum += r.PowerW - cnt++ - } - } - if cnt == 0 { - return 0 - } - return sum / float64(cnt) -} - // roundTo5W rounds w to the nearest 5 W boundary. func roundTo5W(w int) int { return ((w + 2) / 5) * 5