Seed power ramp with single-card TDP limits

2026-04-16 11:43:01 +03:00
parent b4280941f5
commit dca4afb8d0
1 changed files with 15 additions and 4 deletions
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
@@ -3502,10 +3502,21 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
 		_ = os.MkdirAll(stepDir, 0755)

 		// Reuse the latest stable limits as starting points, but re-check every
-		// active GPU in this hotter configuration.
-		seedForStep := make(map[int]int, len(stableLimits))
-		for k, v := range stableLimits {
-			seedForStep[k] = v
+		// active GPU in this hotter configuration. For the newly introduced GPU,
+		// seed from its single-card calibration so we do not restart from the
+		// default TDP when a prior derated limit is already known.
+		seedForStep := make(map[int]int, len(subset))
+		for _, idx := range subset {
+			if lim, ok := stableLimits[idx]; ok && lim > 0 {
+				seedForStep[idx] = lim
+				continue
+			}
+			if base, ok := calibByIndex[idx]; ok {
+				lim := int(math.Round(base.AppliedPowerLimitW))
+				if lim > 0 {
+					seedForStep[idx] = lim
+				}
+			}
 		}

 		logFunc(fmt.Sprintf("power ramp: step %d/%d — revalidating %d active GPU(s) including new GPU %d",