Seed power ramp with single-card TDP limits

This commit is contained in:
Mikhail Chusavitin
2026-04-16 11:43:01 +03:00
parent b4280941f5
commit dca4afb8d0

View File

@@ -3502,10 +3502,21 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
_ = os.MkdirAll(stepDir, 0755)
// Reuse the latest stable limits as starting points, but re-check every
// active GPU in this hotter configuration.
seedForStep := make(map[int]int, len(stableLimits))
for k, v := range stableLimits {
seedForStep[k] = v
// active GPU in this hotter configuration. For the newly introduced GPU,
// seed from its single-card calibration so we do not restart from the
// default TDP when a prior derated limit is already known.
seedForStep := make(map[int]int, len(subset))
for _, idx := range subset {
if lim, ok := stableLimits[idx]; ok && lim > 0 {
seedForStep[idx] = lim
continue
}
if base, ok := calibByIndex[idx]; ok {
lim := int(math.Round(base.AppliedPowerLimitW))
if lim > 0 {
seedForStep[idx] = lim
}
}
}
logFunc(fmt.Sprintf("power ramp: step %d/%d — revalidating %d active GPU(s) including new GPU %d",