Seed power ramp with single-card TDP limits
This commit is contained in:
@@ -3502,10 +3502,21 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
|
||||
_ = os.MkdirAll(stepDir, 0755)
|
||||
|
||||
// Reuse the latest stable limits as starting points, but re-check every
|
||||
// active GPU in this hotter configuration.
|
||||
seedForStep := make(map[int]int, len(stableLimits))
|
||||
for k, v := range stableLimits {
|
||||
seedForStep[k] = v
|
||||
// active GPU in this hotter configuration. For the newly introduced GPU,
|
||||
// seed from its single-card calibration so we do not restart from the
|
||||
// default TDP when a prior derated limit is already known.
|
||||
seedForStep := make(map[int]int, len(subset))
|
||||
for _, idx := range subset {
|
||||
if lim, ok := stableLimits[idx]; ok && lim > 0 {
|
||||
seedForStep[idx] = lim
|
||||
continue
|
||||
}
|
||||
if base, ok := calibByIndex[idx]; ok {
|
||||
lim := int(math.Round(base.AppliedPowerLimitW))
|
||||
if lim > 0 {
|
||||
seedForStep[idx] = lim
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logFunc(fmt.Sprintf("power ramp: step %d/%d — revalidating %d active GPU(s) including new GPU %d",
|
||||
|
||||
Reference in New Issue
Block a user