Seed power ramp with single-card TDP limits
This commit is contained in:
@@ -3502,10 +3502,21 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
|
|||||||
_ = os.MkdirAll(stepDir, 0755)
|
_ = os.MkdirAll(stepDir, 0755)
|
||||||
|
|
||||||
// Reuse the latest stable limits as starting points, but re-check every
|
// Reuse the latest stable limits as starting points, but re-check every
|
||||||
// active GPU in this hotter configuration.
|
// active GPU in this hotter configuration. For the newly introduced GPU,
|
||||||
seedForStep := make(map[int]int, len(stableLimits))
|
// seed from its single-card calibration so we do not restart from the
|
||||||
for k, v := range stableLimits {
|
// default TDP when a prior derated limit is already known.
|
||||||
seedForStep[k] = v
|
seedForStep := make(map[int]int, len(subset))
|
||||||
|
for _, idx := range subset {
|
||||||
|
if lim, ok := stableLimits[idx]; ok && lim > 0 {
|
||||||
|
seedForStep[idx] = lim
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if base, ok := calibByIndex[idx]; ok {
|
||||||
|
lim := int(math.Round(base.AppliedPowerLimitW))
|
||||||
|
if lim > 0 {
|
||||||
|
seedForStep[idx] = lim
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
logFunc(fmt.Sprintf("power ramp: step %d/%d — revalidating %d active GPU(s) including new GPU %d",
|
logFunc(fmt.Sprintf("power ramp: step %d/%d — revalidating %d active GPU(s) including new GPU %d",
|
||||||
|
|||||||
Reference in New Issue
Block a user