Unify NVIDIA GPU recovery paths
This commit is contained in:
@@ -404,14 +404,7 @@ func normalizeNvidiaBusID(v string) string {
|
||||
}
|
||||
|
||||
func (s *System) ResetNvidiaGPU(index int) (string, error) {
|
||||
if index < 0 {
|
||||
return "", fmt.Errorf("gpu index must be >= 0")
|
||||
}
|
||||
out, err := runNvidiaRecover("reset-gpu", strconv.Itoa(index))
|
||||
if strings.TrimSpace(out) == "" && err == nil {
|
||||
out = "GPU reset completed.\n"
|
||||
}
|
||||
return out, err
|
||||
return resetNvidiaGPU(index)
|
||||
}
|
||||
|
||||
// RunNCCLTests runs nccl-tests all_reduce_perf across the selected NVIDIA GPUs.
|
||||
|
||||
Reference in New Issue
Block a user