feat(nccl): add nccl-tests all_reduce_perf for GPU bandwidth testing
- Dockerfile: install cuda-nvcc-13-0 from NVIDIA repo for compilation - build-nccl-tests.sh: downloads libnccl-dev for nccl.h, builds all_reduce_perf - build.sh: runs nccl-tests build, injects binary into /usr/local/bin/ - platform: RunNCCLTests() auto-detects GPU count, runs all_reduce_perf - TUI: NCCL bandwidth test entry in Burn-in Tests screen [N] hotkey Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -268,12 +268,9 @@ func TestHealthCheckGPUOpensNvidiaSATSetup(t *testing.T) {
|
||||
m.hcInitialized = true
|
||||
m.hcSel = [4]bool{true, true, true, true}
|
||||
|
||||
next, cmd := m.hcRunSingle(hcGPU)
|
||||
next, _ := m.hcRunSingle(hcGPU)
|
||||
got := next.(model)
|
||||
|
||||
if cmd == nil {
|
||||
t.Fatal("expected non-nil cmd (GPU list loader)")
|
||||
}
|
||||
if got.screen != screenNvidiaSATSetup {
|
||||
t.Fatalf("screen=%q want %q", got.screen, screenNvidiaSATSetup)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user