WIP: checkpoint current tree
This commit is contained in:
@@ -30,22 +30,23 @@ const (
|
||||
|
||||
// taskNames maps target → human-readable name for validate (SAT) runs.
|
||||
var taskNames = map[string]string{
|
||||
"nvidia": "NVIDIA SAT",
|
||||
"nvidia-stress": "NVIDIA GPU Stress",
|
||||
"memory": "Memory SAT",
|
||||
"storage": "Storage SAT",
|
||||
"cpu": "CPU SAT",
|
||||
"amd": "AMD GPU SAT",
|
||||
"amd-mem": "AMD GPU MEM Integrity",
|
||||
"amd-bandwidth": "AMD GPU MEM Bandwidth",
|
||||
"amd-stress": "AMD GPU Burn-in",
|
||||
"memory-stress": "Memory Burn-in",
|
||||
"sat-stress": "SAT Stress (stressapptest)",
|
||||
"platform-stress": "Platform Thermal Cycling",
|
||||
"audit": "Audit",
|
||||
"support-bundle": "Support Bundle",
|
||||
"install": "Install to Disk",
|
||||
"install-to-ram": "Install to RAM",
|
||||
"nvidia": "NVIDIA SAT",
|
||||
"nvidia-benchmark": "NVIDIA Benchmark",
|
||||
"nvidia-stress": "NVIDIA GPU Stress",
|
||||
"memory": "Memory SAT",
|
||||
"storage": "Storage SAT",
|
||||
"cpu": "CPU SAT",
|
||||
"amd": "AMD GPU SAT",
|
||||
"amd-mem": "AMD GPU MEM Integrity",
|
||||
"amd-bandwidth": "AMD GPU MEM Bandwidth",
|
||||
"amd-stress": "AMD GPU Burn-in",
|
||||
"memory-stress": "Memory Burn-in",
|
||||
"sat-stress": "SAT Stress (stressapptest)",
|
||||
"platform-stress": "Platform Thermal Cycling",
|
||||
"audit": "Audit",
|
||||
"support-bundle": "Support Bundle",
|
||||
"install": "Install to Disk",
|
||||
"install-to-ram": "Install to RAM",
|
||||
}
|
||||
|
||||
// burnNames maps target → human-readable name when a burn profile is set.
|
||||
@@ -108,8 +109,11 @@ type taskParams struct {
|
||||
DiagLevel int `json:"diag_level,omitempty"`
|
||||
GPUIndices []int `json:"gpu_indices,omitempty"`
|
||||
ExcludeGPUIndices []int `json:"exclude_gpu_indices,omitempty"`
|
||||
SizeMB int `json:"size_mb,omitempty"`
|
||||
Loader string `json:"loader,omitempty"`
|
||||
BurnProfile string `json:"burn_profile,omitempty"`
|
||||
BenchmarkProfile string `json:"benchmark_profile,omitempty"`
|
||||
RunNCCL bool `json:"run_nccl,omitempty"`
|
||||
DisplayName string `json:"display_name,omitempty"`
|
||||
Device string `json:"device,omitempty"` // for install
|
||||
PlatformComponents []string `json:"platform_components,omitempty"`
|
||||
@@ -547,6 +551,18 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
|
||||
} else {
|
||||
archive, err = a.RunNvidiaAcceptancePack("", j.append)
|
||||
}
|
||||
case "nvidia-benchmark":
|
||||
if a == nil {
|
||||
err = fmt.Errorf("app not configured")
|
||||
break
|
||||
}
|
||||
archive, err = a.RunNvidiaBenchmarkCtx(ctx, "", platform.NvidiaBenchmarkOptions{
|
||||
Profile: t.params.BenchmarkProfile,
|
||||
SizeMB: t.params.SizeMB,
|
||||
GPUIndices: t.params.GPUIndices,
|
||||
ExcludeGPUIndices: t.params.ExcludeGPUIndices,
|
||||
RunNCCL: t.params.RunNCCL,
|
||||
}, j.append)
|
||||
case "nvidia-stress":
|
||||
if a == nil {
|
||||
err = fmt.Errorf("app not configured")
|
||||
|
||||
Reference in New Issue
Block a user