Refactor validate modes, fix benchmark report and IPMI power
- Replace diag level 1-4 dropdown with Validate/Stress radio buttons - Validate: dcgmi L2, 60s CPU, 256MB/1p memtester, SMART short - Stress: dcgmi L3 + targeted_stress in Run All, 30min CPU, 1GB/3p memtester, SMART long/NVMe extended - Parallel GPU mode: spawn single task for all GPUs instead of splitting per model - Benchmark table: per-GPU columns for sequential runs, server-wide column for parallel - Benchmark report converted to Markdown with server model, GPU model, version in header; only steady-state charts - Fix IPMI power parsing in benchmark (was looking for 'Current Power', correct field is 'Instantaneous power reading') Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -326,8 +326,8 @@ func (s *System) RunNvidiaBenchmark(ctx context.Context, baseDir string, opts Nv
|
||||
}
|
||||
|
||||
report := renderBenchmarkReportWithCharts(result, loadBenchmarkReportCharts(runDir, selected))
|
||||
if err := os.WriteFile(filepath.Join(runDir, "report.txt"), []byte(report), 0644); err != nil {
|
||||
return "", fmt.Errorf("write report.txt: %w", err)
|
||||
if err := os.WriteFile(filepath.Join(runDir, "report.md"), []byte(report), 0644); err != nil {
|
||||
return "", fmt.Errorf("write report.md: %w", err)
|
||||
}
|
||||
|
||||
summary := renderBenchmarkSummary(result)
|
||||
@@ -1183,18 +1183,8 @@ func queryIPMIServerPowerW() (float64, error) {
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("ipmitool dcmi power reading: %w", err)
|
||||
}
|
||||
for _, line := range strings.Split(string(out), "\n") {
|
||||
if strings.Contains(line, "Current Power") {
|
||||
parts := strings.SplitN(line, ":", 2)
|
||||
if len(parts) == 2 {
|
||||
val := strings.TrimSpace(strings.TrimSuffix(strings.TrimSpace(parts[1]), "Watts"))
|
||||
val = strings.TrimSpace(val)
|
||||
w, err := strconv.ParseFloat(val, 64)
|
||||
if err == nil && w > 0 {
|
||||
return w, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
if w := parseDCMIPowerReading(string(out)); w > 0 {
|
||||
return w, nil
|
||||
}
|
||||
return 0, fmt.Errorf("could not parse ipmitool dcmi power reading output")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user