Refactor validate modes, fix benchmark report and IPMI power
- Replace diag level 1-4 dropdown with Validate/Stress radio buttons - Validate: dcgmi L2, 60s CPU, 256MB/1p memtester, SMART short - Stress: dcgmi L3 + targeted_stress in Run All, 30min CPU, 1GB/3p memtester, SMART long/NVMe extended - Parallel GPU mode: spawn single task for all GPUs instead of splitting per model - Benchmark table: per-GPU columns for sequential runs, server-wide column for parallel - Benchmark report converted to Markdown with server model, GPU model, version in header; only steady-state charts - Fix IPMI power parsing in benchmark (was looking for 'Current Power', correct field is 'Instantaneous power reading') Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -137,8 +137,9 @@ func TestRenderBenchmarkReportIncludesFindingsAndScores(t *testing.T) {
|
||||
for _, needle := range []string{
|
||||
"Executive Summary",
|
||||
"GPU 0 spent measurable time under SW power cap.",
|
||||
"Composite score: 1176.00",
|
||||
"fp16_tensor: 700.00 TOPS",
|
||||
"1176.00",
|
||||
"fp16_tensor",
|
||||
"700.00",
|
||||
} {
|
||||
if !strings.Contains(report, needle) {
|
||||
t.Fatalf("report missing %q\n%s", needle, report)
|
||||
@@ -164,7 +165,7 @@ func TestRenderBenchmarkReportIncludesTerminalChartsWithoutANSI(t *testing.T) {
|
||||
})
|
||||
|
||||
for _, needle := range []string{
|
||||
"Terminal Charts",
|
||||
"Steady-State Charts",
|
||||
"GPU 0 Steady State",
|
||||
"GPU 0 chart",
|
||||
"42┤───",
|
||||
|
||||
Reference in New Issue
Block a user