Detect PSU faults during perf and power benchmarks

Snapshot IPMI "Power Supply" sensor states before and after each benchmark
run. Compare before/after to surface only *new* anomalies (pre-existing faults
are excluded). Results land in NvidiaBenchmarkResult.PSUIssues and
NvidiaPowerBenchResult.PSUIssues (JSON: psu_issues) and are printed in the
text benchmark report under a "PSU Issues" section.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-18 15:08:41 +03:00
parent 7d64e5d215
commit 028bb30333
3 changed files with 82 additions and 0 deletions

View File

@@ -383,6 +383,16 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
}
}
// ── PSU Issues ────────────────────────────────────────────────────────────
if len(result.PSUIssues) > 0 {
b.WriteString("## PSU Issues\n\n")
b.WriteString("The following power supply anomalies were detected during the benchmark:\n\n")
for _, issue := range result.PSUIssues {
fmt.Fprintf(&b, "- ⛔ %s\n", issue)
}
b.WriteString("\n")
}
// ── Cooling ───────────────────────────────────────────────────────────────
if cooling := result.Cooling; cooling != nil {
b.WriteString("## Cooling\n\n")