Add power results table, fix benchmark results refresh, bound memtester
- Benchmark page now shows two result sections: Performance (scores) and Power / Thermal Fit (slot table). After any benchmark task completes the results section auto-refreshes via GET /api/benchmark/results without a full page reload. - Power results table shows each GPU slot with nominal TDP, achieved stable power limit, and P95 observed power. Rows with derated cards are highlighted amber so under-performing slots stand out at a glance. Older runs are collapsed in a <details> summary. - memtester is now wrapped with timeout(1) so a stuck memory controller cannot cause Validate Memory to hang indefinitely. Wall-clock limit is ~2.5 min per 100 MB per pass plus a 2-minute buffer. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1529,6 +1529,11 @@ func (h *handler) handleAPINetworkRollback(w http.ResponseWriter, _ *http.Reques
|
||||
writeJSON(w, map[string]string{"status": "rolled back"})
|
||||
}
|
||||
|
||||
func (h *handler) handleAPIBenchmarkResults(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprint(w, renderBenchmarkResultsCard(h.opts.ExportDir))
|
||||
}
|
||||
|
||||
func (h *handler) rollbackPendingNetworkChange() error {
|
||||
h.pendingNetMu.Lock()
|
||||
pnc := h.pendingNet
|
||||
|
||||
@@ -2002,7 +2002,7 @@ func renderBenchmark(opts HandlerOptions) string {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
` + renderBenchmarkResultsCard(opts.ExportDir) + `
|
||||
`+`<div id="benchmark-results-section">`+renderBenchmarkResultsCard(opts.ExportDir)+`</div>`+`
|
||||
|
||||
<div id="benchmark-output" style="display:none;margin-top:16px" class="card">
|
||||
<div class="card-head">Benchmark Output <span id="benchmark-title"></span></div>
|
||||
@@ -2188,7 +2188,9 @@ function runNvidiaBenchmark(kind) {
|
||||
if (e.data) failures += 1;
|
||||
term.textContent += (e.data ? '\nERROR: ' + e.data : '\nCompleted.') + '\n';
|
||||
term.scrollTop = term.scrollHeight;
|
||||
const isLast = (idx + 1 >= taskIds.length);
|
||||
streamNext(idx + 1, failures);
|
||||
if (isLast) { benchmarkRefreshResults(); }
|
||||
});
|
||||
benchmarkES.onerror = function() {
|
||||
if (benchmarkES) {
|
||||
@@ -2208,18 +2210,30 @@ function runNvidiaBenchmark(kind) {
|
||||
}
|
||||
|
||||
benchmarkLoadGPUs();
|
||||
|
||||
function benchmarkRefreshResults() {
|
||||
fetch('/api/benchmark/results')
|
||||
.then(function(r) { return r.text(); })
|
||||
.then(function(html) {
|
||||
const el = document.getElementById('benchmark-results-section');
|
||||
if (el) el.innerHTML = html;
|
||||
})
|
||||
.catch(function() {});
|
||||
}
|
||||
</script>`
|
||||
}
|
||||
|
||||
func renderBenchmarkResultsCard(exportDir string) string {
|
||||
maxIdx, runs := loadBenchmarkHistory(exportDir)
|
||||
return renderBenchmarkResultsCardFromRuns(
|
||||
"Perf Results",
|
||||
perf := renderBenchmarkResultsCardFromRuns(
|
||||
"Performance Results",
|
||||
"Composite score by saved benchmark run and GPU.",
|
||||
"No saved benchmark runs yet.",
|
||||
"No saved performance benchmark runs yet.",
|
||||
maxIdx,
|
||||
runs,
|
||||
)
|
||||
power := renderPowerBenchmarkResultsCard(exportDir)
|
||||
return perf + "\n" + power
|
||||
}
|
||||
|
||||
func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string, maxGPUIndex int, runs []benchmarkHistoryRun) string {
|
||||
@@ -2299,6 +2313,126 @@ func loadBenchmarkHistoryFromPaths(paths []string) (int, []benchmarkHistoryRun)
|
||||
return maxGPUIndex, runs
|
||||
}
|
||||
|
||||
func renderPowerBenchmarkResultsCard(exportDir string) string {
|
||||
baseDir := app.DefaultBeeBenchPowerDir
|
||||
if strings.TrimSpace(exportDir) != "" {
|
||||
baseDir = filepath.Join(exportDir, "bee-bench", "power")
|
||||
}
|
||||
paths, err := filepath.Glob(filepath.Join(baseDir, "power-*", "result.json"))
|
||||
if err != nil || len(paths) == 0 {
|
||||
return `<div class="card" style="margin-top:16px"><div class="card-head">Power / Thermal Fit Results</div><div class="card-body"><p style="color:var(--muted);font-size:13px">No saved power benchmark runs yet.</p></div></div>`
|
||||
}
|
||||
sort.Strings(paths)
|
||||
|
||||
type powerRun struct {
|
||||
generatedAt time.Time
|
||||
displayTime string
|
||||
result platform.NvidiaPowerBenchResult
|
||||
}
|
||||
var runs []powerRun
|
||||
for _, path := range paths {
|
||||
raw, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
var r platform.NvidiaPowerBenchResult
|
||||
if err := json.Unmarshal(raw, &r); err != nil {
|
||||
continue
|
||||
}
|
||||
runs = append(runs, powerRun{
|
||||
generatedAt: r.GeneratedAt,
|
||||
displayTime: r.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
|
||||
result: r,
|
||||
})
|
||||
}
|
||||
sort.Slice(runs, func(i, j int) bool {
|
||||
return runs[i].generatedAt.After(runs[j].generatedAt)
|
||||
})
|
||||
|
||||
// Show only the most recent run's GPU slot table, plus a run history summary.
|
||||
var b strings.Builder
|
||||
b.WriteString(`<div class="card" style="margin-top:16px"><div class="card-head">Power / Thermal Fit Results</div><div class="card-body">`)
|
||||
|
||||
latest := runs[0].result
|
||||
b.WriteString(`<p style="font-size:12px;color:var(--muted);margin-bottom:10px">Latest run: ` + html.EscapeString(runs[0].displayTime))
|
||||
if latest.Hostname != "" {
|
||||
b.WriteString(` — ` + html.EscapeString(latest.Hostname))
|
||||
}
|
||||
if latest.OverallStatus != "" {
|
||||
statusColor := "var(--ok)"
|
||||
if latest.OverallStatus != "OK" {
|
||||
statusColor = "var(--warn)"
|
||||
}
|
||||
b.WriteString(` — <span style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(latest.OverallStatus) + `</span>`)
|
||||
}
|
||||
b.WriteString(`</p>`)
|
||||
|
||||
if len(latest.GPUs) > 0 {
|
||||
b.WriteString(`<div style="overflow-x:auto"><table><thead><tr>`)
|
||||
b.WriteString(`<th>GPU</th><th>Model</th><th>Nominal W</th><th>Achieved W</th><th>P95 Observed W</th><th>Status</th>`)
|
||||
b.WriteString(`</tr></thead><tbody>`)
|
||||
for _, gpu := range latest.GPUs {
|
||||
derated := gpu.Derated || (gpu.DefaultPowerLimitW > 0 && gpu.AppliedPowerLimitW < gpu.DefaultPowerLimitW-1)
|
||||
rowStyle := ""
|
||||
achievedStyle := ""
|
||||
if derated {
|
||||
rowStyle = ` style="background:rgba(255,180,0,0.08)"`
|
||||
achievedStyle = ` style="color:#e6a000;font-weight:600"`
|
||||
}
|
||||
statusLabel := gpu.Status
|
||||
if statusLabel == "" {
|
||||
statusLabel = "OK"
|
||||
}
|
||||
statusColor := "var(--ok)"
|
||||
if statusLabel != "OK" {
|
||||
statusColor = "var(--warn)"
|
||||
}
|
||||
nominalStr := "-"
|
||||
if gpu.DefaultPowerLimitW > 0 {
|
||||
nominalStr = fmt.Sprintf("%.0f", gpu.DefaultPowerLimitW)
|
||||
}
|
||||
achievedStr := "-"
|
||||
if gpu.AppliedPowerLimitW > 0 {
|
||||
achievedStr = fmt.Sprintf("%.0f", gpu.AppliedPowerLimitW)
|
||||
}
|
||||
p95Str := "-"
|
||||
if gpu.MaxObservedPowerW > 0 {
|
||||
p95Str = fmt.Sprintf("%.0f", gpu.MaxObservedPowerW)
|
||||
}
|
||||
b.WriteString(`<tr` + rowStyle + `>`)
|
||||
b.WriteString(`<td>` + strconv.Itoa(gpu.Index) + `</td>`)
|
||||
b.WriteString(`<td>` + html.EscapeString(gpu.Name) + `</td>`)
|
||||
b.WriteString(`<td>` + nominalStr + `</td>`)
|
||||
b.WriteString(`<td` + achievedStyle + `>` + achievedStr + `</td>`)
|
||||
b.WriteString(`<td>` + p95Str + `</td>`)
|
||||
b.WriteString(`<td style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(statusLabel) + `</td>`)
|
||||
b.WriteString(`</tr>`)
|
||||
}
|
||||
b.WriteString(`</tbody></table></div>`)
|
||||
}
|
||||
|
||||
if len(runs) > 1 {
|
||||
b.WriteString(`<details style="margin-top:12px"><summary style="font-size:12px;color:var(--muted);cursor:pointer">` + strconv.Itoa(len(runs)) + ` runs total</summary>`)
|
||||
b.WriteString(`<div style="overflow-x:auto;margin-top:8px"><table><thead><tr><th>#</th><th>Time</th><th>GPUs</th><th>Status</th></tr></thead><tbody>`)
|
||||
for i, run := range runs {
|
||||
statusColor := "var(--ok)"
|
||||
if run.result.OverallStatus != "OK" {
|
||||
statusColor = "var(--warn)"
|
||||
}
|
||||
b.WriteString(`<tr>`)
|
||||
b.WriteString(`<td>#` + strconv.Itoa(i+1) + `</td>`)
|
||||
b.WriteString(`<td>` + html.EscapeString(run.displayTime) + `</td>`)
|
||||
b.WriteString(`<td>` + strconv.Itoa(len(run.result.GPUs)) + `</td>`)
|
||||
b.WriteString(`<td style="color:` + statusColor + `;font-weight:600">` + html.EscapeString(run.result.OverallStatus) + `</td>`)
|
||||
b.WriteString(`</tr>`)
|
||||
}
|
||||
b.WriteString(`</tbody></table></div></details>`)
|
||||
}
|
||||
|
||||
b.WriteString(`</div></div>`)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
// ── Burn ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
func renderBurn() string {
|
||||
|
||||
@@ -263,6 +263,7 @@ func NewHandler(opts HandlerOptions) http.Handler {
|
||||
mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
|
||||
mux.HandleFunc("POST /api/bee-bench/nvidia/perf/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf"))
|
||||
mux.HandleFunc("POST /api/bee-bench/nvidia/power/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-power"))
|
||||
mux.HandleFunc("GET /api/benchmark/results", h.handleAPIBenchmarkResults)
|
||||
|
||||
// Tasks
|
||||
mux.HandleFunc("GET /api/tasks", h.handleAPITasksList)
|
||||
|
||||
Reference in New Issue
Block a user