diff --git a/audit/internal/platform/benchmark.go b/audit/internal/platform/benchmark.go
index 29f7db4..48b863f 100644
--- a/audit/internal/platform/benchmark.go
+++ b/audit/internal/platform/benchmark.go
@@ -59,6 +59,9 @@ type benchmarkPowerCalibrationResult struct {
// ≥20% while server fans were below 100% duty cycle — a signal that the
// cooling system may not be correctly configured for full GPU load.
CoolingWarning string
+ // MetricRows holds the telemetry rows from the final (converged) attempt
+ // for this GPU. Used to build per-run gpu-metrics.csv.
+ MetricRows []GPUMetricRow
}
type benchmarkBurnProfile struct {
@@ -2781,7 +2784,7 @@ func runBenchmarkPowerCalibration(
infoByIndex map[int]benchmarkGPUInfo,
logFunc func(string),
seedLimits map[int]int,
-) (map[int]benchmarkPowerCalibrationResult, []benchmarkRestoreAction) {
+) (map[int]benchmarkPowerCalibrationResult, []benchmarkRestoreAction, []GPUMetricRow) {
const calibDurationSec = 120
const maxDerateW = 150
// calibSearchTolerance is the binary-search convergence threshold in watts.
@@ -2795,7 +2798,7 @@ func runBenchmarkPowerCalibration(
if _, err := exec.LookPath("dcgmi"); err != nil {
logFunc("power calibration: dcgmi not found, skipping (will use default power limit)")
- return map[int]benchmarkPowerCalibrationResult{}, nil
+ return map[int]benchmarkPowerCalibrationResult{}, nil, nil
}
if killed := KillTestWorkers(); len(killed) > 0 {
for _, p := range killed {
@@ -2829,6 +2832,8 @@ func runBenchmarkPowerCalibration(
results := make(map[int]benchmarkPowerCalibrationResult, len(gpuIndices))
var restore []benchmarkRestoreAction
+ var allCalibRows []GPUMetricRow // accumulated telemetry across all attempts
+ var calibCursor float64
// Initialise per-GPU state.
states := make([]*gpuCalibState, 0, len(gpuIndices))
@@ -2981,6 +2986,8 @@ calibDone:
ticker.Stop()
cancelAttempt()
_ = os.WriteFile(filepath.Join(runDir, logName), ar.out, 0644)
+ // Accumulate telemetry rows with attempt stage label.
+ appendBenchmarkMetrics(&allCalibRows, ar.rows, fmt.Sprintf("attempt-%d", sharedAttempt), &calibCursor, float64(calibDurationSec))
// Resource busy: retry with exponential back-off (shared — one DCGM session).
if ar.err != nil && isDCGMResourceBusy(ar.err) {
@@ -3065,6 +3072,7 @@ calibDone:
}
}
}
+ s.calib.MetricRows = filterRowsByGPU(ar.rows, s.idx)
s.converged = true
continue
}
@@ -3103,6 +3111,7 @@ calibDone:
} else {
s.calib.Notes = append(s.calib.Notes, fmt.Sprintf("could not find a stable targeted_power limit within %d W of the default", maxDerateW))
}
+ s.calib.MetricRows = filterRowsByGPU(ar.rows, s.idx)
s.converged = true
continue
}
@@ -3140,7 +3149,8 @@ calibDone:
results[s.idx] = s.calib
}
}
- return results, restore
+ writeBenchmarkMetricsFiles(runDir, allCalibRows)
+ return results, restore, allCalibRows
}
// isDCGMResourceBusy returns true when dcgmi exits with DCGM_ST_IN_USE (222),
@@ -3230,21 +3240,25 @@ func renderPowerBenchReport(result NvidiaPowerBenchResult) string {
}
if len(result.RampSteps) > 0 {
b.WriteString("## Ramp Sequence\n\n")
- b.WriteString("| Step | New GPU | Stable Limit | Total Observed | Derated | Status |\n")
- b.WriteString("|------|---------|--------------|----------------|---------|--------|\n")
+ b.WriteString("| Step | New GPU | Stable Limit | Total Observed | Server Δ (IPMI) | Derated | Status |\n")
+ b.WriteString("|------|---------|--------------|----------------|-----------------|---------|--------|\n")
for _, step := range result.RampSteps {
derated := "-"
if step.Derated {
derated = "⚠ yes"
}
- fmt.Fprintf(&b, "| %d | GPU %d | %.0f W | %.0f W | %s | %s |\n",
- step.StepIndex, step.NewGPUIndex, step.NewGPUStableLimitW, step.TotalObservedPowerW, derated, step.Status)
+ serverDelta := "-"
+ if step.ServerDeltaW > 0 {
+ serverDelta = fmt.Sprintf("%.0f W", step.ServerDeltaW)
+ }
+ fmt.Fprintf(&b, "| %d | GPU %d | %.0f W | %.0f W | %s | %s | %s |\n",
+ step.StepIndex, step.NewGPUIndex, step.NewGPUStableLimitW, step.TotalObservedPowerW, serverDelta, derated, step.Status)
}
b.WriteString("\n")
}
b.WriteString("## Per-Slot Results\n\n")
- b.WriteString("| GPU | Status | Single-card Limit | Stable Limit | Temp | Attempts |\n")
- b.WriteString("|-----|--------|-------------------|--------------|------|----------|\n")
+ b.WriteString("| GPU | Status | Single-card Limit | Stable Limit | Server Δ (IPMI) | Temp | Attempts |\n")
+ b.WriteString("|-----|--------|-------------------|--------------|-----------------|------|----------|\n")
for _, gpu := range result.GPUs {
stableLimit := "-"
if gpu.StablePowerLimitW > 0 {
@@ -3254,8 +3268,12 @@ func renderPowerBenchReport(result NvidiaPowerBenchResult) string {
stableLimit = fmt.Sprintf("%.0f W", gpu.StablePowerLimitW)
}
}
- fmt.Fprintf(&b, "| GPU %d | %s | %.0f W | %s | %.1f C | %d |\n",
- gpu.Index, gpu.Status, gpu.AppliedPowerLimitW, stableLimit, gpu.MaxObservedTempC, gpu.CalibrationAttempts)
+ serverDelta := "-"
+ if gpu.ServerDeltaW > 0 {
+ serverDelta = fmt.Sprintf("%.0f W", gpu.ServerDeltaW)
+ }
+ fmt.Fprintf(&b, "| GPU %d | %s | %.0f W | %s | %s | %.1f C | %d |\n",
+ gpu.Index, gpu.Status, gpu.AppliedPowerLimitW, stableLimit, serverDelta, gpu.MaxObservedTempC, gpu.CalibrationAttempts)
}
b.WriteString("\n")
for _, gpu := range result.GPUs {
@@ -3284,11 +3302,19 @@ func renderPowerBenchSummary(result NvidiaPowerBenchResult) string {
fmt.Fprintf(&b, "ramp_step_%d_new_gpu=%d\n", step.StepIndex, step.NewGPUIndex)
fmt.Fprintf(&b, "ramp_step_%d_stable_limit_w=%.0f\n", step.StepIndex, step.NewGPUStableLimitW)
fmt.Fprintf(&b, "ramp_step_%d_total_power_w=%.0f\n", step.StepIndex, step.TotalObservedPowerW)
+ if step.ServerLoadedW > 0 {
+ fmt.Fprintf(&b, "ramp_step_%d_server_loaded_w=%.0f\n", step.StepIndex, step.ServerLoadedW)
+ fmt.Fprintf(&b, "ramp_step_%d_server_delta_w=%.0f\n", step.StepIndex, step.ServerDeltaW)
+ }
}
for _, gpu := range result.GPUs {
if gpu.StablePowerLimitW > 0 {
fmt.Fprintf(&b, "gpu_%d_stable_limit_w=%.0f\n", gpu.Index, gpu.StablePowerLimitW)
}
+ if gpu.ServerLoadedW > 0 {
+ fmt.Fprintf(&b, "gpu_%d_server_loaded_w=%.0f\n", gpu.Index, gpu.ServerLoadedW)
+ fmt.Fprintf(&b, "gpu_%d_server_delta_w=%.0f\n", gpu.Index, gpu.ServerDeltaW)
+ }
}
if sp := result.ServerPower; sp != nil && sp.Available {
fmt.Fprintf(&b, "server_idle_w=%.0f\n", sp.IdleW)
@@ -3327,6 +3353,10 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
if infoErr != nil {
return "", infoErr
}
+ // Capture full nvidia-smi -q snapshot at the start of the run.
+ if out, err := runSATCommandCtx(ctx, verboseLog, "00-nvidia-smi-q.log", []string{"nvidia-smi", "-q"}, nil, nil); err == nil {
+ _ = os.WriteFile(filepath.Join(runDir, "00-nvidia-smi-q.log"), out, 0644)
+ }
hostname, _ := os.Hostname()
result := NvidiaPowerBenchResult{
BenchmarkVersion: benchmarkVersion,
@@ -3352,13 +3382,31 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
// Phase 1: calibrate each GPU individually (sequentially, one at a time) to
// establish a true single-card power baseline unaffected by neighbour heat.
calibByIndex := make(map[int]benchmarkPowerCalibrationResult, len(selected))
+ singleIPMILoadedW := make(map[int]float64, len(selected))
var allRestoreActions []benchmarkRestoreAction
+ // allPowerRows accumulates telemetry from all phases for the top-level gpu-metrics.csv.
+ var allPowerRows []GPUMetricRow
+ var powerCursor float64
for _, idx := range selected {
singleDir := filepath.Join(runDir, fmt.Sprintf("single-%02d", idx))
_ = os.MkdirAll(singleDir, 0755)
singleInfo := cloneBenchmarkGPUInfoMap(infoByIndex)
logFunc(fmt.Sprintf("power calibration: GPU %d single-card baseline", idx))
- c, restore := runBenchmarkPowerCalibration(ctx, verboseLog, singleDir, []int{idx}, singleInfo, logFunc, nil)
+ ipmiSingleCtx, ipmiSingleCancel := context.WithCancel(ctx)
+ ipmiSingleDone := make(chan float64, 1)
+ go func() {
+ defer close(ipmiSingleDone)
+ if w, ok := sampleIPMIPowerSeries(ipmiSingleCtx, 3600); ok {
+ ipmiSingleDone <- w
+ }
+ }()
+ c, restore, singleRows := runBenchmarkPowerCalibration(ctx, verboseLog, singleDir, []int{idx}, singleInfo, logFunc, nil)
+ appendBenchmarkMetrics(&allPowerRows, singleRows, fmt.Sprintf("single-gpu-%d", idx), &powerCursor, 0)
+ ipmiSingleCancel()
+ if w, ok := <-ipmiSingleDone; ok {
+ singleIPMILoadedW[idx] = w
+ logFunc(fmt.Sprintf("power calibration: GPU %d single-card IPMI loaded: %.0f W", idx, w))
+ }
allRestoreActions = append(allRestoreActions, restore...)
if r, ok := c[idx]; ok {
calibByIndex[idx] = r
@@ -3383,7 +3431,7 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
result.OverallStatus = "PARTIAL"
}
}
- gpus = append(gpus, NvidiaPowerBenchGPU{
+ gpu := NvidiaPowerBenchGPU{
Index: idx,
Name: info.Name,
BusID: info.BusID,
@@ -3396,7 +3444,16 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
Status: status,
Notes: append([]string(nil), calib.Notes...),
CoolingWarning: calib.CoolingWarning,
- })
+ }
+ if w, ok := singleIPMILoadedW[idx]; ok && serverIdleOK && w > 0 {
+ gpu.ServerLoadedW = w
+ gpu.ServerDeltaW = w - serverIdleW
+ }
+ if len(calib.MetricRows) > 0 {
+ t := summarizeBenchmarkTelemetry(calib.MetricRows)
+ gpu.Telemetry = &t
+ }
+ gpus = append(gpus, gpu)
}
sort.Slice(gpus, func(i, j int) bool {
if gpus[i].MaxObservedPowerW != gpus[j].MaxObservedPowerW {
@@ -3445,20 +3502,11 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
// stableLimits accumulates GPU index → fixed stable limit (W) across steps.
stableLimits := make(map[int]int, len(result.RecommendedSlotOrder))
- // Start an IPMI sampling goroutine that runs throughout Phase 2 to capture
- // server-side loaded power while GPUs are under stress. The goroutine is
- // cancelled as soon as Phase 2 finishes, and the average is used to compare
- // against PlatformMaxTDPW (GPU-reported stable limits sum).
+ // serverLoadedW tracks the IPMI server power from the final ramp step
+ // (all GPUs simultaneously loaded). Earlier steps' values are stored
+ // per-step in NvidiaPowerBenchStep.ServerLoadedW.
var serverLoadedW float64
var serverLoadedOK bool
- ipmiPhase2Ctx, ipmiPhase2Cancel := context.WithCancel(ctx)
- ipmiPhase2Done := make(chan float64, 1)
- go func() {
- defer close(ipmiPhase2Done)
- if w, ok := sampleIPMIPowerSeries(ipmiPhase2Ctx, 3600); ok {
- ipmiPhase2Done <- w
- }
- }()
// Step 1: reuse single-card calibration result directly.
if len(result.RecommendedSlotOrder) > 0 {
@@ -3475,6 +3523,10 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
Derated: firstCalib.Derated,
Status: "OK",
}
+ if w, ok := singleIPMILoadedW[firstIdx]; ok && serverIdleOK && w > 0 {
+ ramp.ServerLoadedW = w
+ ramp.ServerDeltaW = w - serverIdleW
+ }
if !firstCalib.Completed {
ramp.Status = "FAILED"
ramp.Notes = append(ramp.Notes, fmt.Sprintf("GPU %d did not complete single-card targeted_power", firstIdx))
@@ -3523,7 +3575,24 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
step, len(result.RecommendedSlotOrder), len(subset), newGPUIdx))
stepInfo := cloneBenchmarkGPUInfoMap(infoByIndex)
- stepCalib, stepRestore := runBenchmarkPowerCalibration(ctx, verboseLog, stepDir, subset, stepInfo, logFunc, seedForStep)
+ ipmiStepCtx, ipmiStepCancel := context.WithCancel(ctx)
+ ipmiStepDone := make(chan float64, 1)
+ go func() {
+ defer close(ipmiStepDone)
+ if w, ok := sampleIPMIPowerSeries(ipmiStepCtx, 3600); ok {
+ ipmiStepDone <- w
+ }
+ }()
+ stepCalib, stepRestore, stepRows := runBenchmarkPowerCalibration(ctx, verboseLog, stepDir, subset, stepInfo, logFunc, seedForStep)
+ appendBenchmarkMetrics(&allPowerRows, stepRows, fmt.Sprintf("ramp-step-%d", step), &powerCursor, 0)
+ ipmiStepCancel()
+ var stepIPMILoadedW float64
+ var stepIPMIOK bool
+ if w, ok := <-ipmiStepDone; ok {
+ stepIPMILoadedW = w
+ stepIPMIOK = true
+ logFunc(fmt.Sprintf("power ramp: step %d IPMI loaded: %.0f W", step, w))
+ }
// Accumulate restore actions; they all run in the outer defer.
allRestoreActions = append(allRestoreActions, stepRestore...)
@@ -3586,15 +3655,17 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
result.Findings = append(result.Findings, fmt.Sprintf("Ramp step %d (GPU %d) required derating to %.0f W under combined thermal load.", step, newGPUIdx, c.AppliedPowerLimitW))
}
- result.RampSteps = append(result.RampSteps, ramp)
- }
+ if stepIPMIOK && serverIdleOK && stepIPMILoadedW > 0 {
+ ramp.ServerLoadedW = stepIPMILoadedW
+ ramp.ServerDeltaW = stepIPMILoadedW - serverIdleW
+ // The last step has all GPUs loaded — use it as the top-level loaded_w.
+ if step == len(result.RecommendedSlotOrder) {
+ serverLoadedW = stepIPMILoadedW
+ serverLoadedOK = true
+ }
+ }
- // Stop IPMI Phase 2 sampling and collect result.
- ipmiPhase2Cancel()
- if w, ok := <-ipmiPhase2Done; ok {
- serverLoadedW = w
- serverLoadedOK = true
- logFunc(fmt.Sprintf("server loaded power (IPMI, Phase 2 avg): %.0f W", w))
+ result.RampSteps = append(result.RampSteps, ramp)
}
// Populate StablePowerLimitW on each GPU entry from the accumulated stable limits.
@@ -3624,6 +3695,8 @@ func (s *System) RunNvidiaPowerBench(ctx context.Context, baseDir string, opts N
// ~1.0 → GPU telemetry matches wall power; <0.75 → GPU over-reports its TDP.
_ = serverIdleOK // used implicitly via characterizeServerPower
result.ServerPower = characterizeServerPower(serverIdleW, serverLoadedW, result.PlatformMaxTDPW, serverIdleOK && serverLoadedOK)
+ // Write top-level gpu-metrics.csv/.html aggregating all phases.
+ writeBenchmarkMetricsFiles(runDir, allPowerRows)
resultJSON, err := json.MarshalIndent(result, "", " ")
if err != nil {
return "", fmt.Errorf("marshal power result: %w", err)
diff --git a/audit/internal/platform/benchmark_types.go b/audit/internal/platform/benchmark_types.go
index c78d9d7..902eeb7 100644
--- a/audit/internal/platform/benchmark_types.go
+++ b/audit/internal/platform/benchmark_types.go
@@ -331,6 +331,13 @@ type NvidiaPowerBenchGPU struct {
Notes []string `json:"notes,omitempty"`
// CoolingWarning mirrors BenchmarkGPUResult.CoolingWarning for the power workflow.
CoolingWarning string `json:"cooling_warning,omitempty"`
+ // ServerLoadedW is the IPMI server power reading captured during this
+ // GPU's single-card calibration run. ServerDeltaW = ServerLoadedW − idle.
+ ServerLoadedW float64 `json:"server_loaded_w,omitempty"`
+ ServerDeltaW float64 `json:"server_delta_w,omitempty"`
+ // Telemetry holds the aggregated stats from the final converged calibration
+ // attempt for this GPU (temperature, power, fan, clock percentiles).
+ Telemetry *BenchmarkTelemetrySummary `json:"telemetry,omitempty"`
}
type NvidiaPowerBenchStep struct {
@@ -345,6 +352,10 @@ type NvidiaPowerBenchStep struct {
Derated bool `json:"derated,omitempty"`
Status string `json:"status"`
Notes []string `json:"notes,omitempty"`
+ // ServerLoadedW is the IPMI server power reading captured during this
+ // ramp step's calibration run. ServerDeltaW = ServerLoadedW − idle.
+ ServerLoadedW float64 `json:"server_loaded_w,omitempty"`
+ ServerDeltaW float64 `json:"server_delta_w,omitempty"`
}
// NvidiaPerformanceRampStep holds per-step performance data for the
diff --git a/audit/internal/webui/pages.go b/audit/internal/webui/pages.go
index 67942c1..c7720e0 100644
--- a/audit/internal/webui/pages.go
+++ b/audit/internal/webui/pages.go
@@ -2014,9 +2014,11 @@ func renderSATCard(id, label, runAction, headerActions, body string) string {
// ── Benchmark ─────────────────────────────────────────────────────────────────
type benchmarkHistoryRun struct {
- generatedAt time.Time
- displayTime string
- gpuScores map[int]float64 // GPU index → composite score
+ generatedAt time.Time
+ displayTime string
+ gpuScores map[int]float64 // GPU index → composite score
+ gpuStatuses map[int]string // GPU index → status ("OK", "WARNING", "FAILED", …)
+ overallStatus string
}
func renderBenchmark(opts HandlerOptions) string {
@@ -2324,7 +2326,7 @@ func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string,
b.WriteString(`
` + html.EscapeString(description) + `
`)
}
b.WriteString(``)
- b.WriteString(`
| Run | Time | `)
+ b.WriteString(`| Run | Time | Status | `)
for i := 0; i <= maxGPUIndex; i++ {
b.WriteString(`GPU ` + strconv.Itoa(i) + ` | `)
}
@@ -2333,13 +2335,36 @@ func renderBenchmarkResultsCardFromRuns(title, description, emptyMessage string,
b.WriteString(`
|---|
`)
b.WriteString(`| #` + strconv.Itoa(i+1) + ` | `)
b.WriteString(`` + html.EscapeString(run.displayTime) + ` | `)
+ overallColor := "var(--ok)"
+ overallLabel := run.overallStatus
+ if overallLabel == "" {
+ overallLabel = "OK"
+ }
+ if overallLabel == "FAILED" {
+ overallColor = "var(--crit-fg,#9f3a38)"
+ } else if overallLabel != "OK" {
+ overallColor = "var(--warn)"
+ }
+ b.WriteString(`` + html.EscapeString(overallLabel) + ` | `)
for idx := 0; idx <= maxGPUIndex; idx++ {
score, ok := run.gpuScores[idx]
if !ok {
b.WriteString(`- | `)
continue
}
- b.WriteString(`` + fmt.Sprintf("%.2f", score) + ` | `)
+ gpuStatus := run.gpuStatuses[idx]
+ scoreColor := ""
+ switch gpuStatus {
+ case "FAILED":
+ scoreColor = ` style="color:var(--crit-fg,#9f3a38);font-weight:600"`
+ case "WARNING", "PARTIAL":
+ scoreColor = ` style="color:var(--warn);font-weight:600"`
+ case "", "OK":
+ // no override
+ default:
+ scoreColor = ` style="color:var(--warn);font-weight:600"`
+ }
+ b.WriteString(`` + fmt.Sprintf("%.2f", score) + ` | `)
}
b.WriteString(`
`)
}
@@ -2373,12 +2398,15 @@ func loadBenchmarkHistoryFromPaths(paths []string) (int, []benchmarkHistoryRun)
continue
}
run := benchmarkHistoryRun{
- generatedAt: result.GeneratedAt,
- displayTime: result.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
- gpuScores: make(map[int]float64),
+ generatedAt: result.GeneratedAt,
+ displayTime: result.GeneratedAt.Local().Format("2006-01-02 15:04:05"),
+ gpuScores: make(map[int]float64),
+ gpuStatuses: make(map[int]string),
+ overallStatus: result.OverallStatus,
}
for _, gpu := range result.GPUs {
run.gpuScores[gpu.Index] = gpu.Scores.CompositeScore
+ run.gpuStatuses[gpu.Index] = gpu.Status
if gpu.Index > maxGPUIndex {
maxGPUIndex = gpu.Index
}
@@ -2447,31 +2475,45 @@ func renderPowerBenchmarkResultsCard(exportDir string) string {
if len(latest.GPUs) > 0 {
b.WriteString(``)
- b.WriteString(`| GPU | Model | Nominal W | Achieved W | P95 Observed W | Status | `)
+ b.WriteString(`GPU | Model | Nominal W | Single-card W | Multi-GPU W | P95 Observed W | Status | `)
b.WriteString(`
`)
for _, gpu := range latest.GPUs {
- derated := gpu.Derated || (gpu.DefaultPowerLimitW > 0 && gpu.AppliedPowerLimitW < gpu.DefaultPowerLimitW-1)
+ // finalLimitW is the definitive TDP: multi-GPU stable limit from the ramp,
+ // falling back to single-card applied limit if the ramp hasn't run.
+ finalLimitW := gpu.StablePowerLimitW
+ if finalLimitW <= 0 {
+ finalLimitW = gpu.AppliedPowerLimitW
+ }
+ // Derate is relative to nominal (DefaultPowerLimitW), using the final limit.
+ derated := gpu.Derated ||
+ (gpu.DefaultPowerLimitW > 0 && finalLimitW > 0 && finalLimitW < gpu.DefaultPowerLimitW-1)
rowStyle := ""
- achievedStyle := ""
+ finalStyle := ""
if derated {
rowStyle = ` style="background:rgba(255,180,0,0.08)"`
- achievedStyle = ` style="color:#e6a000;font-weight:600"`
+ finalStyle = ` style="color:#e6a000;font-weight:600"`
}
statusLabel := gpu.Status
if statusLabel == "" {
statusLabel = "OK"
}
statusColor := "var(--ok)"
- if statusLabel != "OK" {
+ if statusLabel == "FAILED" {
+ statusColor = "var(--crit-fg,#9f3a38)"
+ } else if statusLabel != "OK" {
statusColor = "var(--warn)"
}
nominalStr := "-"
if gpu.DefaultPowerLimitW > 0 {
nominalStr = fmt.Sprintf("%.0f", gpu.DefaultPowerLimitW)
}
- achievedStr := "-"
+ singleStr := "-"
if gpu.AppliedPowerLimitW > 0 {
- achievedStr = fmt.Sprintf("%.0f", gpu.AppliedPowerLimitW)
+ singleStr = fmt.Sprintf("%.0f", gpu.AppliedPowerLimitW)
+ }
+ multiStr := "-"
+ if gpu.StablePowerLimitW > 0 {
+ multiStr = fmt.Sprintf("%.0f", gpu.StablePowerLimitW)
}
p95Str := "-"
if gpu.MaxObservedPowerW > 0 {
@@ -2481,7 +2523,8 @@ func renderPowerBenchmarkResultsCard(exportDir string) string {
b.WriteString(`` + strconv.Itoa(gpu.Index) + ` | `)
b.WriteString(`` + html.EscapeString(gpu.Name) + ` | `)
b.WriteString(`` + nominalStr + ` | `)
- b.WriteString(`` + achievedStr + ` | `)
+ b.WriteString(`` + singleStr + ` | `)
+ b.WriteString(`` + multiStr + ` | `)
b.WriteString(`` + p95Str + ` | `)
b.WriteString(`` + html.EscapeString(statusLabel) + ` | `)
b.WriteString(``)
|---|