diff --git a/audit/internal/platform/live_metrics.go b/audit/internal/platform/live_metrics.go index ebecc78..783e7ef 100644 --- a/audit/internal/platform/live_metrics.go +++ b/audit/internal/platform/live_metrics.go @@ -18,11 +18,19 @@ type LiveMetricSample struct { Fans []FanReading `json:"fans"` Temps []TempReading `json:"temps"` PowerW float64 `json:"power_w"` + PSUs []PSUReading `json:"psus,omitempty"` CPULoadPct float64 `json:"cpu_load_pct"` MemLoadPct float64 `json:"mem_load_pct"` GPUs []GPUMetricRow `json:"gpus"` } +// PSUReading is a per-slot power supply input power reading. +type PSUReading struct { + Slot int `json:"slot"` + Name string `json:"name"` + PowerW float64 `json:"power_w"` +} + // TempReading is a named temperature sensor value. type TempReading struct { Name string `json:"name"` @@ -57,6 +65,9 @@ func SampleLiveMetrics() LiveMetricSample { // System power — returns 0 if unavailable s.PowerW = sampleSystemPower() + // Per-PSU power — populated when IPMI SDR has Power Supply entities with Watt readings + s.PSUs = samplePSUPower() + // CPU load — from /proc/stat s.CPULoadPct = sampleCPULoadPct() @@ -326,3 +337,65 @@ func compactAmbientTempName(chip, name string) string { } return chip + " / " + name } + +// samplePSUPower reads per-PSU input power via IPMI SDR. +// It parses `ipmitool sdr elist full` output looking for Power Supply entity +// sensors (entity ID "10.N") that report a value in Watts. +// Returns nil when IPMI is unavailable or no PSU Watt sensors exist. +func samplePSUPower() []PSUReading { + out, err := exec.Command("ipmitool", "sdr", "elist", "full").Output() + if err != nil || len(out) == 0 { + return nil + } + // map slot → reading (keep highest-watt value per slot in case of duplicates) + type entry struct { + name string + powerW float64 + } + bySlot := map[int]entry{} + for _, line := range strings.Split(string(out), "\n") { + parts := strings.Split(line, "|") + if len(parts) < 5 { + continue + } + entityID := strings.TrimSpace(parts[3]) // e.g. "10.1" + if !strings.HasPrefix(entityID, "10.") { + continue // not a Power Supply entity + } + slotStr := strings.TrimPrefix(entityID, "10.") + slot, err := strconv.Atoi(slotStr) + if err != nil { + continue + } + valueField := strings.TrimSpace(parts[4]) // e.g. "740.00 Watts" + if !strings.Contains(strings.ToLower(valueField), "watts") { + continue + } + valueFields := strings.Fields(valueField) + if len(valueFields) < 2 { + continue + } + w, err := strconv.ParseFloat(valueFields[0], 64) + if err != nil || w <= 0 { + continue + } + sensorName := strings.TrimSpace(parts[0]) + if existing, ok := bySlot[slot]; !ok || w > existing.powerW { + bySlot[slot] = entry{name: sensorName, powerW: w} + } + } + if len(bySlot) == 0 { + return nil + } + slots := make([]int, 0, len(bySlot)) + for s := range bySlot { + slots = append(slots, s) + } + sort.Ints(slots) + psus := make([]PSUReading, 0, len(slots)) + for _, s := range slots { + e := bySlot[s] + psus = append(psus, PSUReading{Slot: s, Name: e.name, PowerW: e.powerW}) + } + return psus +} diff --git a/audit/internal/webui/charts_svg.go b/audit/internal/webui/charts_svg.go index 6d01cbc..80c519b 100644 --- a/audit/internal/webui/charts_svg.go +++ b/audit/internal/webui/charts_svg.go @@ -462,6 +462,127 @@ func synthesizeChartTimes(times []time.Time, count int) []time.Time { return out } +// renderStackedMetricChartSVG renders a stacked area chart where each dataset +// is visually "stacked" on top of the previous one. Intended for multi-PSU +// power charts where the filled area of each PSU shows its individual +// contribution and the total height equals the combined draw. +func renderStackedMetricChartSVG(title string, labels []string, times []time.Time, datasets [][]float64, names []string, yMax *float64, canvasHeight int, timeline []chartTimelineSegment) ([]byte, error) { + pointCount := len(labels) + if len(times) > pointCount { + pointCount = len(times) + } + if pointCount == 0 { + pointCount = 1 + labels = []string{""} + times = []time.Time{{}} + } + if len(labels) < pointCount { + padded := make([]string, pointCount) + copy(padded, labels) + labels = padded + } + if len(times) < pointCount { + times = synthesizeChartTimes(times, pointCount) + } + for i := range datasets { + if len(datasets[i]) == 0 { + datasets[i] = make([]float64, pointCount) + } + } + + times, datasets = downsampleTimeSeries(times, datasets, 1400) + pointCount = len(times) + + // Build cumulative sums per time point. + cumulative := make([][]float64, len(datasets)+1) + for i := range cumulative { + cumulative[i] = make([]float64, pointCount) + } + for i, ds := range datasets { + for j, v := range ds { + cumulative[i+1][j] = cumulative[i][j] + v + } + } + + // Scale is based on the total (top cumulative row). + total := cumulative[len(cumulative)-1] + yMin := floatPtr(0) + if yMax == nil { + yMax = autoMax120(total) + } + scale := singleAxisChartScale([][]float64{total}, yMin, yMax) + + legendItems := make([]metricChartSeries, len(datasets)) + for i, name := range names { + color := metricChartPalette[i%len(metricChartPalette)] + legendItems[i] = metricChartSeries{Name: name, Color: color, Values: datasets[i]} + } + + // Stats label from totals. + statsLabel := chartStatsLabel([][]float64{total}) + + layout := singleAxisChartLayout(canvasHeight, len(legendItems)) + start, end := chartTimeBounds(times) + + var b strings.Builder + writeSVGOpen(&b, layout.Width, layout.Height) + writeChartFrame(&b, title, statsLabel, layout.Width, layout.Height) + writeTimelineIdleSpans(&b, layout, start, end, timeline) + writeVerticalGrid(&b, layout, times, pointCount, 8) + writeHorizontalGrid(&b, layout, scale) + writeTimelineBoundaries(&b, layout, start, end, timeline) + writePlotBorder(&b, layout) + writeSingleAxisY(&b, layout, scale) + writeXAxisLabels(&b, layout, times, labels, start, end, 8) + + // Draw stacked areas from top to bottom so lower layers are visible. + for i := len(datasets) - 1; i >= 0; i-- { + writeStackedArea(&b, layout, times, start, end, cumulative[i], cumulative[i+1], scale, legendItems[i].Color) + } + // Draw border polylines on top. + for i := len(datasets) - 1; i >= 0; i-- { + writeSeriesPolyline(&b, layout, times, start, end, cumulative[i+1], scale, legendItems[i].Color) + } + + writeLegend(&b, layout, legendItems) + writeSVGClose(&b) + return []byte(b.String()), nil +} + +// writeStackedArea draws a filled polygon between two cumulative value arrays +// (baseline and top), using the given color at 55% opacity. +func writeStackedArea(b *strings.Builder, layout chartLayout, times []time.Time, start, end time.Time, baseline, top []float64, scale chartScale, color string) { + n := len(top) + if n == 0 { + return + } + if len(baseline) < n { + baseline = make([]float64, n) + } + + // Forward path along top values, then backward along baseline values. + var points strings.Builder + for i := 0; i < n; i++ { + x := chartXForTime(chartPointTime(times, i), start, end, layout.PlotLeft, layout.PlotRight) + y := chartYForValue(valueClamp(top[i], scale), scale, layout.PlotTop, layout.PlotBottom) + if i > 0 { + points.WriteByte(' ') + } + points.WriteString(strconv.FormatFloat(x, 'f', 1, 64)) + points.WriteByte(',') + points.WriteString(strconv.FormatFloat(y, 'f', 1, 64)) + } + for i := n - 1; i >= 0; i-- { + x := chartXForTime(chartPointTime(times, i), start, end, layout.PlotLeft, layout.PlotRight) + y := chartYForValue(valueClamp(baseline[i], scale), scale, layout.PlotTop, layout.PlotBottom) + points.WriteByte(' ') + points.WriteString(strconv.FormatFloat(x, 'f', 1, 64)) + points.WriteByte(',') + points.WriteString(strconv.FormatFloat(y, 'f', 1, 64)) + } + fmt.Fprintf(b, ``+"\n", points.String(), color) +} + func writeSVGOpen(b *strings.Builder, width, height int) { fmt.Fprintf(b, ``+"\n", width, height, width, height) } diff --git a/audit/internal/webui/server.go b/audit/internal/webui/server.go index 595c233..c87582b 100644 --- a/audit/internal/webui/server.go +++ b/audit/internal/webui/server.go @@ -575,12 +575,14 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request) } timeline := metricsTimelineSegments(samples, time.Now()) if idx, sub, ok := parseGPUChartPath(path); ok && sub == "overview" { - buf, ok, err := renderGPUOverviewChartSVG(idx, samples, timeline) + var overviewOk bool + var buf []byte + buf, overviewOk, err = renderGPUOverviewChartSVG(idx, samples, timeline) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } - if !ok { + if !overviewOk { http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable) return } @@ -589,23 +591,37 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request) _, _ = w.Write(buf) return } - datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples) + datasets, names, labels, title, yMin, yMax, stacked, ok := chartDataFromSamples(path, samples) if !ok { http.Error(w, "metrics history unavailable", http.StatusServiceUnavailable) return } - buf, err := renderMetricChartSVG( - title, - labels, - sampleTimes(samples), - datasets, - names, - yMin, - yMax, - chartCanvasHeightForPath(path, len(names)), - timeline, - ) + var buf []byte + if stacked { + buf, err = renderStackedMetricChartSVG( + title, + labels, + sampleTimes(samples), + datasets, + names, + yMax, + chartCanvasHeightForPath(path, len(names)), + timeline, + ) + } else { + buf, err = renderMetricChartSVG( + title, + labels, + sampleTimes(samples), + datasets, + names, + yMin, + yMax, + chartCanvasHeightForPath(path, len(names)), + timeline, + ) + } if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return @@ -615,12 +631,8 @@ func (h *handler) handleMetricsChartSVG(w http.ResponseWriter, r *http.Request) _, _ = w.Write(buf) } -func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][]float64, []string, []string, string, *float64, *float64, bool) { - var datasets [][]float64 - var names []string - var title string - var yMin, yMax *float64 - labels := sampleTimeLabels(samples) +func chartDataFromSamples(path string, samples []platform.LiveMetricSample) (datasets [][]float64, names []string, labels []string, title string, yMin, yMax *float64, stacked bool, ok bool) { + labels = sampleTimeLabels(samples) switch { case path == "server-load": @@ -656,15 +668,41 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][ case path == "server-power": title = "System Power" - power := make([]float64, len(samples)) - for i, s := range samples { - power[i] = s.PowerW + // Use per-PSU stacked chart when PSU SDR data is available. + // Collect the union of PSU slots seen across all samples. + psuSlots := psuSlotsFromSamples(samples) + if len(psuSlots) > 1 { + // Build one dataset per PSU slot. + psuDatasets := make([][]float64, len(psuSlots)) + psuNames := make([]string, len(psuSlots)) + for si, slot := range psuSlots { + ds := make([]float64, len(samples)) + for i, s := range samples { + for _, psu := range s.PSUs { + if psu.Slot == slot { + ds[i] = psu.PowerW + break + } + } + } + psuDatasets[si] = normalizePowerSeries(ds) + psuNames[si] = fmt.Sprintf("PSU %d", slot) + } + datasets = psuDatasets + names = psuNames + stacked = true + yMax = autoMax120(psuStackedTotal(psuDatasets)) + } else { + power := make([]float64, len(samples)) + for i, s := range samples { + power[i] = s.PowerW + } + power = normalizePowerSeries(power) + datasets = [][]float64{power} + names = []string{"Power W"} + yMin = floatPtr(0) + yMax = autoMax120(power) } - power = normalizePowerSeries(power) - datasets = [][]float64{power} - names = []string{"Power W"} - yMin = floatPtr(0) - yMax = autoMax120(power) case path == "server-fans": title = "Fan RPM" @@ -707,7 +745,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][ case strings.HasPrefix(path, "gpu/"): idx, sub, ok := parseGPUChartPath(path) if !ok { - return nil, nil, nil, "", nil, nil, false + return nil, nil, nil, "", nil, nil, false, false } switch sub { case "load": @@ -715,7 +753,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][ util := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.UsagePct }) mem := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemUsagePct }) if util == nil && mem == nil { - return nil, nil, nil, "", nil, nil, false + return nil, nil, nil, "", nil, nil, false, false } datasets = [][]float64{coalesceDataset(util, len(samples)), coalesceDataset(mem, len(samples))} names = []string{"Load %", "Mem %"} @@ -725,7 +763,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][ title = gpuDisplayLabel(idx) + " Temperature" temp := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.TempC }) if temp == nil { - return nil, nil, nil, "", nil, nil, false + return nil, nil, nil, "", nil, nil, false, false } datasets = [][]float64{temp} names = []string{"Temp °C"} @@ -735,7 +773,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][ title = gpuDisplayLabel(idx) + " Core Clock" clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.ClockMHz }) if clock == nil { - return nil, nil, nil, "", nil, nil, false + return nil, nil, nil, "", nil, nil, false, false } datasets = [][]float64{clock} names = []string{"Core Clock MHz"} @@ -744,7 +782,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][ title = gpuDisplayLabel(idx) + " Memory Clock" clock := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.MemClockMHz }) if clock == nil { - return nil, nil, nil, "", nil, nil, false + return nil, nil, nil, "", nil, nil, false, false } datasets = [][]float64{clock} names = []string{"Memory Clock MHz"} @@ -753,7 +791,7 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][ title = gpuDisplayLabel(idx) + " Power" power := gpuDatasetByIndex(samples, idx, func(g platform.GPUMetricRow) float64 { return g.PowerW }) if power == nil { - return nil, nil, nil, "", nil, nil, false + return nil, nil, nil, "", nil, nil, false, false } datasets = [][]float64{power} names = []string{"Power W"} @@ -761,10 +799,10 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) ([][ } default: - return nil, nil, nil, "", nil, nil, false + return nil, nil, nil, "", nil, nil, false, false } - return datasets, names, labels, title, yMin, yMax, len(datasets) > 0 + return datasets, names, labels, title, yMin, yMax, stacked, len(datasets) > 0 } func parseGPUChartPath(path string) (idx int, sub string, ok bool) { @@ -930,6 +968,37 @@ func normalizePowerSeries(ds []float64) []float64 { return out } +// psuSlotsFromSamples returns the sorted list of PSU slot numbers seen across samples. +func psuSlotsFromSamples(samples []platform.LiveMetricSample) []int { + seen := map[int]struct{}{} + for _, s := range samples { + for _, p := range s.PSUs { + seen[p.Slot] = struct{}{} + } + } + slots := make([]int, 0, len(seen)) + for s := range seen { + slots = append(slots, s) + } + sort.Ints(slots) + return slots +} + +// psuStackedTotal returns the point-by-point sum of all PSU datasets (for scale calculation). +func psuStackedTotal(datasets [][]float64) []float64 { + if len(datasets) == 0 { + return nil + } + n := len(datasets[0]) + total := make([]float64, n) + for _, ds := range datasets { + for i, v := range ds { + total[i] += v + } + } + return total +} + func normalizeFanSeries(ds []float64) []float64 { if len(ds) == 0 { return nil diff --git a/audit/internal/webui/server_test.go b/audit/internal/webui/server_test.go index d1c95aa..763d92f 100644 --- a/audit/internal/webui/server_test.go +++ b/audit/internal/webui/server_test.go @@ -120,7 +120,7 @@ func TestChartDataFromSamplesUsesFullHistory(t *testing.T) { }, } - datasets, names, labels, title, _, _, ok := chartDataFromSamples("gpu-all-power", samples) + datasets, names, labels, title, _, _, _, ok := chartDataFromSamples("gpu-all-power", samples) if !ok { t.Fatal("chartDataFromSamples returned ok=false") } @@ -164,7 +164,7 @@ func TestChartDataFromSamplesKeepsStableGPUSeriesOrder(t *testing.T) { }, } - datasets, names, _, title, _, _, ok := chartDataFromSamples("gpu-all-power", samples) + datasets, names, _, title, _, _, _, ok := chartDataFromSamples("gpu-all-power", samples) if !ok { t.Fatal("chartDataFromSamples returned ok=false") } @@ -209,7 +209,7 @@ func TestChartDataFromSamplesIncludesGPUClockCharts(t *testing.T) { }, } - datasets, names, _, title, _, _, ok := chartDataFromSamples("gpu-all-clock", samples) + datasets, names, _, title, _, _, _, ok := chartDataFromSamples("gpu-all-clock", samples) if !ok { t.Fatal("gpu-all-clock returned ok=false") } diff --git a/audit/internal/webui/task_report.go b/audit/internal/webui/task_report.go index 535c32b..90b93f4 100644 --- a/audit/internal/webui/task_report.go +++ b/audit/internal/webui/task_report.go @@ -171,21 +171,17 @@ func renderTaskChartSVG(path string, samples []platform.LiveMetricSample, timeli } return gpuDisplayLabel(idx) + " Overview", buf, true } - datasets, names, labels, title, yMin, yMax, ok := chartDataFromSamples(path, samples) + datasets, names, labels, title, yMin, yMax, stacked, ok := chartDataFromSamples(path, samples) if !ok { return "", nil, false } - buf, err := renderMetricChartSVG( - title, - labels, - sampleTimes(samples), - datasets, - names, - yMin, - yMax, - chartCanvasHeightForPath(path, len(names)), - timeline, - ) + var buf []byte + var err error + if stacked { + buf, err = renderStackedMetricChartSVG(title, labels, sampleTimes(samples), datasets, names, yMax, chartCanvasHeightForPath(path, len(names)), timeline) + } else { + buf, err = renderMetricChartSVG(title, labels, sampleTimes(samples), datasets, names, yMin, yMax, chartCanvasHeightForPath(path, len(names)), timeline) + } if err != nil { return "", nil, false }