Globalize autotuned system power source

2026-04-20 07:02:12 +03:00
parent 17118298bd
commit b3cf8e3893
14 changed files with 327 additions and 108 deletions
--- a/audit/internal/app/app.go
+++ b/audit/internal/app/app.go
@@ -19,20 +19,22 @@ import (
 )
 var (
-	DefaultExportDir        = "/appdata/bee/export"
+	DefaultExportDir                     = "/appdata/bee/export"
-	DefaultAuditJSONPath    = DefaultExportDir + "/bee-audit.json"
+	DefaultAuditJSONPath                 = DefaultExportDir + "/bee-audit.json"
-	DefaultAuditLogPath     = DefaultExportDir + "/bee-audit.log"
+	DefaultAuditLogPath                  = DefaultExportDir + "/bee-audit.log"
-	DefaultWebLogPath       = DefaultExportDir + "/bee-web.log"
+	DefaultWebLogPath                    = DefaultExportDir + "/bee-web.log"
-	DefaultNetworkLogPath   = DefaultExportDir + "/bee-network.log"
+	DefaultNetworkLogPath                = DefaultExportDir + "/bee-network.log"
-	DefaultNvidiaLogPath    = DefaultExportDir + "/bee-nvidia.log"
+	DefaultNvidiaLogPath                 = DefaultExportDir + "/bee-nvidia.log"
-	DefaultSSHLogPath       = DefaultExportDir + "/bee-sshsetup.log"
+	DefaultSSHLogPath                    = DefaultExportDir + "/bee-sshsetup.log"
-	DefaultRuntimeJSONPath  = DefaultExportDir + "/runtime-health.json"
+	DefaultRuntimeJSONPath               = DefaultExportDir + "/runtime-health.json"
-	DefaultRuntimeLogPath   = DefaultExportDir + "/runtime-health.log"
+	DefaultRuntimeLogPath                = DefaultExportDir + "/runtime-health.log"
-	DefaultTechDumpDir      = DefaultExportDir + "/techdump"
+	DefaultTechDumpDir                   = DefaultExportDir + "/techdump"
-	DefaultSATBaseDir       = DefaultExportDir + "/bee-sat"
+	DefaultSATBaseDir                    = DefaultExportDir + "/bee-sat"
-	DefaultBeeBenchBaseDir  = DefaultExportDir + "/bee-bench"
+	DefaultBeeBenchBaseDir               = DefaultExportDir + "/bee-bench"
-	DefaultBeeBenchPerfDir  = DefaultBeeBenchBaseDir + "/perf"
+	DefaultBeeBenchAutotuneDir           = DefaultBeeBenchBaseDir + "/autotune"
-	DefaultBeeBenchPowerDir = DefaultBeeBenchBaseDir + "/power"
+	DefaultBeeBenchPerfDir               = DefaultBeeBenchBaseDir + "/perf"
 	DefaultBeeBenchPowerDir              = DefaultBeeBenchBaseDir + "/power"
 	DefaultBeeBenchPowerSourceConfigPath = DefaultBeeBenchBaseDir + "/power-source-autotune.json"
 )
 type App struct {
@@ -125,6 +127,7 @@ type satRunner interface {
 	RunNvidiaTargetedStressValidatePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaBenchmark(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
 	RunNvidiaPowerBench(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, logFunc func(string)) (string, error)
 	RunNvidiaPowerSourceAutotune(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error)
 	RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error)
 	RunNvidiaTargetedPowerPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
 	RunNvidiaPulseTestPack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, logFunc func(string)) (string, error)
@@ -572,6 +575,11 @@ func (a *App) RunNvidiaBenchmarkCtx(ctx context.Context, baseDir string, opts pl
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultBeeBenchPerfDir
 	}
 	resolved, err := a.ensureBenchmarkPowerAutotune(ctx, baseDir, opts, "performance", logFunc)
 	if err != nil {
 		return "", err
 	}
 	opts.ServerPowerSource = resolved.SelectedSource
 	return a.sat.RunNvidiaBenchmark(ctx, baseDir, opts, logFunc)
 }
@@ -579,9 +587,47 @@ func (a *App) RunNvidiaPowerBenchCtx(ctx context.Context, baseDir string, opts p
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultBeeBenchPowerDir
 	}
 	resolved, err := a.ensureBenchmarkPowerAutotune(ctx, baseDir, opts, "power-fit", logFunc)
 	if err != nil {
 		return "", err
 	}
 	opts.ServerPowerSource = resolved.SelectedSource
 	return a.sat.RunNvidiaPowerBench(ctx, baseDir, opts, logFunc)
 }
 func (a *App) RunNvidiaPowerSourceAutotuneCtx(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultBeeBenchAutotuneDir
 	}
 	return a.sat.RunNvidiaPowerSourceAutotune(ctx, baseDir, opts, benchmarkKind, logFunc)
 }
 func (a *App) LoadBenchmarkPowerAutotune() (*platform.BenchmarkPowerAutotuneConfig, error) {
 	return platform.LoadBenchmarkPowerAutotuneConfig(DefaultBeeBenchPowerSourceConfigPath)
 }
 func (a *App) ensureBenchmarkPowerAutotune(ctx context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (platform.BenchmarkPowerAutotuneConfig, error) {
 	cfgPath := platform.BenchmarkPowerSourceConfigPath(baseDir)
 	if cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(cfgPath); err == nil {
 		if logFunc != nil {
 			logFunc(fmt.Sprintf("benchmark autotune: using saved server power source %s", cfg.SelectedSource))
 		}
 		return *cfg, nil
 	}
 	if logFunc != nil {
 		logFunc("benchmark autotune: no saved power source config, running autotune first")
 	}
 	autotuneDir := filepath.Join(filepath.Dir(baseDir), "autotune")
 	if _, err := a.RunNvidiaPowerSourceAutotuneCtx(ctx, autotuneDir, opts, benchmarkKind, logFunc); err != nil {
 		return platform.BenchmarkPowerAutotuneConfig{}, err
 	}
 	cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(cfgPath)
 	if err != nil {
 		return platform.BenchmarkPowerAutotuneConfig{}, err
 	}
 	return *cfg, nil
 }
 func (a *App) RunNvidiaOfficialComputePack(ctx context.Context, baseDir string, durationSec int, gpuIndices []int, staggerSec int, logFunc func(string)) (string, error) {
 	if strings.TrimSpace(baseDir) == "" {
 		baseDir = DefaultSATBaseDir
--- a/audit/internal/app/app_test.go
+++ b/audit/internal/app/app_test.go
@@ -9,6 +9,7 @@ import (
 	"io"
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 	"bee/audit/internal/platform"
@@ -123,6 +124,7 @@ type fakeSAT struct {
 	runNvidiaFn               func(string) (string, error)
 	runNvidiaBenchmarkFn      func(string, platform.NvidiaBenchmarkOptions) (string, error)
 	runNvidiaPowerBenchFn     func(string, platform.NvidiaBenchmarkOptions) (string, error)
 	runNvidiaAutotuneFn       func(string, platform.NvidiaBenchmarkOptions, string) (string, error)
 	runNvidiaStressFn         func(string, platform.NvidiaStressOptions) (string, error)
 	runNvidiaComputeFn        func(string, int, []int) (string, error)
 	runNvidiaPowerFn          func(string, int, []int) (string, error)
@@ -163,6 +165,13 @@ func (f fakeSAT) RunNvidiaPowerBench(_ context.Context, baseDir string, opts pla
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaPowerSourceAutotune(_ context.Context, baseDir string, opts platform.NvidiaBenchmarkOptions, benchmarkKind string, _ func(string)) (string, error) {
 	if f.runNvidiaAutotuneFn != nil {
 		return f.runNvidiaAutotuneFn(baseDir, opts, benchmarkKind)
 	}
 	return f.runNvidiaFn(baseDir)
 }
 func (f fakeSAT) RunNvidiaTargetedStressValidatePack(_ context.Context, baseDir string, durationSec int, gpuIndices []int, _ func(string)) (string, error) {
 	if f.runNvidiaTargetedStressFn != nil {
 		return f.runNvidiaTargetedStressFn(baseDir, durationSec, gpuIndices)
@@ -809,6 +818,12 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run", "verbose.log"), []byte("sat verbose"), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.MkdirAll(filepath.Join(exportDir, "bee-bench"), 0755); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-bench", "power-source-autotune.json"), []byte(`{"version":1,"updated_at":"2026-04-20T01:02:03Z","selected_source":"sdr_psu_input","reason":"selected lowest relative error"}`), 0644); err != nil {
 		t.Fatal(err)
 	}
 	if err := os.WriteFile(filepath.Join(exportDir, "bee-sat", "memory-run.tar.gz"), []byte("nested sat archive"), 0644); err != nil {
 		t.Fatal(err)
 	}
@@ -836,6 +851,7 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	tr := tar.NewReader(gzr)
 	var names []string
 	var auditJSON string
 	var manifest string
 	for {
 		hdr, err := tr.Next()
 		if errors.Is(err, io.EOF) {
@@ -852,6 +868,13 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 			}
 			auditJSON = string(body)
 		}
 		if strings.HasSuffix(hdr.Name, "/manifest.txt") {
 			body, err := io.ReadAll(tr)
 			if err != nil {
 				t.Fatalf("read manifest entry: %v", err)
 			}
 			manifest = string(body)
 		}
 	}
 	for _, want := range []string{
@@ -895,6 +918,12 @@ func TestBuildSupportBundleIncludesExportDirContents(t *testing.T) {
 	if !contains(auditJSON, "PASCARI") || !contains(auditJSON, "NVIDIA H100") {
 		t.Fatalf("support bundle should keep real devices:\n%s", auditJSON)
 	}
 	if !contains(manifest, "files:") {
 		t.Fatalf("support bundle manifest missing files section:\n%s", manifest)
 	}
 	if !strings.Contains(manifest, "power_autotune_selected_source=sdr_psu_input") {
 		t.Fatalf("support bundle manifest missing autotune source:\n%s", manifest)
 	}
 }
 func TestMainBanner(t *testing.T) {
--- a/audit/internal/app/support_bundle.go
+++ b/audit/internal/app/support_bundle.go
@@ -2,6 +2,7 @@ package app
 import (
 	"archive/tar"
 	"bee/audit/internal/platform"
 	"compress/gzip"
 	"fmt"
 	"io"
@@ -424,6 +425,13 @@ func writeManifest(dst, exportDir, stageRoot string) error {
 	fmt.Fprintf(&body, "host=%s\n", hostnameOr("unknown"))
 	fmt.Fprintf(&body, "generated_at_utc=%s\n", time.Now().UTC().Format(time.RFC3339))
 	fmt.Fprintf(&body, "export_dir=%s\n", exportDir)
 	if cfg, err := platform.LoadBenchmarkPowerAutotuneConfig(filepath.Join(exportDir, "bee-bench", "power-source-autotune.json")); err == nil && cfg != nil {
 		fmt.Fprintf(&body, "power_autotune_selected_source=%s\n", cfg.SelectedSource)
 		fmt.Fprintf(&body, "power_autotune_updated_at=%s\n", cfg.UpdatedAt.UTC().Format(time.RFC3339))
 		if strings.TrimSpace(cfg.Reason) != "" {
 			fmt.Fprintf(&body, "power_autotune_reason=%s\n", cfg.Reason)
 		}
 	}
 	fmt.Fprintf(&body, "\nfiles:\n")
 	var files []string
--- a/audit/internal/platform/benchmark_report.go
+++ b/audit/internal/platform/benchmark_report.go
@@ -401,11 +401,15 @@ func renderBenchmarkReportWithCharts(result NvidiaBenchmarkResult) string {
 		}
 	}
-	// ── Server Power (IPMI) ───────────────────────────────────────────────────
+	// ── Server Power ───────────────────────────────────────────────────────────
 	if sp := result.ServerPower; sp != nil {
-		b.WriteString("## Server Power (IPMI)\n\n")
+		title := "## Server Power\n\n"
 		if sp.Source != "" {
 			title = fmt.Sprintf("## Server Power (`%s`)\n\n", sp.Source)
 		}
 		b.WriteString(title)
 		if !sp.Available {
-			b.WriteString("IPMI power measurement unavailable.\n\n")
+			b.WriteString("Server power measurement unavailable.\n\n")
 		} else {
 			spRows := [][]string{
 				{"Server idle", fmt.Sprintf("%.0f W", sp.IdleW)},
--- a/audit/internal/platform/live_metrics.go
+++ b/audit/internal/platform/live_metrics.go
@@ -16,14 +16,17 @@ import (
 // LiveMetricSample is a single point-in-time snapshot of server metrics
 // collected for the web UI metrics page.
 type LiveMetricSample struct {
-	Timestamp  time.Time      `json:"ts"`
+	Timestamp   time.Time      `json:"ts"`
-	Fans       []FanReading   `json:"fans"`
+	Fans        []FanReading   `json:"fans"`
-	Temps      []TempReading  `json:"temps"`
+	Temps       []TempReading  `json:"temps"`
-	PowerW     float64        `json:"power_w"`
+	PowerW      float64        `json:"power_w"`
-	PSUs       []PSUReading   `json:"psus,omitempty"`
+	PowerSource string         `json:"power_source,omitempty"`
-	CPULoadPct float64        `json:"cpu_load_pct"`
+	PowerMode   string         `json:"power_mode,omitempty"`
-	MemLoadPct float64        `json:"mem_load_pct"`
+	PowerReason string         `json:"power_reason,omitempty"`
-	GPUs       []GPUMetricRow `json:"gpus"`
+	PSUs        []PSUReading   `json:"psus,omitempty"`
 	CPULoadPct  float64        `json:"cpu_load_pct"`
 	MemLoadPct  float64        `json:"mem_load_pct"`
 	GPUs        []GPUMetricRow `json:"gpus"`
 }
 // PSUReading is a per-slot power supply input power reading.
@@ -67,15 +70,13 @@ func SampleLiveMetrics() LiveMetricSample {
 	// Per-PSU power — populated when IPMI SDR has Power Supply entities with Watt readings
 	s.PSUs = samplePSUPower()
-	// System power: prefer sum of PSU AC inputs (full wall draw); fall back to DCMI.
+	// System power: use the global autotune-selected source when configured,
-	if len(s.PSUs) > 0 {
+	// otherwise fall back to the historical heuristic and mark the mode.
-		var total float64
+	if powerW, decision, err := SampleSystemPowerResolved(""); err == nil {
-		for _, p := range s.PSUs {
+		s.PowerW = powerW
-			total += p.PowerW
+		s.PowerSource = decision.EffectiveSource
-		}
+		s.PowerMode = decision.Mode
-		s.PowerW = total
+		s.PowerReason = decision.Reason
 	} else {
 		s.PowerW = sampleSystemPower()
 	}
 	// CPU load — from /proc/stat
--- a/audit/internal/platform/sat_fan_stress.go
+++ b/audit/internal/platform/sat_fan_stress.go
@@ -43,17 +43,22 @@ type GPUStressMetric struct {
 // FanStressRow is one second-interval telemetry sample covering all monitored dimensions.
 type FanStressRow struct {
-	TimestampUTC string
+	TimestampUTC   string
-	ElapsedSec   float64
+	ElapsedSec     float64
-	Phase        string // "baseline", "load1", "pause", "load2", "cooldown"
+	Phase          string // "baseline", "load1", "pause", "load2", "cooldown"
-	GPUs         []GPUStressMetric
+	GPUs           []GPUStressMetric
-	Fans         []FanReading
+	Fans           []FanReading
-	CPUMaxTempC  float64 // highest CPU temperature from ipmitool / sensors
+	CPUMaxTempC    float64 // highest CPU temperature from ipmitool / sensors
-	SysPowerW    float64 // DCMI system power reading
+	SysPowerW      float64
 	SysPowerSource string
 	SysPowerMode   string
 }
 type cachedPowerReading struct {
 	Value     float64
 	Source    string
 	Mode      string
 	Reason    string
 	UpdatedAt time.Time
 }
@@ -278,7 +283,7 @@ func sampleFanStressRow(gpuIndices []int, phase string, elapsed float64) FanStre
 	row.GPUs = sampleGPUStressMetrics(gpuIndices)
 	row.Fans, _ = sampleFanSpeeds()
 	row.CPUMaxTempC = sampleCPUMaxTemp()
-	row.SysPowerW = sampleSystemPower()
+	row.SysPowerW, row.SysPowerSource, row.SysPowerMode = sampleSystemPowerResolved()
 	return row
 }
@@ -763,19 +768,19 @@ func sampleCPUTempViaSensors() float64 {
 	return max
 }
-// sampleSystemPower reads system power draw via DCMI.
+// sampleSystemPowerResolved reads system power via the global autotune source,
-func sampleSystemPower() float64 {
+// falling back to the historical heuristic before autotune or when degraded.
 func sampleSystemPowerResolved() (float64, string, string) {
 	now := time.Now()
-	current := 0.0
+	current, decision, err := SampleSystemPowerResolved("")
 	out, err := exec.Command("ipmitool", "dcmi", "power", "reading").Output()
 	if err == nil {
 		current = parseDCMIPowerReading(string(out))
 	}
 	systemPowerCacheMu.Lock()
 	defer systemPowerCacheMu.Unlock()
-	value, updated := effectiveSystemPowerReading(systemPowerCache, current, now)
+	if err != nil {
 		current = 0
 	}
 	value, updated := effectiveSystemPowerReading(systemPowerCache, current, decision.EffectiveSource, decision.Mode, decision.Reason, now)
 	systemPowerCache = updated
-	return value
+	return value, updated.Source, updated.Mode
 }
 // parseDCMIPowerReading extracts the instantaneous power reading from ipmitool dcmi output.
@@ -798,9 +803,9 @@ func parseDCMIPowerReading(raw string) float64 {
 	return 0
 }
-func effectiveSystemPowerReading(cache cachedPowerReading, current float64, now time.Time) (float64, cachedPowerReading) {
+func effectiveSystemPowerReading(cache cachedPowerReading, current float64, source, mode, reason string, now time.Time) (float64, cachedPowerReading) {
 	if current > 0 {
-		cache = cachedPowerReading{Value: current, UpdatedAt: now}
+		cache = cachedPowerReading{Value: current, Source: source, Mode: mode, Reason: reason, UpdatedAt: now}
 		return current, cache
 	}
 	if cache.Value > 0 && !cache.UpdatedAt.IsZero() && now.Sub(cache.UpdatedAt) <= systemPowerHoldTTL {
--- a/audit/internal/platform/sat_fan_stress_test.go
+++ b/audit/internal/platform/sat_fan_stress_test.go
@@ -112,7 +112,7 @@ func TestEffectiveSystemPowerReading(t *testing.T) {
 	now := time.Now()
 	cache := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-5 * time.Second)}
-	got, updated := effectiveSystemPowerReading(cache, 0, now)
+	got, updated := effectiveSystemPowerReading(cache, 0, "", "", "", now)
 	if got != 480 {
 		t.Fatalf("got=%v want cached 480", got)
 	}
@@ -120,7 +120,7 @@ func TestEffectiveSystemPowerReading(t *testing.T) {
 		t.Fatalf("updated=%+v", updated)
 	}
-	got, updated = effectiveSystemPowerReading(cache, 530, now)
+	got, updated = effectiveSystemPowerReading(cache, 530, "dcmi", "fallback", "test", now)
 	if got != 530 {
 		t.Fatalf("got=%v want 530", got)
 	}
@@ -129,7 +129,7 @@ func TestEffectiveSystemPowerReading(t *testing.T) {
 	}
 	expired := cachedPowerReading{Value: 480, UpdatedAt: now.Add(-systemPowerHoldTTL - time.Second)}
-	got, _ = effectiveSystemPowerReading(expired, 0, now)
+	got, _ = effectiveSystemPowerReading(expired, 0, "", "", "", now)
 	if got != 0 {
 		t.Fatalf("expired cache returned %v want 0", got)
 	}
--- a/audit/internal/webui/api.go
+++ b/audit/internal/webui/api.go
@@ -127,7 +127,7 @@ func defaultTaskPriority(target string, params taskParams) int {
 		return taskPriorityInstallToRAM
 	case "audit":
 		return taskPriorityAudit
-	case "nvidia-bench-perf", "nvidia-bench-power":
+	case "nvidia-bench-perf", "nvidia-bench-power", "nvidia-bench-autotune":
 		return taskPriorityBenchmark
 	case "nvidia-stress", "amd-stress", "memory-stress", "sat-stress", "platform-stress", "nvidia-compute":
 		return taskPriorityBurn
@@ -701,6 +701,78 @@ func (h *handler) handleAPIBenchmarkNvidiaRunKind(target string) http.HandlerFun
 	}
 }
 func (h *handler) handleAPIBenchmarkAutotuneRun() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		if h.opts.App == nil {
 			writeError(w, http.StatusServiceUnavailable, "app not configured")
 			return
 		}
 		var body struct {
 			Profile       string `json:"profile"`
 			BenchmarkKind string `json:"benchmark_kind"`
 			SizeMB        int    `json:"size_mb"`
 		}
 		if r.Body != nil {
 			if err := json.NewDecoder(r.Body).Decode(&body); err != nil && !errors.Is(err, io.EOF) {
 				writeError(w, http.StatusBadRequest, "invalid request body")
 				return
 			}
 		}
 		profile := strings.TrimSpace(body.Profile)
 		if profile == "" {
 			profile = "standard"
 		}
 		benchmarkKind := strings.TrimSpace(body.BenchmarkKind)
 		if benchmarkKind == "" {
 			benchmarkKind = "power-fit"
 		}
 		now := time.Now()
 		taskName := fmt.Sprintf("NVIDIA Benchmark Autotune · %s · %s", profile, benchmarkKind)
 		t := &Task{
 			ID:        newJobID("bee-bench-autotune"),
 			Name:      taskName,
 			Target:    "nvidia-bench-autotune",
 			Priority:  defaultTaskPriority("nvidia-bench-autotune", taskParams{}),
 			Status:    TaskPending,
 			CreatedAt: now,
 			params: taskParams{
 				BenchmarkProfile: profile,
 				BenchmarkKind:    benchmarkKind,
 				SizeMB:           body.SizeMB,
 				DisplayName:      taskName,
 			},
 		}
 		globalQueue.enqueue(t)
 		writeTaskRunResponse(w, []*Task{t})
 	}
 }
 func (h *handler) handleAPIBenchmarkAutotuneStatus(w http.ResponseWriter, r *http.Request) {
 	if h.opts.App == nil {
 		writeError(w, http.StatusServiceUnavailable, "app not configured")
 		return
 	}
 	cfg, err := h.opts.App.LoadBenchmarkPowerAutotune()
 	if err != nil {
 		if os.IsNotExist(err) {
 			w.WriteHeader(http.StatusOK)
 			writeJSON(w, map[string]any{
 				"configured": false,
 				"decision":   platform.ResolveSystemPowerDecision(h.opts.ExportDir),
 			})
 			return
 		}
 		writeError(w, http.StatusInternalServerError, err.Error())
 		return
 	}
 	w.WriteHeader(http.StatusOK)
 	writeJSON(w, map[string]any{
 		"configured": true,
 		"config":     cfg,
 		"decision":   platform.ResolveSystemPowerDecision(h.opts.ExportDir),
 	})
 }
 func (h *handler) handleAPIBenchmarkNvidiaRun(w http.ResponseWriter, r *http.Request) {
 	h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf").ServeHTTP(w, r)
 }
--- a/audit/internal/webui/api_test.go
+++ b/audit/internal/webui/api_test.go
@@ -195,6 +195,40 @@ func TestHandleAPIBenchmarkPowerFitRampQueuesBenchmarkPowerFitTasks(t *testing.T
 	}
 }
 func TestHandleAPIBenchmarkAutotuneRunQueuesTask(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
 	globalQueue.tasks = nil
 	globalQueue.mu.Unlock()
 	t.Cleanup(func() {
 		globalQueue.mu.Lock()
 		globalQueue.tasks = originalTasks
 		globalQueue.mu.Unlock()
 	})
 	h := &handler{opts: HandlerOptions{App: &app.App{}}}
 	req := httptest.NewRequest("POST", "/api/bee-bench/nvidia/autotune/run", strings.NewReader(`{"profile":"standard","benchmark_kind":"power-fit"}`))
 	rec := httptest.NewRecorder()
 	h.handleAPIBenchmarkAutotuneRun().ServeHTTP(rec, req)
 	if rec.Code != 200 {
 		t.Fatalf("status=%d body=%s", rec.Code, rec.Body.String())
 	}
 	globalQueue.mu.Lock()
 	defer globalQueue.mu.Unlock()
 	if len(globalQueue.tasks) != 1 {
 		t.Fatalf("tasks=%d want 1", len(globalQueue.tasks))
 	}
 	task := globalQueue.tasks[0]
 	if task.Target != "nvidia-bench-autotune" {
 		t.Fatalf("task target=%q want nvidia-bench-autotune", task.Target)
 	}
 	if task.params.BenchmarkKind != "power-fit" {
 		t.Fatalf("task benchmark kind=%q want power-fit", task.params.BenchmarkKind)
 	}
 }
 func TestHandleAPISATRunSplitsMixedNvidiaTaskSet(t *testing.T) {
 	globalQueue.mu.Lock()
 	originalTasks := globalQueue.tasks
--- a/audit/internal/webui/metricsdb.go
+++ b/audit/internal/webui/metricsdb.go
@@ -53,6 +53,9 @@ CREATE TABLE IF NOT EXISTS sys_metrics (
  cpu_load_pct REAL,
  mem_load_pct REAL,
  power_w      REAL,
  power_source TEXT,
  power_mode   TEXT,
  power_reason TEXT,
  PRIMARY KEY (ts)
 );
 CREATE TABLE IF NOT EXISTS gpu_metrics (
@@ -86,7 +89,16 @@ CREATE TABLE IF NOT EXISTS temp_metrics (
 	if err := ensureMetricsColumn(db, "gpu_metrics", "clock_mhz", "REAL"); err != nil {
 		return err
 	}
-	return ensureMetricsColumn(db, "gpu_metrics", "mem_clock_mhz", "REAL")
+	if err := ensureMetricsColumn(db, "gpu_metrics", "mem_clock_mhz", "REAL"); err != nil {
 		return err
 	}
 	if err := ensureMetricsColumn(db, "sys_metrics", "power_source", "TEXT"); err != nil {
 		return err
 	}
 	if err := ensureMetricsColumn(db, "sys_metrics", "power_mode", "TEXT"); err != nil {
 		return err
 	}
 	return ensureMetricsColumn(db, "sys_metrics", "power_reason", "TEXT")
 }
 func ensureMetricsColumn(db *sql.DB, table, column, definition string) error {
@@ -125,8 +137,8 @@ func (m *MetricsDB) Write(s platform.LiveMetricSample) error {
 	defer func() { _ = tx.Rollback() }()
 	_, err = tx.Exec(
-		`INSERT OR REPLACE INTO sys_metrics(ts,cpu_load_pct,mem_load_pct,power_w) VALUES(?,?,?,?)`,
+		`INSERT OR REPLACE INTO sys_metrics(ts,cpu_load_pct,mem_load_pct,power_w,power_source,power_mode,power_reason) VALUES(?,?,?,?,?,?,?)`,
-		ts, s.CPULoadPct, s.MemLoadPct, s.PowerW,
+		ts, s.CPULoadPct, s.MemLoadPct, s.PowerW, s.PowerSource, s.PowerMode, s.PowerReason,
 	)
 	if err != nil {
 		return err
@@ -213,12 +225,12 @@ func (m *MetricsDB) Prune(before time.Time) error {
 // LoadRecent returns up to n samples in chronological order (oldest first).
 func (m *MetricsDB) LoadRecent(n int) ([]platform.LiveMetricSample, error) {
-	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM (SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts DESC LIMIT ?) ORDER BY ts`, n)
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w,IFNULL(power_source,''),IFNULL(power_mode,''),IFNULL(power_reason,'') FROM (SELECT ts,cpu_load_pct,mem_load_pct,power_w,power_source,power_mode,power_reason FROM sys_metrics ORDER BY ts DESC LIMIT ?) ORDER BY ts`, n)
 }
 // LoadAll returns all persisted samples in chronological order (oldest first).
 func (m *MetricsDB) LoadAll() ([]platform.LiveMetricSample, error) {
-	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics ORDER BY ts`, nil)
+	return m.loadSamples(`SELECT ts,cpu_load_pct,mem_load_pct,power_w,IFNULL(power_source,''),IFNULL(power_mode,''),IFNULL(power_reason,'') FROM sys_metrics ORDER BY ts`, nil)
 }
 // LoadBetween returns samples in chronological order within the given time window.
@@ -233,7 +245,7 @@ func (m *MetricsDB) LoadBetween(start, end time.Time) ([]platform.LiveMetricSamp
 		start, end = end, start
 	}
 	return m.loadSamples(
-		`SELECT ts,cpu_load_pct,mem_load_pct,power_w FROM sys_metrics WHERE ts>=? AND ts<=? ORDER BY ts`,
+		`SELECT ts,cpu_load_pct,mem_load_pct,power_w,IFNULL(power_source,''),IFNULL(power_mode,''),IFNULL(power_reason,'') FROM sys_metrics WHERE ts>=? AND ts<=? ORDER BY ts`,
 		start.Unix(), end.Unix(),
 	)
 }
@@ -249,11 +261,14 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	type sysRow struct {
 		ts            int64
 		cpu, mem, pwr float64
 		powerSource   string
 		powerMode     string
 		powerReason   string
 	}
 	var sysRows []sysRow
 	for rows.Next() {
 		var r sysRow
-		if err := rows.Scan(&r.ts, &r.cpu, &r.mem, &r.pwr); err != nil {
+		if err := rows.Scan(&r.ts, &r.cpu, &r.mem, &r.pwr, &r.powerSource, &r.powerMode, &r.powerReason); err != nil {
 			continue
 		}
 		sysRows = append(sysRows, r)
@@ -363,10 +378,13 @@ func (m *MetricsDB) loadSamples(query string, args ...any) ([]platform.LiveMetri
 	samples := make([]platform.LiveMetricSample, len(sysRows))
 	for i, r := range sysRows {
 		s := platform.LiveMetricSample{
-			Timestamp:  time.Unix(r.ts, 0).UTC(),
+			Timestamp:   time.Unix(r.ts, 0).UTC(),
-			CPULoadPct: r.cpu,
+			CPULoadPct:  r.cpu,
-			MemLoadPct: r.mem,
+			MemLoadPct:  r.mem,
-			PowerW:     r.pwr,
+			PowerW:      r.pwr,
 			PowerSource: r.powerSource,
 			PowerMode:   r.powerMode,
 			PowerReason: r.powerReason,
 		}
 		for _, idx := range gpuIndices {
 			if g, ok := gpuData[gpuKey{r.ts, idx}]; ok {
--- a/audit/internal/webui/page_benchmark.go
+++ b/audit/internal/webui/page_benchmark.go
@@ -69,6 +69,7 @@ func renderBenchmark(opts HandlerOptions) string {
      <span id="benchmark-run-nccl" hidden>nccl-auto</span>
      <span id="benchmark-run-status" style="margin-left:10px;font-size:12px;color:var(--muted)"></span>
      <div id="benchmark-autotune-status" style="margin-top:10px;font-size:12px;color:var(--muted)">Autotune status: loading…</div>
      <div style="margin-top:6px;font-size:12px;color:var(--muted)">Autotune overwrites the saved system-power source and applies it to all new power charts and tests.</div>
    </div>
  </div>
--- a/audit/internal/webui/server.go
+++ b/audit/internal/webui/server.go
@@ -271,6 +271,8 @@ func NewHandler(opts HandlerOptions) http.Handler {
 	mux.HandleFunc("POST /api/sat/abort", h.handleAPISATAbort)
 	mux.HandleFunc("POST /api/bee-bench/nvidia/perf/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-perf"))
 	mux.HandleFunc("POST /api/bee-bench/nvidia/power/run", h.handleAPIBenchmarkNvidiaRunKind("nvidia-bench-power"))
 	mux.HandleFunc("POST /api/bee-bench/nvidia/autotune/run", h.handleAPIBenchmarkAutotuneRun())
 	mux.HandleFunc("GET /api/bee-bench/nvidia/autotune/status", h.handleAPIBenchmarkAutotuneStatus)
 	mux.HandleFunc("GET /api/benchmark/results", h.handleAPIBenchmarkResults)
 	// Tasks
@@ -687,41 +689,22 @@ func chartDataFromSamples(path string, samples []platform.LiveMetricSample) (dat
 	case path == "server-power":
 		title = "System Power"
-		// Use per-PSU stacked chart when PSU SDR data is available.
+		power := make([]float64, len(samples))
-		// Collect the union of PSU slots seen across all samples.
+		label := "Power W"
-		psuSlots := psuSlotsFromSamples(samples)
+		for i, s := range samples {
-		if len(psuSlots) > 0 {
+			power[i] = s.PowerW
-			// Build one dataset per PSU slot.
+			if strings.TrimSpace(s.PowerSource) != "" {
-			psuDatasets := make([][]float64, len(psuSlots))
+				label = fmt.Sprintf("Power W · %s", s.PowerSource)
-			psuNames := make([]string, len(psuSlots))
+				if strings.TrimSpace(s.PowerMode) != "" {
-			for si, slot := range psuSlots {
+					label += fmt.Sprintf(" (%s)", s.PowerMode)
 				ds := make([]float64, len(samples))
 				for i, s := range samples {
 					for _, psu := range s.PSUs {
 						if psu.Slot == slot {
 							ds[i] = psu.PowerW
 							break
 						}
 					}
 				}
 				psuDatasets[si] = normalizePowerSeries(ds)
 				psuNames[si] = fmt.Sprintf("PSU %d", slot)
 			}
 			datasets = psuDatasets
 			names = psuNames
 			stacked = len(psuDatasets) > 0
 			yMax = autoMax120(psuStackedTotal(psuDatasets))
 		} else {
 			power := make([]float64, len(samples))
 			for i, s := range samples {
 				power[i] = s.PowerW
 			}
 			power = normalizePowerSeries(power)
 			datasets = [][]float64{power}
 			names = []string{"Power W"}
 			yMin = floatPtr(0)
 			yMax = autoMax120(power)
 		}
 		power = normalizePowerSeries(power)
 		datasets = [][]float64{power}
 		names = []string{label}
 		yMin = floatPtr(0)
 		yMax = autoMax120(power)
 	case path == "server-fans":
 		title = "Fan RPM"
--- a/audit/internal/webui/server_test.go
+++ b/audit/internal/webui/server_test.go
@@ -420,7 +420,7 @@ func TestHandleMetricsChartSVGRendersCustomSVG(t *testing.T) {
 	}
 }
-func TestChartDataFromSamplesServerPowerUsesPerPSUDatasets(t *testing.T) {
+func TestChartDataFromSamplesServerPowerUsesResolvedSystemPower(t *testing.T) {
 	start := time.Date(2026, 4, 5, 12, 0, 0, 0, time.UTC)
 	samples := []platform.LiveMetricSample{
 		{
@@ -429,7 +429,9 @@ func TestChartDataFromSamplesServerPowerUsesPerPSUDatasets(t *testing.T) {
 				{Slot: 1, PowerW: 120},
 				{Slot: 2, PowerW: 130},
 			},
-			PowerW: 250,
+			PowerW:      250,
 			PowerSource: "sdr_psu_input",
 			PowerMode:   "autotuned",
 		},
 		{
 			Timestamp: start.Add(time.Minute),
@@ -437,7 +439,9 @@ func TestChartDataFromSamplesServerPowerUsesPerPSUDatasets(t *testing.T) {
 				{Slot: 1, PowerW: 140},
 				{Slot: 2, PowerW: 135},
 			},
-			PowerW: 275,
+			PowerW:      275,
 			PowerSource: "sdr_psu_input",
 			PowerMode:   "autotuned",
 		},
 	}
@@ -448,13 +452,13 @@ func TestChartDataFromSamplesServerPowerUsesPerPSUDatasets(t *testing.T) {
 	if title != "System Power" {
 		t.Fatalf("title=%q", title)
 	}
-	if !stacked {
+	if stacked {
-		t.Fatal("expected stacked PSU chart")
+		t.Fatal("server-power should use resolved system power, not stacked PSU inputs")
 	}
-	if len(datasets) != 2 || len(names) != 2 {
+	if len(datasets) != 1 || len(names) != 1 {
-		t.Fatalf("datasets=%d names=%d want 2/2", len(datasets), len(names))
+		t.Fatalf("datasets=%d names=%d want 1/1", len(datasets), len(names))
 	}
-	if names[0] != "PSU 1" || names[1] != "PSU 2" {
+	if names[0] != "Power W · sdr_psu_input (autotuned)" {
 		t.Fatalf("names=%v", names)
 	}
 }
@@ -689,9 +693,12 @@ func TestBenchmarkPageRendersGPUSelectionControls(t *testing.T) {
 		`/api/gpu/nvidia`,
 		`/api/bee-bench/nvidia/perf/run`,
 		`/api/bee-bench/nvidia/power/run`,
 		`/api/bee-bench/nvidia/autotune/run`,
 		`/api/bee-bench/nvidia/autotune/status`,
 		`benchmark-run-nccl`,
 		`Run Performance Benchmark`,
 		`Run Power / Thermal Fit`,
 		`Autotune`,
 	} {
 		if !strings.Contains(body, needle) {
 			t.Fatalf("benchmark page missing %q: %s", needle, body)
--- a/audit/internal/webui/tasks.go
+++ b/audit/internal/webui/tasks.go
@@ -34,6 +34,7 @@ var taskNames = map[string]string{
 	"nvidia-targeted-stress": "NVIDIA Targeted Stress Validate (dcgmi diag targeted_stress)",
 	"nvidia-bench-perf":      "NVIDIA Bee Bench Perf",
 	"nvidia-bench-power":     "NVIDIA Bee Bench Power",
 	"nvidia-bench-autotune":  "NVIDIA Bee Bench Power Source Autotune",
 	"nvidia-compute":         "NVIDIA Max Compute Load (dcgmproftester)",
 	"nvidia-targeted-power":  "NVIDIA Targeted Power (dcgmi diag targeted_power)",
 	"nvidia-pulse":           "NVIDIA Pulse Test (dcgmi diag pulse_test)",
@@ -125,6 +126,7 @@ type taskParams struct {
 	Loader             string   `json:"loader,omitempty"`
 	BurnProfile        string   `json:"burn_profile,omitempty"`
 	BenchmarkProfile   string   `json:"benchmark_profile,omitempty"`
 	BenchmarkKind      string   `json:"benchmark_kind,omitempty"`
 	RunNCCL            bool     `json:"run_nccl,omitempty"`
 	ParallelGPUs       bool     `json:"parallel_gpus,omitempty"`
 	RampStep           int      `json:"ramp_step,omitempty"`
@@ -686,6 +688,15 @@ func (q *taskQueue) runTask(t *Task, j *jobState, ctx context.Context) {
 			RampTotal:         t.params.RampTotal,
 			RampRunID:         t.params.RampRunID,
 		}, j.append)
 	case "nvidia-bench-autotune":
 		if a == nil {
 			err = fmt.Errorf("app not configured")
 			break
 		}
 		archive, err = a.RunNvidiaPowerSourceAutotuneCtx(ctx, app.DefaultBeeBenchAutotuneDir, platform.NvidiaBenchmarkOptions{
 			Profile: t.params.BenchmarkProfile,
 			SizeMB:  t.params.SizeMB,
 		}, t.params.BenchmarkKind, j.append)
 	case "nvidia-compute":
 		if a == nil {
 			err = fmt.Errorf("app not configured")