package platform

import (
	"context"
	"encoding/json"
	"fmt"
	"math"
	"os"
	"os/exec"
	"path/filepath"
	"sort"
	"strings"
	"time"
)

const (
	benchmarkPowerAutotuneVersion         = 1
	benchmarkPowerAutotuneIdleSec         = 60
	benchmarkPowerAutotuneLoadSec         = 90
	benchmarkPowerAutotuneSampleInterval  = 3
	defaultBenchmarkPowerSourceConfigPath = "/appdata/bee/export/bee-bench/power-source-autotune.json"
)

func BenchmarkPowerSourceConfigPath(baseDir string) string {
	baseDir = strings.TrimSpace(baseDir)
	if baseDir == "" {
		return defaultBenchmarkPowerSourceConfigPath
	}
	return filepath.Join(filepath.Dir(baseDir), "power-source-autotune.json")
}

func LoadBenchmarkPowerAutotuneConfig(path string) (*BenchmarkPowerAutotuneConfig, error) {
	raw, err := os.ReadFile(path)
	if err != nil {
		return nil, err
	}
	var cfg BenchmarkPowerAutotuneConfig
	if err := json.Unmarshal(raw, &cfg); err != nil {
		return nil, err
	}
	if strings.TrimSpace(cfg.SelectedSource) == "" {
		return nil, fmt.Errorf("autotune config missing selected_source")
	}
	return &cfg, nil
}

func SaveBenchmarkPowerAutotuneConfig(path string, cfg BenchmarkPowerAutotuneConfig) error {
	if strings.TrimSpace(path) == "" {
		return fmt.Errorf("empty autotune config path")
	}
	if cfg.Version <= 0 {
		cfg.Version = benchmarkPowerAutotuneVersion
	}
	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
		return err
	}
	data, err := json.MarshalIndent(cfg, "", "  ")
	if err != nil {
		return err
	}
	tmp := path + ".tmp"
	if err := os.WriteFile(tmp, data, 0644); err != nil {
		return err
	}
	return os.Rename(tmp, path)
}

func LoadSystemPowerSourceConfig(exportDir string) (*BenchmarkPowerAutotuneConfig, error) {
	return LoadBenchmarkPowerAutotuneConfig(BenchmarkPowerSourceConfigPath(exportDir))
}

func ResetBenchmarkPowerAutotuneConfig(path string) error {
	if strings.TrimSpace(path) == "" {
		return fmt.Errorf("empty autotune config path")
	}
	if err := os.Remove(path); err != nil && !os.IsNotExist(err) {
		return err
	}
	return nil
}

func normalizeBenchmarkPowerSource(source string) string {
	switch strings.TrimSpace(strings.ToLower(source)) {
	case BenchmarkPowerSourceSDRPSUInput:
		return BenchmarkPowerSourceSDRPSUInput
	default:
		return BenchmarkPowerSourceDCMI
	}
}

func ResolveSystemPowerDecision(exportDir string) SystemPowerSourceDecision {
	cfg, err := LoadSystemPowerSourceConfig(exportDir)
	if err == nil && cfg != nil && strings.TrimSpace(cfg.SelectedSource) != "" {
		selected := normalizeBenchmarkPowerSource(cfg.SelectedSource)
		return SystemPowerSourceDecision{
			Configured:      true,
			SelectedSource:  selected,
			EffectiveSource: selected,
			Mode:            "autotuned",
			Reason:          strings.TrimSpace(cfg.Reason),
			ConfiguredAt:    cfg.UpdatedAt,
		}
	}

	sources := sampleBenchmarkPowerSources()
	if value := sources[BenchmarkPowerSourceSDRPSUInput]; value > 0 {
		return SystemPowerSourceDecision{
			Configured:      false,
			EffectiveSource: BenchmarkPowerSourceSDRPSUInput,
			Mode:            "fallback",
			Reason:          "autotune config not found; using temporary fallback source sdr_psu_input",
		}
	}
	return SystemPowerSourceDecision{
		Configured:      false,
		EffectiveSource: BenchmarkPowerSourceDCMI,
		Mode:            "fallback",
		Reason:          "autotune config not found; using temporary fallback source dcmi",
	}
}

func SampleSystemPowerResolved(exportDir string) (float64, SystemPowerSourceDecision, error) {
	decision := ResolveSystemPowerDecision(exportDir)
	if decision.EffectiveSource != "" {
		if value, err := queryBenchmarkPowerSourceW(decision.EffectiveSource); err == nil && value > 0 {
			return value, decision, nil
		} else if decision.Configured {
			fallback := BenchmarkPowerSourceDCMI
			if decision.EffectiveSource == BenchmarkPowerSourceDCMI {
				fallback = BenchmarkPowerSourceSDRPSUInput
			}
			if fallbackValue, fallbackErr := queryBenchmarkPowerSourceW(fallback); fallbackErr == nil && fallbackValue > 0 {
				decision.Mode = "degraded"
				decision.Reason = fmt.Sprintf("configured source %s unavailable; using degraded fallback %s", decision.SelectedSource, fallback)
				decision.EffectiveSource = fallback
				return fallbackValue, decision, nil
			}
			decision.Mode = "degraded"
			decision.Reason = fmt.Sprintf("configured source %s unavailable and no fallback source responded", decision.SelectedSource)
			return 0, decision, err
		}
	}
	return 0, decision, fmt.Errorf("system power source unavailable")
}

func queryBenchmarkPowerSourceW(source string) (float64, error) {
	switch normalizeBenchmarkPowerSource(source) {
	case BenchmarkPowerSourceSDRPSUInput:
		sdr := sampleIPMISDRPowerSensors()
		if sdr.PSUInW > 0 {
			return sdr.PSUInW, nil
		}
		return 0, fmt.Errorf("sdr psu input unavailable")
	default:
		return queryIPMIServerPowerW()
	}
}

func sampleBenchmarkPowerSources() map[string]float64 {
	out := map[string]float64{}
	if w, err := queryIPMIServerPowerW(); err == nil && w > 0 {
		out[BenchmarkPowerSourceDCMI] = w
	}
	if w, err := queryBenchmarkPowerSourceW(BenchmarkPowerSourceSDRPSUInput); err == nil && w > 0 {
		out[BenchmarkPowerSourceSDRPSUInput] = w
	}
	return out
}

func sampleBenchmarkPowerSourceSeries(ctx context.Context, source string, durationSec, intervalSec int) (float64, bool) {
	if durationSec <= 0 {
		return 0, false
	}
	samples := collectSelectedPowerSourceSamples(ctx, source, durationSec, intervalSec)
	if len(samples) == 0 {
		return 0, false
	}
	return benchmarkMean(samples), true
}

func collectSelectedPowerSourceSamples(ctx context.Context, source string, durationSec, intervalSec int) []float64 {
	if durationSec <= 0 {
		return nil
	}
	stopCh := make(chan struct{})
	doneCh := startSelectedPowerSourceSampler(stopCh, source, intervalSec)
	select {
	case <-ctx.Done():
	case <-time.After(time.Duration(durationSec) * time.Second):
	}
	close(stopCh)
	return <-doneCh
}

func startSelectedPowerSourceSampler(stopCh <-chan struct{}, source string, intervalSec int) <-chan []float64 {
	if intervalSec <= 0 {
		intervalSec = benchmarkPowerAutotuneSampleInterval
	}
	ch := make(chan []float64, 1)
	go func() {
		defer close(ch)
		var samples []float64
		record := func() {
			if w, err := queryBenchmarkPowerSourceW(source); err == nil && w > 0 {
				samples = append(samples, w)
			}
		}
		record()
		ticker := time.NewTicker(time.Duration(intervalSec) * time.Second)
		defer ticker.Stop()
		for {
			select {
			case <-stopCh:
				ch <- samples
				return
			case <-ticker.C:
				record()
			}
		}
	}()
	return ch
}

type benchmarkPowerAutotuneSample struct {
	ElapsedSec     float64
	GPUAvgUsagePct float64
	CPUUsagePct    float64
	GPUSumPowerW   float64
	Sources        map[string]float64
}

func collectBenchmarkPowerAutotuneSamples(ctx context.Context, phase string, gpuIndices []int, durationSec int, logFunc func(string)) []benchmarkPowerAutotuneSample {
	if durationSec <= 0 {
		return nil
	}
	var out []benchmarkPowerAutotuneSample
	deadline := time.Now().Add(time.Duration(durationSec) * time.Second)
	start := time.Now()
	for {
		if ctx.Err() != nil {
			return out
		}
		row := benchmarkPowerAutotuneSample{
			ElapsedSec:  time.Since(start).Seconds(),
			CPUUsagePct: sampleCPULoadPct(),
			Sources:     sampleBenchmarkPowerSources(),
		}
		if gpuRows, err := sampleGPUMetrics(gpuIndices); err == nil && len(gpuRows) > 0 {
			var usageSum float64
			for _, gpu := range gpuRows {
				row.GPUSumPowerW += gpu.PowerW
				usageSum += gpu.UsagePct
			}
			row.GPUAvgUsagePct = usageSum / float64(len(gpuRows))
		}
		out = append(out, row)
		logBenchmarkPowerAutotuneSample(phase, row, logFunc)
		if time.Now().After(deadline) {
			return out
		}
		select {
		case <-ctx.Done():
			return out
		case <-time.After(benchmarkPowerAutotuneSampleInterval * time.Second):
		}
	}
}

func logBenchmarkPowerAutotuneSample(phase string, sample benchmarkPowerAutotuneSample, logFunc func(string)) {
	if logFunc == nil {
		return
	}
	var sourceParts []string
	for _, source := range []string{BenchmarkPowerSourceDCMI, BenchmarkPowerSourceSDRPSUInput} {
		if value, ok := sample.Sources[source]; ok && value > 0 {
			sourceParts = append(sourceParts, fmt.Sprintf("%s=%.0fW", source, value))
		} else {
			sourceParts = append(sourceParts, fmt.Sprintf("%s=n/a", source))
		}
	}
	logFunc(fmt.Sprintf(
		"autotune %s sample t=%.0fs gpu_avg_util=%.1f%% gpu_sum_power=%.0fW cpu_load=%.1f%% %s",
		phase,
		sample.ElapsedSec,
		sample.GPUAvgUsagePct,
		sample.GPUSumPowerW,
		sample.CPUUsagePct,
		strings.Join(sourceParts, " "),
	))
}

func logBenchmarkPowerAutotunePhaseSummary(phase string, samples []benchmarkPowerAutotuneSample, logFunc func(string)) {
	if logFunc == nil || len(samples) == 0 {
		return
	}
	var gpuUsage []float64
	var cpuUsage []float64
	var gpuPower []float64
	sourceBuckets := map[string][]float64{}
	for _, sample := range samples {
		gpuUsage = append(gpuUsage, sample.GPUAvgUsagePct)
		cpuUsage = append(cpuUsage, sample.CPUUsagePct)
		gpuPower = append(gpuPower, sample.GPUSumPowerW)
		for source, value := range sample.Sources {
			if value > 0 {
				sourceBuckets[source] = append(sourceBuckets[source], value)
			}
		}
	}
	var sourceParts []string
	for _, source := range []string{BenchmarkPowerSourceDCMI, BenchmarkPowerSourceSDRPSUInput} {
		values := sourceBuckets[source]
		if len(values) == 0 {
			sourceParts = append(sourceParts, fmt.Sprintf("%s_avg=n/a", source))
			continue
		}
		sourceParts = append(sourceParts, fmt.Sprintf("%s_avg=%.0fW", source, benchmarkMean(values)))
	}
	logFunc(fmt.Sprintf(
		"autotune %s summary samples=%d gpu_avg_util=%.1f%% gpu_p95_util=%.1f%% gpu_avg_power=%.0fW cpu_avg=%.1f%% cpu_p95=%.1f%% %s",
		phase,
		len(samples),
		benchmarkMean(gpuUsage),
		benchmarkPercentile(gpuUsage, 95),
		benchmarkMean(gpuPower),
		benchmarkMean(cpuUsage),
		benchmarkPercentile(cpuUsage, 95),
		strings.Join(sourceParts, " "),
	))
}

func logBenchmarkPowerAutotuneSelection(candidates []BenchmarkPowerAutotuneCandidate, selectedSource string, gpuDelta float64, logFunc func(string)) {
	if logFunc == nil {
		return
	}
	for _, candidate := range candidates {
		if !candidate.Available {
			logFunc(fmt.Sprintf("autotune candidate %s unavailable", candidate.Source))
			continue
		}
		logFunc(fmt.Sprintf(
			"autotune candidate %s idle_avg=%.0fW load_avg=%.0fW delta=%.0fW gpu_delta=%.0fW relative_error=%.3f confidence=%.0f%%%s",
			candidate.Source,
			candidate.IdleAvgW,
			candidate.LoadAvgW,
			candidate.DeltaW,
			gpuDelta,
			candidate.RelativeError,
			candidate.Confidence*100,
			map[bool]string{true: " SELECTED", false: ""}[candidate.Source == selectedSource],
		))
		if strings.TrimSpace(candidate.SelectionNotes) != "" {
			logFunc(fmt.Sprintf("autotune candidate %s reason: %s", candidate.Source, candidate.SelectionNotes))
		}
	}
}

func validateBenchmarkPowerAutotuneIdle(samples []benchmarkPowerAutotuneSample) *BenchmarkPowerAutotuneValidation {
	result := &BenchmarkPowerAutotuneValidation{}
	if len(samples) == 0 {
		result.Reason = "no idle telemetry samples collected"
		return result
	}
	var gpuUsage []float64
	var cpuUsage []float64
	for _, sample := range samples {
		gpuUsage = append(gpuUsage, sample.GPUAvgUsagePct)
		if sample.CPUUsagePct > 0 {
			cpuUsage = append(cpuUsage, sample.CPUUsagePct)
		}
	}
	result.GPUSamples = len(gpuUsage)
	result.CPUSamples = len(cpuUsage)
	result.GPUAvgUsagePct = math.Round(benchmarkMean(gpuUsage)*10) / 10
	result.GPUP95UsagePct = math.Round(benchmarkPercentile(gpuUsage, 95)*10) / 10
	result.CPUAvgUsagePct = math.Round(benchmarkMean(cpuUsage)*10) / 10
	result.CPUP95UsagePct = math.Round(benchmarkPercentile(cpuUsage, 95)*10) / 10
	switch {
	case result.GPUAvgUsagePct > 5:
		result.Reason = fmt.Sprintf("idle validation failed: average GPU load %.1f%% exceeds 5%%", result.GPUAvgUsagePct)
	case result.GPUP95UsagePct > 10:
		result.Reason = fmt.Sprintf("idle validation failed: p95 GPU load %.1f%% exceeds 10%%", result.GPUP95UsagePct)
	case result.CPUAvgUsagePct > 20:
		result.Reason = fmt.Sprintf("idle validation failed: average CPU load %.1f%% exceeds 20%%", result.CPUAvgUsagePct)
	case result.CPUP95UsagePct > 35:
		result.Reason = fmt.Sprintf("idle validation failed: p95 CPU load %.1f%% exceeds 35%%", result.CPUP95UsagePct)
	default:
		result.Valid = true
	}
	return result
}

func chooseBenchmarkPowerAutotuneSource(idle, load []benchmarkPowerAutotuneSample) (string, []BenchmarkPowerAutotuneCandidate, float64, float64, error) {
	idleBySource := map[string][]float64{}
	loadBySource := map[string][]float64{}
	var idleGPU []float64
	var loadGPU []float64
	for _, sample := range idle {
		idleGPU = append(idleGPU, sample.GPUSumPowerW)
		for source, value := range sample.Sources {
			if value > 0 {
				idleBySource[source] = append(idleBySource[source], value)
			}
		}
	}
	for _, sample := range load {
		loadGPU = append(loadGPU, sample.GPUSumPowerW)
		for source, value := range sample.Sources {
			if value > 0 {
				loadBySource[source] = append(loadBySource[source], value)
			}
		}
	}
	idleGPUAvg := benchmarkMean(idleGPU)
	loadGPUAvg := benchmarkMean(loadGPU)
	gpuDelta := loadGPUAvg - idleGPUAvg
	if gpuDelta <= 0 {
		gpuDelta = loadGPUAvg
	}

	candidates := []BenchmarkPowerAutotuneCandidate{
		buildBenchmarkPowerAutotuneCandidate(BenchmarkPowerSourceDCMI, idleBySource[BenchmarkPowerSourceDCMI], loadBySource[BenchmarkPowerSourceDCMI], gpuDelta),
		buildBenchmarkPowerAutotuneCandidate(BenchmarkPowerSourceSDRPSUInput, idleBySource[BenchmarkPowerSourceSDRPSUInput], loadBySource[BenchmarkPowerSourceSDRPSUInput], gpuDelta),
	}
	available := make([]BenchmarkPowerAutotuneCandidate, 0, len(candidates))
	for _, candidate := range candidates {
		if candidate.Available && candidate.DeltaW > 0 {
			available = append(available, candidate)
		}
	}
	if len(available) == 0 {
		return "", candidates, idleGPUAvg, loadGPUAvg, fmt.Errorf("no usable server power source samples collected")
	}
	sort.Slice(available, func(i, j int) bool {
		if math.Abs(available[i].RelativeError-available[j].RelativeError) <= 0.10 {
			if available[i].Source != available[j].Source {
				return available[i].Source == BenchmarkPowerSourceSDRPSUInput
			}
		}
		if available[i].RelativeError != available[j].RelativeError {
			return available[i].RelativeError < available[j].RelativeError
		}
		return available[i].Samples > available[j].Samples
	})
	selected := available[0]
	for idx := range candidates {
		if candidates[idx].Source == selected.Source {
			candidates[idx].Selected = true
			candidates[idx].SelectionNotes = fmt.Sprintf("selected because delta %.0f W is closest to GPU delta %.0f W (relative error %.3f)", selected.DeltaW, gpuDelta, selected.RelativeError)
		}
	}
	return selected.Source, candidates, idleGPUAvg, loadGPUAvg, nil
}

func buildBenchmarkPowerAutotuneCandidate(source string, idle, load []float64, gpuDelta float64) BenchmarkPowerAutotuneCandidate {
	candidate := BenchmarkPowerAutotuneCandidate{
		Source:    source,
		Available: len(idle) > 0 && len(load) > 0,
		Samples:   minInt(len(idle), len(load)),
	}
	if !candidate.Available {
		return candidate
	}
	candidate.IdleAvgW = benchmarkMean(idle)
	candidate.LoadAvgW = benchmarkMean(load)
	candidate.DeltaW = candidate.LoadAvgW - candidate.IdleAvgW
	if gpuDelta > 0 {
		candidate.RelativeError = math.Abs(candidate.DeltaW-gpuDelta) / gpuDelta
		candidate.Confidence = math.Max(0, 1-candidate.RelativeError)
	}
	return candidate
}

func renderBenchmarkPowerAutotuneSummary(result BenchmarkPowerAutotuneResult) string {
	var b strings.Builder
	fmt.Fprintf(&b, "generated_at=%s\n", result.GeneratedAt.UTC().Format(time.RFC3339))
	fmt.Fprintf(&b, "status=%s\n", result.Status)
	fmt.Fprintf(&b, "benchmark_kind=%s\n", result.BenchmarkKind)
	fmt.Fprintf(&b, "profile=%s\n", result.Profile)
	fmt.Fprintf(&b, "idle_duration_sec=%d\n", result.IdleDurationSec)
	fmt.Fprintf(&b, "load_duration_sec=%d\n", result.LoadDurationSec)
	fmt.Fprintf(&b, "sample_interval_sec=%d\n", result.SampleIntervalSec)
	if result.SelectedSource != "" {
		fmt.Fprintf(&b, "selected_source=%s\n", result.SelectedSource)
	}
	if result.IdleValidation != nil {
		fmt.Fprintf(&b, "idle_valid=%t\n", result.IdleValidation.Valid)
		fmt.Fprintf(&b, "idle_gpu_avg_usage_pct=%.1f\n", result.IdleValidation.GPUAvgUsagePct)
		fmt.Fprintf(&b, "idle_gpu_p95_usage_pct=%.1f\n", result.IdleValidation.GPUP95UsagePct)
		fmt.Fprintf(&b, "idle_cpu_avg_usage_pct=%.1f\n", result.IdleValidation.CPUAvgUsagePct)
		fmt.Fprintf(&b, "idle_cpu_p95_usage_pct=%.1f\n", result.IdleValidation.CPUP95UsagePct)
		if result.IdleValidation.Reason != "" {
			fmt.Fprintf(&b, "idle_validation_error=%s\n", result.IdleValidation.Reason)
		}
	}
	for _, candidate := range result.Candidates {
		fmt.Fprintf(&b, "candidate_%s_available=%t\n", candidate.Source, candidate.Available)
		if candidate.Available {
			fmt.Fprintf(&b, "candidate_%s_idle_avg_w=%.0f\n", candidate.Source, candidate.IdleAvgW)
			fmt.Fprintf(&b, "candidate_%s_load_avg_w=%.0f\n", candidate.Source, candidate.LoadAvgW)
			fmt.Fprintf(&b, "candidate_%s_delta_w=%.0f\n", candidate.Source, candidate.DeltaW)
			fmt.Fprintf(&b, "candidate_%s_relative_error=%.3f\n", candidate.Source, candidate.RelativeError)
		}
	}
	return b.String()
}

func renderBenchmarkPowerAutotuneReport(result BenchmarkPowerAutotuneResult) string {
	var b strings.Builder
	b.WriteString("# Bee Bench Power Source Autotune\n\n")
	fmt.Fprintf(&b, "**Status:** %s  \n", result.Status)
	fmt.Fprintf(&b, "**Benchmark kind:** %s  \n", result.BenchmarkKind)
	fmt.Fprintf(&b, "**Profile:** %s  \n", result.Profile)
	fmt.Fprintf(&b, "**Idle window:** %ds  \n", result.IdleDurationSec)
	fmt.Fprintf(&b, "**Load window:** %ds  \n", result.LoadDurationSec)
	fmt.Fprintf(&b, "**Sample interval:** %ds  \n", result.SampleIntervalSec)
	if result.SelectedSource != "" {
		fmt.Fprintf(&b, "**Selected source:** `%s`  \n", result.SelectedSource)
	}
	b.WriteString("\n")
	if result.IdleValidation != nil {
		b.WriteString("## Idle Validation\n\n")
		fmt.Fprintf(&b, "- valid: %t\n", result.IdleValidation.Valid)
		fmt.Fprintf(&b, "- GPU avg usage: %.1f%%\n", result.IdleValidation.GPUAvgUsagePct)
		fmt.Fprintf(&b, "- GPU p95 usage: %.1f%%\n", result.IdleValidation.GPUP95UsagePct)
		fmt.Fprintf(&b, "- CPU avg usage: %.1f%%\n", result.IdleValidation.CPUAvgUsagePct)
		fmt.Fprintf(&b, "- CPU p95 usage: %.1f%%\n", result.IdleValidation.CPUP95UsagePct)
		if result.IdleValidation.Reason != "" {
			fmt.Fprintf(&b, "- reason: %s\n", result.IdleValidation.Reason)
		}
		b.WriteString("\n")
	}
	if len(result.Candidates) > 0 {
		b.WriteString("## Candidates\n\n")
		b.WriteString("| Source | Idle avg W | Load avg W | Delta W | Relative error | Selected |\n")
		b.WriteString("|--------|------------|------------|---------|----------------|----------|\n")
		for _, candidate := range result.Candidates {
			if !candidate.Available {
				fmt.Fprintf(&b, "| %s | — | — | — | — | no |\n", candidate.Source)
				continue
			}
			selected := "no"
			if candidate.Selected {
				selected = "yes"
			}
			fmt.Fprintf(&b, "| %s | %.0f | %.0f | %.0f | %.2f | %s |\n",
				candidate.Source, candidate.IdleAvgW, candidate.LoadAvgW, candidate.DeltaW, candidate.RelativeError, selected)
		}
		b.WriteString("\n")
	}
	for _, note := range result.Notes {
		fmt.Fprintf(&b, "- %s\n", note)
	}
	return b.String()
}

func benchmarkAutotuneLoadCommand(kind string, durationSec int, gpuIndices []int, sizeMB int) ([]string, string) {
	allDevices := joinIndexList(gpuIndices)
	switch strings.TrimSpace(strings.ToLower(kind)) {
	case "power-fit", "power", "nvidia-bench-power":
		cmd, _, err := resolveBenchmarkPowerLoadCommand(durationSec, gpuIndices)
		if err == nil {
			return cmd, "power-fit"
		}
		return nvidiaDCGMNamedDiagCommand("targeted_power", durationSec, gpuIndices), "power-fit"
	default:
		cmd := []string{
			"bee-gpu-burn",
			"--seconds", fmt.Sprintf("%d", durationSec),
			"--devices", allDevices,
		}
		if sizeMB > 0 {
			cmd = append(cmd, "--size-mb", fmt.Sprintf("%d", sizeMB))
		}
		return cmd, "performance"
	}
}

func (s *System) RunNvidiaPowerSourceAutotune(ctx context.Context, baseDir string, opts NvidiaBenchmarkOptions, benchmarkKind string, logFunc func(string)) (string, error) {
	if ctx == nil {
		ctx = context.Background()
	}
	if logFunc == nil {
		logFunc = func(string) {}
	}
	if strings.TrimSpace(baseDir) == "" {
		baseDir = "/var/log/bee-bench/autotune"
	}
	if err := os.MkdirAll(baseDir, 0755); err != nil {
		return "", fmt.Errorf("mkdir %s: %w", baseDir, err)
	}
	selected, err := resolveNvidiaGPUSelection(nil, nil)
	if err != nil {
		return "", err
	}
	if len(selected) == 0 {
		return "", fmt.Errorf("no NVIDIA GPUs detected for autotune")
	}
	ts := time.Now().UTC().Format("20060102-150405")
	runDir := filepath.Join(baseDir, "autotune-"+ts)
	if err := os.MkdirAll(runDir, 0755); err != nil {
		return "", fmt.Errorf("mkdir %s: %w", runDir, err)
	}
	verboseLog := filepath.Join(runDir, "verbose.log")
	hostname, _ := os.Hostname()
	loadCmd, normalizedKind := benchmarkAutotuneLoadCommand(benchmarkKind, benchmarkPowerAutotuneLoadSec, selected, opts.SizeMB)
	result := BenchmarkPowerAutotuneResult{
		GeneratedAt:       time.Now().UTC(),
		Hostname:          hostname,
		ServerModel:       readServerModel(),
		BenchmarkKind:     normalizedKind,
		Profile:           opts.Profile,
		Status:            "FAILED",
		IdleDurationSec:   benchmarkPowerAutotuneIdleSec,
		LoadDurationSec:   benchmarkPowerAutotuneLoadSec,
		SampleIntervalSec: benchmarkPowerAutotuneSampleInterval,
	}

	logFunc(fmt.Sprintf("autotune: idle validation window %ds on GPUs %s", benchmarkPowerAutotuneIdleSec, joinIndexList(selected)))
	idleSamples := collectBenchmarkPowerAutotuneSamples(ctx, "idle", selected, benchmarkPowerAutotuneIdleSec, logFunc)
	logBenchmarkPowerAutotunePhaseSummary("idle", idleSamples, logFunc)
	result.IdleValidation = validateBenchmarkPowerAutotuneIdle(idleSamples)
	if result.IdleValidation == nil || !result.IdleValidation.Valid {
		if result.IdleValidation != nil {
			result.IdleValidationError = result.IdleValidation.Reason
			logFunc(result.IdleValidation.Reason)
		}
		result.Notes = append(result.Notes, "autotune stopped before load stage because idle validation failed")
		if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
			return "", err
		}
		return runDir, fmt.Errorf("%s", result.IdleValidationError)
	}

	logFunc(fmt.Sprintf("autotune: full-load stage using %s for %ds", normalizedKind, benchmarkPowerAutotuneLoadSec))
	loadSamplesCh := make(chan []benchmarkPowerAutotuneSample, 1)
	go func() {
		loadSamplesCh <- collectBenchmarkPowerAutotuneSamples(ctx, "load", selected, benchmarkPowerAutotuneLoadSec, logFunc)
	}()
	out, runErr := runSATCommandCtx(ctx, verboseLog, "autotune-load.log", loadCmd, nil, logFunc)
	_ = os.WriteFile(filepath.Join(runDir, "autotune-load.log"), out, 0644)
	loadSamples := <-loadSamplesCh
	logBenchmarkPowerAutotunePhaseSummary("load", loadSamples, logFunc)
	if runErr != nil {
		result.Notes = append(result.Notes, "full-load stage failed: "+runErr.Error())
		if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
			return "", err
		}
		return runDir, fmt.Errorf("autotune load stage: %w", runErr)
	}

	selectedSource, candidates, idleGPUAvg, loadGPUAvg, chooseErr := chooseBenchmarkPowerAutotuneSource(idleSamples, loadSamples)
	result.Candidates = candidates
	result.GPUPowerIdleW = idleGPUAvg
	result.GPUPowerLoadW = loadGPUAvg
	if chooseErr != nil {
		result.Notes = append(result.Notes, chooseErr.Error())
		if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
			return "", err
		}
		return runDir, chooseErr
	}
	gpuDelta := loadGPUAvg - idleGPUAvg
	if gpuDelta <= 0 {
		gpuDelta = loadGPUAvg
	}
	logBenchmarkPowerAutotuneSelection(candidates, selectedSource, gpuDelta, logFunc)
	result.SelectedSource = selectedSource
	result.Status = "OK"
	var confidence float64
	selectionReason := fmt.Sprintf("selected %s after comparing full-load average against GPU-reported delta", selectedSource)
	for _, candidate := range candidates {
		if candidate.Selected {
			confidence = candidate.Confidence
			if strings.TrimSpace(candidate.SelectionNotes) != "" {
				selectionReason = candidate.SelectionNotes
			}
			break
		}
	}
	cfg := BenchmarkPowerAutotuneConfig{
		Version:           benchmarkPowerAutotuneVersion,
		UpdatedAt:         time.Now().UTC(),
		SelectedSource:    selectedSource,
		BenchmarkKind:     normalizedKind,
		Profile:           opts.Profile,
		IdleDurationSec:   benchmarkPowerAutotuneIdleSec,
		LoadDurationSec:   benchmarkPowerAutotuneLoadSec,
		SampleIntervalSec: benchmarkPowerAutotuneSampleInterval,
		Confidence:        confidence,
		Reason:            selectionReason,
	}
	result.Config = &cfg
	configPath := BenchmarkPowerSourceConfigPath(baseDir)
	if err := SaveBenchmarkPowerAutotuneConfig(configPath, cfg); err != nil {
		result.Status = "FAILED"
		result.Notes = append(result.Notes, "failed to save autotune config: "+err.Error())
		if writeErr := writeBenchmarkPowerAutotuneArtifacts(runDir, result); writeErr != nil {
			return "", writeErr
		}
		return runDir, err
	}
	logFunc(fmt.Sprintf("autotune conclusion: selected source %s; reason: %s", selectedSource, cfg.Reason))
	result.Notes = append(result.Notes, "saved autotune config to "+configPath)
	if err := writeBenchmarkPowerAutotuneArtifacts(runDir, result); err != nil {
		return "", err
	}
	return runDir, nil
}

func writeBenchmarkPowerAutotuneArtifacts(runDir string, result BenchmarkPowerAutotuneResult) error {
	resultJSON, err := json.MarshalIndent(result, "", "  ")
	if err != nil {
		return fmt.Errorf("marshal autotune result: %w", err)
	}
	if err := os.WriteFile(filepath.Join(runDir, "result.json"), resultJSON, 0644); err != nil {
		return fmt.Errorf("write autotune result.json: %w", err)
	}
	if err := os.WriteFile(filepath.Join(runDir, "summary.txt"), []byte(renderBenchmarkPowerAutotuneSummary(result)), 0644); err != nil {
		return fmt.Errorf("write autotune summary.txt: %w", err)
	}
	if err := os.WriteFile(filepath.Join(runDir, "report.md"), []byte(renderBenchmarkPowerAutotuneReport(result)), 0644); err != nil {
		return fmt.Errorf("write autotune report.md: %w", err)
	}
	return nil
}

func minInt(a, b int) int {
	if a < b {
		return a
	}
	return b
}

var _ = exec.ErrNotFound